OmniSciDB  cde582ebc3
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
NvidiaKernel.cpp
Go to the documentation of this file.
1 /*
2  * Copyright 2022 HEAVY.AI, Inc.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #include <sstream>
18 
19 #include "NvidiaKernel.h"
20 
21 #include "Logger/Logger.h"
23 
24 #include <boost/filesystem/operations.hpp>
25 
26 #ifdef HAVE_CUDA
27 namespace {
28 
29 #define JIT_LOG_SIZE 8192
30 
31 void fill_options(std::vector<CUjit_option>& option_keys,
32  std::vector<void*>& option_values,
33  char* info_log,
34  char* error_log,
35  const unsigned block_size_x) {
36  option_keys.push_back(CU_JIT_LOG_VERBOSE);
37  option_values.push_back(reinterpret_cast<void*>(1));
38  option_keys.push_back(CU_JIT_THREADS_PER_BLOCK);
39  option_values.push_back(reinterpret_cast<void*>(block_size_x));
40  option_keys.push_back(CU_JIT_WALL_TIME);
41  option_values.push_back(reinterpret_cast<void*>(0));
42  option_keys.push_back(CU_JIT_INFO_LOG_BUFFER);
43  option_values.push_back(reinterpret_cast<void*>(info_log));
44  option_keys.push_back(CU_JIT_INFO_LOG_BUFFER_SIZE_BYTES);
45  option_values.push_back(reinterpret_cast<void*>((long)JIT_LOG_SIZE));
46  option_keys.push_back(CU_JIT_ERROR_LOG_BUFFER);
47  option_values.push_back(reinterpret_cast<void*>(error_log));
48  option_keys.push_back(CU_JIT_ERROR_LOG_BUFFER_SIZE_BYTES);
49  option_values.push_back(reinterpret_cast<void*>((long)JIT_LOG_SIZE));
50 }
51 
52 boost::filesystem::path get_gpu_rt_path() {
53  boost::filesystem::path gpu_rt_path{heavyai::get_root_abs_path()};
54  gpu_rt_path /= "QueryEngine";
55  gpu_rt_path /= "cuda_mapd_rt.fatbin";
56  if (!boost::filesystem::exists(gpu_rt_path)) {
57  throw std::runtime_error("HeavyDB GPU runtime library not found at " +
58  gpu_rt_path.string());
59  }
60  return gpu_rt_path;
61 }
62 
63 boost::filesystem::path get_cuda_table_functions_path() {
64  boost::filesystem::path cuda_table_functions_path{heavyai::get_root_abs_path()};
65  cuda_table_functions_path /= "QueryEngine";
66  cuda_table_functions_path /= "CudaTableFunctions.a";
67  if (!boost::filesystem::exists(cuda_table_functions_path)) {
68  throw std::runtime_error("HeavyDB GPU table functions module not found at " +
69  cuda_table_functions_path.string());
70  }
71 
72  return cuda_table_functions_path;
73 }
74 
75 } // namespace
76 
77 void nvidia_jit_warmup() {
78  std::vector<CUjit_option> option_keys;
79  std::vector<void*> option_values;
80  char info_log[JIT_LOG_SIZE];
81  char error_log[JIT_LOG_SIZE];
82  fill_options(option_keys, option_values, info_log, error_log, /*block_size=*/1024);
83  CHECK_EQ(option_values.size(), option_keys.size());
84  unsigned num_options = option_keys.size();
85  CUlinkState link_state;
87  cuLinkCreate(num_options, &option_keys[0], &option_values[0], &link_state))
88  << ": " << std::string(error_log);
89  VLOG(1) << "CUDA JIT time to create link: "
90  << *reinterpret_cast<float*>(&option_values[2]);
91  boost::filesystem::path gpu_rt_path = get_gpu_rt_path();
92  boost::filesystem::path cuda_table_functions_path = get_cuda_table_functions_path();
93  CHECK(!gpu_rt_path.empty());
94  CHECK(!cuda_table_functions_path.empty());
95  checkCudaErrors(cuLinkAddFile(
96  link_state, CU_JIT_INPUT_FATBINARY, gpu_rt_path.c_str(), 0, nullptr, nullptr))
97  << ": " << std::string(error_log);
98  VLOG(1) << "CUDA JIT time to add RT fatbinary: "
99  << *reinterpret_cast<float*>(&option_values[2]);
100  checkCudaErrors(cuLinkAddFile(link_state,
101  CU_JIT_INPUT_LIBRARY,
102  cuda_table_functions_path.c_str(),
103  0,
104  nullptr,
105  nullptr))
106  << ": " << std::string(error_log);
107  VLOG(1) << "CUDA JIT time to add GPU table functions library: "
108  << *reinterpret_cast<float*>(&option_values[2]);
109  checkCudaErrors(cuLinkDestroy(link_state)) << ": " << std::string(error_log);
110 }
111 
112 std::string add_line_numbers(const std::string& text) {
113  std::stringstream iss(text);
114  std::string result;
115  size_t count = 1;
116  while (iss.good()) {
117  std::string line;
118  std::getline(iss, line, '\n');
119  result += std::to_string(count) + ": " + line + "\n";
120  count++;
121  }
122  return result;
123 }
124 
125 CubinResult ptx_to_cubin(const std::string& ptx,
126  const unsigned block_size,
127  const CudaMgr_Namespace::CudaMgr* cuda_mgr) {
128  auto timer = DEBUG_TIMER(__func__);
129  CHECK(!ptx.empty());
130  CHECK(cuda_mgr && cuda_mgr->getDeviceCount() > 0);
131  cuda_mgr->setContext(0);
132  std::vector<CUjit_option> option_keys;
133  std::vector<void*> option_values;
134  char info_log[JIT_LOG_SIZE];
135  char error_log[JIT_LOG_SIZE];
136  fill_options(option_keys, option_values, info_log, error_log, block_size);
137  CHECK_EQ(option_values.size(), option_keys.size());
138  unsigned num_options = option_keys.size();
139  CUlinkState link_state;
141  cuLinkCreate(num_options, &option_keys[0], &option_values[0], &link_state))
142  << ": " << std::string(error_log);
143  VLOG(1) << "CUDA JIT time to create link: "
144  << *reinterpret_cast<float*>(&option_values[2]);
145 
146  boost::filesystem::path gpu_rt_path = get_gpu_rt_path();
147  boost::filesystem::path cuda_table_functions_path = get_cuda_table_functions_path();
148  CHECK(!gpu_rt_path.empty());
149  CHECK(!cuda_table_functions_path.empty());
150  // How to create a static CUDA library:
151  // 1. nvcc -std=c++11 -arch=sm_35 --device-link -c [list of .cu files]
152  // 2. nvcc -std=c++11 -arch=sm_35 -lib [list of .o files generated by step 1] -o
153  // [library_name.a]
154  checkCudaErrors(cuLinkAddFile(
155  link_state, CU_JIT_INPUT_FATBINARY, gpu_rt_path.c_str(), 0, nullptr, nullptr))
156  << ": " << std::string(error_log);
157  VLOG(1) << "CUDA JIT time to add RT fatbinary: "
158  << *reinterpret_cast<float*>(&option_values[2]);
159  checkCudaErrors(cuLinkAddFile(link_state,
160  CU_JIT_INPUT_LIBRARY,
161  cuda_table_functions_path.c_str(),
162  0,
163  nullptr,
164  nullptr))
165  << ": " << std::string(error_log);
166  VLOG(1) << "CUDA JIT time to add GPU table functions library: "
167  << *reinterpret_cast<float*>(&option_values[2]);
168  checkCudaErrors(cuLinkAddData(link_state,
169  CU_JIT_INPUT_PTX,
170  static_cast<void*>(const_cast<char*>(ptx.c_str())),
171  ptx.length() + 1,
172  0,
173  0,
174  nullptr,
175  nullptr))
176  << ": " << std::string(error_log) << "\nPTX:\n"
177  << add_line_numbers(ptx) << "\nEOF PTX";
178  VLOG(1) << "CUDA JIT time to add generated code: "
179  << *reinterpret_cast<float*>(&option_values[2]);
180  void* cubin{nullptr};
181  size_t cubinSize{0};
182  checkCudaErrors(cuLinkComplete(link_state, &cubin, &cubinSize))
183  << ": " << std::string(error_log);
184  VLOG(1) << "CUDA Linker completed: " << info_log;
185  CHECK(cubin);
186  CHECK_GT(cubinSize, size_t(0));
187  VLOG(1) << "Generated GPU binary code size: " << cubinSize << " bytes";
188  return {cubin, option_keys, option_values, link_state};
189 }
190 #endif
191 
192 #ifdef HAVE_CUDA
194  const std::string& kernel_name,
195  const int device_id,
196  const void* cuda_mgr,
197  unsigned int num_options,
198  CUjit_option* options,
199  void** option_vals)
200  : module_(nullptr)
201  , kernel_(nullptr)
202  , device_id_(device_id)
203  , cuda_mgr_(static_cast<const CudaMgr_Namespace::CudaMgr*>(cuda_mgr)) {
204  LOG_IF(FATAL, cuda_mgr_ == nullptr)
205  << "Unable to initialize GPU compilation context without CUDA manager";
206  cuda_mgr_->loadGpuModuleData(
207  &module_, image, num_options, options, option_vals, device_id_);
208  CHECK(module_);
209  checkCudaErrors(cuModuleGetFunction(&kernel_, module_, kernel_name.c_str()));
210 }
211 #endif // HAVE_CUDA
212 
214 #ifdef HAVE_CUDA
215  CHECK(cuda_mgr_);
216  cuda_mgr_->unloadGpuModuleData(&module_, device_id_);
217 #endif
218 }
#define CHECK_EQ(x, y)
Definition: Logger.h:230
int CUjit_option
Definition: nocuda.h:26
std::string get_root_abs_path()
void checkCudaErrors(CUresult err)
Definition: sample.cpp:38
void nvidia_jit_warmup()
void setContext(const int device_num) const
Definition: CudaMgr.cpp:405
#define CHECK_GT(x, y)
Definition: Logger.h:234
std::string to_string(char const *&&v)
#define LOG_IF(severity, condition)
Definition: Logger.h:312
int getDeviceCount() const
Definition: CudaMgr.h:87
GpuDeviceCompilationContext(const void *image, const std::string &kernel_name, const int device_id, const void *cuda_mgr, unsigned int num_options, CUjit_option *options, void **option_vals)
int CUlinkState
Definition: nocuda.h:27
tuple line
Definition: parse_ast.py:10
#define CHECK(condition)
Definition: Logger.h:222
#define DEBUG_TIMER(name)
Definition: Logger.h:371
CubinResult ptx_to_cubin(const std::string &ptx, const unsigned block_size, const CudaMgr_Namespace::CudaMgr *cuda_mgr)
#define VLOG(n)
Definition: Logger.h:316