OmniSciDB  340b00dbf6
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
NvidiaKernel.cpp
Go to the documentation of this file.
1 /*
2  * Copyright 2020 OmniSci, Inc.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #include <sstream>
18 
19 #include "NvidiaKernel.h"
20 
21 #include "Logger/Logger.h"
23 
24 #include <boost/filesystem/operations.hpp>
25 
26 #ifdef HAVE_CUDA
27 namespace {
28 
29 #define JIT_LOG_SIZE 8192
30 
31 void fill_options(std::vector<CUjit_option>& option_keys,
32  std::vector<void*>& option_values,
33  char* info_log,
34  char* error_log,
35  const unsigned block_size_x) {
36  option_keys.push_back(CU_JIT_LOG_VERBOSE);
37  option_values.push_back(reinterpret_cast<void*>(1));
38  option_keys.push_back(CU_JIT_THREADS_PER_BLOCK);
39  option_values.push_back(reinterpret_cast<void*>(block_size_x));
40  option_keys.push_back(CU_JIT_WALL_TIME);
41  option_values.push_back(reinterpret_cast<void*>(0));
42  option_keys.push_back(CU_JIT_INFO_LOG_BUFFER);
43  option_values.push_back(reinterpret_cast<void*>(info_log));
44  option_keys.push_back(CU_JIT_INFO_LOG_BUFFER_SIZE_BYTES);
45  option_values.push_back(reinterpret_cast<void*>((long)JIT_LOG_SIZE));
46  option_keys.push_back(CU_JIT_ERROR_LOG_BUFFER);
47  option_values.push_back(reinterpret_cast<void*>(error_log));
48  option_keys.push_back(CU_JIT_ERROR_LOG_BUFFER_SIZE_BYTES);
49  option_values.push_back(reinterpret_cast<void*>((long)JIT_LOG_SIZE));
50 }
51 
52 boost::filesystem::path get_gpu_rt_path() {
53  boost::filesystem::path gpu_rt_path{omnisci::get_root_abs_path()};
54  gpu_rt_path /= "QueryEngine";
55  gpu_rt_path /= "cuda_mapd_rt.fatbin";
56  if (!boost::filesystem::exists(gpu_rt_path)) {
57  throw std::runtime_error("OmniSci GPU runtime library not found at " +
58  gpu_rt_path.string());
59  }
60  return gpu_rt_path;
61 }
62 
63 } // namespace
64 
65 void nvidia_jit_warmup() {
66  std::vector<CUjit_option> option_keys;
67  std::vector<void*> option_values;
68  char info_log[JIT_LOG_SIZE];
69  char error_log[JIT_LOG_SIZE];
70  fill_options(option_keys, option_values, info_log, error_log, /*block_size=*/1024);
71  CHECK_EQ(option_values.size(), option_keys.size());
72  unsigned num_options = option_keys.size();
73  CUlinkState link_state;
75  cuLinkCreate(num_options, &option_keys[0], &option_values[0], &link_state))
76  << std::string(error_log);
77  VLOG(1) << "CUDA JIT time to create link: "
78  << *reinterpret_cast<float*>(&option_values[2]);
79  boost::filesystem::path gpu_rt_path = get_gpu_rt_path();
80  CHECK(!gpu_rt_path.empty());
81  checkCudaErrors(cuLinkAddFile(
82  link_state, CU_JIT_INPUT_FATBINARY, gpu_rt_path.c_str(), 0, nullptr, nullptr))
83  << std::string(error_log);
84  VLOG(1) << "CUDA JIT time to add RT fatbinary: "
85  << *reinterpret_cast<float*>(&option_values[2]);
86  checkCudaErrors(cuLinkDestroy(link_state)) << std::string(error_log);
87 }
88 
89 std::string add_line_numbers(const std::string& text) {
90  std::stringstream iss(text);
91  std::string result;
92  size_t count = 1;
93  while (iss.good()) {
94  std::string line;
95  std::getline(iss, line, '\n');
96  result += std::to_string(count) + ": " + line + "\n";
97  count++;
98  }
99  return result;
100 }
101 
102 CubinResult ptx_to_cubin(const std::string& ptx,
103  const unsigned block_size,
104  const CudaMgr_Namespace::CudaMgr* cuda_mgr) {
105  CHECK(!ptx.empty());
106  CHECK(cuda_mgr && cuda_mgr->getDeviceCount() > 0);
107  cuda_mgr->setContext(0);
108  std::vector<CUjit_option> option_keys;
109  std::vector<void*> option_values;
110  char info_log[JIT_LOG_SIZE];
111  char error_log[JIT_LOG_SIZE];
112  fill_options(option_keys, option_values, info_log, error_log, block_size);
113  CHECK_EQ(option_values.size(), option_keys.size());
114  unsigned num_options = option_keys.size();
115  CUlinkState link_state;
117  cuLinkCreate(num_options, &option_keys[0], &option_values[0], &link_state))
118  << std::string(error_log);
119  VLOG(1) << "CUDA JIT time to create link: "
120  << *reinterpret_cast<float*>(&option_values[2]);
121 
122  boost::filesystem::path gpu_rt_path = get_gpu_rt_path();
123  CHECK(!gpu_rt_path.empty());
124  // How to create a static CUDA library:
125  // 1. nvcc -std=c++11 -arch=sm_35 --device-link -c [list of .cu files]
126  // 2. nvcc -std=c++11 -arch=sm_35 -lib [list of .o files generated by step 1] -o
127  // [library_name.a]
128  checkCudaErrors(cuLinkAddFile(
129  link_state, CU_JIT_INPUT_FATBINARY, gpu_rt_path.c_str(), 0, nullptr, nullptr))
130  << std::string(error_log);
131  VLOG(1) << "CUDA JIT time to add RT fatbinary: "
132  << *reinterpret_cast<float*>(&option_values[2]);
133  checkCudaErrors(cuLinkAddData(link_state,
134  CU_JIT_INPUT_PTX,
135  static_cast<void*>(const_cast<char*>(ptx.c_str())),
136  ptx.length() + 1,
137  0,
138  0,
139  nullptr,
140  nullptr))
141  << std::string(error_log) << "\nPTX:\n"
142  << add_line_numbers(ptx) << "\nEOF PTX";
143  VLOG(1) << "CUDA JIT time to add generated code: "
144  << *reinterpret_cast<float*>(&option_values[2]);
145  void* cubin{nullptr};
146  size_t cubinSize{0};
147  checkCudaErrors(cuLinkComplete(link_state, &cubin, &cubinSize))
148  << std::string(error_log);
149  VLOG(1) << "CUDA Linker completed: " << info_log;
150  CHECK(cubin);
151  CHECK_GT(cubinSize, size_t(0));
152  VLOG(1) << "Generated GPU binary code size: " << cubinSize << " bytes";
153  return {cubin, option_keys, option_values, link_state};
154 }
155 #endif
156 
157 #ifdef HAVE_CUDA
159  const std::string& kernel_name,
160  const int device_id,
161  const void* cuda_mgr,
162  unsigned int num_options,
163  CUjit_option* options,
164  void** option_vals)
165  : module_(nullptr)
166  , kernel_(nullptr)
167  , device_id_(device_id)
168  , cuda_mgr_(static_cast<const CudaMgr_Namespace::CudaMgr*>(cuda_mgr)) {
169  LOG_IF(FATAL, cuda_mgr_ == nullptr)
170  << "Unable to initialize GPU compilation context without CUDA manager";
171  cuda_mgr_->loadGpuModuleData(
172  &module_, image, num_options, options, option_vals, device_id_);
173  CHECK(module_);
174  checkCudaErrors(cuModuleGetFunction(&kernel_, module_, kernel_name.c_str()));
175 }
176 #endif // HAVE_CUDA
177 
179 #ifdef HAVE_CUDA
180  CHECK(cuda_mgr_);
181  cuda_mgr_->unloadGpuModuleData(&module_, device_id_);
182 #endif
183 }
#define CHECK_EQ(x, y)
Definition: Logger.h:205
int CUjit_option
Definition: nocuda.h:25
tuple line
Definition: parse_ast.py:10
void checkCudaErrors(CUresult err)
Definition: sample.cpp:38
void nvidia_jit_warmup()
void setContext(const int device_num) const
Definition: CudaMgr.cpp:362
#define CHECK_GT(x, y)
Definition: Logger.h:209
std::string to_string(char const *&&v)
#define LOG_IF(severity, condition)
Definition: Logger.h:287
int getDeviceCount() const
Definition: CudaMgr.h:86
GpuDeviceCompilationContext(const void *image, const std::string &kernel_name, const int device_id, const void *cuda_mgr, unsigned int num_options, CUjit_option *options, void **option_vals)
int CUlinkState
Definition: nocuda.h:26
#define CHECK(condition)
Definition: Logger.h:197
std::string get_root_abs_path()
CubinResult ptx_to_cubin(const std::string &ptx, const unsigned block_size, const CudaMgr_Namespace::CudaMgr *cuda_mgr)
#define VLOG(n)
Definition: Logger.h:291