OmniSciDB  8a228a1076
NvidiaKernel.cpp
Go to the documentation of this file.
1 /*
2  * Copyright 2020 OmniSci, Inc.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #include "NvidiaKernel.h"
18 
19 #include "../Shared/mapdpath.h"
20 #include "Logger/Logger.h"
21 
22 #include <boost/filesystem/operations.hpp>
23 
24 #ifdef HAVE_CUDA
25 namespace {
26 
27 #define JIT_LOG_SIZE 8192
28 
29 void fill_options(std::vector<CUjit_option>& option_keys,
30  std::vector<void*>& option_values,
31  char* info_log,
32  char* error_log,
33  const unsigned block_size_x) {
34  option_keys.push_back(CU_JIT_LOG_VERBOSE);
35  option_values.push_back(reinterpret_cast<void*>(1));
36  option_keys.push_back(CU_JIT_THREADS_PER_BLOCK);
37  option_values.push_back(reinterpret_cast<void*>(block_size_x));
38  option_keys.push_back(CU_JIT_WALL_TIME);
39  option_values.push_back(reinterpret_cast<void*>(0));
40  option_keys.push_back(CU_JIT_INFO_LOG_BUFFER);
41  option_values.push_back(reinterpret_cast<void*>(info_log));
42  option_keys.push_back(CU_JIT_INFO_LOG_BUFFER_SIZE_BYTES);
43  option_values.push_back(reinterpret_cast<void*>((long)JIT_LOG_SIZE));
44  option_keys.push_back(CU_JIT_ERROR_LOG_BUFFER);
45  option_values.push_back(reinterpret_cast<void*>(error_log));
46  option_keys.push_back(CU_JIT_ERROR_LOG_BUFFER_SIZE_BYTES);
47  option_values.push_back(reinterpret_cast<void*>((long)JIT_LOG_SIZE));
48 }
49 
50 boost::filesystem::path get_gpu_rt_path() {
51  boost::filesystem::path gpu_rt_path{mapd_root_abs_path()};
52  gpu_rt_path /= "QueryEngine";
53  gpu_rt_path /= "cuda_mapd_rt.fatbin";
54  if (!boost::filesystem::exists(gpu_rt_path)) {
55  throw std::runtime_error("OmniSci GPU runtime library not found at " +
56  gpu_rt_path.string());
57  }
58  return gpu_rt_path;
59 }
60 
61 } // namespace
62 
63 void nvidia_jit_warmup() {
64  std::vector<CUjit_option> option_keys;
65  std::vector<void*> option_values;
66  char info_log[JIT_LOG_SIZE];
67  char error_log[JIT_LOG_SIZE];
68  fill_options(option_keys, option_values, info_log, error_log, /*block_size=*/1024);
69  CHECK_EQ(option_values.size(), option_keys.size());
70  unsigned num_options = option_keys.size();
71  CUlinkState link_state;
73  cuLinkCreate(num_options, &option_keys[0], &option_values[0], &link_state))
74  << std::string(error_log);
75  VLOG(1) << "CUDA JIT time to create link: "
76  << *reinterpret_cast<float*>(&option_values[2]);
77  boost::filesystem::path gpu_rt_path = get_gpu_rt_path();
78  CHECK(!gpu_rt_path.empty());
79  checkCudaErrors(cuLinkAddFile(
80  link_state, CU_JIT_INPUT_FATBINARY, gpu_rt_path.c_str(), 0, nullptr, nullptr))
81  << std::string(error_log);
82  VLOG(1) << "CUDA JIT time to add RT fatbinary: "
83  << *reinterpret_cast<float*>(&option_values[2]);
84  checkCudaErrors(cuLinkDestroy(link_state)) << std::string(error_log);
85 }
86 
87 CubinResult ptx_to_cubin(const std::string& ptx,
88  const unsigned block_size,
89  const CudaMgr_Namespace::CudaMgr* cuda_mgr) {
90  CHECK(!ptx.empty());
91  CHECK(cuda_mgr && cuda_mgr->getDeviceCount() > 0);
92  cuda_mgr->setContext(0);
93  std::vector<CUjit_option> option_keys;
94  std::vector<void*> option_values;
95  char info_log[JIT_LOG_SIZE];
96  char error_log[JIT_LOG_SIZE];
97  fill_options(option_keys, option_values, info_log, error_log, block_size);
98  CHECK_EQ(option_values.size(), option_keys.size());
99  unsigned num_options = option_keys.size();
100  CUlinkState link_state;
102  cuLinkCreate(num_options, &option_keys[0], &option_values[0], &link_state))
103  << std::string(error_log);
104  VLOG(1) << "CUDA JIT time to create link: "
105  << *reinterpret_cast<float*>(&option_values[2]);
106 
107  boost::filesystem::path gpu_rt_path = get_gpu_rt_path();
108  CHECK(!gpu_rt_path.empty());
109  // How to create a static CUDA library:
110  // 1. nvcc -std=c++11 -arch=sm_35 --device-link -c [list of .cu files]
111  // 2. nvcc -std=c++11 -arch=sm_35 -lib [list of .o files generated by step 1] -o
112  // [library_name.a]
113  checkCudaErrors(cuLinkAddFile(
114  link_state, CU_JIT_INPUT_FATBINARY, gpu_rt_path.c_str(), 0, nullptr, nullptr))
115  << std::string(error_log);
116  VLOG(1) << "CUDA JIT time to add RT fatbinary: "
117  << *reinterpret_cast<float*>(&option_values[2]);
118 
119  checkCudaErrors(cuLinkAddData(link_state,
120  CU_JIT_INPUT_PTX,
121  static_cast<void*>(const_cast<char*>(ptx.c_str())),
122  ptx.length() + 1,
123  0,
124  0,
125  nullptr,
126  nullptr))
127  << std::string(error_log);
128  VLOG(1) << "CUDA JIT time to add generated code: "
129  << *reinterpret_cast<float*>(&option_values[2]);
130  void* cubin{nullptr};
131  size_t cubinSize{0};
132  checkCudaErrors(cuLinkComplete(link_state, &cubin, &cubinSize))
133  << std::string(error_log);
134  VLOG(1) << "CUDA Linker completed: " << info_log;
135  CHECK(cubin);
136  CHECK_GT(cubinSize, size_t(0));
137  VLOG(1) << "Generated GPU binary code size: " << cubinSize << " bytes";
138  return {cubin, option_keys, option_values, link_state};
139 }
140 #endif
141 
142 #ifdef HAVE_CUDA
144  const std::string& kernel_name,
145  const int device_id,
146  const void* cuda_mgr,
147  unsigned int num_options,
148  CUjit_option* options,
149  void** option_vals)
150  : module_(nullptr)
151  , kernel_(nullptr)
152  , device_id_(device_id)
153  , cuda_mgr_(static_cast<const CudaMgr_Namespace::CudaMgr*>(cuda_mgr)) {
154  LOG_IF(FATAL, cuda_mgr_ == nullptr)
155  << "Unable to initialize GPU compilation context without CUDA manager";
156  cuda_mgr_->loadGpuModuleData(
157  &module_, image, num_options, options, option_vals, device_id_);
158  CHECK(module_);
159  checkCudaErrors(cuModuleGetFunction(&kernel_, module_, kernel_name.c_str()));
160 }
161 #endif // HAVE_CUDA
162 
164 #ifdef HAVE_CUDA
165  CHECK(cuda_mgr_);
166  cuda_mgr_->unloadGpuModuleData(&module_, device_id_);
167 #endif
168 }
#define CHECK_EQ(x, y)
Definition: Logger.h:205
int CUjit_option
Definition: nocuda.h:25
void checkCudaErrors(CUresult err)
Definition: sample.cpp:38
std::string mapd_root_abs_path()
Definition: mapdpath.h:30
void nvidia_jit_warmup()
int getDeviceCount() const
Definition: CudaMgr.h:86
#define CHECK_GT(x, y)
Definition: Logger.h:209
#define LOG_IF(severity, condition)
Definition: Logger.h:287
GpuDeviceCompilationContext(const void *image, const std::string &kernel_name, const int device_id, const void *cuda_mgr, unsigned int num_options, CUjit_option *options, void **option_vals)
int CUlinkState
Definition: nocuda.h:26
#define CHECK(condition)
Definition: Logger.h:197
void setContext(const int device_num) const
Definition: CudaMgr.cpp:362
CubinResult ptx_to_cubin(const std::string &ptx, const unsigned block_size, const CudaMgr_Namespace::CudaMgr *cuda_mgr)
#define VLOG(n)
Definition: Logger.h:291