24 #include <boost/filesystem/operations.hpp>
29 #define JIT_LOG_SIZE 8192
31 void fill_options(std::vector<CUjit_option>& option_keys,
32 std::vector<void*>& option_values,
35 option_keys.push_back(CU_JIT_LOG_VERBOSE);
36 option_values.push_back(reinterpret_cast<void*>(1));
37 option_keys.push_back(CU_JIT_THREADS_PER_BLOCK);
41 option_values.push_back(reinterpret_cast<void*>(1024));
42 option_keys.push_back(CU_JIT_WALL_TIME);
43 option_values.push_back(reinterpret_cast<void*>(0));
44 option_keys.push_back(CU_JIT_INFO_LOG_BUFFER);
45 option_values.push_back(reinterpret_cast<void*>(info_log));
46 option_keys.push_back(CU_JIT_INFO_LOG_BUFFER_SIZE_BYTES);
47 option_values.push_back(reinterpret_cast<void*>((
long)JIT_LOG_SIZE));
48 option_keys.push_back(CU_JIT_ERROR_LOG_BUFFER);
49 option_values.push_back(reinterpret_cast<void*>(error_log));
50 option_keys.push_back(CU_JIT_ERROR_LOG_BUFFER_SIZE_BYTES);
51 option_values.push_back(reinterpret_cast<void*>((
long)JIT_LOG_SIZE));
54 boost::filesystem::path get_gpu_rt_path() {
56 gpu_rt_path /=
"QueryEngine";
57 gpu_rt_path /=
"cuda_mapd_rt.fatbin";
58 if (!boost::filesystem::exists(gpu_rt_path)) {
59 throw std::runtime_error(
"HeavyDB GPU runtime library not found at " +
60 gpu_rt_path.string());
65 boost::filesystem::path get_cuda_table_functions_path() {
67 cuda_table_functions_path /=
"QueryEngine";
68 cuda_table_functions_path /=
"CudaTableFunctions.a";
69 if (!boost::filesystem::exists(cuda_table_functions_path)) {
70 throw std::runtime_error(
"HeavyDB GPU table functions module not found at " +
71 cuda_table_functions_path.string());
74 return cuda_table_functions_path;
80 std::vector<CUjit_option> option_keys;
81 std::vector<void*> option_values;
82 char info_log[JIT_LOG_SIZE];
83 char error_log[JIT_LOG_SIZE];
84 fill_options(option_keys, option_values, info_log, error_log);
85 CHECK_EQ(option_values.size(), option_keys.size());
86 unsigned num_options = option_keys.size();
89 cuLinkCreate(num_options, &option_keys[0], &option_values[0], &link_state))
90 <<
": " << std::string(error_log);
91 VLOG(1) <<
"CUDA JIT time to create link: "
92 << *
reinterpret_cast<float*
>(&option_values[2]);
93 boost::filesystem::path gpu_rt_path = get_gpu_rt_path();
94 boost::filesystem::path cuda_table_functions_path = get_cuda_table_functions_path();
95 CHECK(!gpu_rt_path.empty());
96 CHECK(!cuda_table_functions_path.empty());
98 link_state, CU_JIT_INPUT_FATBINARY, gpu_rt_path.c_str(), 0,
nullptr,
nullptr))
99 <<
": " << std::string(error_log);
100 VLOG(1) <<
"CUDA JIT time to add RT fatbinary: "
101 << *
reinterpret_cast<float*
>(&option_values[2]);
103 CU_JIT_INPUT_LIBRARY,
104 cuda_table_functions_path.c_str(),
108 <<
": " << std::string(error_log);
109 VLOG(1) <<
"CUDA JIT time to add GPU table functions library: "
110 << *
reinterpret_cast<float*
>(&option_values[2]);
111 checkCudaErrors(cuLinkDestroy(link_state)) <<
": " << std::string(error_log);
114 std::string add_line_numbers(
const std::string& text) {
115 std::stringstream iss(text);
120 std::getline(iss, line,
'\n');
133 std::vector<CUjit_option> option_keys;
134 std::vector<void*> option_values;
135 char info_log[JIT_LOG_SIZE];
136 char error_log[JIT_LOG_SIZE];
137 fill_options(option_keys, option_values, info_log, error_log);
138 CHECK_EQ(option_values.size(), option_keys.size());
139 unsigned num_options = option_keys.size();
142 cuLinkCreate(num_options, &option_keys[0], &option_values[0], &link_state))
143 <<
": " << std::string(error_log);
144 VLOG(1) <<
"CUDA JIT time to create link: "
145 << *
reinterpret_cast<float*
>(&option_values[2]);
147 boost::filesystem::path gpu_rt_path = get_gpu_rt_path();
148 boost::filesystem::path cuda_table_functions_path = get_cuda_table_functions_path();
149 CHECK(!gpu_rt_path.empty());
150 CHECK(!cuda_table_functions_path.empty());
156 link_state, CU_JIT_INPUT_FATBINARY, gpu_rt_path.c_str(), 0,
nullptr,
nullptr))
157 <<
": " << std::string(error_log);
158 VLOG(1) <<
"CUDA JIT time to add RT fatbinary: "
159 << *
reinterpret_cast<float*
>(&option_values[2]);
161 CU_JIT_INPUT_LIBRARY,
162 cuda_table_functions_path.c_str(),
166 <<
": " << std::string(error_log);
167 VLOG(1) <<
"CUDA JIT time to add GPU table functions library: "
168 << *
reinterpret_cast<float*
>(&option_values[2]);
171 static_cast<void*>(const_cast<char*>(ptx.c_str())),
177 <<
": " << std::string(error_log) <<
"\nPTX:\n"
178 << add_line_numbers(ptx) <<
"\nEOF PTX";
179 VLOG(1) <<
"CUDA JIT time to add generated code: "
180 << *
reinterpret_cast<float*
>(&option_values[2]);
181 void* cubin{
nullptr};
184 <<
": " << std::string(error_log);
185 VLOG(1) <<
"CUDA Linker completed: " << info_log;
188 VLOG(1) <<
"Generated GPU binary code size: " << cubinSize <<
" bytes";
189 return {cubin, option_keys, option_values, link_state};
195 const std::string& kernel_name,
197 const void* cuda_mgr,
198 unsigned int num_options,
203 , device_id_(device_id)
204 , cuda_mgr_(static_cast<const CudaMgr_Namespace::CudaMgr*>(cuda_mgr)) {
206 <<
"Unable to initialize GPU compilation context without CUDA manager";
207 cuda_mgr_->loadGpuModuleData(
208 &module_, image, num_options, options, option_vals, device_id_);
210 checkCudaErrors(cuModuleGetFunction(&kernel_, module_, kernel_name.c_str()));
217 cuda_mgr_->unloadGpuModuleData(&
module_, device_id_);
std::string get_root_abs_path()
void checkCudaErrors(CUresult err)
void setContext(const int device_num) const
#define LOG_IF(severity, condition)
int getDeviceCount() const
GpuDeviceCompilationContext(const void *image, const std::string &kernel_name, const int device_id, const void *cuda_mgr, unsigned int num_options, CUjit_option *options, void **option_vals)
CubinResult ptx_to_cubin(const std::string &ptx, const CudaMgr_Namespace::CudaMgr *cuda_mgr)
#define DEBUG_TIMER(name)
~GpuDeviceCompilationContext()