24 #include <boost/filesystem/operations.hpp>
29 #define JIT_LOG_SIZE 8192
31 void fill_options(std::vector<CUjit_option>& option_keys,
32 std::vector<void*>& option_values,
35 const unsigned block_size_x) {
36 option_keys.push_back(CU_JIT_LOG_VERBOSE);
37 option_values.push_back(reinterpret_cast<void*>(1));
38 option_keys.push_back(CU_JIT_THREADS_PER_BLOCK);
39 option_values.push_back(reinterpret_cast<void*>(block_size_x));
40 option_keys.push_back(CU_JIT_WALL_TIME);
41 option_values.push_back(reinterpret_cast<void*>(0));
42 option_keys.push_back(CU_JIT_INFO_LOG_BUFFER);
43 option_values.push_back(reinterpret_cast<void*>(info_log));
44 option_keys.push_back(CU_JIT_INFO_LOG_BUFFER_SIZE_BYTES);
45 option_values.push_back(reinterpret_cast<void*>((
long)JIT_LOG_SIZE));
46 option_keys.push_back(CU_JIT_ERROR_LOG_BUFFER);
47 option_values.push_back(reinterpret_cast<void*>(error_log));
48 option_keys.push_back(CU_JIT_ERROR_LOG_BUFFER_SIZE_BYTES);
49 option_values.push_back(reinterpret_cast<void*>((
long)JIT_LOG_SIZE));
52 boost::filesystem::path get_gpu_rt_path() {
54 gpu_rt_path /=
"QueryEngine";
55 gpu_rt_path /=
"cuda_mapd_rt.fatbin";
56 if (!boost::filesystem::exists(gpu_rt_path)) {
57 throw std::runtime_error(
"HeavyDB GPU runtime library not found at " +
58 gpu_rt_path.string());
63 boost::filesystem::path get_cuda_table_functions_path() {
65 cuda_table_functions_path /=
"QueryEngine";
66 cuda_table_functions_path /=
"CudaTableFunctions.a";
67 if (!boost::filesystem::exists(cuda_table_functions_path)) {
68 throw std::runtime_error(
"HeavyDB GPU table functions module not found at " +
69 cuda_table_functions_path.string());
72 return cuda_table_functions_path;
78 std::vector<CUjit_option> option_keys;
79 std::vector<void*> option_values;
80 char info_log[JIT_LOG_SIZE];
81 char error_log[JIT_LOG_SIZE];
82 fill_options(option_keys, option_values, info_log, error_log, 1024);
83 CHECK_EQ(option_values.size(), option_keys.size());
84 unsigned num_options = option_keys.size();
87 cuLinkCreate(num_options, &option_keys[0], &option_values[0], &link_state))
88 <<
": " << std::string(error_log);
89 VLOG(1) <<
"CUDA JIT time to create link: "
90 << *
reinterpret_cast<float*
>(&option_values[2]);
91 boost::filesystem::path gpu_rt_path = get_gpu_rt_path();
92 boost::filesystem::path cuda_table_functions_path = get_cuda_table_functions_path();
93 CHECK(!gpu_rt_path.empty());
94 CHECK(!cuda_table_functions_path.empty());
96 link_state, CU_JIT_INPUT_FATBINARY, gpu_rt_path.c_str(), 0,
nullptr,
nullptr))
97 <<
": " << std::string(error_log);
98 VLOG(1) <<
"CUDA JIT time to add RT fatbinary: "
99 << *
reinterpret_cast<float*
>(&option_values[2]);
101 CU_JIT_INPUT_LIBRARY,
102 cuda_table_functions_path.c_str(),
106 <<
": " << std::string(error_log);
107 VLOG(1) <<
"CUDA JIT time to add GPU table functions library: "
108 << *
reinterpret_cast<float*
>(&option_values[2]);
109 checkCudaErrors(cuLinkDestroy(link_state)) <<
": " << std::string(error_log);
112 std::string add_line_numbers(
const std::string& text) {
113 std::stringstream iss(text);
118 std::getline(iss, line,
'\n');
126 const unsigned block_size,
132 std::vector<CUjit_option> option_keys;
133 std::vector<void*> option_values;
134 char info_log[JIT_LOG_SIZE];
135 char error_log[JIT_LOG_SIZE];
136 fill_options(option_keys, option_values, info_log, error_log, block_size);
137 CHECK_EQ(option_values.size(), option_keys.size());
138 unsigned num_options = option_keys.size();
141 cuLinkCreate(num_options, &option_keys[0], &option_values[0], &link_state))
142 <<
": " << std::string(error_log);
143 VLOG(1) <<
"CUDA JIT time to create link: "
144 << *
reinterpret_cast<float*
>(&option_values[2]);
146 boost::filesystem::path gpu_rt_path = get_gpu_rt_path();
147 boost::filesystem::path cuda_table_functions_path = get_cuda_table_functions_path();
148 CHECK(!gpu_rt_path.empty());
149 CHECK(!cuda_table_functions_path.empty());
155 link_state, CU_JIT_INPUT_FATBINARY, gpu_rt_path.c_str(), 0,
nullptr,
nullptr))
156 <<
": " << std::string(error_log);
157 VLOG(1) <<
"CUDA JIT time to add RT fatbinary: "
158 << *
reinterpret_cast<float*
>(&option_values[2]);
160 CU_JIT_INPUT_LIBRARY,
161 cuda_table_functions_path.c_str(),
165 <<
": " << std::string(error_log);
166 VLOG(1) <<
"CUDA JIT time to add GPU table functions library: "
167 << *
reinterpret_cast<float*
>(&option_values[2]);
170 static_cast<void*>(const_cast<char*>(ptx.c_str())),
176 <<
": " << std::string(error_log) <<
"\nPTX:\n"
177 << add_line_numbers(ptx) <<
"\nEOF PTX";
178 VLOG(1) <<
"CUDA JIT time to add generated code: "
179 << *
reinterpret_cast<float*
>(&option_values[2]);
180 void* cubin{
nullptr};
183 <<
": " << std::string(error_log);
184 VLOG(1) <<
"CUDA Linker completed: " << info_log;
187 VLOG(1) <<
"Generated GPU binary code size: " << cubinSize <<
" bytes";
188 return {cubin, option_keys, option_values, link_state};
194 const std::string& kernel_name,
196 const void* cuda_mgr,
197 unsigned int num_options,
202 , device_id_(device_id)
203 , cuda_mgr_(static_cast<const CudaMgr_Namespace::CudaMgr*>(cuda_mgr)) {
205 <<
"Unable to initialize GPU compilation context without CUDA manager";
206 cuda_mgr_->loadGpuModuleData(
207 &module_, image, num_options, options, option_vals, device_id_);
209 checkCudaErrors(cuModuleGetFunction(&kernel_, module_, kernel_name.c_str()));
216 cuda_mgr_->unloadGpuModuleData(&
module_, device_id_);
std::string get_root_abs_path()
void checkCudaErrors(CUresult err)
void setContext(const int device_num) const
#define LOG_IF(severity, condition)
int getDeviceCount() const
GpuDeviceCompilationContext(const void *image, const std::string &kernel_name, const int device_id, const void *cuda_mgr, unsigned int num_options, CUjit_option *options, void **option_vals)
#define DEBUG_TIMER(name)
CubinResult ptx_to_cubin(const std::string &ptx, const unsigned block_size, const CudaMgr_Namespace::CudaMgr *cuda_mgr)
~GpuDeviceCompilationContext()