OmniSciDB  f632821e96
anonymous_namespace{NativeCodegen.cpp} Namespace Reference

Functions

void eliminate_dead_self_recursive_funcs (llvm::Module &M, const std::unordered_set< llvm::Function *> &live_funcs)
 
void optimize_ir (llvm::Function *query_func, llvm::Module *module, llvm::legacy::PassManager &pass_manager, const std::unordered_set< llvm::Function *> &live_funcs, const CompilationOptions &co)
 
std::string assemblyForCPU (ExecutionEngineWrapper &execution_engine, llvm::Module *module)
 
std::string cpp_to_llvm_name (const std::string &s)
 
std::string gen_array_any_all_sigs ()
 
std::string gen_translate_null_key_sigs ()
 
void bind_pos_placeholders (const std::string &pos_fn_name, const bool use_resume_param, llvm::Function *query_func, llvm::Module *module)
 
void set_row_func_argnames (llvm::Function *row_func, const size_t in_col_count, const size_t agg_col_count, const bool hoist_literals)
 
llvm::Function * create_row_function (const size_t in_col_count, const size_t agg_col_count, const bool hoist_literals, llvm::Module *module, llvm::LLVMContext &context)
 
void bind_query (llvm::Function *query_func, const std::string &query_fname, llvm::Function *multifrag_query_func, llvm::Module *module)
 
std::vector< std::string > get_agg_fnames (const std::vector< Analyzer::Expr *> &target_exprs, const bool is_group_by)
 
template<typename InstType >
llvm::Value * find_variable_in_basic_block (llvm::Function *func, std::string bb_name, std::string variable_name)
 
size_t get_shared_memory_size (const bool shared_mem_used, const QueryMemoryDescriptor *query_mem_desc_ptr)
 
bool is_gpu_shared_mem_supported (const QueryMemoryDescriptor *query_mem_desc_ptr, const RelAlgExecutionUnit &ra_exe_unit, const CudaMgr_Namespace::CudaMgr *cuda_mgr, const ExecutorDeviceType device_type, const unsigned gpu_blocksize, const unsigned num_blocks_per_mp)
 
std::string serialize_llvm_metadata_footnotes (llvm::Function *query_func, CgenState *cgen_state)
 

Variables

const std::string cuda_rt_decls
 

Function Documentation

◆ assemblyForCPU()

std::string anonymous_namespace{NativeCodegen.cpp}::assemblyForCPU ( ExecutionEngineWrapper execution_engine,
llvm::Module *  module 
)

Definition at line 235 of file NativeCodegen.cpp.

References CHECK.

Referenced by CodeGenerator::generateNativeCPUCode().

236  {
237  llvm::legacy::PassManager pass_manager;
238  auto cpu_target_machine = execution_engine->getTargetMachine();
239  CHECK(cpu_target_machine);
240  llvm::SmallString<256> code_str;
241  llvm::raw_svector_ostream os(code_str);
242 #if LLVM_VERSION_MAJOR >= 10
243  cpu_target_machine->addPassesToEmitFile(
244  pass_manager, os, nullptr, llvm::CGFT_AssemblyFile);
245 #elif LLVM_VERSION_MAJOR >= 7
246  cpu_target_machine->addPassesToEmitFile(
247  pass_manager, os, nullptr, llvm::TargetMachine::CGFT_AssemblyFile);
248 #else
249  cpu_target_machine->addPassesToEmitFile(
250  pass_manager, os, llvm::TargetMachine::CGFT_AssemblyFile);
251 #endif
252  pass_manager.run(*module);
253  return "Assembly for the CPU:\n" + std::string(code_str.str()) + "\nEnd of assembly";
254 }
#define CHECK(condition)
Definition: Logger.h:197
+ Here is the caller graph for this function:

◆ bind_pos_placeholders()

void anonymous_namespace{NativeCodegen.cpp}::bind_pos_placeholders ( const std::string &  pos_fn_name,
const bool  use_resume_param,
llvm::Function *  query_func,
llvm::Module *  module 
)

Definition at line 1092 of file NativeCodegen.cpp.

References get_arg_by_name().

Referenced by Executor::compileWorkUnit().

1095  {
1096  for (auto it = llvm::inst_begin(query_func), e = llvm::inst_end(query_func); it != e;
1097  ++it) {
1098  if (!llvm::isa<llvm::CallInst>(*it)) {
1099  continue;
1100  }
1101  auto& pos_call = llvm::cast<llvm::CallInst>(*it);
1102  if (std::string(pos_call.getCalledFunction()->getName()) == pos_fn_name) {
1103  if (use_resume_param) {
1104  const auto error_code_arg = get_arg_by_name(query_func, "error_code");
1105  llvm::ReplaceInstWithInst(
1106  &pos_call,
1107  llvm::CallInst::Create(module->getFunction(pos_fn_name + "_impl"),
1108  error_code_arg));
1109  } else {
1110  llvm::ReplaceInstWithInst(
1111  &pos_call,
1112  llvm::CallInst::Create(module->getFunction(pos_fn_name + "_impl")));
1113  }
1114  break;
1115  }
1116  }
1117 }
llvm::Value * get_arg_by_name(llvm::Function *func, const std::string &name)
Definition: Execute.h:129
+ Here is the call graph for this function:
+ Here is the caller graph for this function:

◆ bind_query()

void anonymous_namespace{NativeCodegen.cpp}::bind_query ( llvm::Function *  query_func,
const std::string &  query_fname,
llvm::Function *  multifrag_query_func,
llvm::Module *  module 
)

Definition at line 1231 of file NativeCodegen.cpp.

References run_benchmark_import::args.

Referenced by Executor::compileWorkUnit().

1234  {
1235  std::vector<llvm::CallInst*> query_stubs;
1236  for (auto it = llvm::inst_begin(multifrag_query_func),
1237  e = llvm::inst_end(multifrag_query_func);
1238  it != e;
1239  ++it) {
1240  if (!llvm::isa<llvm::CallInst>(*it)) {
1241  continue;
1242  }
1243  auto& query_call = llvm::cast<llvm::CallInst>(*it);
1244  if (std::string(query_call.getCalledFunction()->getName()) == query_fname) {
1245  query_stubs.push_back(&query_call);
1246  }
1247  }
1248  for (auto& S : query_stubs) {
1249  std::vector<llvm::Value*> args;
1250  for (size_t i = 0; i < S->getNumArgOperands(); ++i) {
1251  args.push_back(S->getArgOperand(i));
1252  }
1253  llvm::ReplaceInstWithInst(S, llvm::CallInst::Create(query_func, args, ""));
1254  }
1255 }
+ Here is the caller graph for this function:

◆ cpp_to_llvm_name()

std::string anonymous_namespace{NativeCodegen.cpp}::cpp_to_llvm_name ( const std::string &  s)

Definition at line 404 of file NativeCodegen.cpp.

References CHECK.

Referenced by gen_array_any_all_sigs(), and gen_translate_null_key_sigs().

404  {
405  if (s == "int8_t") {
406  return "i8";
407  }
408  if (s == "int16_t") {
409  return "i16";
410  }
411  if (s == "int32_t") {
412  return "i32";
413  }
414  if (s == "int64_t") {
415  return "i64";
416  }
417  CHECK(s == "float" || s == "double");
418  return s;
419 }
#define CHECK(condition)
Definition: Logger.h:197
+ Here is the caller graph for this function:

◆ create_row_function()

llvm::Function* anonymous_namespace{NativeCodegen.cpp}::create_row_function ( const size_t  in_col_count,
const size_t  agg_col_count,
const bool  hoist_literals,
llvm::Module *  module,
llvm::LLVMContext &  context 
)

Definition at line 1168 of file NativeCodegen.cpp.

References agg_col_count, get_int_type(), and set_row_func_argnames().

Referenced by Executor::compileWorkUnit().

1172  {
1173  std::vector<llvm::Type*> row_process_arg_types;
1174 
1175  if (agg_col_count) {
1176  // output (aggregate) arguments
1177  for (size_t i = 0; i < agg_col_count; ++i) {
1178  row_process_arg_types.push_back(llvm::Type::getInt64PtrTy(context));
1179  }
1180  } else {
1181  // group by buffer
1182  row_process_arg_types.push_back(llvm::Type::getInt64PtrTy(context));
1183  // current match count
1184  row_process_arg_types.push_back(llvm::Type::getInt32PtrTy(context));
1185  // total match count passed from the caller
1186  row_process_arg_types.push_back(llvm::Type::getInt32PtrTy(context));
1187  // old total match count returned to the caller
1188  row_process_arg_types.push_back(llvm::Type::getInt32PtrTy(context));
1189  // max matched (total number of slots in the output buffer)
1190  row_process_arg_types.push_back(llvm::Type::getInt32PtrTy(context));
1191  }
1192 
1193  // aggregate init values
1194  row_process_arg_types.push_back(llvm::Type::getInt64PtrTy(context));
1195 
1196  // position argument
1197  row_process_arg_types.push_back(llvm::Type::getInt64Ty(context));
1198 
1199  // fragment row offset argument
1200  row_process_arg_types.push_back(llvm::Type::getInt64PtrTy(context));
1201 
1202  // number of rows for each scan
1203  row_process_arg_types.push_back(llvm::Type::getInt64PtrTy(context));
1204 
1205  // literals buffer argument
1206  if (hoist_literals) {
1207  row_process_arg_types.push_back(llvm::Type::getInt8PtrTy(context));
1208  }
1209 
1210  // column buffer arguments
1211  for (size_t i = 0; i < in_col_count; ++i) {
1212  row_process_arg_types.emplace_back(llvm::Type::getInt8PtrTy(context));
1213  }
1214 
1215  // join hash table argument
1216  row_process_arg_types.push_back(llvm::Type::getInt64PtrTy(context));
1217 
1218  // generate the function
1219  auto ft =
1220  llvm::FunctionType::get(get_int_type(32, context), row_process_arg_types, false);
1221 
1222  auto row_func =
1223  llvm::Function::Create(ft, llvm::Function::ExternalLinkage, "row_func", module);
1224 
1225  // set the row function argument names; for debugging purposes only
1226  set_row_func_argnames(row_func, in_col_count, agg_col_count, hoist_literals);
1227 
1228  return row_func;
1229 }
const int64_t const uint32_t const uint32_t const uint32_t agg_col_count
llvm::Type * get_int_type(const int width, llvm::LLVMContext &context)
void set_row_func_argnames(llvm::Function *row_func, const size_t in_col_count, const size_t agg_col_count, const bool hoist_literals)
+ Here is the call graph for this function:
+ Here is the caller graph for this function:

◆ eliminate_dead_self_recursive_funcs()

void anonymous_namespace{NativeCodegen.cpp}::eliminate_dead_self_recursive_funcs ( llvm::Module &  M,
const std::unordered_set< llvm::Function *> &  live_funcs 
)

Definition at line 122 of file NativeCodegen.cpp.

Referenced by optimize_ir().

124  {
125  std::vector<llvm::Function*> dead_funcs;
126  for (auto& F : M) {
127  bool bAlive = false;
128  if (live_funcs.count(&F)) {
129  continue;
130  }
131  for (auto U : F.users()) {
132  auto* C = llvm::dyn_cast<const llvm::CallInst>(U);
133  if (!C || C->getParent()->getParent() != &F) {
134  bAlive = true;
135  break;
136  }
137  }
138  if (!bAlive) {
139  dead_funcs.push_back(&F);
140  }
141  }
142  for (auto pFn : dead_funcs) {
143  pFn->eraseFromParent();
144  }
145 }
+ Here is the caller graph for this function:

◆ find_variable_in_basic_block()

template<typename InstType >
llvm::Value* anonymous_namespace{NativeCodegen.cpp}::find_variable_in_basic_block ( llvm::Function *  func,
std::string  bb_name,
std::string  variable_name 
)

Definition at line 1462 of file NativeCodegen.cpp.

References run_benchmark_import::result.

1464  {
1465  llvm::Value* result = nullptr;
1466  if (func == nullptr || variable_name.empty()) {
1467  return result;
1468  }
1469  bool is_found = false;
1470  for (auto bb_it = func->begin(); bb_it != func->end() && !is_found; ++bb_it) {
1471  if (!bb_name.empty() && bb_it->getName() != bb_name) {
1472  continue;
1473  }
1474  for (auto inst_it = bb_it->begin(); inst_it != bb_it->end(); inst_it++) {
1475  if (llvm::isa<InstType>(*inst_it)) {
1476  if (inst_it->getName() == variable_name) {
1477  result = &*inst_it;
1478  is_found = true;
1479  break;
1480  }
1481  }
1482  }
1483  }
1484  return result;
1485 }

◆ gen_array_any_all_sigs()

std::string anonymous_namespace{NativeCodegen.cpp}::gen_array_any_all_sigs ( )

Definition at line 421 of file NativeCodegen.cpp.

References cpp_to_llvm_name(), and run_benchmark_import::result.

421  {
422  std::string result;
423  for (const std::string any_or_all : {"any", "all"}) {
424  for (const std::string elem_type :
425  {"int8_t", "int16_t", "int32_t", "int64_t", "float", "double"}) {
426  for (const std::string needle_type :
427  {"int8_t", "int16_t", "int32_t", "int64_t", "float", "double"}) {
428  for (const std::string op_name : {"eq", "ne", "lt", "le", "gt", "ge"}) {
429  result += ("declare i1 @array_" + any_or_all + "_" + op_name + "_" + elem_type +
430  "_" + needle_type + "(i8*, i64, " + cpp_to_llvm_name(needle_type) +
431  ", " + cpp_to_llvm_name(elem_type) + ");\n");
432  }
433  }
434  }
435  }
436  return result;
437 }
std::string cpp_to_llvm_name(const std::string &s)
+ Here is the call graph for this function:

◆ gen_translate_null_key_sigs()

std::string anonymous_namespace{NativeCodegen.cpp}::gen_translate_null_key_sigs ( )

Definition at line 439 of file NativeCodegen.cpp.

References cpp_to_llvm_name(), and run_benchmark_import::result.

439  {
440  std::string result;
441  for (const std::string key_type : {"int8_t", "int16_t", "int32_t", "int64_t"}) {
442  const auto key_llvm_type = cpp_to_llvm_name(key_type);
443  result += "declare i64 @translate_null_key_" + key_type + "(" + key_llvm_type + ", " +
444  key_llvm_type + ", i64);\n";
445  }
446  return result;
447 }
std::string cpp_to_llvm_name(const std::string &s)
+ Here is the call graph for this function:

◆ get_agg_fnames()

std::vector<std::string> anonymous_namespace{NativeCodegen.cpp}::get_agg_fnames ( const std::vector< Analyzer::Expr *> &  target_exprs,
const bool  is_group_by 
)

Definition at line 1257 of file NativeCodegen.cpp.

References CHECK, g_rt_module, Analyzer::AggExpr::get_aggtype(), getGlobalLLVMContext(), kAPPROX_COUNT_DISTINCT, kAVG, kCOUNT, kENCODING_NONE, kMAX, kMIN, kSAMPLE, kSINGLE_VALUE, kSUM, read_template_module(), and run_benchmark_import::result.

Referenced by Executor::compileWorkUnit().

1258  {
1259  std::vector<std::string> result;
1260  for (size_t target_idx = 0, agg_col_idx = 0; target_idx < target_exprs.size();
1261  ++target_idx, ++agg_col_idx) {
1262  const auto target_expr = target_exprs[target_idx];
1263  CHECK(target_expr);
1264  const auto target_type_info = target_expr->get_type_info();
1265  const auto agg_expr = dynamic_cast<Analyzer::AggExpr*>(target_expr);
1266  const bool is_varlen =
1267  (target_type_info.is_string() &&
1268  target_type_info.get_compression() == kENCODING_NONE) ||
1269  target_type_info.is_array(); // TODO: should it use is_varlen_array() ?
1270  if (!agg_expr || agg_expr->get_aggtype() == kSAMPLE) {
1271  result.emplace_back(target_type_info.is_fp() ? "agg_id_double" : "agg_id");
1272  if (is_varlen) {
1273  result.emplace_back("agg_id");
1274  }
1275  if (target_type_info.is_geometry()) {
1276  result.emplace_back("agg_id");
1277  for (auto i = 2; i < 2 * target_type_info.get_physical_coord_cols(); ++i) {
1278  result.emplace_back("agg_id");
1279  }
1280  }
1281  continue;
1282  }
1283  const auto agg_type = agg_expr->get_aggtype();
1284  const auto& agg_type_info =
1285  agg_type != kCOUNT ? agg_expr->get_arg()->get_type_info() : target_type_info;
1286  switch (agg_type) {
1287  case kAVG: {
1288  if (!agg_type_info.is_integer() && !agg_type_info.is_decimal() &&
1289  !agg_type_info.is_fp()) {
1290  throw std::runtime_error("AVG is only valid on integer and floating point");
1291  }
1292  result.emplace_back((agg_type_info.is_integer() || agg_type_info.is_time())
1293  ? "agg_sum"
1294  : "agg_sum_double");
1295  result.emplace_back((agg_type_info.is_integer() || agg_type_info.is_time())
1296  ? "agg_count"
1297  : "agg_count_double");
1298  break;
1299  }
1300  case kMIN: {
1301  if (agg_type_info.is_string() || agg_type_info.is_array() ||
1302  agg_type_info.is_geometry()) {
1303  throw std::runtime_error(
1304  "MIN on strings, arrays or geospatial types not supported yet");
1305  }
1306  result.emplace_back((agg_type_info.is_integer() || agg_type_info.is_time())
1307  ? "agg_min"
1308  : "agg_min_double");
1309  break;
1310  }
1311  case kMAX: {
1312  if (agg_type_info.is_string() || agg_type_info.is_array() ||
1313  agg_type_info.is_geometry()) {
1314  throw std::runtime_error(
1315  "MAX on strings, arrays or geospatial types not supported yet");
1316  }
1317  result.emplace_back((agg_type_info.is_integer() || agg_type_info.is_time())
1318  ? "agg_max"
1319  : "agg_max_double");
1320  break;
1321  }
1322  case kSUM: {
1323  if (!agg_type_info.is_integer() && !agg_type_info.is_decimal() &&
1324  !agg_type_info.is_fp()) {
1325  throw std::runtime_error("SUM is only valid on integer and floating point");
1326  }
1327  result.emplace_back((agg_type_info.is_integer() || agg_type_info.is_time())
1328  ? "agg_sum"
1329  : "agg_sum_double");
1330  break;
1331  }
1332  case kCOUNT:
1333  result.emplace_back(agg_expr->get_is_distinct() ? "agg_count_distinct"
1334  : "agg_count");
1335  break;
1336  case kSINGLE_VALUE: {
1337  result.emplace_back(agg_type_info.is_fp() ? "agg_id_double" : "agg_id");
1338  break;
1339  }
1340  case kSAMPLE: {
1341  // Note that varlen SAMPLE arguments are handled separately above
1342  result.emplace_back(agg_type_info.is_fp() ? "agg_id_double" : "agg_id");
1343  break;
1344  }
1346  result.emplace_back("agg_approximate_count_distinct");
1347  break;
1348  default:
1349  CHECK(false);
1350  }
1351  }
1352  return result;
1353 }
Definition: sqldefs.h:73
Definition: sqldefs.h:75
SQLAgg get_aggtype() const
Definition: Analyzer.h:1095
Definition: sqldefs.h:76
#define CHECK(condition)
Definition: Logger.h:197
Definition: sqldefs.h:74
Definition: sqldefs.h:72
+ Here is the call graph for this function:
+ Here is the caller graph for this function:

◆ get_shared_memory_size()

size_t anonymous_namespace{NativeCodegen.cpp}::get_shared_memory_size ( const bool  shared_mem_used,
const QueryMemoryDescriptor query_mem_desc_ptr 
)

Definition at line 1886 of file NativeCodegen.cpp.

References QueryMemoryDescriptor::getEntryCount(), and QueryMemoryDescriptor::getRowSize().

Referenced by Executor::compileWorkUnit().

1887  {
1888  return shared_mem_used
1889  ? (query_mem_desc_ptr->getRowSize() * query_mem_desc_ptr->getEntryCount())
1890  : 0;
1891 }
+ Here is the call graph for this function:
+ Here is the caller graph for this function:

◆ is_gpu_shared_mem_supported()

bool anonymous_namespace{NativeCodegen.cpp}::is_gpu_shared_mem_supported ( const QueryMemoryDescriptor query_mem_desc_ptr,
const RelAlgExecutionUnit ra_exe_unit,
const CudaMgr_Namespace::CudaMgr cuda_mgr,
const ExecutorDeviceType  device_type,
const unsigned  gpu_blocksize,
const unsigned  num_blocks_per_mp 
)

To simplify the implementation for practical purposes, we initially provide shared memory support for cases where there are at most as many entries in the output buffer as there are threads within each GPU device. In order to relax this assumption later, we need to add a for loop in generated codes such that each thread loops over multiple entries. TODO: relax this if necessary

Definition at line 1893 of file NativeCodegen.cpp.

References CHECK, QueryMemoryDescriptor::countDistinctDescriptorsLogicallyEmpty(), CPU, QueryMemoryDescriptor::didOutputColumnar(), g_enable_smem_group_by, g_enable_smem_grouped_non_count_agg, g_enable_smem_non_grouped_agg, g_gpu_smem_threshold, QueryMemoryDescriptor::getEntryCount(), CudaMgr_Namespace::CudaMgr::getMinSharedMemoryPerBlockForAllDevices(), QueryMemoryDescriptor::getQueryDescriptionType(), QueryMemoryDescriptor::getRowSize(), GroupByPerfectHash, QueryMemoryDescriptor::hasKeylessHash(), CudaMgr_Namespace::CudaMgr::isArchMaxwellOrLaterForAll(), kAVG, kCOUNT, kMAX, kMIN, kSUM, NonGroupedAggregate, RelAlgExecutionUnit::target_exprs, target_exprs_to_infos(), and QueryMemoryDescriptor::useStreamingTopN().

Referenced by Executor::compileWorkUnit().

1898  {
1899  if (device_type == ExecutorDeviceType::CPU) {
1900  return false;
1901  }
1902  if (query_mem_desc_ptr->didOutputColumnar()) {
1903  return false;
1904  }
1905  CHECK(query_mem_desc_ptr);
1906  CHECK(cuda_mgr);
1907  /*
1908  * We only use shared memory strategy if GPU hardware provides native shared
1909  * memory atomics support. From CUDA Toolkit documentation:
1910  * https://docs.nvidia.com/cuda/pascal-tuning-guide/index.html#atomic-ops "Like
1911  * Maxwell, Pascal [and Volta] provides native shared memory atomic operations
1912  * for 32-bit integer arithmetic, along with native 32 or 64-bit compare-and-swap
1913  * (CAS)."
1914  *
1915  **/
1916  if (!cuda_mgr->isArchMaxwellOrLaterForAll()) {
1917  return false;
1918  }
1919 
1920  if (query_mem_desc_ptr->getQueryDescriptionType() ==
1923  query_mem_desc_ptr->countDistinctDescriptorsLogicallyEmpty()) {
1924  // TODO: relax this, if necessary
1925  if (gpu_blocksize < query_mem_desc_ptr->getEntryCount()) {
1926  return false;
1927  }
1928  // skip shared memory usage when dealing with 1) variable length targets, 2)
1929  // not a COUNT aggregate
1930  const auto target_infos =
1931  target_exprs_to_infos(ra_exe_unit.target_exprs, *query_mem_desc_ptr);
1932  std::unordered_set<SQLAgg> supported_aggs{kCOUNT};
1933  if (std::find_if(target_infos.begin(),
1934  target_infos.end(),
1935  [&supported_aggs](const TargetInfo& ti) {
1936  if (ti.sql_type.is_varlen() ||
1937  !supported_aggs.count(ti.agg_kind)) {
1938  return true;
1939  } else {
1940  return false;
1941  }
1942  }) == target_infos.end()) {
1943  return true;
1944  }
1945  }
1946  if (query_mem_desc_ptr->getQueryDescriptionType() ==
1957  if (gpu_blocksize < query_mem_desc_ptr->getEntryCount()) {
1958  return false;
1959  }
1960 
1961  // Fundamentally, we should use shared memory whenever the output buffer
1962  // is small enough so that we can fit it in the shared memory and yet expect
1963  // good occupancy.
1964  // For now, we allow keyless, row-wise layout, and only for perfect hash
1965  // group by operations.
1966  if (query_mem_desc_ptr->hasKeylessHash() &&
1967  query_mem_desc_ptr->countDistinctDescriptorsLogicallyEmpty() &&
1968  !query_mem_desc_ptr->useStreamingTopN()) {
1969  const size_t shared_memory_threshold_bytes = std::min(
1970  g_gpu_smem_threshold == 0 ? SIZE_MAX : g_gpu_smem_threshold,
1971  cuda_mgr->getMinSharedMemoryPerBlockForAllDevices() / num_blocks_per_mp);
1972  const auto output_buffer_size =
1973  query_mem_desc_ptr->getRowSize() * query_mem_desc_ptr->getEntryCount();
1974  if (output_buffer_size > shared_memory_threshold_bytes) {
1975  return false;
1976  }
1977 
1978  // skip shared memory usage when dealing with 1) variable length targets, 2)
1979  // non-basic aggregates (COUNT, SUM, MIN, MAX, AVG)
1980  // TODO: relax this if necessary
1981  const auto target_infos =
1982  target_exprs_to_infos(ra_exe_unit.target_exprs, *query_mem_desc_ptr);
1983  std::unordered_set<SQLAgg> supported_aggs{kCOUNT};
1985  supported_aggs = {kCOUNT, kMIN, kMAX, kSUM, kAVG};
1986  }
1987  if (std::find_if(target_infos.begin(),
1988  target_infos.end(),
1989  [&supported_aggs](const TargetInfo& ti) {
1990  if (ti.sql_type.is_varlen() ||
1991  !supported_aggs.count(ti.agg_kind)) {
1992  return true;
1993  } else {
1994  return false;
1995  }
1996  }) == target_infos.end()) {
1997  return true;
1998  }
1999  }
2000  }
2001  return false;
2002 }
std::vector< Analyzer::Expr * > target_exprs
bool g_enable_smem_group_by
bool isArchMaxwellOrLaterForAll() const
Definition: CudaMgr.cpp:288
bool g_enable_smem_non_grouped_agg
Definition: Execute.cpp:119
Definition: sqldefs.h:73
bool countDistinctDescriptorsLogicallyEmpty() const
Definition: sqldefs.h:75
bool g_enable_smem_grouped_non_count_agg
Definition: Execute.cpp:116
Definition: sqldefs.h:76
std::vector< TargetInfo > target_exprs_to_infos(const std::vector< Analyzer::Expr *> &targets, const QueryMemoryDescriptor &query_mem_desc)
size_t getMinSharedMemoryPerBlockForAllDevices() const
Definition: CudaMgr.h:114
#define CHECK(condition)
Definition: Logger.h:197
QueryDescriptionType getQueryDescriptionType() const
Definition: sqldefs.h:74
Definition: sqldefs.h:72
size_t g_gpu_smem_threshold
Definition: Execute.cpp:111
+ Here is the call graph for this function:
+ Here is the caller graph for this function:

◆ optimize_ir()

void anonymous_namespace{NativeCodegen.cpp}::optimize_ir ( llvm::Function *  query_func,
llvm::Module *  module,
llvm::legacy::PassManager &  pass_manager,
const std::unordered_set< llvm::Function *> &  live_funcs,
const CompilationOptions co 
)

Definition at line 147 of file NativeCodegen.cpp.

References eliminate_dead_self_recursive_funcs(), LoopStrengthReduction, and CompilationOptions::opt_level.

Referenced by Executor::compileWorkUnit(), CodeGenerator::generateNativeCPUCode(), and CodeGenerator::generateNativeGPUCode().

151  {
152  pass_manager.add(llvm::createAlwaysInlinerLegacyPass());
153  pass_manager.add(llvm::createPromoteMemoryToRegisterPass());
154 #if LLVM_VERSION_MAJOR >= 7
155  pass_manager.add(llvm::createInstSimplifyLegacyPass());
156 #else
157  pass_manager.add(llvm::createInstructionSimplifierPass());
158 #endif
159  pass_manager.add(llvm::createInstructionCombiningPass());
160  pass_manager.add(llvm::createGlobalOptimizerPass());
161 
162  pass_manager.add(llvm::createLICMPass());
164  pass_manager.add(llvm::createLoopStrengthReducePass());
165  }
166  pass_manager.run(*module);
167 
168  eliminate_dead_self_recursive_funcs(*module, live_funcs);
169 }
ExecutorOptLevel opt_level
void eliminate_dead_self_recursive_funcs(llvm::Module &M, const std::unordered_set< llvm::Function *> &live_funcs)
+ Here is the call graph for this function:
+ Here is the caller graph for this function:

◆ serialize_llvm_metadata_footnotes()

std::string anonymous_namespace{NativeCodegen.cpp}::serialize_llvm_metadata_footnotes ( llvm::Function *  query_func,
CgenState cgen_state 
)

Definition at line 2005 of file NativeCodegen.cpp.

References CgenState::filter_func_, CgenState::module_, CgenState::row_func_, and split().

Referenced by Executor::compileWorkUnit().

2006  {
2007  std::string llvm_ir;
2008  std::unordered_set<llvm::MDNode*> md;
2009 
2010  // Loop over all instructions in the query function.
2011  for (auto bb_it = query_func->begin(); bb_it != query_func->end(); ++bb_it) {
2012  for (auto instr_it = bb_it->begin(); instr_it != bb_it->end(); ++instr_it) {
2013  llvm::SmallVector<std::pair<unsigned, llvm::MDNode*>, 100> imd;
2014  instr_it->getAllMetadata(imd);
2015  for (auto [kind, node] : imd) {
2016  md.insert(node);
2017  }
2018  }
2019  }
2020 
2021  // Loop over all instructions in the row function.
2022  for (auto bb_it = cgen_state->row_func_->begin(); bb_it != cgen_state->row_func_->end();
2023  ++bb_it) {
2024  for (auto instr_it = bb_it->begin(); instr_it != bb_it->end(); ++instr_it) {
2025  llvm::SmallVector<std::pair<unsigned, llvm::MDNode*>, 100> imd;
2026  instr_it->getAllMetadata(imd);
2027  for (auto [kind, node] : imd) {
2028  md.insert(node);
2029  }
2030  }
2031  }
2032 
2033  // Loop over all instructions in the filter function.
2034  if (cgen_state->filter_func_) {
2035  for (auto bb_it = cgen_state->filter_func_->begin();
2036  bb_it != cgen_state->filter_func_->end();
2037  ++bb_it) {
2038  for (auto instr_it = bb_it->begin(); instr_it != bb_it->end(); ++instr_it) {
2039  llvm::SmallVector<std::pair<unsigned, llvm::MDNode*>, 100> imd;
2040  instr_it->getAllMetadata(imd);
2041  for (auto [kind, node] : imd) {
2042  md.insert(node);
2043  }
2044  }
2045  }
2046  }
2047 
2048  // Sort the metadata by canonical number and convert to text.
2049  if (!md.empty()) {
2050  std::map<size_t, std::string> sorted_strings;
2051  for (auto p : md) {
2052  std::string str;
2053  llvm::raw_string_ostream os(str);
2054  p->print(os, cgen_state->module_, true);
2055  os.flush();
2056  auto fields = split(str, {}, 1);
2057  if (fields.empty() || fields[0].empty()) {
2058  continue;
2059  }
2060  sorted_strings.emplace(std::stoul(fields[0].substr(1)), str);
2061  }
2062  llvm_ir += "\n";
2063  for (auto [id, text] : sorted_strings) {
2064  llvm_ir += text;
2065  llvm_ir += "\n";
2066  }
2067  }
2068 
2069  return llvm_ir;
2070 }
std::vector< std::string > split(std::string_view str, std::string_view delim, std::optional< size_t > maxsplit)
split apart a string into a vector of substrings
llvm::Function * row_func_
Definition: CgenState.h:331
llvm::Module * module_
Definition: CgenState.h:330
llvm::Function * filter_func_
Definition: CgenState.h:332
+ Here is the call graph for this function:
+ Here is the caller graph for this function:

◆ set_row_func_argnames()

void anonymous_namespace{NativeCodegen.cpp}::set_row_func_argnames ( llvm::Function *  row_func,
const size_t  in_col_count,
const size_t  agg_col_count,
const bool  hoist_literals 
)

Definition at line 1119 of file NativeCodegen.cpp.

References agg_col_count, and to_string().

Referenced by create_row_function().

1122  {
1123  auto arg_it = row_func->arg_begin();
1124 
1125  if (agg_col_count) {
1126  for (size_t i = 0; i < agg_col_count; ++i) {
1127  arg_it->setName("out");
1128  ++arg_it;
1129  }
1130  } else {
1131  arg_it->setName("group_by_buff");
1132  ++arg_it;
1133  arg_it->setName("crt_matched");
1134  ++arg_it;
1135  arg_it->setName("total_matched");
1136  ++arg_it;
1137  arg_it->setName("old_total_matched");
1138  ++arg_it;
1139  arg_it->setName("max_matched");
1140  ++arg_it;
1141  }
1142 
1143  arg_it->setName("agg_init_val");
1144  ++arg_it;
1145 
1146  arg_it->setName("pos");
1147  ++arg_it;
1148 
1149  arg_it->setName("frag_row_off");
1150  ++arg_it;
1151 
1152  arg_it->setName("num_rows_per_scan");
1153  ++arg_it;
1154 
1155  if (hoist_literals) {
1156  arg_it->setName("literals");
1157  ++arg_it;
1158  }
1159 
1160  for (size_t i = 0; i < in_col_count; ++i) {
1161  arg_it->setName("col_buf" + std::to_string(i));
1162  ++arg_it;
1163  }
1164 
1165  arg_it->setName("join_hash_tables");
1166 }
const int64_t const uint32_t const uint32_t const uint32_t agg_col_count
std::string to_string(char const *&&v)
+ Here is the call graph for this function:
+ Here is the caller graph for this function:

Variable Documentation

◆ cuda_rt_decls

const std::string anonymous_namespace{NativeCodegen.cpp}::cuda_rt_decls

Definition at line 449 of file NativeCodegen.cpp.