OmniSciDB  0264ff685a
anonymous_namespace{NativeCodegen.cpp} Namespace Reference

Functions

void throw_parseIR_error (const llvm::SMDiagnostic &parse_error, std::string src="", const bool is_gpu=false)
 
template<typename T = void>
void show_defined (llvm::Module &module)
 
template<typename T = void>
void show_defined (llvm::Module *module)
 
template<typename T = void>
void show_defined (std::unique_ptr< llvm::Module > &module)
 
template<typename T = void>
void scan_function_calls (llvm::Function &F, std::unordered_set< std::string > &defined, std::unordered_set< std::string > &undefined, const std::unordered_set< std::string > &ignored)
 
template<typename T = void>
void scan_function_calls (llvm::Module &module, std::unordered_set< std::string > &defined, std::unordered_set< std::string > &undefined, const std::unordered_set< std::string > &ignored)
 
template<typename T = void>
std::tuple< std::unordered_set< std::string >, std::unordered_set< std::string > > scan_function_calls (llvm::Module &module, const std::unordered_set< std::string > &ignored={})
 
void eliminate_dead_self_recursive_funcs (llvm::Module &M, const std::unordered_set< llvm::Function *> &live_funcs)
 
void optimize_ir (llvm::Function *query_func, llvm::Module *module, llvm::legacy::PassManager &pass_manager, const std::unordered_set< llvm::Function *> &live_funcs, const CompilationOptions &co)
 
std::string assemblyForCPU (ExecutionEngineWrapper &execution_engine, llvm::Module *module)
 
std::string cpp_to_llvm_name (const std::string &s)
 
std::string gen_array_any_all_sigs ()
 
std::string gen_translate_null_key_sigs ()
 
void bind_pos_placeholders (const std::string &pos_fn_name, const bool use_resume_param, llvm::Function *query_func, llvm::Module *module)
 
void set_row_func_argnames (llvm::Function *row_func, const size_t in_col_count, const size_t agg_col_count, const bool hoist_literals)
 
llvm::Function * create_row_function (const size_t in_col_count, const size_t agg_col_count, const bool hoist_literals, llvm::Module *module, llvm::LLVMContext &context)
 
void bind_query (llvm::Function *query_func, const std::string &query_fname, llvm::Function *multifrag_query_func, llvm::Module *module)
 
std::vector< std::string > get_agg_fnames (const std::vector< Analyzer::Expr *> &target_exprs, const bool is_group_by)
 
template<typename InstType >
llvm::Value * find_variable_in_basic_block (llvm::Function *func, std::string bb_name, std::string variable_name)
 
size_t get_shared_memory_size (const bool shared_mem_used, const QueryMemoryDescriptor *query_mem_desc_ptr)
 
bool is_gpu_shared_mem_supported (const QueryMemoryDescriptor *query_mem_desc_ptr, const RelAlgExecutionUnit &ra_exe_unit, const CudaMgr_Namespace::CudaMgr *cuda_mgr, const ExecutorDeviceType device_type, const unsigned gpu_blocksize, const unsigned num_blocks_per_mp)
 
std::string serialize_llvm_metadata_footnotes (llvm::Function *query_func, CgenState *cgen_state)
 

Variables

const std::string cuda_rt_decls
 

Function Documentation

◆ assemblyForCPU()

std::string anonymous_namespace{NativeCodegen.cpp}::assemblyForCPU ( ExecutionEngineWrapper execution_engine,
llvm::Module *  module 
)

Definition at line 396 of file NativeCodegen.cpp.

References CHECK.

Referenced by CodeGenerator::generateNativeCPUCode().

397  {
398  llvm::legacy::PassManager pass_manager;
399  auto cpu_target_machine = execution_engine->getTargetMachine();
400  CHECK(cpu_target_machine);
401  llvm::SmallString<256> code_str;
402  llvm::raw_svector_ostream os(code_str);
403 #if LLVM_VERSION_MAJOR >= 10
404  cpu_target_machine->addPassesToEmitFile(
405  pass_manager, os, nullptr, llvm::CGFT_AssemblyFile);
406 #else
407  cpu_target_machine->addPassesToEmitFile(
408  pass_manager, os, nullptr, llvm::TargetMachine::CGFT_AssemblyFile);
409 #endif
410  pass_manager.run(*module);
411  return "Assembly for the CPU:\n" + std::string(code_str.str()) + "\nEnd of assembly";
412 }
#define CHECK(condition)
Definition: Logger.h:197
+ Here is the caller graph for this function:

◆ bind_pos_placeholders()

void anonymous_namespace{NativeCodegen.cpp}::bind_pos_placeholders ( const std::string &  pos_fn_name,
const bool  use_resume_param,
llvm::Function *  query_func,
llvm::Module *  module 
)

Definition at line 1383 of file NativeCodegen.cpp.

References get_arg_by_name().

Referenced by Executor::compileWorkUnit().

1386  {
1387  for (auto it = llvm::inst_begin(query_func), e = llvm::inst_end(query_func); it != e;
1388  ++it) {
1389  if (!llvm::isa<llvm::CallInst>(*it)) {
1390  continue;
1391  }
1392  auto& pos_call = llvm::cast<llvm::CallInst>(*it);
1393  if (std::string(pos_call.getCalledFunction()->getName()) == pos_fn_name) {
1394  if (use_resume_param) {
1395  const auto error_code_arg = get_arg_by_name(query_func, "error_code");
1396  llvm::ReplaceInstWithInst(
1397  &pos_call,
1398  llvm::CallInst::Create(module->getFunction(pos_fn_name + "_impl"),
1399  error_code_arg));
1400  } else {
1401  llvm::ReplaceInstWithInst(
1402  &pos_call,
1403  llvm::CallInst::Create(module->getFunction(pos_fn_name + "_impl")));
1404  }
1405  break;
1406  }
1407  }
1408 }
llvm::Value * get_arg_by_name(llvm::Function *func, const std::string &name)
Definition: Execute.h:162
+ Here is the call graph for this function:
+ Here is the caller graph for this function:

◆ bind_query()

void anonymous_namespace{NativeCodegen.cpp}::bind_query ( llvm::Function *  query_func,
const std::string &  query_fname,
llvm::Function *  multifrag_query_func,
llvm::Module *  module 
)

Definition at line 1523 of file NativeCodegen.cpp.

References run_benchmark_import::args.

Referenced by Executor::compileWorkUnit().

1526  {
1527  std::vector<llvm::CallInst*> query_stubs;
1528  for (auto it = llvm::inst_begin(multifrag_query_func),
1529  e = llvm::inst_end(multifrag_query_func);
1530  it != e;
1531  ++it) {
1532  if (!llvm::isa<llvm::CallInst>(*it)) {
1533  continue;
1534  }
1535  auto& query_call = llvm::cast<llvm::CallInst>(*it);
1536  if (std::string(query_call.getCalledFunction()->getName()) == query_fname) {
1537  query_stubs.push_back(&query_call);
1538  }
1539  }
1540  for (auto& S : query_stubs) {
1541  std::vector<llvm::Value*> args;
1542  for (size_t i = 0; i < S->getNumArgOperands(); ++i) {
1543  args.push_back(S->getArgOperand(i));
1544  }
1545  llvm::ReplaceInstWithInst(S, llvm::CallInst::Create(query_func, args, ""));
1546  }
1547 }
+ Here is the caller graph for this function:

◆ cpp_to_llvm_name()

std::string anonymous_namespace{NativeCodegen.cpp}::cpp_to_llvm_name ( const std::string &  s)

Definition at line 549 of file NativeCodegen.cpp.

References CHECK.

Referenced by gen_array_any_all_sigs(), and gen_translate_null_key_sigs().

549  {
550  if (s == "int8_t") {
551  return "i8";
552  }
553  if (s == "int16_t") {
554  return "i16";
555  }
556  if (s == "int32_t") {
557  return "i32";
558  }
559  if (s == "int64_t") {
560  return "i64";
561  }
562  CHECK(s == "float" || s == "double");
563  return s;
564 }
#define CHECK(condition)
Definition: Logger.h:197
+ Here is the caller graph for this function:

◆ create_row_function()

llvm::Function* anonymous_namespace{NativeCodegen.cpp}::create_row_function ( const size_t  in_col_count,
const size_t  agg_col_count,
const bool  hoist_literals,
llvm::Module *  module,
llvm::LLVMContext &  context 
)

Definition at line 1459 of file NativeCodegen.cpp.

References get_int_type(), and set_row_func_argnames().

Referenced by Executor::compileWorkUnit().

1463  {
1464  std::vector<llvm::Type*> row_process_arg_types;
1465 
1466  if (agg_col_count) {
1467  // output (aggregate) arguments
1468  for (size_t i = 0; i < agg_col_count; ++i) {
1469  row_process_arg_types.push_back(llvm::Type::getInt64PtrTy(context));
1470  }
1471  } else {
1472  // group by buffer
1473  row_process_arg_types.push_back(llvm::Type::getInt64PtrTy(context));
1474  // current match count
1475  row_process_arg_types.push_back(llvm::Type::getInt32PtrTy(context));
1476  // total match count passed from the caller
1477  row_process_arg_types.push_back(llvm::Type::getInt32PtrTy(context));
1478  // old total match count returned to the caller
1479  row_process_arg_types.push_back(llvm::Type::getInt32PtrTy(context));
1480  // max matched (total number of slots in the output buffer)
1481  row_process_arg_types.push_back(llvm::Type::getInt32PtrTy(context));
1482  }
1483 
1484  // aggregate init values
1485  row_process_arg_types.push_back(llvm::Type::getInt64PtrTy(context));
1486 
1487  // position argument
1488  row_process_arg_types.push_back(llvm::Type::getInt64Ty(context));
1489 
1490  // fragment row offset argument
1491  row_process_arg_types.push_back(llvm::Type::getInt64PtrTy(context));
1492 
1493  // number of rows for each scan
1494  row_process_arg_types.push_back(llvm::Type::getInt64PtrTy(context));
1495 
1496  // literals buffer argument
1497  if (hoist_literals) {
1498  row_process_arg_types.push_back(llvm::Type::getInt8PtrTy(context));
1499  }
1500 
1501  // column buffer arguments
1502  for (size_t i = 0; i < in_col_count; ++i) {
1503  row_process_arg_types.emplace_back(llvm::Type::getInt8PtrTy(context));
1504  }
1505 
1506  // join hash table argument
1507  row_process_arg_types.push_back(llvm::Type::getInt64PtrTy(context));
1508 
1509  // generate the function
1510  auto ft =
1511  llvm::FunctionType::get(get_int_type(32, context), row_process_arg_types, false);
1512 
1513  auto row_func =
1514  llvm::Function::Create(ft, llvm::Function::ExternalLinkage, "row_func", module);
1515 
1516  // set the row function argument names; for debugging purposes only
1517  set_row_func_argnames(row_func, in_col_count, agg_col_count, hoist_literals);
1518 
1519  return row_func;
1520 }
llvm::Type * get_int_type(const int width, llvm::LLVMContext &context)
void set_row_func_argnames(llvm::Function *row_func, const size_t in_col_count, const size_t agg_col_count, const bool hoist_literals)
+ Here is the call graph for this function:
+ Here is the caller graph for this function:

◆ eliminate_dead_self_recursive_funcs()

void anonymous_namespace{NativeCodegen.cpp}::eliminate_dead_self_recursive_funcs ( llvm::Module &  M,
const std::unordered_set< llvm::Function *> &  live_funcs 
)

Definition at line 248 of file NativeCodegen.cpp.

References logger::DEBUG1, logger::INFO, and LOG.

Referenced by optimize_ir().

250  {
251  std::vector<llvm::Function*> dead_funcs;
252  for (auto& F : M) {
253  bool bAlive = false;
254  if (live_funcs.count(&F)) {
255  continue;
256  }
257  for (auto U : F.users()) {
258  auto* C = llvm::dyn_cast<const llvm::CallInst>(U);
259  if (!C || C->getParent()->getParent() != &F) {
260  bAlive = true;
261  break;
262  }
263  }
264  if (!bAlive) {
265  dead_funcs.push_back(&F);
266  }
267  }
268  for (auto pFn : dead_funcs) {
269  pFn->eraseFromParent();
270  }
271 }
+ Here is the caller graph for this function:

◆ find_variable_in_basic_block()

template<typename InstType >
llvm::Value* anonymous_namespace{NativeCodegen.cpp}::find_variable_in_basic_block ( llvm::Function *  func,
std::string  bb_name,
std::string  variable_name 
)

Definition at line 1775 of file NativeCodegen.cpp.

References run_benchmark_import::result.

1777  {
1778  llvm::Value* result = nullptr;
1779  if (func == nullptr || variable_name.empty()) {
1780  return result;
1781  }
1782  bool is_found = false;
1783  for (auto bb_it = func->begin(); bb_it != func->end() && !is_found; ++bb_it) {
1784  if (!bb_name.empty() && bb_it->getName() != bb_name) {
1785  continue;
1786  }
1787  for (auto inst_it = bb_it->begin(); inst_it != bb_it->end(); inst_it++) {
1788  if (llvm::isa<InstType>(*inst_it)) {
1789  if (inst_it->getName() == variable_name) {
1790  result = &*inst_it;
1791  is_found = true;
1792  break;
1793  }
1794  }
1795  }
1796  }
1797  return result;
1798 }

◆ gen_array_any_all_sigs()

std::string anonymous_namespace{NativeCodegen.cpp}::gen_array_any_all_sigs ( )

Definition at line 566 of file NativeCodegen.cpp.

References cpp_to_llvm_name(), and run_benchmark_import::result.

566  {
567  std::string result;
568  for (const std::string any_or_all : {"any", "all"}) {
569  for (const std::string elem_type :
570  {"int8_t", "int16_t", "int32_t", "int64_t", "float", "double"}) {
571  for (const std::string needle_type :
572  {"int8_t", "int16_t", "int32_t", "int64_t", "float", "double"}) {
573  for (const std::string op_name : {"eq", "ne", "lt", "le", "gt", "ge"}) {
574  result += ("declare i1 @array_" + any_or_all + "_" + op_name + "_" + elem_type +
575  "_" + needle_type + "(i8*, i64, " + cpp_to_llvm_name(needle_type) +
576  ", " + cpp_to_llvm_name(elem_type) + ");\n");
577  }
578  }
579  }
580  }
581  return result;
582 }
std::string cpp_to_llvm_name(const std::string &s)
+ Here is the call graph for this function:

◆ gen_translate_null_key_sigs()

std::string anonymous_namespace{NativeCodegen.cpp}::gen_translate_null_key_sigs ( )

Definition at line 584 of file NativeCodegen.cpp.

References cpp_to_llvm_name(), and run_benchmark_import::result.

584  {
585  std::string result;
586  for (const std::string key_type : {"int8_t", "int16_t", "int32_t", "int64_t"}) {
587  const auto key_llvm_type = cpp_to_llvm_name(key_type);
588  result += "declare i64 @translate_null_key_" + key_type + "(" + key_llvm_type + ", " +
589  key_llvm_type + ", i64);\n";
590  }
591  return result;
592 }
std::string cpp_to_llvm_name(const std::string &s)
+ Here is the call graph for this function:

◆ get_agg_fnames()

std::vector<std::string> anonymous_namespace{NativeCodegen.cpp}::get_agg_fnames ( const std::vector< Analyzer::Expr *> &  target_exprs,
const bool  is_group_by 
)

Definition at line 1549 of file NativeCodegen.cpp.

References CHECK, g_rt_module, Analyzer::AggExpr::get_aggtype(), getGlobalLLVMContext(), kAPPROX_COUNT_DISTINCT, kAPPROX_MEDIAN, kAVG, kCOUNT, kENCODING_NONE, kMAX, kMIN, kSAMPLE, kSINGLE_VALUE, kSUM, read_template_module(), and run_benchmark_import::result.

Referenced by Executor::compileWorkUnit().

1550  {
1551  std::vector<std::string> result;
1552  for (size_t target_idx = 0, agg_col_idx = 0; target_idx < target_exprs.size();
1553  ++target_idx, ++agg_col_idx) {
1554  const auto target_expr = target_exprs[target_idx];
1555  CHECK(target_expr);
1556  const auto target_type_info = target_expr->get_type_info();
1557  const auto agg_expr = dynamic_cast<Analyzer::AggExpr*>(target_expr);
1558  const bool is_varlen =
1559  (target_type_info.is_string() &&
1560  target_type_info.get_compression() == kENCODING_NONE) ||
1561  target_type_info.is_array(); // TODO: should it use is_varlen_array() ?
1562  if (!agg_expr || agg_expr->get_aggtype() == kSAMPLE) {
1563  result.emplace_back(target_type_info.is_fp() ? "agg_id_double" : "agg_id");
1564  if (is_varlen) {
1565  result.emplace_back("agg_id");
1566  }
1567  if (target_type_info.is_geometry()) {
1568  result.emplace_back("agg_id");
1569  for (auto i = 2; i < 2 * target_type_info.get_physical_coord_cols(); ++i) {
1570  result.emplace_back("agg_id");
1571  }
1572  }
1573  continue;
1574  }
1575  const auto agg_type = agg_expr->get_aggtype();
1576  const auto& agg_type_info =
1577  agg_type != kCOUNT ? agg_expr->get_arg()->get_type_info() : target_type_info;
1578  switch (agg_type) {
1579  case kAVG: {
1580  if (!agg_type_info.is_integer() && !agg_type_info.is_decimal() &&
1581  !agg_type_info.is_fp()) {
1582  throw std::runtime_error("AVG is only valid on integer and floating point");
1583  }
1584  result.emplace_back((agg_type_info.is_integer() || agg_type_info.is_time())
1585  ? "agg_sum"
1586  : "agg_sum_double");
1587  result.emplace_back((agg_type_info.is_integer() || agg_type_info.is_time())
1588  ? "agg_count"
1589  : "agg_count_double");
1590  break;
1591  }
1592  case kMIN: {
1593  if (agg_type_info.is_string() || agg_type_info.is_array() ||
1594  agg_type_info.is_geometry()) {
1595  throw std::runtime_error(
1596  "MIN on strings, arrays or geospatial types not supported yet");
1597  }
1598  result.emplace_back((agg_type_info.is_integer() || agg_type_info.is_time())
1599  ? "agg_min"
1600  : "agg_min_double");
1601  break;
1602  }
1603  case kMAX: {
1604  if (agg_type_info.is_string() || agg_type_info.is_array() ||
1605  agg_type_info.is_geometry()) {
1606  throw std::runtime_error(
1607  "MAX on strings, arrays or geospatial types not supported yet");
1608  }
1609  result.emplace_back((agg_type_info.is_integer() || agg_type_info.is_time())
1610  ? "agg_max"
1611  : "agg_max_double");
1612  break;
1613  }
1614  case kSUM: {
1615  if (!agg_type_info.is_integer() && !agg_type_info.is_decimal() &&
1616  !agg_type_info.is_fp()) {
1617  throw std::runtime_error("SUM is only valid on integer and floating point");
1618  }
1619  result.emplace_back((agg_type_info.is_integer() || agg_type_info.is_time())
1620  ? "agg_sum"
1621  : "agg_sum_double");
1622  break;
1623  }
1624  case kCOUNT:
1625  result.emplace_back(agg_expr->get_is_distinct() ? "agg_count_distinct"
1626  : "agg_count");
1627  break;
1628  case kSINGLE_VALUE: {
1629  result.emplace_back(agg_type_info.is_fp() ? "agg_id_double" : "agg_id");
1630  break;
1631  }
1632  case kSAMPLE: {
1633  // Note that varlen SAMPLE arguments are handled separately above
1634  result.emplace_back(agg_type_info.is_fp() ? "agg_id_double" : "agg_id");
1635  break;
1636  }
1638  result.emplace_back("agg_approximate_count_distinct");
1639  break;
1640  case kAPPROX_MEDIAN:
1641  result.emplace_back("agg_approx_median");
1642  break;
1643  default:
1644  CHECK(false);
1645  }
1646  }
1647  return result;
1648 }
Definition: sqldefs.h:73
Definition: sqldefs.h:75
SQLAgg get_aggtype() const
Definition: Analyzer.h:1095
Definition: sqldefs.h:76
#define CHECK(condition)
Definition: Logger.h:197
Definition: sqldefs.h:74
Definition: sqldefs.h:72
+ Here is the call graph for this function:
+ Here is the caller graph for this function:

◆ get_shared_memory_size()

size_t anonymous_namespace{NativeCodegen.cpp}::get_shared_memory_size ( const bool  shared_mem_used,
const QueryMemoryDescriptor query_mem_desc_ptr 
)

Definition at line 2257 of file NativeCodegen.cpp.

References QueryMemoryDescriptor::getEntryCount(), and QueryMemoryDescriptor::getRowSize().

Referenced by Executor::compileWorkUnit().

2258  {
2259  return shared_mem_used
2260  ? (query_mem_desc_ptr->getRowSize() * query_mem_desc_ptr->getEntryCount())
2261  : 0;
2262 }
+ Here is the call graph for this function:
+ Here is the caller graph for this function:

◆ is_gpu_shared_mem_supported()

bool anonymous_namespace{NativeCodegen.cpp}::is_gpu_shared_mem_supported ( const QueryMemoryDescriptor query_mem_desc_ptr,
const RelAlgExecutionUnit ra_exe_unit,
const CudaMgr_Namespace::CudaMgr cuda_mgr,
const ExecutorDeviceType  device_type,
const unsigned  gpu_blocksize,
const unsigned  num_blocks_per_mp 
)

To simplify the implementation for practical purposes, we initially provide shared memory support for cases where there are at most as many entries in the output buffer as there are threads within each GPU device. In order to relax this assumption later, we need to add a for loop in generated codes such that each thread loops over multiple entries. TODO: relax this if necessary

Definition at line 2264 of file NativeCodegen.cpp.

References CHECK, QueryMemoryDescriptor::countDistinctDescriptorsLogicallyEmpty(), CPU, QueryMemoryDescriptor::didOutputColumnar(), g_enable_smem_group_by, g_enable_smem_grouped_non_count_agg, g_enable_smem_non_grouped_agg, g_gpu_smem_threshold, QueryMemoryDescriptor::getEntryCount(), CudaMgr_Namespace::CudaMgr::getMinSharedMemoryPerBlockForAllDevices(), QueryMemoryDescriptor::getQueryDescriptionType(), QueryMemoryDescriptor::getRowSize(), GroupByPerfectHash, QueryMemoryDescriptor::hasKeylessHash(), CudaMgr_Namespace::CudaMgr::isArchMaxwellOrLaterForAll(), kAVG, kCOUNT, kMAX, kMIN, kSUM, NonGroupedAggregate, RelAlgExecutionUnit::target_exprs, target_exprs_to_infos(), and QueryMemoryDescriptor::useStreamingTopN().

Referenced by Executor::compileWorkUnit().

2269  {
2270  if (device_type == ExecutorDeviceType::CPU) {
2271  return false;
2272  }
2273  if (query_mem_desc_ptr->didOutputColumnar()) {
2274  return false;
2275  }
2276  CHECK(query_mem_desc_ptr);
2277  CHECK(cuda_mgr);
2278  /*
2279  * We only use shared memory strategy if GPU hardware provides native shared
2280  * memory atomics support. From CUDA Toolkit documentation:
2281  * https://docs.nvidia.com/cuda/pascal-tuning-guide/index.html#atomic-ops "Like
2282  * Maxwell, Pascal [and Volta] provides native shared memory atomic operations
2283  * for 32-bit integer arithmetic, along with native 32 or 64-bit compare-and-swap
2284  * (CAS)."
2285  *
2286  **/
2287  if (!cuda_mgr->isArchMaxwellOrLaterForAll()) {
2288  return false;
2289  }
2290 
2291  if (query_mem_desc_ptr->getQueryDescriptionType() ==
2294  query_mem_desc_ptr->countDistinctDescriptorsLogicallyEmpty()) {
2295  // TODO: relax this, if necessary
2296  if (gpu_blocksize < query_mem_desc_ptr->getEntryCount()) {
2297  return false;
2298  }
2299  // skip shared memory usage when dealing with 1) variable length targets, 2)
2300  // not a COUNT aggregate
2301  const auto target_infos =
2302  target_exprs_to_infos(ra_exe_unit.target_exprs, *query_mem_desc_ptr);
2303  std::unordered_set<SQLAgg> supported_aggs{kCOUNT};
2304  if (std::find_if(target_infos.begin(),
2305  target_infos.end(),
2306  [&supported_aggs](const TargetInfo& ti) {
2307  if (ti.sql_type.is_varlen() ||
2308  !supported_aggs.count(ti.agg_kind)) {
2309  return true;
2310  } else {
2311  return false;
2312  }
2313  }) == target_infos.end()) {
2314  return true;
2315  }
2316  }
2317  if (query_mem_desc_ptr->getQueryDescriptionType() ==
2328  if (gpu_blocksize < query_mem_desc_ptr->getEntryCount()) {
2329  return false;
2330  }
2331 
2332  // Fundamentally, we should use shared memory whenever the output buffer
2333  // is small enough so that we can fit it in the shared memory and yet expect
2334  // good occupancy.
2335  // For now, we allow keyless, row-wise layout, and only for perfect hash
2336  // group by operations.
2337  if (query_mem_desc_ptr->hasKeylessHash() &&
2338  query_mem_desc_ptr->countDistinctDescriptorsLogicallyEmpty() &&
2339  !query_mem_desc_ptr->useStreamingTopN()) {
2340  const size_t shared_memory_threshold_bytes = std::min(
2341  g_gpu_smem_threshold == 0 ? SIZE_MAX : g_gpu_smem_threshold,
2342  cuda_mgr->getMinSharedMemoryPerBlockForAllDevices() / num_blocks_per_mp);
2343  const auto output_buffer_size =
2344  query_mem_desc_ptr->getRowSize() * query_mem_desc_ptr->getEntryCount();
2345  if (output_buffer_size > shared_memory_threshold_bytes) {
2346  return false;
2347  }
2348 
2349  // skip shared memory usage when dealing with 1) variable length targets, 2)
2350  // non-basic aggregates (COUNT, SUM, MIN, MAX, AVG)
2351  // TODO: relax this if necessary
2352  const auto target_infos =
2353  target_exprs_to_infos(ra_exe_unit.target_exprs, *query_mem_desc_ptr);
2354  std::unordered_set<SQLAgg> supported_aggs{kCOUNT};
2356  supported_aggs = {kCOUNT, kMIN, kMAX, kSUM, kAVG};
2357  }
2358  if (std::find_if(target_infos.begin(),
2359  target_infos.end(),
2360  [&supported_aggs](const TargetInfo& ti) {
2361  if (ti.sql_type.is_varlen() ||
2362  !supported_aggs.count(ti.agg_kind)) {
2363  return true;
2364  } else {
2365  return false;
2366  }
2367  }) == target_infos.end()) {
2368  return true;
2369  }
2370  }
2371  }
2372  return false;
2373 }
std::vector< Analyzer::Expr * > target_exprs
bool g_enable_smem_group_by
bool isArchMaxwellOrLaterForAll() const
Definition: CudaMgr.cpp:288
bool g_enable_smem_non_grouped_agg
Definition: Execute.cpp:122
Definition: sqldefs.h:73
bool countDistinctDescriptorsLogicallyEmpty() const
Definition: sqldefs.h:75
bool g_enable_smem_grouped_non_count_agg
Definition: Execute.cpp:119
Definition: sqldefs.h:76
std::vector< TargetInfo > target_exprs_to_infos(const std::vector< Analyzer::Expr *> &targets, const QueryMemoryDescriptor &query_mem_desc)
size_t getMinSharedMemoryPerBlockForAllDevices() const
Definition: CudaMgr.h:114
#define CHECK(condition)
Definition: Logger.h:197
QueryDescriptionType getQueryDescriptionType() const
Definition: sqldefs.h:74
Definition: sqldefs.h:72
size_t g_gpu_smem_threshold
Definition: Execute.cpp:114
+ Here is the call graph for this function:
+ Here is the caller graph for this function:

◆ optimize_ir()

void anonymous_namespace{NativeCodegen.cpp}::optimize_ir ( llvm::Function *  query_func,
llvm::Module *  module,
llvm::legacy::PassManager &  pass_manager,
const std::unordered_set< llvm::Function *> &  live_funcs,
const CompilationOptions co 
)

Definition at line 309 of file NativeCodegen.cpp.

References eliminate_dead_self_recursive_funcs(), LoopStrengthReduction, and CompilationOptions::opt_level.

Referenced by Executor::compileWorkUnit(), CodeGenerator::generateNativeCPUCode(), and CodeGenerator::generateNativeGPUCode().

313  {
314  pass_manager.add(llvm::createAlwaysInlinerLegacyPass());
315  pass_manager.add(llvm::createPromoteMemoryToRegisterPass());
316  pass_manager.add(llvm::createInstSimplifyLegacyPass());
317  pass_manager.add(llvm::createInstructionCombiningPass());
318  pass_manager.add(llvm::createGlobalOptimizerPass());
319 
320  pass_manager.add(llvm::createLICMPass());
322  pass_manager.add(llvm::createLoopStrengthReducePass());
323  }
324  pass_manager.run(*module);
325 
326  eliminate_dead_self_recursive_funcs(*module, live_funcs);
327 }
ExecutorOptLevel opt_level
void eliminate_dead_self_recursive_funcs(llvm::Module &M, const std::unordered_set< llvm::Function *> &live_funcs)
+ Here is the call graph for this function:
+ Here is the caller graph for this function:

◆ scan_function_calls() [1/3]

template<typename T = void>
void anonymous_namespace{NativeCodegen.cpp}::scan_function_calls ( llvm::Function &  F,
std::unordered_set< std::string > &  defined,
std::unordered_set< std::string > &  undefined,
const std::unordered_set< std::string > &  ignored 
)

Definition at line 197 of file NativeCodegen.cpp.

References scan_function_calls().

200  {
201  for (llvm::inst_iterator I = llvm::inst_begin(F), E = llvm::inst_end(F); I != E; ++I) {
202  if (auto* CI = llvm::dyn_cast<llvm::CallInst>(&*I)) {
203  auto* F2 = CI->getCalledFunction();
204  if (F2 != nullptr) {
205  auto F2name = F2->getName().str();
206  if (F2->isDeclaration()) {
207  if (F2name.rfind("__", 0) !=
208  0 // assume symbols with double underscore are defined
209  && F2name.rfind("llvm.", 0) !=
210  0 // TODO: this may give false positive for NVVM intrinsics
211  && ignored.find(F2name) == ignored.end() // not in ignored list
212  ) {
213  undefined.emplace(F2name);
214  }
215  } else {
216  if (defined.find(F2name) == defined.end()) {
217  defined.emplace(F2name);
218  scan_function_calls(*F2, defined, undefined, ignored);
219  }
220  }
221  }
222  }
223  }
224 }
std::tuple< std::unordered_set< std::string >, std::unordered_set< std::string > > scan_function_calls(llvm::Module &module, const std::unordered_set< std::string > &ignored={})
+ Here is the call graph for this function:

◆ scan_function_calls() [2/3]

template<typename T = void>
void anonymous_namespace{NativeCodegen.cpp}::scan_function_calls ( llvm::Module &  module,
std::unordered_set< std::string > &  defined,
std::unordered_set< std::string > &  undefined,
const std::unordered_set< std::string > &  ignored 
)

Definition at line 227 of file NativeCodegen.cpp.

References scan_function_calls().

230  {
231  for (auto& F : module) {
232  if (!F.isDeclaration()) {
233  scan_function_calls(F, defined, undefined, ignored);
234  }
235  }
236 }
std::tuple< std::unordered_set< std::string >, std::unordered_set< std::string > > scan_function_calls(llvm::Module &module, const std::unordered_set< std::string > &ignored={})
+ Here is the call graph for this function:

◆ scan_function_calls() [3/3]

template<typename T = void>
std::tuple<std::unordered_set<std::string>, std::unordered_set<std::string> > anonymous_namespace{NativeCodegen.cpp}::scan_function_calls ( llvm::Module &  module,
const std::unordered_set< std::string > &  ignored = {} 
)

Definition at line 240 of file NativeCodegen.cpp.

Referenced by scan_function_calls().

241  {}) {
242  std::unordered_set<std::string> defined, undefined;
243  scan_function_calls(module, defined, undefined, ignored);
244  return std::make_tuple(defined, undefined);
245 }
std::tuple< std::unordered_set< std::string >, std::unordered_set< std::string > > scan_function_calls(llvm::Module &module, const std::unordered_set< std::string > &ignored={})
+ Here is the caller graph for this function:

◆ serialize_llvm_metadata_footnotes()

std::string anonymous_namespace{NativeCodegen.cpp}::serialize_llvm_metadata_footnotes ( llvm::Function *  query_func,
CgenState cgen_state 
)

Definition at line 2376 of file NativeCodegen.cpp.

References CgenState::filter_func_, CgenState::module_, CgenState::row_func_, and split().

Referenced by Executor::compileWorkUnit().

2377  {
2378  std::string llvm_ir;
2379  std::unordered_set<llvm::MDNode*> md;
2380 
2381  // Loop over all instructions in the query function.
2382  for (auto bb_it = query_func->begin(); bb_it != query_func->end(); ++bb_it) {
2383  for (auto instr_it = bb_it->begin(); instr_it != bb_it->end(); ++instr_it) {
2384  llvm::SmallVector<std::pair<unsigned, llvm::MDNode*>, 100> imd;
2385  instr_it->getAllMetadata(imd);
2386  for (auto [kind, node] : imd) {
2387  md.insert(node);
2388  }
2389  }
2390  }
2391 
2392  // Loop over all instructions in the row function.
2393  for (auto bb_it = cgen_state->row_func_->begin(); bb_it != cgen_state->row_func_->end();
2394  ++bb_it) {
2395  for (auto instr_it = bb_it->begin(); instr_it != bb_it->end(); ++instr_it) {
2396  llvm::SmallVector<std::pair<unsigned, llvm::MDNode*>, 100> imd;
2397  instr_it->getAllMetadata(imd);
2398  for (auto [kind, node] : imd) {
2399  md.insert(node);
2400  }
2401  }
2402  }
2403 
2404  // Loop over all instructions in the filter function.
2405  if (cgen_state->filter_func_) {
2406  for (auto bb_it = cgen_state->filter_func_->begin();
2407  bb_it != cgen_state->filter_func_->end();
2408  ++bb_it) {
2409  for (auto instr_it = bb_it->begin(); instr_it != bb_it->end(); ++instr_it) {
2410  llvm::SmallVector<std::pair<unsigned, llvm::MDNode*>, 100> imd;
2411  instr_it->getAllMetadata(imd);
2412  for (auto [kind, node] : imd) {
2413  md.insert(node);
2414  }
2415  }
2416  }
2417  }
2418 
2419  // Sort the metadata by canonical number and convert to text.
2420  if (!md.empty()) {
2421  std::map<size_t, std::string> sorted_strings;
2422  for (auto p : md) {
2423  std::string str;
2424  llvm::raw_string_ostream os(str);
2425  p->print(os, cgen_state->module_, true);
2426  os.flush();
2427  auto fields = split(str, {}, 1);
2428  if (fields.empty() || fields[0].empty()) {
2429  continue;
2430  }
2431  sorted_strings.emplace(std::stoul(fields[0].substr(1)), str);
2432  }
2433  llvm_ir += "\n";
2434  for (auto [id, text] : sorted_strings) {
2435  llvm_ir += text;
2436  llvm_ir += "\n";
2437  }
2438  }
2439 
2440  return llvm_ir;
2441 }
std::vector< std::string > split(std::string_view str, std::string_view delim, std::optional< size_t > maxsplit)
split apart a string into a vector of substrings
llvm::Function * row_func_
Definition: CgenState.h:323
llvm::Module * module_
Definition: CgenState.h:322
llvm::Function * filter_func_
Definition: CgenState.h:324
+ Here is the call graph for this function:
+ Here is the caller graph for this function:

◆ set_row_func_argnames()

void anonymous_namespace{NativeCodegen.cpp}::set_row_func_argnames ( llvm::Function *  row_func,
const size_t  in_col_count,
const size_t  agg_col_count,
const bool  hoist_literals 
)

Definition at line 1410 of file NativeCodegen.cpp.

References to_string().

Referenced by create_row_function().

1413  {
1414  auto arg_it = row_func->arg_begin();
1415 
1416  if (agg_col_count) {
1417  for (size_t i = 0; i < agg_col_count; ++i) {
1418  arg_it->setName("out");
1419  ++arg_it;
1420  }
1421  } else {
1422  arg_it->setName("group_by_buff");
1423  ++arg_it;
1424  arg_it->setName("crt_matched");
1425  ++arg_it;
1426  arg_it->setName("total_matched");
1427  ++arg_it;
1428  arg_it->setName("old_total_matched");
1429  ++arg_it;
1430  arg_it->setName("max_matched");
1431  ++arg_it;
1432  }
1433 
1434  arg_it->setName("agg_init_val");
1435  ++arg_it;
1436 
1437  arg_it->setName("pos");
1438  ++arg_it;
1439 
1440  arg_it->setName("frag_row_off");
1441  ++arg_it;
1442 
1443  arg_it->setName("num_rows_per_scan");
1444  ++arg_it;
1445 
1446  if (hoist_literals) {
1447  arg_it->setName("literals");
1448  ++arg_it;
1449  }
1450 
1451  for (size_t i = 0; i < in_col_count; ++i) {
1452  arg_it->setName("col_buf" + std::to_string(i));
1453  ++arg_it;
1454  }
1455 
1456  arg_it->setName("join_hash_tables");
1457 }
std::string to_string(char const *&&v)
+ Here is the call graph for this function:
+ Here is the caller graph for this function:

◆ show_defined() [1/3]

template<typename T = void>
void anonymous_namespace{NativeCodegen.cpp}::show_defined ( llvm::Module &  module)

Definition at line 158 of file NativeCodegen.cpp.

158  {
159  std::cout << "defines: ";
160  for (auto& f : module.getFunctionList()) {
161  if (!f.isDeclaration()) {
162  std::cout << f.getName().str() << ", ";
163  }
164  }
165  std::cout << std::endl;
166 }

◆ show_defined() [2/3]

template<typename T = void>
void anonymous_namespace{NativeCodegen.cpp}::show_defined ( llvm::Module *  module)

Definition at line 169 of file NativeCodegen.cpp.

References show_defined().

169  {
170  if (module == nullptr) {
171  std::cout << "is null" << std::endl;
172  } else {
173  show_defined(*module);
174  }
175 }
void show_defined(std::unique_ptr< llvm::Module > &module)
+ Here is the call graph for this function:

◆ show_defined() [3/3]

template<typename T = void>
void anonymous_namespace{NativeCodegen.cpp}::show_defined ( std::unique_ptr< llvm::Module > &  module)

Definition at line 178 of file NativeCodegen.cpp.

Referenced by show_defined().

178  {
179  show_defined(module.get());
180 }
void show_defined(std::unique_ptr< llvm::Module > &module)
+ Here is the caller graph for this function:

◆ throw_parseIR_error()

void anonymous_namespace{NativeCodegen.cpp}::throw_parseIR_error ( const llvm::SMDiagnostic &  parse_error,
std::string  src = "",
const bool  is_gpu = false 
)

Definition at line 126 of file NativeCodegen.cpp.

Referenced by CodeGenerator::generatePTX(), read_rt_udf_cpu_module(), read_rt_udf_gpu_module(), read_udf_cpu_module(), and read_udf_gpu_module().

128  {
129  std::string excname = (is_gpu ? "NVVM IR ParseError: " : "LLVM IR ParseError: ");
130  llvm::raw_string_ostream ss(excname);
131  parse_error.print(src.c_str(), ss, false, false);
132  throw ParseIRError(ss.str());
133 }
+ Here is the caller graph for this function:

Variable Documentation

◆ cuda_rt_decls

const std::string anonymous_namespace{NativeCodegen.cpp}::cuda_rt_decls

Definition at line 594 of file NativeCodegen.cpp.