OmniSciDB  471d68cefb
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
anonymous_namespace{NativeCodegen.cpp} Namespace Reference

Functions

void throw_parseIR_error (const llvm::SMDiagnostic &parse_error, std::string src="", const bool is_gpu=false)
 
template<typename T = void>
void show_defined (llvm::Module &module)
 
template<typename T = void>
void show_defined (llvm::Module *module)
 
template<typename T = void>
void show_defined (std::unique_ptr< llvm::Module > &module)
 
template<typename T = void>
void scan_function_calls (llvm::Function &F, std::unordered_set< std::string > &defined, std::unordered_set< std::string > &undefined, const std::unordered_set< std::string > &ignored)
 
template<typename T = void>
void scan_function_calls (llvm::Module &module, std::unordered_set< std::string > &defined, std::unordered_set< std::string > &undefined, const std::unordered_set< std::string > &ignored)
 
template<typename T = void>
std::tuple< std::unordered_set
< std::string >
, std::unordered_set
< std::string > > 
scan_function_calls (llvm::Module &module, const std::unordered_set< std::string > &ignored={})
 
void eliminate_dead_self_recursive_funcs (llvm::Module &M, const std::unordered_set< llvm::Function * > &live_funcs)
 
void optimize_ir (llvm::Function *query_func, llvm::Module *module, llvm::legacy::PassManager &pass_manager, const std::unordered_set< llvm::Function * > &live_funcs, const CompilationOptions &co)
 
std::string assemblyForCPU (ExecutionEngineWrapper &execution_engine, llvm::Module *module)
 
std::string cpp_to_llvm_name (const std::string &s)
 
std::string gen_array_any_all_sigs ()
 
std::string gen_translate_null_key_sigs ()
 
bool is_udf_module_present (bool cpu_only=false)
 
void bind_pos_placeholders (const std::string &pos_fn_name, const bool use_resume_param, llvm::Function *query_func, llvm::Module *module)
 
void set_row_func_argnames (llvm::Function *row_func, const size_t in_col_count, const size_t agg_col_count, const bool hoist_literals)
 
llvm::Function * create_row_function (const size_t in_col_count, const size_t agg_col_count, const bool hoist_literals, llvm::Module *module, llvm::LLVMContext &context)
 
void bind_query (llvm::Function *query_func, const std::string &query_fname, llvm::Function *multifrag_query_func, llvm::Module *module)
 
std::vector< std::string > get_agg_fnames (const std::vector< Analyzer::Expr * > &target_exprs, const bool is_group_by)
 
void read_udf_gpu_module (const std::string &udf_ir_filename)
 
void read_udf_cpu_module (const std::string &udf_ir_filename)
 
template<typename InstType >
llvm::Value * find_variable_in_basic_block (llvm::Function *func, std::string bb_name, std::string variable_name)
 
size_t get_shared_memory_size (const bool shared_mem_used, const QueryMemoryDescriptor *query_mem_desc_ptr)
 
bool is_gpu_shared_mem_supported (const QueryMemoryDescriptor *query_mem_desc_ptr, const RelAlgExecutionUnit &ra_exe_unit, const CudaMgr_Namespace::CudaMgr *cuda_mgr, const ExecutorDeviceType device_type, const unsigned gpu_blocksize, const unsigned num_blocks_per_mp)
 
std::string serialize_llvm_metadata_footnotes (llvm::Function *query_func, CgenState *cgen_state)
 

Variables

const std::string cuda_rt_decls
 

Function Documentation

std::string anonymous_namespace{NativeCodegen.cpp}::assemblyForCPU ( ExecutionEngineWrapper execution_engine,
llvm::Module *  module 
)

Definition at line 423 of file NativeCodegen.cpp.

References CHECK.

Referenced by CodeGenerator::generateNativeCPUCode().

424  {
425  llvm::legacy::PassManager pass_manager;
426  auto cpu_target_machine = execution_engine->getTargetMachine();
427  CHECK(cpu_target_machine);
428  llvm::SmallString<256> code_str;
429  llvm::raw_svector_ostream os(code_str);
430 #if LLVM_VERSION_MAJOR >= 10
431  cpu_target_machine->addPassesToEmitFile(
432  pass_manager, os, nullptr, llvm::CGFT_AssemblyFile);
433 #else
434  cpu_target_machine->addPassesToEmitFile(
435  pass_manager, os, nullptr, llvm::TargetMachine::CGFT_AssemblyFile);
436 #endif
437  pass_manager.run(*module);
438  return "Assembly for the CPU:\n" + std::string(code_str.str()) + "\nEnd of assembly";
439 }
#define CHECK(condition)
Definition: Logger.h:209

+ Here is the caller graph for this function:

void anonymous_namespace{NativeCodegen.cpp}::bind_pos_placeholders ( const std::string &  pos_fn_name,
const bool  use_resume_param,
llvm::Function *  query_func,
llvm::Module *  module 
)

Definition at line 1485 of file NativeCodegen.cpp.

1488  {
1489  for (auto it = llvm::inst_begin(query_func), e = llvm::inst_end(query_func); it != e;
1490  ++it) {
1491  if (!llvm::isa<llvm::CallInst>(*it)) {
1492  continue;
1493  }
1494  auto& pos_call = llvm::cast<llvm::CallInst>(*it);
1495  if (std::string(pos_call.getCalledFunction()->getName()) == pos_fn_name) {
1496  if (use_resume_param) {
1497  const auto error_code_arg = get_arg_by_name(query_func, "error_code");
1498  llvm::ReplaceInstWithInst(
1499  &pos_call,
1500  llvm::CallInst::Create(module->getFunction(pos_fn_name + "_impl"),
1501  error_code_arg));
1502  } else {
1503  llvm::ReplaceInstWithInst(
1504  &pos_call,
1505  llvm::CallInst::Create(module->getFunction(pos_fn_name + "_impl")));
1506  }
1507  break;
1508  }
1509  }
1510 }
llvm::Value * get_arg_by_name(llvm::Function *func, const std::string &name)
Definition: Execute.h:164
void anonymous_namespace{NativeCodegen.cpp}::bind_query ( llvm::Function *  query_func,
const std::string &  query_fname,
llvm::Function *  multifrag_query_func,
llvm::Module *  module 
)

Definition at line 1629 of file NativeCodegen.cpp.

1632  {
1633  std::vector<llvm::CallInst*> query_stubs;
1634  for (auto it = llvm::inst_begin(multifrag_query_func),
1635  e = llvm::inst_end(multifrag_query_func);
1636  it != e;
1637  ++it) {
1638  if (!llvm::isa<llvm::CallInst>(*it)) {
1639  continue;
1640  }
1641  auto& query_call = llvm::cast<llvm::CallInst>(*it);
1642  if (std::string(query_call.getCalledFunction()->getName()) == query_fname) {
1643  query_stubs.push_back(&query_call);
1644  }
1645  }
1646  for (auto& S : query_stubs) {
1647  std::vector<llvm::Value*> args;
1648  for (size_t i = 0; i < S->getNumArgOperands(); ++i) {
1649  args.push_back(S->getArgOperand(i));
1650  }
1651  llvm::ReplaceInstWithInst(S, llvm::CallInst::Create(query_func, args, ""));
1652  }
1653 }
std::string anonymous_namespace{NativeCodegen.cpp}::cpp_to_llvm_name ( const std::string &  s)

Definition at line 583 of file NativeCodegen.cpp.

References CHECK.

Referenced by gen_array_any_all_sigs(), and gen_translate_null_key_sigs().

583  {
584  if (s == "int8_t") {
585  return "i8";
586  }
587  if (s == "int16_t") {
588  return "i16";
589  }
590  if (s == "int32_t") {
591  return "i32";
592  }
593  if (s == "int64_t") {
594  return "i64";
595  }
596  CHECK(s == "float" || s == "double");
597  return s;
598 }
#define CHECK(condition)
Definition: Logger.h:209

+ Here is the caller graph for this function:

llvm::Function* anonymous_namespace{NativeCodegen.cpp}::create_row_function ( const size_t  in_col_count,
const size_t  agg_col_count,
const bool  hoist_literals,
llvm::Module *  module,
llvm::LLVMContext &  context 
)

Definition at line 1563 of file NativeCodegen.cpp.

References LOG, udf_gpu_module, and logger::WARNING.

1567  {
1568  std::vector<llvm::Type*> row_process_arg_types;
1569 
1570  if (agg_col_count) {
1571  // output (aggregate) arguments
1572  for (size_t i = 0; i < agg_col_count; ++i) {
1573  row_process_arg_types.push_back(llvm::Type::getInt64PtrTy(context));
1574  }
1575  } else {
1576  // group by buffer
1577  row_process_arg_types.push_back(llvm::Type::getInt64PtrTy(context));
1578  // varlen output buffer
1579  row_process_arg_types.push_back(llvm::Type::getInt64PtrTy(context));
1580  // current match count
1581  row_process_arg_types.push_back(llvm::Type::getInt32PtrTy(context));
1582  // total match count passed from the caller
1583  row_process_arg_types.push_back(llvm::Type::getInt32PtrTy(context));
1584  // old total match count returned to the caller
1585  row_process_arg_types.push_back(llvm::Type::getInt32PtrTy(context));
1586  // max matched (total number of slots in the output buffer)
1587  row_process_arg_types.push_back(llvm::Type::getInt32PtrTy(context));
1588  }
1589 
1590  // aggregate init values
1591  row_process_arg_types.push_back(llvm::Type::getInt64PtrTy(context));
1592 
1593  // position argument
1594  row_process_arg_types.push_back(llvm::Type::getInt64Ty(context));
1595 
1596  // fragment row offset argument
1597  row_process_arg_types.push_back(llvm::Type::getInt64PtrTy(context));
1598 
1599  // number of rows for each scan
1600  row_process_arg_types.push_back(llvm::Type::getInt64PtrTy(context));
1601 
1602  // literals buffer argument
1603  if (hoist_literals) {
1604  row_process_arg_types.push_back(llvm::Type::getInt8PtrTy(context));
1605  }
1606 
1607  // column buffer arguments
1608  for (size_t i = 0; i < in_col_count; ++i) {
1609  row_process_arg_types.emplace_back(llvm::Type::getInt8PtrTy(context));
1610  }
1611 
1612  // join hash table argument
1613  row_process_arg_types.push_back(llvm::Type::getInt64PtrTy(context));
1614 
1615  // generate the function
1616  auto ft =
1617  llvm::FunctionType::get(get_int_type(32, context), row_process_arg_types, false);
1618 
1619  auto row_func =
1620  llvm::Function::Create(ft, llvm::Function::ExternalLinkage, "row_func", module);
1621 
1622  // set the row function argument names; for debugging purposes only
1623  set_row_func_argnames(row_func, in_col_count, agg_col_count, hoist_literals);
1624 
1625  return row_func;
1626 }
llvm::Type * get_int_type(const int width, llvm::LLVMContext &context)
void set_row_func_argnames(llvm::Function *row_func, const size_t in_col_count, const size_t agg_col_count, const bool hoist_literals)
void anonymous_namespace{NativeCodegen.cpp}::eliminate_dead_self_recursive_funcs ( llvm::Module &  M,
const std::unordered_set< llvm::Function * > &  live_funcs 
)

Definition at line 253 of file NativeCodegen.cpp.

Referenced by optimize_ir().

255  {
256  std::vector<llvm::Function*> dead_funcs;
257  for (auto& F : M) {
258  bool bAlive = false;
259  if (live_funcs.count(&F)) {
260  continue;
261  }
262  for (auto U : F.users()) {
263  auto* C = llvm::dyn_cast<const llvm::CallInst>(U);
264  if (!C || C->getParent()->getParent() != &F) {
265  bAlive = true;
266  break;
267  }
268  }
269  if (!bAlive) {
270  dead_funcs.push_back(&F);
271  }
272  }
273  for (auto pFn : dead_funcs) {
274  pFn->eraseFromParent();
275  }
276 }

+ Here is the caller graph for this function:

template<typename InstType >
llvm::Value* anonymous_namespace{NativeCodegen.cpp}::find_variable_in_basic_block ( llvm::Function *  func,
std::string  bb_name,
std::string  variable_name 
)

Definition at line 1890 of file NativeCodegen.cpp.

1892  {
1893  llvm::Value* result = nullptr;
1894  if (func == nullptr || variable_name.empty()) {
1895  return result;
1896  }
1897  bool is_found = false;
1898  for (auto bb_it = func->begin(); bb_it != func->end() && !is_found; ++bb_it) {
1899  if (!bb_name.empty() && bb_it->getName() != bb_name) {
1900  continue;
1901  }
1902  for (auto inst_it = bb_it->begin(); inst_it != bb_it->end(); inst_it++) {
1903  if (llvm::isa<InstType>(*inst_it)) {
1904  if (inst_it->getName() == variable_name) {
1905  result = &*inst_it;
1906  is_found = true;
1907  break;
1908  }
1909  }
1910  }
1911  }
1912  return result;
1913 }
std::string anonymous_namespace{NativeCodegen.cpp}::gen_array_any_all_sigs ( )

Definition at line 600 of file NativeCodegen.cpp.

References cpp_to_llvm_name(), and run_benchmark_import::result.

600  {
601  std::string result;
602  for (const std::string any_or_all : {"any", "all"}) {
603  for (const std::string elem_type :
604  {"int8_t", "int16_t", "int32_t", "int64_t", "float", "double"}) {
605  for (const std::string needle_type :
606  {"int8_t", "int16_t", "int32_t", "int64_t", "float", "double"}) {
607  for (const std::string op_name : {"eq", "ne", "lt", "le", "gt", "ge"}) {
608  result += ("declare i1 @array_" + any_or_all + "_" + op_name + "_" + elem_type +
609  "_" + needle_type + "(i8*, i64, " + cpp_to_llvm_name(needle_type) +
610  ", " + cpp_to_llvm_name(elem_type) + ");\n");
611  }
612  }
613  }
614  }
615  return result;
616 }
std::string cpp_to_llvm_name(const std::string &s)

+ Here is the call graph for this function:

std::string anonymous_namespace{NativeCodegen.cpp}::gen_translate_null_key_sigs ( )

Definition at line 618 of file NativeCodegen.cpp.

References cpp_to_llvm_name(), and run_benchmark_import::result.

618  {
619  std::string result;
620  for (const std::string key_type : {"int8_t", "int16_t", "int32_t", "int64_t"}) {
621  const auto key_llvm_type = cpp_to_llvm_name(key_type);
622  result += "declare i64 @translate_null_key_" + key_type + "(" + key_llvm_type + ", " +
623  key_llvm_type + ", i64);\n";
624  }
625  return result;
626 }
std::string cpp_to_llvm_name(const std::string &s)

+ Here is the call graph for this function:

std::vector<std::string> anonymous_namespace{NativeCodegen.cpp}::get_agg_fnames ( const std::vector< Analyzer::Expr * > &  target_exprs,
const bool  is_group_by 
)

Definition at line 1655 of file NativeCodegen.cpp.

1656  {
1657  std::vector<std::string> result;
1658  for (size_t target_idx = 0, agg_col_idx = 0; target_idx < target_exprs.size();
1659  ++target_idx, ++agg_col_idx) {
1660  const auto target_expr = target_exprs[target_idx];
1661  CHECK(target_expr);
1662  const auto target_type_info = target_expr->get_type_info();
1663  const auto agg_expr = dynamic_cast<Analyzer::AggExpr*>(target_expr);
1664  const bool is_varlen =
1665  (target_type_info.is_string() &&
1666  target_type_info.get_compression() == kENCODING_NONE) ||
1667  target_type_info.is_array(); // TODO: should it use is_varlen_array() ?
1668  if (!agg_expr || agg_expr->get_aggtype() == kSAMPLE) {
1669  result.emplace_back(target_type_info.is_fp() ? "agg_id_double" : "agg_id");
1670  if (is_varlen) {
1671  result.emplace_back("agg_id");
1672  }
1673  if (target_type_info.is_geometry()) {
1674  result.emplace_back("agg_id");
1675  for (auto i = 2; i < 2 * target_type_info.get_physical_coord_cols(); ++i) {
1676  result.emplace_back("agg_id");
1677  }
1678  }
1679  continue;
1680  }
1681  const auto agg_type = agg_expr->get_aggtype();
1682  const auto& agg_type_info =
1683  agg_type != kCOUNT ? agg_expr->get_arg()->get_type_info() : target_type_info;
1684  switch (agg_type) {
1685  case kAVG: {
1686  if (!agg_type_info.is_integer() && !agg_type_info.is_decimal() &&
1687  !agg_type_info.is_fp()) {
1688  throw std::runtime_error("AVG is only valid on integer and floating point");
1689  }
1690  result.emplace_back((agg_type_info.is_integer() || agg_type_info.is_time())
1691  ? "agg_sum"
1692  : "agg_sum_double");
1693  result.emplace_back((agg_type_info.is_integer() || agg_type_info.is_time())
1694  ? "agg_count"
1695  : "agg_count_double");
1696  break;
1697  }
1698  case kMIN: {
1699  if (agg_type_info.is_string() || agg_type_info.is_array() ||
1700  agg_type_info.is_geometry()) {
1701  throw std::runtime_error(
1702  "MIN on strings, arrays or geospatial types not supported yet");
1703  }
1704  result.emplace_back((agg_type_info.is_integer() || agg_type_info.is_time())
1705  ? "agg_min"
1706  : "agg_min_double");
1707  break;
1708  }
1709  case kMAX: {
1710  if (agg_type_info.is_string() || agg_type_info.is_array() ||
1711  agg_type_info.is_geometry()) {
1712  throw std::runtime_error(
1713  "MAX on strings, arrays or geospatial types not supported yet");
1714  }
1715  result.emplace_back((agg_type_info.is_integer() || agg_type_info.is_time())
1716  ? "agg_max"
1717  : "agg_max_double");
1718  break;
1719  }
1720  case kSUM: {
1721  if (!agg_type_info.is_integer() && !agg_type_info.is_decimal() &&
1722  !agg_type_info.is_fp()) {
1723  throw std::runtime_error("SUM is only valid on integer and floating point");
1724  }
1725  result.emplace_back((agg_type_info.is_integer() || agg_type_info.is_time())
1726  ? "agg_sum"
1727  : "agg_sum_double");
1728  break;
1729  }
1730  case kCOUNT:
1731  result.emplace_back(agg_expr->get_is_distinct() ? "agg_count_distinct"
1732  : "agg_count");
1733  break;
1734  case kSINGLE_VALUE: {
1735  result.emplace_back(agg_type_info.is_fp() ? "agg_id_double" : "agg_id");
1736  break;
1737  }
1738  case kSAMPLE: {
1739  // Note that varlen SAMPLE arguments are handled separately above
1740  result.emplace_back(agg_type_info.is_fp() ? "agg_id_double" : "agg_id");
1741  break;
1742  }
1744  result.emplace_back("agg_approximate_count_distinct");
1745  break;
1746  case kAPPROX_QUANTILE:
1747  result.emplace_back("agg_approx_quantile");
1748  break;
1749  default:
1750  CHECK(false);
1751  }
1752  }
1753  return result;
1754 }
Definition: sqldefs.h:73
Definition: sqldefs.h:75
Definition: sqldefs.h:76
SQLAgg get_aggtype() const
Definition: Analyzer.h:1249
#define CHECK(condition)
Definition: Logger.h:209
Definition: sqldefs.h:74
Definition: sqldefs.h:72
size_t anonymous_namespace{NativeCodegen.cpp}::get_shared_memory_size ( const bool  shared_mem_used,
const QueryMemoryDescriptor query_mem_desc_ptr 
)

Definition at line 2372 of file NativeCodegen.cpp.

2373  {
2374  return shared_mem_used
2375  ? (query_mem_desc_ptr->getRowSize() * query_mem_desc_ptr->getEntryCount())
2376  : 0;
2377 }
bool anonymous_namespace{NativeCodegen.cpp}::is_gpu_shared_mem_supported ( const QueryMemoryDescriptor query_mem_desc_ptr,
const RelAlgExecutionUnit ra_exe_unit,
const CudaMgr_Namespace::CudaMgr cuda_mgr,
const ExecutorDeviceType  device_type,
const unsigned  gpu_blocksize,
const unsigned  num_blocks_per_mp 
)

To simplify the implementation for practical purposes, we initially provide shared memory support for cases where there are at most as many entries in the output buffer as there are threads within each GPU device. In order to relax this assumption later, we need to add a for loop in generated codes such that each thread loops over multiple entries. TODO: relax this if necessary

Definition at line 2379 of file NativeCodegen.cpp.

2384  {
2385  if (device_type == ExecutorDeviceType::CPU) {
2386  return false;
2387  }
2388  if (query_mem_desc_ptr->didOutputColumnar()) {
2389  return false;
2390  }
2391  CHECK(query_mem_desc_ptr);
2392  CHECK(cuda_mgr);
2393  /*
2394  * We only use shared memory strategy if GPU hardware provides native shared
2395  * memory atomics support. From CUDA Toolkit documentation:
2396  * https://docs.nvidia.com/cuda/pascal-tuning-guide/index.html#atomic-ops "Like
2397  * Maxwell, Pascal [and Volta] provides native shared memory atomic operations
2398  * for 32-bit integer arithmetic, along with native 32 or 64-bit compare-and-swap
2399  * (CAS)."
2400  *
2401  **/
2402  if (!cuda_mgr->isArchMaxwellOrLaterForAll()) {
2403  return false;
2404  }
2405 
2406  if (query_mem_desc_ptr->getQueryDescriptionType() ==
2409  query_mem_desc_ptr->countDistinctDescriptorsLogicallyEmpty()) {
2410  // TODO: relax this, if necessary
2411  if (gpu_blocksize < query_mem_desc_ptr->getEntryCount()) {
2412  return false;
2413  }
2414  // skip shared memory usage when dealing with 1) variable length targets, 2)
2415  // not a COUNT aggregate
2416  const auto target_infos =
2417  target_exprs_to_infos(ra_exe_unit.target_exprs, *query_mem_desc_ptr);
2418  std::unordered_set<SQLAgg> supported_aggs{kCOUNT};
2419  if (std::find_if(target_infos.begin(),
2420  target_infos.end(),
2421  [&supported_aggs](const TargetInfo& ti) {
2422  if (ti.sql_type.is_varlen() ||
2423  !supported_aggs.count(ti.agg_kind)) {
2424  return true;
2425  } else {
2426  return false;
2427  }
2428  }) == target_infos.end()) {
2429  return true;
2430  }
2431  }
2432  if (query_mem_desc_ptr->getQueryDescriptionType() ==
2443  if (gpu_blocksize < query_mem_desc_ptr->getEntryCount()) {
2444  return false;
2445  }
2446 
2447  // Fundamentally, we should use shared memory whenever the output buffer
2448  // is small enough so that we can fit it in the shared memory and yet expect
2449  // good occupancy.
2450  // For now, we allow keyless, row-wise layout, and only for perfect hash
2451  // group by operations.
2452  if (query_mem_desc_ptr->hasKeylessHash() &&
2453  query_mem_desc_ptr->countDistinctDescriptorsLogicallyEmpty() &&
2454  !query_mem_desc_ptr->useStreamingTopN()) {
2455  const size_t shared_memory_threshold_bytes = std::min(
2457  cuda_mgr->getMinSharedMemoryPerBlockForAllDevices() / num_blocks_per_mp);
2458  const auto output_buffer_size =
2459  query_mem_desc_ptr->getRowSize() * query_mem_desc_ptr->getEntryCount();
2460  if (output_buffer_size > shared_memory_threshold_bytes) {
2461  return false;
2462  }
2463 
2464  // skip shared memory usage when dealing with 1) variable length targets, 2)
2465  // non-basic aggregates (COUNT, SUM, MIN, MAX, AVG)
2466  // TODO: relax this if necessary
2467  const auto target_infos =
2468  target_exprs_to_infos(ra_exe_unit.target_exprs, *query_mem_desc_ptr);
2469  std::unordered_set<SQLAgg> supported_aggs{kCOUNT};
2471  supported_aggs = {kCOUNT, kMIN, kMAX, kSUM, kAVG};
2472  }
2473  if (std::find_if(target_infos.begin(),
2474  target_infos.end(),
2475  [&supported_aggs](const TargetInfo& ti) {
2476  if (ti.sql_type.is_varlen() ||
2477  !supported_aggs.count(ti.agg_kind)) {
2478  return true;
2479  } else {
2480  return false;
2481  }
2482  }) == target_infos.end()) {
2483  return true;
2484  }
2485  }
2486  }
2487  return false;
2488 }
std::vector< Analyzer::Expr * > target_exprs
bool g_enable_smem_group_by
bool countDistinctDescriptorsLogicallyEmpty() const
#define SIZE_MAX
bool g_enable_smem_non_grouped_agg
Definition: Execute.cpp:131
Definition: sqldefs.h:73
Definition: sqldefs.h:75
size_t getMinSharedMemoryPerBlockForAllDevices() const
Definition: CudaMgr.h:114
QueryDescriptionType getQueryDescriptionType() const
bool isArchMaxwellOrLaterForAll() const
Definition: CudaMgr.cpp:287
bool g_enable_smem_grouped_non_count_agg
Definition: Execute.cpp:128
Definition: sqldefs.h:76
#define CHECK(condition)
Definition: Logger.h:209
std::vector< TargetInfo > target_exprs_to_infos(const std::vector< Analyzer::Expr * > &targets, const QueryMemoryDescriptor &query_mem_desc)
Definition: sqldefs.h:74
Definition: sqldefs.h:72
size_t g_gpu_smem_threshold
Definition: Execute.cpp:123
bool anonymous_namespace{NativeCodegen.cpp}::is_udf_module_present ( bool  cpu_only = false)

Definition at line 1026 of file NativeCodegen.cpp.

1026  {
1027  return (cpu_only || udf_gpu_module != nullptr) && (udf_cpu_module != nullptr);
1028 }
std::unique_ptr< llvm::Module > udf_gpu_module
std::unique_ptr< llvm::Module > udf_cpu_module
void anonymous_namespace{NativeCodegen.cpp}::optimize_ir ( llvm::Function *  query_func,
llvm::Module *  module,
llvm::legacy::PassManager &  pass_manager,
const std::unordered_set< llvm::Function * > &  live_funcs,
const CompilationOptions co 
)

Definition at line 314 of file NativeCodegen.cpp.

References eliminate_dead_self_recursive_funcs(), LoopStrengthReduction, and CompilationOptions::opt_level.

Referenced by CodeGenerator::generateNativeCPUCode().

318  {
319  // the always inliner legacy pass must always run first
320  pass_manager.add(llvm::createAlwaysInlinerLegacyPass());
321 
322  pass_manager.add(new AnnotateInternalFunctionsPass());
323 
324  pass_manager.add(llvm::createSROAPass());
325  // mem ssa drops unused load and store instructions, e.g. passing variables directly
326  // where possible
327  pass_manager.add(
328  llvm::createEarlyCSEPass(/*enable_mem_ssa=*/true)); // Catch trivial redundancies
329 
330  pass_manager.add(llvm::createJumpThreadingPass()); // Thread jumps.
331  pass_manager.add(llvm::createCFGSimplificationPass());
332 
333  // remove load/stores in PHIs if instructions can be accessed directly post thread jumps
334  pass_manager.add(llvm::createNewGVNPass());
335 
336  pass_manager.add(llvm::createDeadStoreEliminationPass());
337  pass_manager.add(llvm::createLICMPass());
338 
339  pass_manager.add(llvm::createInstructionCombiningPass());
340 
341  // module passes
342  pass_manager.add(llvm::createPromoteMemoryToRegisterPass());
343  pass_manager.add(llvm::createGlobalOptimizerPass());
344 
346  pass_manager.add(llvm::createLoopStrengthReducePass());
347  }
348 
349  pass_manager.add(llvm::createCFGSimplificationPass()); // cleanup after everything
350 
351  pass_manager.run(*module);
352 
353  eliminate_dead_self_recursive_funcs(*module, live_funcs);
354 }
void eliminate_dead_self_recursive_funcs(llvm::Module &M, const std::unordered_set< llvm::Function * > &live_funcs)
ExecutorOptLevel opt_level

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

void anonymous_namespace{NativeCodegen.cpp}::read_udf_cpu_module ( const std::string &  udf_ir_filename)

Definition at line 1794 of file NativeCodegen.cpp.

1794  {
1795  llvm::SMDiagnostic parse_error;
1796 
1797  llvm::StringRef file_name_arg(udf_ir_filename);
1798 
1799  udf_cpu_module = llvm::parseIRFile(file_name_arg, parse_error, getGlobalLLVMContext());
1800  if (!udf_cpu_module) {
1801  throw_parseIR_error(parse_error, udf_ir_filename);
1802  }
1803 }
void throw_parseIR_error(const llvm::SMDiagnostic &parse_error, std::string src="", const bool is_gpu=false)
std::unique_ptr< llvm::Module > udf_cpu_module
llvm::LLVMContext & getGlobalLLVMContext()
void anonymous_namespace{NativeCodegen.cpp}::read_udf_gpu_module ( const std::string &  udf_ir_filename)

Definition at line 1775 of file NativeCodegen.cpp.

1775  {
1776  llvm::SMDiagnostic parse_error;
1777 
1778  llvm::StringRef file_name_arg(udf_ir_filename);
1779  udf_gpu_module = llvm::parseIRFile(file_name_arg, parse_error, getGlobalLLVMContext());
1780 
1781  if (!udf_gpu_module) {
1782  throw_parseIR_error(parse_error, udf_ir_filename, /* is_gpu= */ true);
1783  }
1784 
1785  llvm::Triple gpu_triple(udf_gpu_module->getTargetTriple());
1786  if (!gpu_triple.isNVPTX()) {
1787  LOG(WARNING)
1788  << "Expected triple nvptx64-nvidia-cuda for NVVM IR of loadtime UDFs but got "
1789  << gpu_triple.str() << ". Disabling the NVVM IR module.";
1790  udf_gpu_module = nullptr;
1791  }
1792 }
std::unique_ptr< llvm::Module > udf_gpu_module
#define LOG(tag)
Definition: Logger.h:203
void throw_parseIR_error(const llvm::SMDiagnostic &parse_error, std::string src="", const bool is_gpu=false)
llvm::LLVMContext & getGlobalLLVMContext()
template<typename T = void>
void anonymous_namespace{NativeCodegen.cpp}::scan_function_calls ( llvm::Function &  F,
std::unordered_set< std::string > &  defined,
std::unordered_set< std::string > &  undefined,
const std::unordered_set< std::string > &  ignored 
)

Definition at line 202 of file NativeCodegen.cpp.

Referenced by scan_function_calls().

205  {
206  for (llvm::inst_iterator I = llvm::inst_begin(F), E = llvm::inst_end(F); I != E; ++I) {
207  if (auto* CI = llvm::dyn_cast<llvm::CallInst>(&*I)) {
208  auto* F2 = CI->getCalledFunction();
209  if (F2 != nullptr) {
210  auto F2name = F2->getName().str();
211  if (F2->isDeclaration()) {
212  if (F2name.rfind("__", 0) !=
213  0 // assume symbols with double underscore are defined
214  && F2name.rfind("llvm.", 0) !=
215  0 // TODO: this may give false positive for NVVM intrinsics
216  && ignored.find(F2name) == ignored.end() // not in ignored list
217  ) {
218  undefined.emplace(F2name);
219  }
220  } else {
221  if (defined.find(F2name) == defined.end()) {
222  defined.emplace(F2name);
223  scan_function_calls<T>(*F2, defined, undefined, ignored);
224  }
225  }
226  }
227  }
228  }
229 }

+ Here is the caller graph for this function:

template<typename T = void>
void anonymous_namespace{NativeCodegen.cpp}::scan_function_calls ( llvm::Module &  module,
std::unordered_set< std::string > &  defined,
std::unordered_set< std::string > &  undefined,
const std::unordered_set< std::string > &  ignored 
)

Definition at line 232 of file NativeCodegen.cpp.

References scan_function_calls().

235  {
236  for (auto& F : module) {
237  if (!F.isDeclaration()) {
238  scan_function_calls(F, defined, undefined, ignored);
239  }
240  }
241 }
void scan_function_calls(llvm::Function &F, std::unordered_set< std::string > &defined, std::unordered_set< std::string > &undefined, const std::unordered_set< std::string > &ignored)

+ Here is the call graph for this function:

template<typename T = void>
std::tuple<std::unordered_set<std::string>, std::unordered_set<std::string> > anonymous_namespace{NativeCodegen.cpp}::scan_function_calls ( llvm::Module &  module,
const std::unordered_set< std::string > &  ignored = {} 
)

Definition at line 245 of file NativeCodegen.cpp.

246  {}) {
247  std::unordered_set<std::string> defined, undefined;
248  scan_function_calls(module, defined, undefined, ignored);
249  return std::make_tuple(defined, undefined);
250 }
void scan_function_calls(llvm::Function &F, std::unordered_set< std::string > &defined, std::unordered_set< std::string > &undefined, const std::unordered_set< std::string > &ignored)
std::string anonymous_namespace{NativeCodegen.cpp}::serialize_llvm_metadata_footnotes ( llvm::Function *  query_func,
CgenState cgen_state 
)

Definition at line 2491 of file NativeCodegen.cpp.

2492  {
2493  std::string llvm_ir;
2494  std::unordered_set<llvm::MDNode*> md;
2495 
2496  // Loop over all instructions in the query function.
2497  for (auto bb_it = query_func->begin(); bb_it != query_func->end(); ++bb_it) {
2498  for (auto instr_it = bb_it->begin(); instr_it != bb_it->end(); ++instr_it) {
2499  llvm::SmallVector<std::pair<unsigned, llvm::MDNode*>, 100> imd;
2500  instr_it->getAllMetadata(imd);
2501  for (auto [kind, node] : imd) {
2502  md.insert(node);
2503  }
2504  }
2505  }
2506 
2507  // Loop over all instructions in the row function.
2508  for (auto bb_it = cgen_state->row_func_->begin(); bb_it != cgen_state->row_func_->end();
2509  ++bb_it) {
2510  for (auto instr_it = bb_it->begin(); instr_it != bb_it->end(); ++instr_it) {
2511  llvm::SmallVector<std::pair<unsigned, llvm::MDNode*>, 100> imd;
2512  instr_it->getAllMetadata(imd);
2513  for (auto [kind, node] : imd) {
2514  md.insert(node);
2515  }
2516  }
2517  }
2518 
2519  // Loop over all instructions in the filter function.
2520  if (cgen_state->filter_func_) {
2521  for (auto bb_it = cgen_state->filter_func_->begin();
2522  bb_it != cgen_state->filter_func_->end();
2523  ++bb_it) {
2524  for (auto instr_it = bb_it->begin(); instr_it != bb_it->end(); ++instr_it) {
2525  llvm::SmallVector<std::pair<unsigned, llvm::MDNode*>, 100> imd;
2526  instr_it->getAllMetadata(imd);
2527  for (auto [kind, node] : imd) {
2528  md.insert(node);
2529  }
2530  }
2531  }
2532  }
2533 
2534  // Sort the metadata by canonical number and convert to text.
2535  if (!md.empty()) {
2536  std::map<size_t, std::string> sorted_strings;
2537  for (auto p : md) {
2538  std::string str;
2539  llvm::raw_string_ostream os(str);
2540  p->print(os, cgen_state->module_, true);
2541  os.flush();
2542  auto fields = split(str, {}, 1);
2543  if (fields.empty() || fields[0].empty()) {
2544  continue;
2545  }
2546  sorted_strings.emplace(std::stoul(fields[0].substr(1)), str);
2547  }
2548  llvm_ir += "\n";
2549  for (auto [id, text] : sorted_strings) {
2550  llvm_ir += text;
2551  llvm_ir += "\n";
2552  }
2553  }
2554 
2555  return llvm_ir;
2556 }
std::vector< std::string > split(std::string_view str, std::string_view delim, std::optional< size_t > maxsplit)
split apart a string into a vector of substrings
llvm::Function * row_func_
Definition: CgenState.h:330
llvm::Module * module_
Definition: CgenState.h:329
llvm::Function * filter_func_
Definition: CgenState.h:331
void anonymous_namespace{NativeCodegen.cpp}::set_row_func_argnames ( llvm::Function *  row_func,
const size_t  in_col_count,
const size_t  agg_col_count,
const bool  hoist_literals 
)

Definition at line 1512 of file NativeCodegen.cpp.

1515  {
1516  auto arg_it = row_func->arg_begin();
1517 
1518  if (agg_col_count) {
1519  for (size_t i = 0; i < agg_col_count; ++i) {
1520  arg_it->setName("out");
1521  ++arg_it;
1522  }
1523  } else {
1524  arg_it->setName("group_by_buff");
1525  ++arg_it;
1526  arg_it->setName("varlen_output_buff");
1527  ++arg_it;
1528  arg_it->setName("crt_matched");
1529  ++arg_it;
1530  arg_it->setName("total_matched");
1531  ++arg_it;
1532  arg_it->setName("old_total_matched");
1533  ++arg_it;
1534  arg_it->setName("max_matched");
1535  ++arg_it;
1536  }
1537 
1538  arg_it->setName("agg_init_val");
1539  ++arg_it;
1540 
1541  arg_it->setName("pos");
1542  ++arg_it;
1543 
1544  arg_it->setName("frag_row_off");
1545  ++arg_it;
1546 
1547  arg_it->setName("num_rows_per_scan");
1548  ++arg_it;
1549 
1550  if (hoist_literals) {
1551  arg_it->setName("literals");
1552  ++arg_it;
1553  }
1554 
1555  for (size_t i = 0; i < in_col_count; ++i) {
1556  arg_it->setName("col_buf" + std::to_string(i));
1557  ++arg_it;
1558  }
1559 
1560  arg_it->setName("join_hash_tables");
1561 }
std::string to_string(char const *&&v)
template<typename T = void>
void anonymous_namespace{NativeCodegen.cpp}::show_defined ( llvm::Module &  module)

Definition at line 163 of file NativeCodegen.cpp.

References f.

Referenced by show_defined().

163  {
164  std::cout << "defines: ";
165  for (auto& f : module.getFunctionList()) {
166  if (!f.isDeclaration()) {
167  std::cout << f.getName().str() << ", ";
168  }
169  }
170  std::cout << std::endl;
171 }
char * f

+ Here is the caller graph for this function:

template<typename T = void>
void anonymous_namespace{NativeCodegen.cpp}::show_defined ( llvm::Module *  module)

Definition at line 174 of file NativeCodegen.cpp.

References show_defined().

174  {
175  if (module == nullptr) {
176  std::cout << "is null" << std::endl;
177  } else {
178  show_defined(*module);
179  }
180 }
void show_defined(llvm::Module &module)

+ Here is the call graph for this function:

template<typename T = void>
void anonymous_namespace{NativeCodegen.cpp}::show_defined ( std::unique_ptr< llvm::Module > &  module)

Definition at line 183 of file NativeCodegen.cpp.

References show_defined().

183  {
184  show_defined(module.get());
185 }
void show_defined(llvm::Module &module)

+ Here is the call graph for this function:

void anonymous_namespace{NativeCodegen.cpp}::throw_parseIR_error ( const llvm::SMDiagnostic &  parse_error,
std::string  src = "",
const bool  is_gpu = false 
)

Definition at line 131 of file NativeCodegen.cpp.

133  {
134  std::string excname = (is_gpu ? "NVVM IR ParseError: " : "LLVM IR ParseError: ");
135  llvm::raw_string_ostream ss(excname);
136  parse_error.print(src.c_str(), ss, false, false);
137  throw ParseIRError(ss.str());
138 }

Variable Documentation

const std::string anonymous_namespace{NativeCodegen.cpp}::cuda_rt_decls

Definition at line 628 of file NativeCodegen.cpp.