OmniSciDB  085a039ca4
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
anonymous_namespace{NativeCodegen.cpp} Namespace Reference

Functions

void throw_parseIR_error (const llvm::SMDiagnostic &parse_error, std::string src="", const bool is_gpu=false)
 
template<typename T = void>
void show_defined (llvm::Module &llvm_module)
 
template<typename T = void>
void show_defined (llvm::Module *llvm_module)
 
template<typename T = void>
void show_defined (std::unique_ptr< llvm::Module > &llvm_module)
 
template<typename T = void>
void scan_function_calls (llvm::Function &F, std::unordered_set< std::string > &defined, std::unordered_set< std::string > &undefined, const std::unordered_set< std::string > &ignored)
 
template<typename T = void>
void scan_function_calls (llvm::Module &llvm_module, std::unordered_set< std::string > &defined, std::unordered_set< std::string > &undefined, const std::unordered_set< std::string > &ignored)
 
template<typename T = void>
std::tuple< std::unordered_set
< std::string >
, std::unordered_set
< std::string > > 
scan_function_calls (llvm::Module &llvm_module, const std::unordered_set< std::string > &ignored={})
 
void eliminate_dead_self_recursive_funcs (llvm::Module &M, const std::unordered_set< llvm::Function * > &live_funcs)
 
void optimize_ir (llvm::Function *query_func, llvm::Module *llvm_module, llvm::legacy::PassManager &pass_manager, const std::unordered_set< llvm::Function * > &live_funcs, const bool is_gpu_smem_used, const CompilationOptions &co)
 
std::string assemblyForCPU (ExecutionEngineWrapper &execution_engine, llvm::Module *llvm_module)
 
ExecutionEngineWrapper create_execution_engine (llvm::Module *llvm_module, llvm::EngineBuilder &eb, const CompilationOptions &co)
 
std::string cpp_to_llvm_name (const std::string &s)
 
std::string gen_array_any_all_sigs ()
 
std::string gen_translate_null_key_sigs ()
 
void bind_pos_placeholders (const std::string &pos_fn_name, const bool use_resume_param, llvm::Function *query_func, llvm::Module *llvm_module)
 
void set_row_func_argnames (llvm::Function *row_func, const size_t in_col_count, const size_t agg_col_count, const bool hoist_literals)
 
llvm::Function * create_row_function (const size_t in_col_count, const size_t agg_col_count, const bool hoist_literals, llvm::Module *llvm_module, llvm::LLVMContext &context)
 
void bind_query (llvm::Function *query_func, const std::string &query_fname, llvm::Function *multifrag_query_func, llvm::Module *llvm_module)
 
std::vector< std::string > get_agg_fnames (const std::vector< Analyzer::Expr * > &target_exprs, const bool is_group_by)
 
template<typename InstType >
llvm::Value * find_variable_in_basic_block (llvm::Function *func, std::string bb_name, std::string variable_name)
 
size_t get_shared_memory_size (const bool shared_mem_used, const QueryMemoryDescriptor *query_mem_desc_ptr)
 
bool is_gpu_shared_mem_supported (const QueryMemoryDescriptor *query_mem_desc_ptr, const RelAlgExecutionUnit &ra_exe_unit, const CudaMgr_Namespace::CudaMgr *cuda_mgr, const ExecutorDeviceType device_type, const unsigned gpu_blocksize, const unsigned num_blocks_per_mp)
 
std::string serialize_llvm_metadata_footnotes (llvm::Function *query_func, CgenState *cgen_state)
 

Variables

const std::string cuda_rt_decls
 

Function Documentation

std::string anonymous_namespace{NativeCodegen.cpp}::assemblyForCPU ( ExecutionEngineWrapper execution_engine,
llvm::Module *  llvm_module 
)

Definition at line 398 of file NativeCodegen.cpp.

References CHECK.

Referenced by create_execution_engine().

399  {
400  llvm::legacy::PassManager pass_manager;
401  auto cpu_target_machine = execution_engine->getTargetMachine();
402  CHECK(cpu_target_machine);
403  llvm::SmallString<256> code_str;
404  llvm::raw_svector_ostream os(code_str);
405 #if LLVM_VERSION_MAJOR >= 10
406  cpu_target_machine->addPassesToEmitFile(
407  pass_manager, os, nullptr, llvm::CGFT_AssemblyFile);
408 #else
409  cpu_target_machine->addPassesToEmitFile(
410  pass_manager, os, nullptr, llvm::TargetMachine::CGFT_AssemblyFile);
411 #endif
412  pass_manager.run(*llvm_module);
413  return "Assembly for the CPU:\n" + std::string(code_str.str()) + "\nEnd of assembly";
414 }
#define CHECK(condition)
Definition: Logger.h:223

+ Here is the caller graph for this function:

void anonymous_namespace{NativeCodegen.cpp}::bind_pos_placeholders ( const std::string &  pos_fn_name,
const bool  use_resume_param,
llvm::Function *  query_func,
llvm::Module *  llvm_module 
)

Definition at line 1573 of file NativeCodegen.cpp.

1576  {
1577  for (auto it = llvm::inst_begin(query_func), e = llvm::inst_end(query_func); it != e;
1578  ++it) {
1579  if (!llvm::isa<llvm::CallInst>(*it)) {
1580  continue;
1581  }
1582  auto& pos_call = llvm::cast<llvm::CallInst>(*it);
1583  if (std::string(pos_call.getCalledFunction()->getName()) == pos_fn_name) {
1584  if (use_resume_param) {
1585  const auto error_code_arg = get_arg_by_name(query_func, "error_code");
1586  llvm::ReplaceInstWithInst(
1587  &pos_call,
1588  llvm::CallInst::Create(llvm_module->getFunction(pos_fn_name + "_impl"),
1589  error_code_arg));
1590  } else {
1591  llvm::ReplaceInstWithInst(
1592  &pos_call,
1593  llvm::CallInst::Create(llvm_module->getFunction(pos_fn_name + "_impl")));
1594  }
1595  break;
1596  }
1597  }
1598 }
llvm::Value * get_arg_by_name(llvm::Function *func, const std::string &name)
Definition: Execute.h:166
void anonymous_namespace{NativeCodegen.cpp}::bind_query ( llvm::Function *  query_func,
const std::string &  query_fname,
llvm::Function *  multifrag_query_func,
llvm::Module *  llvm_module 
)

Definition at line 1717 of file NativeCodegen.cpp.

1720  {
1721  std::vector<llvm::CallInst*> query_stubs;
1722  for (auto it = llvm::inst_begin(multifrag_query_func),
1723  e = llvm::inst_end(multifrag_query_func);
1724  it != e;
1725  ++it) {
1726  if (!llvm::isa<llvm::CallInst>(*it)) {
1727  continue;
1728  }
1729  auto& query_call = llvm::cast<llvm::CallInst>(*it);
1730  if (std::string(query_call.getCalledFunction()->getName()) == query_fname) {
1731  query_stubs.push_back(&query_call);
1732  }
1733  }
1734  for (auto& S : query_stubs) {
1735  std::vector<llvm::Value*> args;
1736  for (size_t i = 0; i < S->getNumArgOperands(); ++i) {
1737  args.push_back(S->getArgOperand(i));
1738  }
1739  llvm::ReplaceInstWithInst(S, llvm::CallInst::Create(query_func, args, ""));
1740  }
1741 }
std::string anonymous_namespace{NativeCodegen.cpp}::cpp_to_llvm_name ( const std::string &  s)

Definition at line 577 of file NativeCodegen.cpp.

References CHECK.

Referenced by gen_array_any_all_sigs(), and gen_translate_null_key_sigs().

577  {
578  if (s == "int8_t") {
579  return "i8";
580  }
581  if (s == "int16_t") {
582  return "i16";
583  }
584  if (s == "int32_t") {
585  return "i32";
586  }
587  if (s == "int64_t") {
588  return "i64";
589  }
590  CHECK(s == "float" || s == "double");
591  return s;
592 }
#define CHECK(condition)
Definition: Logger.h:223

+ Here is the caller graph for this function:

ExecutionEngineWrapper anonymous_namespace{NativeCodegen.cpp}::create_execution_engine ( llvm::Module *  llvm_module,
llvm::EngineBuilder &  eb,
const CompilationOptions co 
)

Definition at line 416 of file NativeCodegen.cpp.

References logger::ASM, assemblyForCPU(), CHECK, DEBUG_TIMER, g_ee_create_mutex, and LOG.

Referenced by CodeGenerator::generateNativeCPUCode().

418  {
419  auto timer = DEBUG_TIMER(__func__);
420  // Avoids data race in
421  // llvm::sys::DynamicLibrary::getPermanentLibrary and
422  // GDBJITRegistrationListener::notifyObjectLoaded while creating a
423  // new ExecutionEngine instance. Unfortunately we have to use global
424  // mutex here.
425  std::lock_guard<llvm::sys::Mutex> lock(g_ee_create_mutex);
426  ExecutionEngineWrapper execution_engine(eb.create(), co);
427  CHECK(execution_engine.get());
428  // Force the module data layout to match the layout for the selected target
429  llvm_module->setDataLayout(execution_engine->getDataLayout());
430 
431  LOG(ASM) << assemblyForCPU(execution_engine, llvm_module);
432 
433  execution_engine->finalizeObject();
434  return execution_engine;
435 }
#define LOG(tag)
Definition: Logger.h:217
std::string assemblyForCPU(ExecutionEngineWrapper &execution_engine, llvm::Module *llvm_module)
#define CHECK(condition)
Definition: Logger.h:223
#define DEBUG_TIMER(name)
Definition: Logger.h:370
static llvm::sys::Mutex g_ee_create_mutex

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

llvm::Function* anonymous_namespace{NativeCodegen.cpp}::create_row_function ( const size_t  in_col_count,
const size_t  agg_col_count,
const bool  hoist_literals,
llvm::Module *  llvm_module,
llvm::LLVMContext &  context 
)

Definition at line 1651 of file NativeCodegen.cpp.

1655  {
1656  std::vector<llvm::Type*> row_process_arg_types;
1657 
1658  if (agg_col_count) {
1659  // output (aggregate) arguments
1660  for (size_t i = 0; i < agg_col_count; ++i) {
1661  row_process_arg_types.push_back(llvm::Type::getInt64PtrTy(context));
1662  }
1663  } else {
1664  // group by buffer
1665  row_process_arg_types.push_back(llvm::Type::getInt64PtrTy(context));
1666  // varlen output buffer
1667  row_process_arg_types.push_back(llvm::Type::getInt64PtrTy(context));
1668  // current match count
1669  row_process_arg_types.push_back(llvm::Type::getInt32PtrTy(context));
1670  // total match count passed from the caller
1671  row_process_arg_types.push_back(llvm::Type::getInt32PtrTy(context));
1672  // old total match count returned to the caller
1673  row_process_arg_types.push_back(llvm::Type::getInt32PtrTy(context));
1674  // max matched (total number of slots in the output buffer)
1675  row_process_arg_types.push_back(llvm::Type::getInt32PtrTy(context));
1676  }
1677 
1678  // aggregate init values
1679  row_process_arg_types.push_back(llvm::Type::getInt64PtrTy(context));
1680 
1681  // position argument
1682  row_process_arg_types.push_back(llvm::Type::getInt64Ty(context));
1683 
1684  // fragment row offset argument
1685  row_process_arg_types.push_back(llvm::Type::getInt64PtrTy(context));
1686 
1687  // number of rows for each scan
1688  row_process_arg_types.push_back(llvm::Type::getInt64PtrTy(context));
1689 
1690  // literals buffer argument
1691  if (hoist_literals) {
1692  row_process_arg_types.push_back(llvm::Type::getInt8PtrTy(context));
1693  }
1694 
1695  // column buffer arguments
1696  for (size_t i = 0; i < in_col_count; ++i) {
1697  row_process_arg_types.emplace_back(llvm::Type::getInt8PtrTy(context));
1698  }
1699 
1700  // join hash table argument
1701  row_process_arg_types.push_back(llvm::Type::getInt64PtrTy(context));
1702 
1703  // generate the function
1704  auto ft =
1705  llvm::FunctionType::get(get_int_type(32, context), row_process_arg_types, false);
1706 
1707  auto row_func = llvm::Function::Create(
1708  ft, llvm::Function::ExternalLinkage, "row_func", llvm_module);
1709 
1710  // set the row function argument names; for debugging purposes only
1711  set_row_func_argnames(row_func, in_col_count, agg_col_count, hoist_literals);
1712 
1713  return row_func;
1714 }
llvm::Type * get_int_type(const int width, llvm::LLVMContext &context)
void set_row_func_argnames(llvm::Function *row_func, const size_t in_col_count, const size_t agg_col_count, const bool hoist_literals)
void anonymous_namespace{NativeCodegen.cpp}::eliminate_dead_self_recursive_funcs ( llvm::Module &  M,
const std::unordered_set< llvm::Function * > &  live_funcs 
)

Definition at line 242 of file NativeCodegen.cpp.

Referenced by optimize_ir().

244  {
245  std::vector<llvm::Function*> dead_funcs;
246  for (auto& F : M) {
247  bool bAlive = false;
248  if (live_funcs.count(&F)) {
249  continue;
250  }
251  for (auto U : F.users()) {
252  auto* C = llvm::dyn_cast<const llvm::CallInst>(U);
253  if (!C || C->getParent()->getParent() != &F) {
254  bAlive = true;
255  break;
256  }
257  }
258  if (!bAlive) {
259  dead_funcs.push_back(&F);
260  }
261  }
262  for (auto pFn : dead_funcs) {
263  pFn->eraseFromParent();
264  }
265 }

+ Here is the caller graph for this function:

template<typename InstType >
llvm::Value* anonymous_namespace{NativeCodegen.cpp}::find_variable_in_basic_block ( llvm::Function *  func,
std::string  bb_name,
std::string  variable_name 
)

Definition at line 1893 of file NativeCodegen.cpp.

1895  {
1896  llvm::Value* result = nullptr;
1897  if (func == nullptr || variable_name.empty()) {
1898  return result;
1899  }
1900  bool is_found = false;
1901  for (auto bb_it = func->begin(); bb_it != func->end() && !is_found; ++bb_it) {
1902  if (!bb_name.empty() && bb_it->getName() != bb_name) {
1903  continue;
1904  }
1905  for (auto inst_it = bb_it->begin(); inst_it != bb_it->end(); inst_it++) {
1906  if (llvm::isa<InstType>(*inst_it)) {
1907  if (inst_it->getName() == variable_name) {
1908  result = &*inst_it;
1909  is_found = true;
1910  break;
1911  }
1912  }
1913  }
1914  }
1915  return result;
1916 }
std::string anonymous_namespace{NativeCodegen.cpp}::gen_array_any_all_sigs ( )

Definition at line 594 of file NativeCodegen.cpp.

References cpp_to_llvm_name(), and run_benchmark_import::result.

594  {
595  std::string result;
596  for (const std::string any_or_all : {"any", "all"}) {
597  for (const std::string elem_type :
598  {"int8_t", "int16_t", "int32_t", "int64_t", "float", "double"}) {
599  for (const std::string needle_type :
600  {"int8_t", "int16_t", "int32_t", "int64_t", "float", "double"}) {
601  for (const std::string op_name : {"eq", "ne", "lt", "le", "gt", "ge"}) {
602  result += ("declare i1 @array_" + any_or_all + "_" + op_name + "_" + elem_type +
603  "_" + needle_type + "(i8*, i64, " + cpp_to_llvm_name(needle_type) +
604  ", " + cpp_to_llvm_name(elem_type) + ");\n");
605  }
606  }
607  }
608  }
609  return result;
610 }
std::string cpp_to_llvm_name(const std::string &s)

+ Here is the call graph for this function:

std::string anonymous_namespace{NativeCodegen.cpp}::gen_translate_null_key_sigs ( )

Definition at line 612 of file NativeCodegen.cpp.

References cpp_to_llvm_name(), and run_benchmark_import::result.

612  {
613  std::string result;
614  for (const std::string key_type : {"int8_t", "int16_t", "int32_t", "int64_t"}) {
615  const auto key_llvm_type = cpp_to_llvm_name(key_type);
616  result += "declare i64 @translate_null_key_" + key_type + "(" + key_llvm_type + ", " +
617  key_llvm_type + ", i64);\n";
618  }
619  return result;
620 }
std::string cpp_to_llvm_name(const std::string &s)

+ Here is the call graph for this function:

std::vector<std::string> anonymous_namespace{NativeCodegen.cpp}::get_agg_fnames ( const std::vector< Analyzer::Expr * > &  target_exprs,
const bool  is_group_by 
)

Definition at line 1743 of file NativeCodegen.cpp.

1744  {
1745  std::vector<std::string> result;
1746  for (size_t target_idx = 0, agg_col_idx = 0; target_idx < target_exprs.size();
1747  ++target_idx, ++agg_col_idx) {
1748  const auto target_expr = target_exprs[target_idx];
1749  CHECK(target_expr);
1750  const auto target_type_info = target_expr->get_type_info();
1751  const auto agg_expr = dynamic_cast<Analyzer::AggExpr*>(target_expr);
1752  const bool is_varlen =
1753  (target_type_info.is_string() &&
1754  target_type_info.get_compression() == kENCODING_NONE) ||
1755  target_type_info.is_array(); // TODO: should it use is_varlen_array() ?
1756  if (!agg_expr || agg_expr->get_aggtype() == kSAMPLE) {
1757  result.emplace_back(target_type_info.is_fp() ? "agg_id_double" : "agg_id");
1758  if (is_varlen) {
1759  result.emplace_back("agg_id");
1760  }
1761  if (target_type_info.is_geometry()) {
1762  result.emplace_back("agg_id");
1763  for (auto i = 2; i < 2 * target_type_info.get_physical_coord_cols(); ++i) {
1764  result.emplace_back("agg_id");
1765  }
1766  }
1767  continue;
1768  }
1769  const auto agg_type = agg_expr->get_aggtype();
1770  const auto& agg_type_info =
1771  agg_type != kCOUNT ? agg_expr->get_arg()->get_type_info() : target_type_info;
1772  switch (agg_type) {
1773  case kAVG: {
1774  if (!agg_type_info.is_integer() && !agg_type_info.is_decimal() &&
1775  !agg_type_info.is_fp()) {
1776  throw std::runtime_error("AVG is only valid on integer and floating point");
1777  }
1778  result.emplace_back((agg_type_info.is_integer() || agg_type_info.is_time())
1779  ? "agg_sum"
1780  : "agg_sum_double");
1781  result.emplace_back((agg_type_info.is_integer() || agg_type_info.is_time())
1782  ? "agg_count"
1783  : "agg_count_double");
1784  break;
1785  }
1786  case kMIN: {
1787  if (agg_type_info.is_string() || agg_type_info.is_array() ||
1788  agg_type_info.is_geometry()) {
1789  throw std::runtime_error(
1790  "MIN on strings, arrays or geospatial types not supported yet");
1791  }
1792  result.emplace_back((agg_type_info.is_integer() || agg_type_info.is_time())
1793  ? "agg_min"
1794  : "agg_min_double");
1795  break;
1796  }
1797  case kMAX: {
1798  if (agg_type_info.is_string() || agg_type_info.is_array() ||
1799  agg_type_info.is_geometry()) {
1800  throw std::runtime_error(
1801  "MAX on strings, arrays or geospatial types not supported yet");
1802  }
1803  result.emplace_back((agg_type_info.is_integer() || agg_type_info.is_time())
1804  ? "agg_max"
1805  : "agg_max_double");
1806  break;
1807  }
1808  case kSUM: {
1809  if (!agg_type_info.is_integer() && !agg_type_info.is_decimal() &&
1810  !agg_type_info.is_fp()) {
1811  throw std::runtime_error("SUM is only valid on integer and floating point");
1812  }
1813  result.emplace_back((agg_type_info.is_integer() || agg_type_info.is_time())
1814  ? "agg_sum"
1815  : "agg_sum_double");
1816  break;
1817  }
1818  case kCOUNT:
1819  result.emplace_back(agg_expr->get_is_distinct() ? "agg_count_distinct"
1820  : "agg_count");
1821  break;
1822  case kSINGLE_VALUE: {
1823  result.emplace_back(agg_type_info.is_fp() ? "agg_id_double" : "agg_id");
1824  break;
1825  }
1826  case kSAMPLE: {
1827  // Note that varlen SAMPLE arguments are handled separately above
1828  result.emplace_back(agg_type_info.is_fp() ? "agg_id_double" : "agg_id");
1829  break;
1830  }
1832  result.emplace_back("agg_approximate_count_distinct");
1833  break;
1834  case kAPPROX_QUANTILE:
1835  result.emplace_back("agg_approx_quantile");
1836  break;
1837  default:
1838  CHECK(false);
1839  }
1840  }
1841  return result;
1842 }
Definition: sqldefs.h:75
Definition: sqldefs.h:77
Definition: sqldefs.h:78
SQLAgg get_aggtype() const
Definition: Analyzer.h:1201
#define CHECK(condition)
Definition: Logger.h:223
Definition: sqldefs.h:76
Definition: sqldefs.h:74
size_t anonymous_namespace{NativeCodegen.cpp}::get_shared_memory_size ( const bool  shared_mem_used,
const QueryMemoryDescriptor query_mem_desc_ptr 
)

Definition at line 2466 of file NativeCodegen.cpp.

2467  {
2468  return shared_mem_used
2469  ? (query_mem_desc_ptr->getRowSize() * query_mem_desc_ptr->getEntryCount())
2470  : 0;
2471 }
bool anonymous_namespace{NativeCodegen.cpp}::is_gpu_shared_mem_supported ( const QueryMemoryDescriptor query_mem_desc_ptr,
const RelAlgExecutionUnit ra_exe_unit,
const CudaMgr_Namespace::CudaMgr cuda_mgr,
const ExecutorDeviceType  device_type,
const unsigned  gpu_blocksize,
const unsigned  num_blocks_per_mp 
)

To simplify the implementation for practical purposes, we initially provide shared memory support for cases where there are at most as many entries in the output buffer as there are threads within each GPU device. In order to relax this assumption later, we need to add a for loop in generated codes such that each thread loops over multiple entries. TODO: relax this if necessary

Definition at line 2473 of file NativeCodegen.cpp.

2478  {
2479  if (device_type == ExecutorDeviceType::CPU) {
2480  return false;
2481  }
2482  if (query_mem_desc_ptr->didOutputColumnar()) {
2483  return false;
2484  }
2485  CHECK(query_mem_desc_ptr);
2486  CHECK(cuda_mgr);
2487  /*
2488  * We only use shared memory strategy if GPU hardware provides native shared
2489  * memory atomics support. From CUDA Toolkit documentation:
2490  * https://docs.nvidia.com/cuda/pascal-tuning-guide/index.html#atomic-ops "Like
2491  * Maxwell, Pascal [and Volta] provides native shared memory atomic operations
2492  * for 32-bit integer arithmetic, along with native 32 or 64-bit compare-and-swap
2493  * (CAS)."
2494  *
2495  **/
2496  if (!cuda_mgr->isArchMaxwellOrLaterForAll()) {
2497  return false;
2498  }
2499 
2500  if (query_mem_desc_ptr->getQueryDescriptionType() ==
2503  query_mem_desc_ptr->countDistinctDescriptorsLogicallyEmpty()) {
2504  // TODO: relax this, if necessary
2505  if (gpu_blocksize < query_mem_desc_ptr->getEntryCount()) {
2506  return false;
2507  }
2508  // skip shared memory usage when dealing with 1) variable length targets, 2)
2509  // not a COUNT aggregate
2510  const auto target_infos =
2511  target_exprs_to_infos(ra_exe_unit.target_exprs, *query_mem_desc_ptr);
2512  std::unordered_set<SQLAgg> supported_aggs{kCOUNT};
2513  if (std::find_if(target_infos.begin(),
2514  target_infos.end(),
2515  [&supported_aggs](const TargetInfo& ti) {
2516  if (ti.sql_type.is_varlen() ||
2517  !supported_aggs.count(ti.agg_kind)) {
2518  return true;
2519  } else {
2520  return false;
2521  }
2522  }) == target_infos.end()) {
2523  return true;
2524  }
2525  }
2526  if (query_mem_desc_ptr->getQueryDescriptionType() ==
2537  if (gpu_blocksize < query_mem_desc_ptr->getEntryCount()) {
2538  return false;
2539  }
2540 
2541  // Fundamentally, we should use shared memory whenever the output buffer
2542  // is small enough so that we can fit it in the shared memory and yet expect
2543  // good occupancy.
2544  // For now, we allow keyless, row-wise layout, and only for perfect hash
2545  // group by operations.
2546  if (query_mem_desc_ptr->hasKeylessHash() &&
2547  query_mem_desc_ptr->countDistinctDescriptorsLogicallyEmpty() &&
2548  !query_mem_desc_ptr->useStreamingTopN()) {
2549  const size_t shared_memory_threshold_bytes = std::min(
2550  g_gpu_smem_threshold == 0 ? SIZE_MAX : g_gpu_smem_threshold,
2551  cuda_mgr->getMinSharedMemoryPerBlockForAllDevices() / num_blocks_per_mp);
2552  const auto output_buffer_size =
2553  query_mem_desc_ptr->getRowSize() * query_mem_desc_ptr->getEntryCount();
2554  if (output_buffer_size > shared_memory_threshold_bytes) {
2555  return false;
2556  }
2557 
2558  // skip shared memory usage when dealing with 1) variable length targets, 2)
2559  // non-basic aggregates (COUNT, SUM, MIN, MAX, AVG)
2560  // TODO: relax this if necessary
2561  const auto target_infos =
2562  target_exprs_to_infos(ra_exe_unit.target_exprs, *query_mem_desc_ptr);
2563  std::unordered_set<SQLAgg> supported_aggs{kCOUNT};
2565  supported_aggs = {kCOUNT, kMIN, kMAX, kSUM, kAVG};
2566  }
2567  if (std::find_if(target_infos.begin(),
2568  target_infos.end(),
2569  [&supported_aggs](const TargetInfo& ti) {
2570  if (ti.sql_type.is_varlen() ||
2571  !supported_aggs.count(ti.agg_kind)) {
2572  return true;
2573  } else {
2574  return false;
2575  }
2576  }) == target_infos.end()) {
2577  return true;
2578  }
2579  }
2580  }
2581  return false;
2582 }
std::vector< Analyzer::Expr * > target_exprs
bool g_enable_smem_group_by
bool countDistinctDescriptorsLogicallyEmpty() const
bool g_enable_smem_non_grouped_agg
Definition: Execute.cpp:138
Definition: sqldefs.h:75
Definition: sqldefs.h:77
size_t getMinSharedMemoryPerBlockForAllDevices() const
Definition: CudaMgr.h:121
QueryDescriptionType getQueryDescriptionType() const
bool isArchMaxwellOrLaterForAll() const
Definition: CudaMgr.cpp:331
bool g_enable_smem_grouped_non_count_agg
Definition: Execute.cpp:135
Definition: sqldefs.h:78
#define CHECK(condition)
Definition: Logger.h:223
std::vector< TargetInfo > target_exprs_to_infos(const std::vector< Analyzer::Expr * > &targets, const QueryMemoryDescriptor &query_mem_desc)
Definition: sqldefs.h:76
Definition: sqldefs.h:74
size_t g_gpu_smem_threshold
Definition: Execute.cpp:130
void anonymous_namespace{NativeCodegen.cpp}::optimize_ir ( llvm::Function *  query_func,
llvm::Module *  llvm_module,
llvm::legacy::PassManager &  pass_manager,
const std::unordered_set< llvm::Function * > &  live_funcs,
const bool  is_gpu_smem_used,
const CompilationOptions co 
)

Definition at line 304 of file NativeCodegen.cpp.

References DEBUG_TIMER, and eliminate_dead_self_recursive_funcs().

Referenced by CodeGenerator::generateNativeCPUCode(), and get_device_parameters().

309  {
310  auto timer = DEBUG_TIMER(__func__);
311  // the always inliner legacy pass must always run first
312  pass_manager.add(llvm::createVerifierPass());
313  pass_manager.add(llvm::createAlwaysInlinerLegacyPass());
314 
315  pass_manager.add(new AnnotateInternalFunctionsPass());
316 
317  pass_manager.add(llvm::createSROAPass());
318  // mem ssa drops unused load and store instructions, e.g. passing variables directly
319  // where possible
320  pass_manager.add(
321  llvm::createEarlyCSEPass(/*enable_mem_ssa=*/true)); // Catch trivial redundancies
322 
323  if (!is_gpu_smem_used) {
324  // thread jumps can change the execution order around SMEM sections guarded by
325  // `__syncthreads()`, which results in race conditions. For now, disable jump
326  // threading for shared memory queries. In the future, consider handling shared memory
327  // aggregations with a separate kernel launch
328  pass_manager.add(llvm::createJumpThreadingPass()); // Thread jumps.
329  }
330  pass_manager.add(llvm::createCFGSimplificationPass());
331 
332  // remove load/stores in PHIs if instructions can be accessed directly post thread jumps
333  pass_manager.add(llvm::createNewGVNPass());
334 
335  pass_manager.add(llvm::createDeadStoreEliminationPass());
336  pass_manager.add(llvm::createLICMPass());
337 
338  pass_manager.add(llvm::createInstructionCombiningPass());
339 
340  // module passes
341  pass_manager.add(llvm::createPromoteMemoryToRegisterPass());
342  pass_manager.add(llvm::createGlobalOptimizerPass());
343 
344  pass_manager.add(llvm::createCFGSimplificationPass()); // cleanup after everything
345 
346  pass_manager.run(*llvm_module);
347 
348  eliminate_dead_self_recursive_funcs(*llvm_module, live_funcs);
349 }
void eliminate_dead_self_recursive_funcs(llvm::Module &M, const std::unordered_set< llvm::Function * > &live_funcs)
#define DEBUG_TIMER(name)
Definition: Logger.h:370

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

template<typename T = void>
void anonymous_namespace{NativeCodegen.cpp}::scan_function_calls ( llvm::Function &  F,
std::unordered_set< std::string > &  defined,
std::unordered_set< std::string > &  undefined,
const std::unordered_set< std::string > &  ignored 
)

Definition at line 191 of file NativeCodegen.cpp.

Referenced by scan_function_calls().

194  {
195  for (llvm::inst_iterator I = llvm::inst_begin(F), E = llvm::inst_end(F); I != E; ++I) {
196  if (auto* CI = llvm::dyn_cast<llvm::CallInst>(&*I)) {
197  auto* F2 = CI->getCalledFunction();
198  if (F2 != nullptr) {
199  auto F2name = F2->getName().str();
200  if (F2->isDeclaration()) {
201  if (F2name.rfind("__", 0) !=
202  0 // assume symbols with double underscore are defined
203  && F2name.rfind("llvm.", 0) !=
204  0 // TODO: this may give false positive for NVVM intrinsics
205  && ignored.find(F2name) == ignored.end() // not in ignored list
206  ) {
207  undefined.emplace(F2name);
208  }
209  } else {
210  if (defined.find(F2name) == defined.end()) {
211  defined.emplace(F2name);
212  scan_function_calls<T>(*F2, defined, undefined, ignored);
213  }
214  }
215  }
216  }
217  }
218 }

+ Here is the caller graph for this function:

template<typename T = void>
void anonymous_namespace{NativeCodegen.cpp}::scan_function_calls ( llvm::Module &  llvm_module,
std::unordered_set< std::string > &  defined,
std::unordered_set< std::string > &  undefined,
const std::unordered_set< std::string > &  ignored 
)

Definition at line 221 of file NativeCodegen.cpp.

References scan_function_calls().

224  {
225  for (auto& F : llvm_module) {
226  if (!F.isDeclaration()) {
227  scan_function_calls(F, defined, undefined, ignored);
228  }
229  }
230 }
void scan_function_calls(llvm::Function &F, std::unordered_set< std::string > &defined, std::unordered_set< std::string > &undefined, const std::unordered_set< std::string > &ignored)

+ Here is the call graph for this function:

template<typename T = void>
std::tuple<std::unordered_set<std::string>, std::unordered_set<std::string> > anonymous_namespace{NativeCodegen.cpp}::scan_function_calls ( llvm::Module &  llvm_module,
const std::unordered_set< std::string > &  ignored = {} 
)

Definition at line 234 of file NativeCodegen.cpp.

235  {}) {
236  std::unordered_set<std::string> defined, undefined;
237  scan_function_calls(llvm_module, defined, undefined, ignored);
238  return std::make_tuple(defined, undefined);
239 }
void scan_function_calls(llvm::Function &F, std::unordered_set< std::string > &defined, std::unordered_set< std::string > &undefined, const std::unordered_set< std::string > &ignored)
std::string anonymous_namespace{NativeCodegen.cpp}::serialize_llvm_metadata_footnotes ( llvm::Function *  query_func,
CgenState cgen_state 
)

Definition at line 2585 of file NativeCodegen.cpp.

References query_mem_desc.

2586  {
2587  std::string llvm_ir;
2588  std::unordered_set<llvm::MDNode*> md;
2589 
2590  // Loop over all instructions in the query function.
2591  for (auto bb_it = query_func->begin(); bb_it != query_func->end(); ++bb_it) {
2592  for (auto instr_it = bb_it->begin(); instr_it != bb_it->end(); ++instr_it) {
2593  llvm::SmallVector<std::pair<unsigned, llvm::MDNode*>, 100> imd;
2594  instr_it->getAllMetadata(imd);
2595  for (auto [kind, node] : imd) {
2596  md.insert(node);
2597  }
2598  }
2599  }
2600 
2601  // Loop over all instructions in the row function.
2602  for (auto bb_it = cgen_state->row_func_->begin(); bb_it != cgen_state->row_func_->end();
2603  ++bb_it) {
2604  for (auto instr_it = bb_it->begin(); instr_it != bb_it->end(); ++instr_it) {
2605  llvm::SmallVector<std::pair<unsigned, llvm::MDNode*>, 100> imd;
2606  instr_it->getAllMetadata(imd);
2607  for (auto [kind, node] : imd) {
2608  md.insert(node);
2609  }
2610  }
2611  }
2612 
2613  // Loop over all instructions in the filter function.
2614  if (cgen_state->filter_func_) {
2615  for (auto bb_it = cgen_state->filter_func_->begin();
2616  bb_it != cgen_state->filter_func_->end();
2617  ++bb_it) {
2618  for (auto instr_it = bb_it->begin(); instr_it != bb_it->end(); ++instr_it) {
2619  llvm::SmallVector<std::pair<unsigned, llvm::MDNode*>, 100> imd;
2620  instr_it->getAllMetadata(imd);
2621  for (auto [kind, node] : imd) {
2622  md.insert(node);
2623  }
2624  }
2625  }
2626  }
2627 
2628  // Sort the metadata by canonical number and convert to text.
2629  if (!md.empty()) {
2630  std::map<size_t, std::string> sorted_strings;
2631  for (auto p : md) {
2632  std::string str;
2633  llvm::raw_string_ostream os(str);
2634  p->print(os, cgen_state->module_, true);
2635  os.flush();
2636  auto fields = split(str, {}, 1);
2637  if (fields.empty() || fields[0].empty()) {
2638  continue;
2639  }
2640  sorted_strings.emplace(std::stoul(fields[0].substr(1)), str);
2641  }
2642  llvm_ir += "\n";
2643  for (auto [id, text] : sorted_strings) {
2644  llvm_ir += text;
2645  llvm_ir += "\n";
2646  }
2647  }
2648 
2649  return llvm_ir;
2650 }
std::vector< std::string > split(std::string_view str, std::string_view delim, std::optional< size_t > maxsplit)
split apart a string into a vector of substrings
llvm::Function * row_func_
Definition: CgenState.h:351
llvm::Module * module_
Definition: CgenState.h:350
llvm::Function * filter_func_
Definition: CgenState.h:352
void anonymous_namespace{NativeCodegen.cpp}::set_row_func_argnames ( llvm::Function *  row_func,
const size_t  in_col_count,
const size_t  agg_col_count,
const bool  hoist_literals 
)

Definition at line 1600 of file NativeCodegen.cpp.

1603  {
1604  auto arg_it = row_func->arg_begin();
1605 
1606  if (agg_col_count) {
1607  for (size_t i = 0; i < agg_col_count; ++i) {
1608  arg_it->setName("out");
1609  ++arg_it;
1610  }
1611  } else {
1612  arg_it->setName("group_by_buff");
1613  ++arg_it;
1614  arg_it->setName("varlen_output_buff");
1615  ++arg_it;
1616  arg_it->setName("crt_matched");
1617  ++arg_it;
1618  arg_it->setName("total_matched");
1619  ++arg_it;
1620  arg_it->setName("old_total_matched");
1621  ++arg_it;
1622  arg_it->setName("max_matched");
1623  ++arg_it;
1624  }
1625 
1626  arg_it->setName("agg_init_val");
1627  ++arg_it;
1628 
1629  arg_it->setName("pos");
1630  ++arg_it;
1631 
1632  arg_it->setName("frag_row_off");
1633  ++arg_it;
1634 
1635  arg_it->setName("num_rows_per_scan");
1636  ++arg_it;
1637 
1638  if (hoist_literals) {
1639  arg_it->setName("literals");
1640  ++arg_it;
1641  }
1642 
1643  for (size_t i = 0; i < in_col_count; ++i) {
1644  arg_it->setName("col_buf" + std::to_string(i));
1645  ++arg_it;
1646  }
1647 
1648  arg_it->setName("join_hash_tables");
1649 }
std::string to_string(char const *&&v)
template<typename T = void>
void anonymous_namespace{NativeCodegen.cpp}::show_defined ( llvm::Module &  llvm_module)

Definition at line 152 of file NativeCodegen.cpp.

References anonymous_namespace{Utm.h}::f.

Referenced by show_defined().

152  {
153  std::cout << "defines: ";
154  for (auto& f : llvm_module.getFunctionList()) {
155  if (!f.isDeclaration()) {
156  std::cout << f.getName().str() << ", ";
157  }
158  }
159  std::cout << std::endl;
160 }
constexpr double f
Definition: Utm.h:31

+ Here is the caller graph for this function:

template<typename T = void>
void anonymous_namespace{NativeCodegen.cpp}::show_defined ( llvm::Module *  llvm_module)

Definition at line 163 of file NativeCodegen.cpp.

References show_defined().

163  {
164  if (llvm_module == nullptr) {
165  std::cout << "is null" << std::endl;
166  } else {
167  show_defined(*llvm_module);
168  }
169 }
void show_defined(llvm::Module &llvm_module)

+ Here is the call graph for this function:

template<typename T = void>
void anonymous_namespace{NativeCodegen.cpp}::show_defined ( std::unique_ptr< llvm::Module > &  llvm_module)

Definition at line 172 of file NativeCodegen.cpp.

References show_defined().

172  {
173  show_defined(llvm_module.get());
174 }
void show_defined(llvm::Module &llvm_module)

+ Here is the call graph for this function:

void anonymous_namespace{NativeCodegen.cpp}::throw_parseIR_error ( const llvm::SMDiagnostic &  parse_error,
std::string  src = "",
const bool  is_gpu = false 
)

Definition at line 120 of file NativeCodegen.cpp.

122  {
123  std::string excname = (is_gpu ? "NVVM IR ParseError: " : "LLVM IR ParseError: ");
124  llvm::raw_string_ostream ss(excname);
125  parse_error.print(src.c_str(), ss, false, false);
126  throw ParseIRError(ss.str());
127 }

Variable Documentation

const std::string anonymous_namespace{NativeCodegen.cpp}::cuda_rt_decls

Definition at line 622 of file NativeCodegen.cpp.