OmniSciDB  f17484ade4
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
anonymous_namespace{NativeCodegen.cpp} Namespace Reference

Classes

class  CaseExprDetector
 

Functions

void throw_parseIR_error (const llvm::SMDiagnostic &parse_error, std::string src="", const bool is_gpu=false)
 
template<typename T = void>
void show_defined (llvm::Module &llvm_module)
 
template<typename T = void>
void show_defined (llvm::Module *llvm_module)
 
template<typename T = void>
void show_defined (std::unique_ptr< llvm::Module > &llvm_module)
 
template<typename T = void>
void scan_function_calls (llvm::Function &F, std::unordered_set< std::string > &defined, std::unordered_set< std::string > &undefined, const std::unordered_set< std::string > &ignored)
 
template<typename T = void>
void scan_function_calls (llvm::Module &llvm_module, std::unordered_set< std::string > &defined, std::unordered_set< std::string > &undefined, const std::unordered_set< std::string > &ignored)
 
template<typename T = void>
std::tuple< std::unordered_set
< std::string >
, std::unordered_set
< std::string > > 
scan_function_calls (llvm::Module &llvm_module, const std::unordered_set< std::string > &ignored={})
 
void eliminate_dead_self_recursive_funcs (llvm::Module &M, const std::unordered_set< llvm::Function * > &live_funcs)
 
void optimize_ir (llvm::Function *query_func, llvm::Module *llvm_module, llvm::legacy::PassManager &pass_manager, const std::unordered_set< llvm::Function * > &live_funcs, const bool is_gpu_smem_used, const CompilationOptions &co)
 
std::string assemblyForCPU (ExecutionEngineWrapper &execution_engine, llvm::Module *llvm_module)
 
ExecutionEngineWrapper create_execution_engine (llvm::Module *llvm_module, llvm::EngineBuilder &eb, const CompilationOptions &co)
 
std::string cpp_to_llvm_name (const std::string &s)
 
std::string gen_array_any_all_sigs ()
 
std::string gen_translate_null_key_sigs ()
 
void bind_pos_placeholders (const std::string &pos_fn_name, const bool use_resume_param, llvm::Function *query_func, llvm::Module *llvm_module)
 
void set_row_func_argnames (llvm::Function *row_func, const size_t in_col_count, const size_t agg_col_count, const bool hoist_literals)
 
llvm::Function * create_row_function (const size_t in_col_count, const size_t agg_col_count, const bool hoist_literals, llvm::Module *llvm_module, llvm::LLVMContext &context)
 
void bind_query (llvm::Function *query_func, const std::string &query_fname, llvm::Function *multifrag_query_func, llvm::Module *llvm_module)
 
std::vector< std::string > get_agg_fnames (const std::vector< Analyzer::Expr * > &target_exprs, const bool is_group_by)
 
template<typename InstType >
llvm::Value * find_variable_in_basic_block (llvm::Function *func, std::string bb_name, std::string variable_name)
 
size_t get_shared_memory_size (const bool shared_mem_used, const QueryMemoryDescriptor *query_mem_desc_ptr)
 
bool has_count_expr (RelAlgExecutionUnit const &ra_exe_unit)
 
bool has_case_expr_within_groupby_expr (RelAlgExecutionUnit const &ra_exe_unit)
 
bool is_gpu_shared_mem_supported (const QueryMemoryDescriptor *query_mem_desc_ptr, const RelAlgExecutionUnit &ra_exe_unit, const CudaMgr_Namespace::CudaMgr *cuda_mgr, const ExecutorDeviceType device_type, const unsigned cuda_blocksize, const unsigned num_blocks_per_mp)
 
std::string serialize_llvm_metadata_footnotes (llvm::Function *query_func, CgenState *cgen_state)
 

Variables

const std::string cuda_rt_decls
 

Function Documentation

std::string anonymous_namespace{NativeCodegen.cpp}::assemblyForCPU ( ExecutionEngineWrapper execution_engine,
llvm::Module *  llvm_module 
)

Definition at line 400 of file NativeCodegen.cpp.

References CHECK.

Referenced by create_execution_engine().

401  {
402  llvm::legacy::PassManager pass_manager;
403  auto cpu_target_machine = execution_engine->getTargetMachine();
404  CHECK(cpu_target_machine);
405  llvm::SmallString<256> code_str;
406  llvm::raw_svector_ostream os(code_str);
407 #if LLVM_VERSION_MAJOR >= 10
408  cpu_target_machine->addPassesToEmitFile(
409  pass_manager, os, nullptr, llvm::CGFT_AssemblyFile);
410 #else
411  cpu_target_machine->addPassesToEmitFile(
412  pass_manager, os, nullptr, llvm::TargetMachine::CGFT_AssemblyFile);
413 #endif
414  pass_manager.run(*llvm_module);
415  return "Assembly for the CPU:\n" + std::string(code_str.str()) + "\nEnd of assembly";
416 }
#define CHECK(condition)
Definition: Logger.h:291

+ Here is the caller graph for this function:

void anonymous_namespace{NativeCodegen.cpp}::bind_pos_placeholders ( const std::string &  pos_fn_name,
const bool  use_resume_param,
llvm::Function *  query_func,
llvm::Module *  llvm_module 
)

Definition at line 1682 of file NativeCodegen.cpp.

1685  {
1686  for (auto it = llvm::inst_begin(query_func), e = llvm::inst_end(query_func); it != e;
1687  ++it) {
1688  if (!llvm::isa<llvm::CallInst>(*it)) {
1689  continue;
1690  }
1691  auto& pos_call = llvm::cast<llvm::CallInst>(*it);
1692  auto const func_name = CodegenUtil::getCalledFunctionName(pos_call);
1693  if (func_name && *func_name == pos_fn_name) {
1694  if (use_resume_param) {
1695  auto* const row_index_resume = get_arg_by_name(query_func, "row_index_resume");
1696  llvm::ReplaceInstWithInst(
1697  &pos_call,
1698  llvm::CallInst::Create(llvm_module->getFunction(pos_fn_name + "_impl"),
1699  row_index_resume));
1700  } else {
1701  llvm::ReplaceInstWithInst(
1702  &pos_call,
1703  llvm::CallInst::Create(llvm_module->getFunction(pos_fn_name + "_impl")));
1704  }
1705  break;
1706  }
1707  }
1708 }
std::optional< std::string_view > getCalledFunctionName(llvm::CallInst &call_inst)
llvm::Value * get_arg_by_name(llvm::Function *func, const std::string &name)
Definition: Execute.h:168
void anonymous_namespace{NativeCodegen.cpp}::bind_query ( llvm::Function *  query_func,
const std::string &  query_fname,
llvm::Function *  multifrag_query_func,
llvm::Module *  llvm_module 
)

Definition at line 1832 of file NativeCodegen.cpp.

1835  {
1836  std::vector<llvm::CallInst*> query_stubs;
1837  for (auto it = llvm::inst_begin(multifrag_query_func),
1838  e = llvm::inst_end(multifrag_query_func);
1839  it != e;
1840  ++it) {
1841  if (!llvm::isa<llvm::CallInst>(*it)) {
1842  continue;
1843  }
1844  auto& query_call = llvm::cast<llvm::CallInst>(*it);
1845  auto const call_func_name = CodegenUtil::getCalledFunctionName(query_call);
1846  if (call_func_name && *call_func_name == query_fname) {
1847  query_stubs.push_back(&query_call);
1848  }
1849  }
1850  for (auto& S : query_stubs) {
1851  std::vector<llvm::Value*> args;
1852  for (size_t i = 0; i < S->getNumOperands() - 1; ++i) {
1853  args.push_back(S->getArgOperand(i));
1854  }
1855  llvm::ReplaceInstWithInst(S, llvm::CallInst::Create(query_func, args, ""));
1856  }
1857 }
std::optional< std::string_view > getCalledFunctionName(llvm::CallInst &call_inst)
std::string anonymous_namespace{NativeCodegen.cpp}::cpp_to_llvm_name ( const std::string &  s)

Definition at line 592 of file NativeCodegen.cpp.

References CHECK.

Referenced by gen_array_any_all_sigs(), and gen_translate_null_key_sigs().

592  {
593  if (s == "int8_t") {
594  return "i8";
595  }
596  if (s == "int16_t") {
597  return "i16";
598  }
599  if (s == "int32_t") {
600  return "i32";
601  }
602  if (s == "int64_t") {
603  return "i64";
604  }
605  CHECK(s == "float" || s == "double");
606  return s;
607 }
#define CHECK(condition)
Definition: Logger.h:291

+ Here is the caller graph for this function:

ExecutionEngineWrapper anonymous_namespace{NativeCodegen.cpp}::create_execution_engine ( llvm::Module *  llvm_module,
llvm::EngineBuilder &  eb,
const CompilationOptions co 
)

Definition at line 418 of file NativeCodegen.cpp.

References logger::ASM, assemblyForCPU(), CHECK, DEBUG_TIMER, and LOG.

Referenced by CodeGenerator::generateNativeCPUCode().

420  {
421  auto timer = DEBUG_TIMER(__func__);
422  ExecutionEngineWrapper execution_engine(eb.create(), co);
423  CHECK(execution_engine.get());
424  // Force the module data layout to match the layout for the selected target
425  llvm_module->setDataLayout(execution_engine->getDataLayout());
426 
427  LOG(ASM) << assemblyForCPU(execution_engine, llvm_module);
428 
429  execution_engine->finalizeObject();
430  return execution_engine;
431 }
#define LOG(tag)
Definition: Logger.h:285
std::string assemblyForCPU(ExecutionEngineWrapper &execution_engine, llvm::Module *llvm_module)
#define CHECK(condition)
Definition: Logger.h:291
#define DEBUG_TIMER(name)
Definition: Logger.h:412

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

llvm::Function* anonymous_namespace{NativeCodegen.cpp}::create_row_function ( const size_t  in_col_count,
const size_t  agg_col_count,
const bool  hoist_literals,
llvm::Module *  llvm_module,
llvm::LLVMContext &  context 
)

Definition at line 1763 of file NativeCodegen.cpp.

1767  {
1768  std::vector<llvm::Type*> row_process_arg_types;
1769 
1770  if (agg_col_count) {
1771  // output (aggregate) arguments
1772  for (size_t i = 0; i < agg_col_count; ++i) {
1773  row_process_arg_types.push_back(llvm::Type::getInt64PtrTy(context));
1774  }
1775  } else {
1776  // group by buffer
1777  row_process_arg_types.push_back(llvm::Type::getInt64PtrTy(context));
1778  // varlen output buffer
1779  row_process_arg_types.push_back(llvm::Type::getInt64PtrTy(context));
1780  // current match count
1781  row_process_arg_types.push_back(llvm::Type::getInt32PtrTy(context));
1782  // total match count passed from the caller
1783  row_process_arg_types.push_back(llvm::Type::getInt32PtrTy(context));
1784  // old total match count returned to the caller
1785  row_process_arg_types.push_back(llvm::Type::getInt32PtrTy(context));
1786  // max matched (total number of slots in the output buffer)
1787  row_process_arg_types.push_back(llvm::Type::getInt32PtrTy(context));
1788  }
1789 
1790  // aggregate init values
1791  row_process_arg_types.push_back(llvm::Type::getInt64PtrTy(context));
1792 
1793  // position argument
1794  row_process_arg_types.push_back(llvm::Type::getInt64Ty(context));
1795 
1796  // fragment row offset argument
1797  row_process_arg_types.push_back(llvm::Type::getInt64PtrTy(context));
1798 
1799  // number of rows for each scan
1800  row_process_arg_types.push_back(llvm::Type::getInt64PtrTy(context));
1801 
1802  // literals buffer argument
1803  if (hoist_literals) {
1804  row_process_arg_types.push_back(llvm::Type::getInt8PtrTy(context));
1805  }
1806 
1807  // column buffer arguments
1808  for (size_t i = 0; i < in_col_count; ++i) {
1809  row_process_arg_types.emplace_back(llvm::Type::getInt8PtrTy(context));
1810  }
1811 
1812  // join hash table argument
1813  row_process_arg_types.push_back(llvm::Type::getInt64PtrTy(context));
1814 
1815  // row function manager
1816  row_process_arg_types.push_back(llvm::Type::getInt8PtrTy(context));
1817 
1818  // generate the function
1819  auto ft =
1820  llvm::FunctionType::get(get_int_type(32, context), row_process_arg_types, false);
1821 
1822  auto row_func = llvm::Function::Create(
1823  ft, llvm::Function::ExternalLinkage, "row_func", llvm_module);
1824 
1825  // set the row function argument names; for debugging purposes only
1826  set_row_func_argnames(row_func, in_col_count, agg_col_count, hoist_literals);
1827 
1828  return row_func;
1829 }
llvm::Type * get_int_type(const int width, llvm::LLVMContext &context)
void set_row_func_argnames(llvm::Function *row_func, const size_t in_col_count, const size_t agg_col_count, const bool hoist_literals)
void anonymous_namespace{NativeCodegen.cpp}::eliminate_dead_self_recursive_funcs ( llvm::Module &  M,
const std::unordered_set< llvm::Function * > &  live_funcs 
)

Definition at line 244 of file NativeCodegen.cpp.

Referenced by optimize_ir().

246  {
247  std::vector<llvm::Function*> dead_funcs;
248  for (auto& F : M) {
249  bool bAlive = false;
250  if (live_funcs.count(&F)) {
251  continue;
252  }
253  for (auto U : F.users()) {
254  auto* C = llvm::dyn_cast<const llvm::CallInst>(U);
255  if (!C || C->getParent()->getParent() != &F) {
256  bAlive = true;
257  break;
258  }
259  }
260  if (!bAlive) {
261  dead_funcs.push_back(&F);
262  }
263  }
264  for (auto pFn : dead_funcs) {
265  pFn->eraseFromParent();
266  }
267 }

+ Here is the caller graph for this function:

template<typename InstType >
llvm::Value* anonymous_namespace{NativeCodegen.cpp}::find_variable_in_basic_block ( llvm::Function *  func,
std::string  bb_name,
std::string  variable_name 
)

Definition at line 2029 of file NativeCodegen.cpp.

2031  {
2032  llvm::Value* result = nullptr;
2033  if (func == nullptr || variable_name.empty()) {
2034  return result;
2035  }
2036  bool is_found = false;
2037  for (auto bb_it = func->begin(); bb_it != func->end() && !is_found; ++bb_it) {
2038  if (!bb_name.empty() && bb_it->getName() != bb_name) {
2039  continue;
2040  }
2041  for (auto inst_it = bb_it->begin(); inst_it != bb_it->end(); inst_it++) {
2042  if (llvm::isa<InstType>(*inst_it)) {
2043  if (inst_it->getName() == variable_name) {
2044  result = &*inst_it;
2045  is_found = true;
2046  break;
2047  }
2048  }
2049  }
2050  }
2051  return result;
2052 }
std::string anonymous_namespace{NativeCodegen.cpp}::gen_array_any_all_sigs ( )

Definition at line 609 of file NativeCodegen.cpp.

References cpp_to_llvm_name(), and run_benchmark_import::result.

609  {
610  std::string result;
611  for (const std::string any_or_all : {"any", "all"}) {
612  for (const std::string elem_type :
613  {"int8_t", "int16_t", "int32_t", "int64_t", "float", "double"}) {
614  for (const std::string needle_type :
615  {"int8_t", "int16_t", "int32_t", "int64_t", "float", "double"}) {
616  for (const std::string op_name : {"eq", "ne", "lt", "le", "gt", "ge"}) {
617  result += ("declare i1 @array_" + any_or_all + "_" + op_name + "_" + elem_type +
618  "_" + needle_type + "(i8*, i64, " + cpp_to_llvm_name(needle_type) +
619  ", " + cpp_to_llvm_name(elem_type) + ");\n");
620  }
621  }
622  }
623  }
624  return result;
625 }
std::string cpp_to_llvm_name(const std::string &s)

+ Here is the call graph for this function:

std::string anonymous_namespace{NativeCodegen.cpp}::gen_translate_null_key_sigs ( )

Definition at line 627 of file NativeCodegen.cpp.

References cpp_to_llvm_name(), and run_benchmark_import::result.

627  {
628  std::string result;
629  for (const std::string key_type : {"int8_t", "int16_t", "int32_t", "int64_t"}) {
630  const auto key_llvm_type = cpp_to_llvm_name(key_type);
631  result += "declare i64 @translate_null_key_" + key_type + "(" + key_llvm_type + ", " +
632  key_llvm_type + ", i64);\n";
633  }
634  return result;
635 }
std::string cpp_to_llvm_name(const std::string &s)

+ Here is the call graph for this function:

std::vector<std::string> anonymous_namespace{NativeCodegen.cpp}::get_agg_fnames ( const std::vector< Analyzer::Expr * > &  target_exprs,
const bool  is_group_by 
)

Definition at line 1859 of file NativeCodegen.cpp.

1860  {
1861  std::vector<std::string> result;
1862  for (size_t target_idx = 0, agg_col_idx = 0; target_idx < target_exprs.size();
1863  ++target_idx, ++agg_col_idx) {
1864  const auto target_expr = target_exprs[target_idx];
1865  CHECK(target_expr);
1866  const auto target_type_info = target_expr->get_type_info();
1867  const auto agg_expr = dynamic_cast<Analyzer::AggExpr*>(target_expr);
1868  const bool is_varlen =
1869  (target_type_info.is_string() &&
1870  target_type_info.get_compression() == kENCODING_NONE) ||
1871  target_type_info.is_array(); // TODO: should it use is_varlen_array() ?
1872  if (!agg_expr || agg_expr->get_aggtype() == kSAMPLE) {
1873  result.emplace_back(target_type_info.is_fp() ? "agg_id_double" : "agg_id");
1874  if (is_varlen) {
1875  result.emplace_back("agg_id");
1876  }
1877  if (target_type_info.is_geometry()) {
1878  result.emplace_back("agg_id");
1879  for (auto i = 2; i < 2 * target_type_info.get_physical_coord_cols(); ++i) {
1880  result.emplace_back("agg_id");
1881  }
1882  }
1883  continue;
1884  }
1885  const auto agg_type = agg_expr->get_aggtype();
1886  SQLTypeInfo agg_type_info;
1887  switch (agg_type) {
1888  case kCOUNT:
1889  case kCOUNT_IF:
1890  agg_type_info = target_type_info;
1891  break;
1892  default:
1893  agg_type_info = agg_expr->get_arg()->get_type_info();
1894  break;
1895  }
1896  switch (agg_type) {
1897  case kAVG: {
1898  if (!agg_type_info.is_integer() && !agg_type_info.is_decimal() &&
1899  !agg_type_info.is_fp()) {
1900  throw std::runtime_error("AVG is only valid on integer and floating point");
1901  }
1902  result.emplace_back((agg_type_info.is_integer() || agg_type_info.is_time())
1903  ? "agg_sum"
1904  : "agg_sum_double");
1905  result.emplace_back((agg_type_info.is_integer() || agg_type_info.is_time())
1906  ? "agg_count"
1907  : "agg_count_double");
1908  break;
1909  }
1910  case kMIN: {
1911  if (agg_type_info.is_string() || agg_type_info.is_array() ||
1912  agg_type_info.is_geometry()) {
1913  throw std::runtime_error(
1914  "MIN on strings, arrays or geospatial types not supported yet");
1915  }
1916  result.emplace_back((agg_type_info.is_integer() || agg_type_info.is_time())
1917  ? "agg_min"
1918  : "agg_min_double");
1919  break;
1920  }
1921  case kMAX: {
1922  if (agg_type_info.is_string() || agg_type_info.is_array() ||
1923  agg_type_info.is_geometry()) {
1924  throw std::runtime_error(
1925  "MAX on strings, arrays or geospatial types not supported yet");
1926  }
1927  result.emplace_back((agg_type_info.is_integer() || agg_type_info.is_time())
1928  ? "agg_max"
1929  : "agg_max_double");
1930  break;
1931  }
1932  case kSUM:
1933  case kSUM_IF: {
1934  if (!agg_type_info.is_integer() && !agg_type_info.is_decimal() &&
1935  !agg_type_info.is_fp()) {
1936  throw std::runtime_error(
1937  "SUM and SUM_IF is only valid on integer and floating point");
1938  }
1939  std::string func_name = (agg_type_info.is_integer() || agg_type_info.is_time())
1940  ? "agg_sum"
1941  : "agg_sum_double";
1942  if (agg_type == kSUM_IF) {
1943  func_name += "_if";
1944  }
1945  result.emplace_back(func_name);
1946  break;
1947  }
1948  case kCOUNT:
1949  result.emplace_back(agg_expr->get_is_distinct() ? "agg_count_distinct"
1950  : "agg_count");
1951  break;
1952  case kCOUNT_IF:
1953  result.emplace_back("agg_count_if");
1954  break;
1955  case kSINGLE_VALUE: {
1956  result.emplace_back(agg_type_info.is_fp() ? "agg_id_double" : "agg_id");
1957  break;
1958  }
1959  case kSAMPLE: {
1960  // Note that varlen SAMPLE arguments are handled separately above
1961  result.emplace_back(agg_type_info.is_fp() ? "agg_id_double" : "agg_id");
1962  break;
1963  }
1965  result.emplace_back("agg_approximate_count_distinct");
1966  break;
1967  case kAPPROX_QUANTILE:
1968  result.emplace_back("agg_approx_quantile");
1969  break;
1970  case kMODE:
1971  result.emplace_back("agg_mode_func");
1972  break;
1973  default:
1974  UNREACHABLE() << "Usupported agg_type: " << agg_type;
1975  }
1976  }
1977  return result;
1978 }
bool is_fp() const
Definition: sqltypes.h:571
#define UNREACHABLE()
Definition: Logger.h:338
bool is_time() const
Definition: sqltypes.h:577
Definition: sqldefs.h:75
bool is_integer() const
Definition: sqltypes.h:565
Definition: sqldefs.h:77
Definition: sqldefs.h:78
SQLAgg get_aggtype() const
Definition: Analyzer.h:1329
#define CHECK(condition)
Definition: Logger.h:291
bool is_geometry() const
Definition: sqltypes.h:595
bool is_string() const
Definition: sqltypes.h:559
Definition: sqldefs.h:76
bool is_decimal() const
Definition: sqltypes.h:568
Definition: sqldefs.h:74
Definition: sqldefs.h:83
bool is_array() const
Definition: sqltypes.h:583
size_t anonymous_namespace{NativeCodegen.cpp}::get_shared_memory_size ( const bool  shared_mem_used,
const QueryMemoryDescriptor query_mem_desc_ptr 
)

Definition at line 2603 of file NativeCodegen.cpp.

2604  {
2605  return shared_mem_used
2606  ? (query_mem_desc_ptr->getRowSize() * query_mem_desc_ptr->getEntryCount())
2607  : 0;
2608 }
bool anonymous_namespace{NativeCodegen.cpp}::has_case_expr_within_groupby_expr ( RelAlgExecutionUnit const &  ra_exe_unit)

Definition at line 2640 of file NativeCodegen.cpp.

2640  {
2641  if (ra_exe_unit.groupby_exprs.empty() || !ra_exe_unit.groupby_exprs.front()) {
2642  return false;
2643  }
2644  CaseExprDetector detector;
2645  for (auto expr : ra_exe_unit.groupby_exprs) {
2646  if (detector.detectCaseExpr(expr.get())) {
2647  return true;
2648  }
2649  }
2650  return false;
2651 }
bool anonymous_namespace{NativeCodegen.cpp}::has_count_expr ( RelAlgExecutionUnit const &  ra_exe_unit)

Definition at line 2610 of file NativeCodegen.cpp.

2610  {
2611  for (auto const expr : ra_exe_unit.target_exprs) {
2612  if (auto const agg_expr = dynamic_cast<Analyzer::AggExpr*>(expr)) {
2613  if (shared::is_any<SQLAgg::kCOUNT, SQLAgg::kCOUNT_IF>(agg_expr->get_aggtype())) {
2614  return true;
2615  }
2616  }
2617  }
2618  return false;
2619 }
bool anonymous_namespace{NativeCodegen.cpp}::is_gpu_shared_mem_supported ( const QueryMemoryDescriptor query_mem_desc_ptr,
const RelAlgExecutionUnit ra_exe_unit,
const CudaMgr_Namespace::CudaMgr cuda_mgr,
const ExecutorDeviceType  device_type,
const unsigned  cuda_blocksize,
const unsigned  num_blocks_per_mp 
)

To simplify the implementation for practical purposes, we initially provide shared memory support for cases where there are at most as many entries in the output buffer as there are threads within each GPU device. In order to relax this assumption later, we need to add a for loop in generated codes such that each thread loops over multiple entries. TODO: relax this if necessary

Definition at line 2653 of file NativeCodegen.cpp.

2658  {
2659  if (device_type == ExecutorDeviceType::CPU) {
2660  return false;
2661  }
2662  if (query_mem_desc_ptr->didOutputColumnar()) {
2663  return false;
2664  }
2665  CHECK(query_mem_desc_ptr);
2666  CHECK(cuda_mgr);
2667  /*
2668  * We only use shared memory strategy if GPU hardware provides native shared
2669  * memory atomics support. From CUDA Toolkit documentation:
2670  * https://docs.nvidia.com/cuda/pascal-tuning-guide/index.html#atomic-ops "Like
2671  * Maxwell, Pascal [and Volta] provides native shared memory atomic operations
2672  * for 32-bit integer arithmetic, along with native 32 or 64-bit compare-and-swap
2673  * (CAS)."
2674  *
2675  */
2676  if (!cuda_mgr->isArchMaxwellOrLaterForAll()) {
2677  return false;
2678  }
2679  if (cuda_mgr->isArchPascal() && !ra_exe_unit.join_quals.empty() &&
2680  has_count_expr(ra_exe_unit) && has_case_expr_within_groupby_expr(ra_exe_unit)) {
2681  return false;
2682  }
2683 
2684  if (query_mem_desc_ptr->getQueryDescriptionType() ==
2687  query_mem_desc_ptr->countDistinctDescriptorsLogicallyEmpty()) {
2688  // TODO: relax this, if necessary
2689  if (cuda_blocksize < query_mem_desc_ptr->getEntryCount()) {
2690  return false;
2691  }
2692  // skip shared memory usage when dealing with 1) variable length targets, 2)
2693  // not a COUNT aggregate
2694  const auto target_infos =
2695  target_exprs_to_infos(ra_exe_unit.target_exprs, *query_mem_desc_ptr);
2696  std::unordered_set<SQLAgg> supported_aggs{kCOUNT, kCOUNT_IF};
2697  if (std::find_if(target_infos.begin(),
2698  target_infos.end(),
2699  [&supported_aggs](const TargetInfo& ti) {
2700  if (ti.sql_type.is_varlen() ||
2701  !supported_aggs.count(ti.agg_kind)) {
2702  return true;
2703  } else {
2704  return false;
2705  }
2706  }) == target_infos.end()) {
2707  return true;
2708  }
2709  }
2710  if (query_mem_desc_ptr->getQueryDescriptionType() ==
2721  if (cuda_blocksize < query_mem_desc_ptr->getEntryCount()) {
2722  return false;
2723  }
2724 
2725  // Fundamentally, we should use shared memory whenever the output buffer
2726  // is small enough so that we can fit it in the shared memory and yet expect
2727  // good occupancy.
2728  // For now, we allow keyless, row-wise layout, and only for perfect hash
2729  // group by operations.
2730  if (query_mem_desc_ptr->hasKeylessHash() &&
2731  query_mem_desc_ptr->countDistinctDescriptorsLogicallyEmpty() &&
2732  !query_mem_desc_ptr->useStreamingTopN()) {
2733  const size_t shared_memory_threshold_bytes = std::min(
2734  g_gpu_smem_threshold == 0 ? SIZE_MAX : g_gpu_smem_threshold,
2735  cuda_mgr->getMinSharedMemoryPerBlockForAllDevices() / num_blocks_per_mp);
2736  const auto output_buffer_size =
2737  query_mem_desc_ptr->getRowSize() * query_mem_desc_ptr->getEntryCount();
2738  if (output_buffer_size > shared_memory_threshold_bytes) {
2739  return false;
2740  }
2741 
2742  // skip shared memory usage when dealing with 1) variable length targets, 2)
2743  // non-basic aggregates (COUNT, SUM, MIN, MAX, AVG)
2744  // TODO: relax this if necessary
2745  const auto target_infos =
2746  target_exprs_to_infos(ra_exe_unit.target_exprs, *query_mem_desc_ptr);
2747  std::unordered_set<SQLAgg> supported_aggs{kCOUNT, kCOUNT_IF};
2749  supported_aggs = {kCOUNT, kCOUNT_IF, kMIN, kMAX, kSUM, kSUM_IF, kAVG};
2750  }
2751  if (std::find_if(target_infos.begin(),
2752  target_infos.end(),
2753  [&supported_aggs](const TargetInfo& ti) {
2754  if (ti.sql_type.is_varlen() ||
2755  !supported_aggs.count(ti.agg_kind)) {
2756  return true;
2757  } else {
2758  return false;
2759  }
2760  }) == target_infos.end()) {
2761  return true;
2762  }
2763  }
2764  }
2765  return false;
2766 }
std::vector< Analyzer::Expr * > target_exprs
bool g_enable_smem_group_by
bool countDistinctDescriptorsLogicallyEmpty() const
bool g_enable_smem_non_grouped_agg
Definition: Execute.cpp:143
Definition: sqldefs.h:75
const JoinQualsPerNestingLevel join_quals
Definition: sqldefs.h:77
size_t getMinSharedMemoryPerBlockForAllDevices() const
Definition: CudaMgr.h:135
QueryDescriptionType getQueryDescriptionType() const
bool isArchMaxwellOrLaterForAll() const
Definition: CudaMgr.cpp:415
bool g_enable_smem_grouped_non_count_agg
Definition: Execute.cpp:140
Definition: sqldefs.h:78
bool has_count_expr(RelAlgExecutionUnit const &ra_exe_unit)
#define CHECK(condition)
Definition: Logger.h:291
std::vector< TargetInfo > target_exprs_to_infos(const std::vector< Analyzer::Expr * > &targets, const QueryMemoryDescriptor &query_mem_desc)
bool isArchPascal() const
Definition: CudaMgr.h:160
Definition: sqldefs.h:76
Definition: sqldefs.h:74
bool has_case_expr_within_groupby_expr(RelAlgExecutionUnit const &ra_exe_unit)
size_t g_gpu_smem_threshold
Definition: Execute.cpp:135
void anonymous_namespace{NativeCodegen.cpp}::optimize_ir ( llvm::Function *  query_func,
llvm::Module *  llvm_module,
llvm::legacy::PassManager &  pass_manager,
const std::unordered_set< llvm::Function * > &  live_funcs,
const bool  is_gpu_smem_used,
const CompilationOptions co 
)

Definition at line 306 of file NativeCodegen.cpp.

References DEBUG_TIMER, and eliminate_dead_self_recursive_funcs().

Referenced by CodeGenerator::generateNativeCPUCode().

311  {
312  auto timer = DEBUG_TIMER(__func__);
313  // the always inliner legacy pass must always run first
314  pass_manager.add(llvm::createVerifierPass());
315  pass_manager.add(llvm::createAlwaysInlinerLegacyPass());
316 
317  pass_manager.add(new AnnotateInternalFunctionsPass());
318 
319  pass_manager.add(llvm::createSROAPass());
320  // mem ssa drops unused load and store instructions, e.g. passing variables directly
321  // where possible
322  pass_manager.add(
323  llvm::createEarlyCSEPass(/*enable_mem_ssa=*/true)); // Catch trivial redundancies
324 
325  if (!is_gpu_smem_used) {
326  // thread jumps can change the execution order around SMEM sections guarded by
327  // `__syncthreads()`, which results in race conditions. For now, disable jump
328  // threading for shared memory queries. In the future, consider handling shared
329  // memory aggregations with a separate kernel launch
330  pass_manager.add(llvm::createJumpThreadingPass()); // Thread jumps.
331  }
332  pass_manager.add(llvm::createCFGSimplificationPass());
333 
334  // remove load/stores in PHIs if instructions can be accessed directly post thread jumps
335  pass_manager.add(llvm::createNewGVNPass());
336 
337  pass_manager.add(llvm::createDeadStoreEliminationPass());
338  pass_manager.add(llvm::createLICMPass());
339 
340  pass_manager.add(llvm::createInstructionCombiningPass());
341 
342  // module passes
343  pass_manager.add(llvm::createPromoteMemoryToRegisterPass());
344  pass_manager.add(llvm::createGlobalOptimizerPass());
345 
346  pass_manager.add(llvm::createCFGSimplificationPass()); // cleanup after everything
347 
348  pass_manager.run(*llvm_module);
349 
350  eliminate_dead_self_recursive_funcs(*llvm_module, live_funcs);
351 }
void eliminate_dead_self_recursive_funcs(llvm::Module &M, const std::unordered_set< llvm::Function * > &live_funcs)
#define DEBUG_TIMER(name)
Definition: Logger.h:412

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

template<typename T = void>
void anonymous_namespace{NativeCodegen.cpp}::scan_function_calls ( llvm::Function &  F,
std::unordered_set< std::string > &  defined,
std::unordered_set< std::string > &  undefined,
const std::unordered_set< std::string > &  ignored 
)

Definition at line 193 of file NativeCodegen.cpp.

Referenced by scan_function_calls().

196  {
197  for (llvm::inst_iterator I = llvm::inst_begin(F), E = llvm::inst_end(F); I != E; ++I) {
198  if (auto* CI = llvm::dyn_cast<llvm::CallInst>(&*I)) {
199  auto* F2 = CI->getCalledFunction();
200  if (F2 != nullptr) {
201  auto F2name = F2->getName().str();
202  if (F2->isDeclaration()) {
203  if (F2name.rfind("__", 0) !=
204  0 // assume symbols with double underscore are defined
205  && F2name.rfind("llvm.", 0) !=
206  0 // TODO: this may give false positive for NVVM intrinsics
207  && ignored.find(F2name) == ignored.end() // not in ignored list
208  ) {
209  undefined.emplace(F2name);
210  }
211  } else {
212  if (defined.find(F2name) == defined.end()) {
213  defined.emplace(F2name);
214  scan_function_calls<T>(*F2, defined, undefined, ignored);
215  }
216  }
217  }
218  }
219  }
220 }

+ Here is the caller graph for this function:

template<typename T = void>
void anonymous_namespace{NativeCodegen.cpp}::scan_function_calls ( llvm::Module &  llvm_module,
std::unordered_set< std::string > &  defined,
std::unordered_set< std::string > &  undefined,
const std::unordered_set< std::string > &  ignored 
)

Definition at line 223 of file NativeCodegen.cpp.

References scan_function_calls().

226  {
227  for (auto& F : llvm_module) {
228  if (!F.isDeclaration()) {
229  scan_function_calls(F, defined, undefined, ignored);
230  }
231  }
232 }
void scan_function_calls(llvm::Function &F, std::unordered_set< std::string > &defined, std::unordered_set< std::string > &undefined, const std::unordered_set< std::string > &ignored)

+ Here is the call graph for this function:

template<typename T = void>
std::tuple<std::unordered_set<std::string>, std::unordered_set<std::string> > anonymous_namespace{NativeCodegen.cpp}::scan_function_calls ( llvm::Module &  llvm_module,
const std::unordered_set< std::string > &  ignored = {} 
)

Definition at line 236 of file NativeCodegen.cpp.

237  {}) {
238  std::unordered_set<std::string> defined, undefined;
239  scan_function_calls(llvm_module, defined, undefined, ignored);
240  return std::make_tuple(defined, undefined);
241 }
void scan_function_calls(llvm::Function &F, std::unordered_set< std::string > &defined, std::unordered_set< std::string > &undefined, const std::unordered_set< std::string > &ignored)
std::string anonymous_namespace{NativeCodegen.cpp}::serialize_llvm_metadata_footnotes ( llvm::Function *  query_func,
CgenState cgen_state 
)

Definition at line 2769 of file NativeCodegen.cpp.

2770  {
2771  std::string llvm_ir;
2772  std::unordered_set<llvm::MDNode*> md;
2773 
2774  // Loop over all instructions in the query function.
2775  for (auto bb_it = query_func->begin(); bb_it != query_func->end(); ++bb_it) {
2776  for (auto instr_it = bb_it->begin(); instr_it != bb_it->end(); ++instr_it) {
2777  llvm::SmallVector<std::pair<unsigned, llvm::MDNode*>, 100> imd;
2778  instr_it->getAllMetadata(imd);
2779  for (auto [kind, node] : imd) {
2780  md.insert(node);
2781  }
2782  }
2783  }
2784 
2785  // Loop over all instructions in the row function.
2786  for (auto bb_it = cgen_state->row_func_->begin(); bb_it != cgen_state->row_func_->end();
2787  ++bb_it) {
2788  for (auto instr_it = bb_it->begin(); instr_it != bb_it->end(); ++instr_it) {
2789  llvm::SmallVector<std::pair<unsigned, llvm::MDNode*>, 100> imd;
2790  instr_it->getAllMetadata(imd);
2791  for (auto [kind, node] : imd) {
2792  md.insert(node);
2793  }
2794  }
2795  }
2796 
2797  // Loop over all instructions in the filter function.
2798  if (cgen_state->filter_func_) {
2799  for (auto bb_it = cgen_state->filter_func_->begin();
2800  bb_it != cgen_state->filter_func_->end();
2801  ++bb_it) {
2802  for (auto instr_it = bb_it->begin(); instr_it != bb_it->end(); ++instr_it) {
2803  llvm::SmallVector<std::pair<unsigned, llvm::MDNode*>, 100> imd;
2804  instr_it->getAllMetadata(imd);
2805  for (auto [kind, node] : imd) {
2806  md.insert(node);
2807  }
2808  }
2809  }
2810  }
2811 
2812  // Sort the metadata by canonical number and convert to text.
2813  if (!md.empty()) {
2814  std::map<size_t, std::string> sorted_strings;
2815  for (auto p : md) {
2816  std::string str;
2817  llvm::raw_string_ostream os(str);
2818  p->print(os, cgen_state->module_, true);
2819  os.flush();
2820  auto fields = split(str, {}, 1);
2821  if (fields.empty() || fields[0].empty()) {
2822  continue;
2823  }
2824  sorted_strings.emplace(std::stoul(fields[0].substr(1)), str);
2825  }
2826  llvm_ir += "\n";
2827  for (auto [id, text] : sorted_strings) {
2828  llvm_ir += text;
2829  llvm_ir += "\n";
2830  }
2831  }
2832 
2833  return llvm_ir;
2834 }
std::vector< std::string > split(std::string_view str, std::string_view delim, std::optional< size_t > maxsplit)
split apart a string into a vector of substrings
llvm::Function * row_func_
Definition: CgenState.h:374
llvm::Module * module_
Definition: CgenState.h:373
llvm::Function * filter_func_
Definition: CgenState.h:375
void anonymous_namespace{NativeCodegen.cpp}::set_row_func_argnames ( llvm::Function *  row_func,
const size_t  in_col_count,
const size_t  agg_col_count,
const bool  hoist_literals 
)

Definition at line 1710 of file NativeCodegen.cpp.

References SQLTypeInfo::is_fp().

1713  {
1714  auto arg_it = row_func->arg_begin();
1715 
1716  if (agg_col_count) {
1717  for (size_t i = 0; i < agg_col_count; ++i) {
1718  arg_it->setName("out");
1719  ++arg_it;
1720  }
1721  } else {
1722  arg_it->setName("group_by_buff");
1723  ++arg_it;
1724  arg_it->setName("varlen_output_buff");
1725  ++arg_it;
1726  arg_it->setName("crt_matched");
1727  ++arg_it;
1728  arg_it->setName("total_matched");
1729  ++arg_it;
1730  arg_it->setName("old_total_matched");
1731  ++arg_it;
1732  arg_it->setName("max_matched");
1733  ++arg_it;
1734  }
1735 
1736  arg_it->setName("agg_init_val");
1737  ++arg_it;
1738 
1739  arg_it->setName("pos");
1740  ++arg_it;
1741 
1742  arg_it->setName("frag_row_off");
1743  ++arg_it;
1744 
1745  arg_it->setName("num_rows_per_scan");
1746  ++arg_it;
1747 
1748  if (hoist_literals) {
1749  arg_it->setName("literals");
1750  ++arg_it;
1751  }
1752 
1753  for (size_t i = 0; i < in_col_count; ++i) {
1754  arg_it->setName("col_buf" + std::to_string(i));
1755  ++arg_it;
1756  }
1757 
1758  arg_it->setName("join_hash_tables");
1759  ++arg_it;
1760  arg_it->setName("row_func_mgr");
1761 }
std::string to_string(char const *&&v)

+ Here is the call graph for this function:

template<typename T = void>
void anonymous_namespace{NativeCodegen.cpp}::show_defined ( llvm::Module &  llvm_module)

Definition at line 154 of file NativeCodegen.cpp.

References f().

Referenced by show_defined().

154  {
155  std::cout << "defines: ";
156  for (auto& f : llvm_module.getFunctionList()) {
157  if (!f.isDeclaration()) {
158  std::cout << f.getName().str() << ", ";
159  }
160  }
161  std::cout << std::endl;
162 }
torch::Tensor f(torch::Tensor x, torch::Tensor W_target, torch::Tensor b_target)

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

template<typename T = void>
void anonymous_namespace{NativeCodegen.cpp}::show_defined ( llvm::Module *  llvm_module)

Definition at line 165 of file NativeCodegen.cpp.

References show_defined().

165  {
166  if (llvm_module == nullptr) {
167  std::cout << "is null" << std::endl;
168  } else {
169  show_defined(*llvm_module);
170  }
171 }
void show_defined(llvm::Module &llvm_module)

+ Here is the call graph for this function:

template<typename T = void>
void anonymous_namespace{NativeCodegen.cpp}::show_defined ( std::unique_ptr< llvm::Module > &  llvm_module)

Definition at line 174 of file NativeCodegen.cpp.

References show_defined().

174  {
175  show_defined(llvm_module.get());
176 }
void show_defined(llvm::Module &llvm_module)

+ Here is the call graph for this function:

void anonymous_namespace{NativeCodegen.cpp}::throw_parseIR_error ( const llvm::SMDiagnostic &  parse_error,
std::string  src = "",
const bool  is_gpu = false 
)

Definition at line 122 of file NativeCodegen.cpp.

Referenced by Executor::optimizeAndCodegenGPU().

124  {
125  std::string excname = (is_gpu ? "NVVM IR ParseError: " : "LLVM IR ParseError: ");
126  llvm::raw_string_ostream ss(excname);
127  parse_error.print(src.c_str(), ss, false, false);
128  throw ParseIRError(ss.str());
129 }

+ Here is the caller graph for this function:

Variable Documentation

const std::string anonymous_namespace{NativeCodegen.cpp}::cuda_rt_decls

Definition at line 637 of file NativeCodegen.cpp.