OmniSciDB  a5dc49c757
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
anonymous_namespace{NativeCodegen.cpp} Namespace Reference

Classes

class  CaseExprDetector
 

Functions

void throw_parseIR_error (const llvm::SMDiagnostic &parse_error, std::string src="", const bool is_gpu=false)
 
template<typename T = void>
void show_defined (llvm::Module &llvm_module)
 
template<typename T = void>
void show_defined (llvm::Module *llvm_module)
 
template<typename T = void>
void show_defined (std::unique_ptr< llvm::Module > &llvm_module)
 
template<typename T = void>
void scan_function_calls (llvm::Function &F, std::unordered_set< std::string > &defined, std::unordered_set< std::string > &undefined, const std::unordered_set< std::string > &ignored)
 
template<typename T = void>
void scan_function_calls (llvm::Module &llvm_module, std::unordered_set< std::string > &defined, std::unordered_set< std::string > &undefined, const std::unordered_set< std::string > &ignored)
 
template<typename T = void>
std::tuple< std::unordered_set
< std::string >
, std::unordered_set
< std::string > > 
scan_function_calls (llvm::Module &llvm_module, const std::unordered_set< std::string > &ignored={})
 
void eliminate_dead_self_recursive_funcs (llvm::Module &M, const std::unordered_set< llvm::Function * > &live_funcs)
 
void optimize_ir (llvm::Function *query_func, llvm::Module *llvm_module, llvm::legacy::PassManager &pass_manager, const std::unordered_set< llvm::Function * > &live_funcs, const bool is_gpu_smem_used, const CompilationOptions &co)
 
std::string assemblyForCPU (ExecutionEngineWrapper &execution_engine, llvm::Module *llvm_module)
 
ExecutionEngineWrapper create_execution_engine (llvm::Module *llvm_module, llvm::EngineBuilder &eb, const CompilationOptions &co)
 
std::string cpp_to_llvm_name (const std::string &s)
 
std::string gen_array_any_all_sigs ()
 
std::string gen_translate_null_key_sigs ()
 
void bind_pos_placeholders (const std::string &pos_fn_name, const bool use_resume_param, llvm::Function *query_func, llvm::Module *llvm_module)
 
void set_row_func_argnames (llvm::Function *row_func, const size_t in_col_count, const size_t agg_col_count, const bool hoist_literals)
 
llvm::Function * create_row_function (const size_t in_col_count, const size_t agg_col_count, const bool hoist_literals, llvm::Module *llvm_module, llvm::LLVMContext &context)
 
void bind_query (llvm::Function *query_func, const std::string &query_fname, llvm::Function *multifrag_query_func, llvm::Module *llvm_module)
 
std::vector< std::string > get_agg_fnames (const std::vector< Analyzer::Expr * > &target_exprs, const bool is_group_by)
 
template<typename InstType >
llvm::Value * find_variable_in_basic_block (llvm::Function *func, std::string bb_name, std::string variable_name)
 
size_t get_shared_memory_size (const bool shared_mem_used, const QueryMemoryDescriptor *query_mem_desc_ptr)
 
bool has_count_expr (RelAlgExecutionUnit const &ra_exe_unit)
 
bool has_case_expr_within_groupby_expr (RelAlgExecutionUnit const &ra_exe_unit)
 
bool is_gpu_shared_mem_supported (const QueryMemoryDescriptor *query_mem_desc_ptr, const RelAlgExecutionUnit &ra_exe_unit, const CudaMgr_Namespace::CudaMgr *cuda_mgr, const ExecutorDeviceType device_type, const unsigned cuda_blocksize, const unsigned num_blocks_per_mp)
 
std::string serialize_llvm_metadata_footnotes (llvm::Function *query_func, CgenState *cgen_state)
 

Variables

const std::string cuda_rt_decls
 

Function Documentation

std::string anonymous_namespace{NativeCodegen.cpp}::assemblyForCPU ( ExecutionEngineWrapper execution_engine,
llvm::Module *  llvm_module 
)

Definition at line 402 of file NativeCodegen.cpp.

References CHECK.

Referenced by create_execution_engine().

403  {
404  llvm::legacy::PassManager pass_manager;
405  auto cpu_target_machine = execution_engine->getTargetMachine();
406  CHECK(cpu_target_machine);
407  llvm::SmallString<256> code_str;
408  llvm::raw_svector_ostream os(code_str);
409 #if LLVM_VERSION_MAJOR >= 10
410  cpu_target_machine->addPassesToEmitFile(
411  pass_manager, os, nullptr, llvm::CGFT_AssemblyFile);
412 #else
413  cpu_target_machine->addPassesToEmitFile(
414  pass_manager, os, nullptr, llvm::TargetMachine::CGFT_AssemblyFile);
415 #endif
416  pass_manager.run(*llvm_module);
417  return "Assembly for the CPU:\n" + std::string(code_str.str()) + "\nEnd of assembly";
418 }
#define CHECK(condition)
Definition: Logger.h:291

+ Here is the caller graph for this function:

void anonymous_namespace{NativeCodegen.cpp}::bind_pos_placeholders ( const std::string &  pos_fn_name,
const bool  use_resume_param,
llvm::Function *  query_func,
llvm::Module *  llvm_module 
)

Definition at line 1656 of file NativeCodegen.cpp.

1659  {
1660  for (auto it = llvm::inst_begin(query_func), e = llvm::inst_end(query_func); it != e;
1661  ++it) {
1662  if (!llvm::isa<llvm::CallInst>(*it)) {
1663  continue;
1664  }
1665  auto& pos_call = llvm::cast<llvm::CallInst>(*it);
1666  auto const func_name = CodegenUtil::getCalledFunctionName(pos_call);
1667  if (func_name && *func_name == pos_fn_name) {
1668  if (use_resume_param) {
1669  auto* const row_index_resume = get_arg_by_name(query_func, "row_index_resume");
1670  llvm::ReplaceInstWithInst(
1671  &pos_call,
1672  llvm::CallInst::Create(llvm_module->getFunction(pos_fn_name + "_impl"),
1673  row_index_resume));
1674  } else {
1675  llvm::ReplaceInstWithInst(
1676  &pos_call,
1677  llvm::CallInst::Create(llvm_module->getFunction(pos_fn_name + "_impl")));
1678  }
1679  break;
1680  }
1681  }
1682 }
std::optional< std::string_view > getCalledFunctionName(llvm::CallInst &call_inst)
llvm::Value * get_arg_by_name(llvm::Function *func, const std::string &name)
Definition: Execute.h:168
void anonymous_namespace{NativeCodegen.cpp}::bind_query ( llvm::Function *  query_func,
const std::string &  query_fname,
llvm::Function *  multifrag_query_func,
llvm::Module *  llvm_module 
)

Definition at line 1806 of file NativeCodegen.cpp.

1809  {
1810  std::vector<llvm::CallInst*> query_stubs;
1811  for (auto it = llvm::inst_begin(multifrag_query_func),
1812  e = llvm::inst_end(multifrag_query_func);
1813  it != e;
1814  ++it) {
1815  if (!llvm::isa<llvm::CallInst>(*it)) {
1816  continue;
1817  }
1818  auto& query_call = llvm::cast<llvm::CallInst>(*it);
1819  auto const call_func_name = CodegenUtil::getCalledFunctionName(query_call);
1820  if (call_func_name && *call_func_name == query_fname) {
1821  query_stubs.push_back(&query_call);
1822  }
1823  }
1824  for (auto& S : query_stubs) {
1825  std::vector<llvm::Value*> args;
1826  for (size_t i = 0; i < S->getNumOperands() - 1; ++i) {
1827  args.push_back(S->getArgOperand(i));
1828  }
1829  llvm::ReplaceInstWithInst(S, llvm::CallInst::Create(query_func, args, ""));
1830  }
1831 }
std::optional< std::string_view > getCalledFunctionName(llvm::CallInst &call_inst)
std::string anonymous_namespace{NativeCodegen.cpp}::cpp_to_llvm_name ( const std::string &  s)

Definition at line 595 of file NativeCodegen.cpp.

References CHECK.

Referenced by gen_array_any_all_sigs(), and gen_translate_null_key_sigs().

595  {
596  if (s == "int8_t") {
597  return "i8";
598  }
599  if (s == "int16_t") {
600  return "i16";
601  }
602  if (s == "int32_t") {
603  return "i32";
604  }
605  if (s == "int64_t") {
606  return "i64";
607  }
608  CHECK(s == "float" || s == "double");
609  return s;
610 }
#define CHECK(condition)
Definition: Logger.h:291

+ Here is the caller graph for this function:

ExecutionEngineWrapper anonymous_namespace{NativeCodegen.cpp}::create_execution_engine ( llvm::Module *  llvm_module,
llvm::EngineBuilder &  eb,
const CompilationOptions co 
)

Definition at line 420 of file NativeCodegen.cpp.

References logger::ASM, assemblyForCPU(), CHECK, DEBUG_TIMER, and LOG.

Referenced by CodeGenerator::generateNativeCPUCode().

422  {
423  auto timer = DEBUG_TIMER(__func__);
424  ExecutionEngineWrapper execution_engine(eb.create(), co);
425  CHECK(execution_engine.get());
426  // Force the module data layout to match the layout for the selected target
427  llvm_module->setDataLayout(execution_engine->getDataLayout());
428 
429  LOG(ASM) << assemblyForCPU(execution_engine, llvm_module);
430 
431  execution_engine->finalizeObject();
432  return execution_engine;
433 }
#define LOG(tag)
Definition: Logger.h:285
std::string assemblyForCPU(ExecutionEngineWrapper &execution_engine, llvm::Module *llvm_module)
#define CHECK(condition)
Definition: Logger.h:291
#define DEBUG_TIMER(name)
Definition: Logger.h:412

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

llvm::Function* anonymous_namespace{NativeCodegen.cpp}::create_row_function ( const size_t  in_col_count,
const size_t  agg_col_count,
const bool  hoist_literals,
llvm::Module *  llvm_module,
llvm::LLVMContext &  context 
)

Definition at line 1737 of file NativeCodegen.cpp.

1741  {
1742  std::vector<llvm::Type*> row_process_arg_types;
1743 
1744  if (agg_col_count) {
1745  // output (aggregate) arguments
1746  for (size_t i = 0; i < agg_col_count; ++i) {
1747  row_process_arg_types.push_back(llvm::Type::getInt64PtrTy(context));
1748  }
1749  } else {
1750  // group by buffer
1751  row_process_arg_types.push_back(llvm::Type::getInt64PtrTy(context));
1752  // varlen output buffer
1753  row_process_arg_types.push_back(llvm::Type::getInt64PtrTy(context));
1754  // current match count
1755  row_process_arg_types.push_back(llvm::Type::getInt32PtrTy(context));
1756  // total match count passed from the caller
1757  row_process_arg_types.push_back(llvm::Type::getInt32PtrTy(context));
1758  // old total match count returned to the caller
1759  row_process_arg_types.push_back(llvm::Type::getInt32PtrTy(context));
1760  // max matched (total number of slots in the output buffer)
1761  row_process_arg_types.push_back(llvm::Type::getInt32PtrTy(context));
1762  }
1763 
1764  // aggregate init values
1765  row_process_arg_types.push_back(llvm::Type::getInt64PtrTy(context));
1766 
1767  // position argument
1768  row_process_arg_types.push_back(llvm::Type::getInt64Ty(context));
1769 
1770  // fragment row offset argument
1771  row_process_arg_types.push_back(llvm::Type::getInt64PtrTy(context));
1772 
1773  // number of rows for each scan
1774  row_process_arg_types.push_back(llvm::Type::getInt64PtrTy(context));
1775 
1776  // literals buffer argument
1777  if (hoist_literals) {
1778  row_process_arg_types.push_back(llvm::Type::getInt8PtrTy(context));
1779  }
1780 
1781  // column buffer arguments
1782  for (size_t i = 0; i < in_col_count; ++i) {
1783  row_process_arg_types.emplace_back(llvm::Type::getInt8PtrTy(context));
1784  }
1785 
1786  // join hash table argument
1787  row_process_arg_types.push_back(llvm::Type::getInt64PtrTy(context));
1788 
1789  // row function manager
1790  row_process_arg_types.push_back(llvm::Type::getInt8PtrTy(context));
1791 
1792  // generate the function
1793  auto ft =
1794  llvm::FunctionType::get(get_int_type(32, context), row_process_arg_types, false);
1795 
1796  auto row_func = llvm::Function::Create(
1797  ft, llvm::Function::ExternalLinkage, "row_func", llvm_module);
1798 
1799  // set the row function argument names; for debugging purposes only
1800  set_row_func_argnames(row_func, in_col_count, agg_col_count, hoist_literals);
1801 
1802  return row_func;
1803 }
llvm::Type * get_int_type(const int width, llvm::LLVMContext &context)
void set_row_func_argnames(llvm::Function *row_func, const size_t in_col_count, const size_t agg_col_count, const bool hoist_literals)
void anonymous_namespace{NativeCodegen.cpp}::eliminate_dead_self_recursive_funcs ( llvm::Module &  M,
const std::unordered_set< llvm::Function * > &  live_funcs 
)

Definition at line 246 of file NativeCodegen.cpp.

Referenced by optimize_ir().

248  {
249  std::vector<llvm::Function*> dead_funcs;
250  for (auto& F : M) {
251  bool bAlive = false;
252  if (live_funcs.count(&F)) {
253  continue;
254  }
255  for (auto U : F.users()) {
256  auto* C = llvm::dyn_cast<const llvm::CallInst>(U);
257  if (!C || C->getParent()->getParent() != &F) {
258  bAlive = true;
259  break;
260  }
261  }
262  if (!bAlive) {
263  dead_funcs.push_back(&F);
264  }
265  }
266  for (auto pFn : dead_funcs) {
267  pFn->eraseFromParent();
268  }
269 }

+ Here is the caller graph for this function:

template<typename InstType >
llvm::Value* anonymous_namespace{NativeCodegen.cpp}::find_variable_in_basic_block ( llvm::Function *  func,
std::string  bb_name,
std::string  variable_name 
)

Definition at line 2003 of file NativeCodegen.cpp.

2005  {
2006  llvm::Value* result = nullptr;
2007  if (func == nullptr || variable_name.empty()) {
2008  return result;
2009  }
2010  bool is_found = false;
2011  for (auto bb_it = func->begin(); bb_it != func->end() && !is_found; ++bb_it) {
2012  if (!bb_name.empty() && bb_it->getName() != bb_name) {
2013  continue;
2014  }
2015  for (auto inst_it = bb_it->begin(); inst_it != bb_it->end(); inst_it++) {
2016  if (llvm::isa<InstType>(*inst_it)) {
2017  if (inst_it->getName() == variable_name) {
2018  result = &*inst_it;
2019  is_found = true;
2020  break;
2021  }
2022  }
2023  }
2024  }
2025  return result;
2026 }
std::string anonymous_namespace{NativeCodegen.cpp}::gen_array_any_all_sigs ( )

Definition at line 612 of file NativeCodegen.cpp.

References cpp_to_llvm_name(), and run_benchmark_import::result.

612  {
613  std::string result;
614  for (const std::string any_or_all : {"any", "all"}) {
615  for (const std::string elem_type :
616  {"int8_t", "int16_t", "int32_t", "int64_t", "float", "double"}) {
617  for (const std::string needle_type :
618  {"int8_t", "int16_t", "int32_t", "int64_t", "float", "double"}) {
619  for (const std::string op_name : {"eq", "ne", "lt", "le", "gt", "ge"}) {
620  result += ("declare i1 @array_" + any_or_all + "_" + op_name + "_" + elem_type +
621  "_" + needle_type + "(i8*, i64, " + cpp_to_llvm_name(needle_type) +
622  ", " + cpp_to_llvm_name(elem_type) + ");\n");
623  }
624  }
625  }
626  }
627  return result;
628 }
std::string cpp_to_llvm_name(const std::string &s)

+ Here is the call graph for this function:

std::string anonymous_namespace{NativeCodegen.cpp}::gen_translate_null_key_sigs ( )

Definition at line 630 of file NativeCodegen.cpp.

References cpp_to_llvm_name(), and run_benchmark_import::result.

630  {
631  std::string result;
632  for (const std::string key_type : {"int8_t", "int16_t", "int32_t", "int64_t"}) {
633  const auto key_llvm_type = cpp_to_llvm_name(key_type);
634  result += "declare i64 @translate_null_key_" + key_type + "(" + key_llvm_type + ", " +
635  key_llvm_type + ", i64);\n";
636  }
637  return result;
638 }
std::string cpp_to_llvm_name(const std::string &s)

+ Here is the call graph for this function:

std::vector<std::string> anonymous_namespace{NativeCodegen.cpp}::get_agg_fnames ( const std::vector< Analyzer::Expr * > &  target_exprs,
const bool  is_group_by 
)

Definition at line 1833 of file NativeCodegen.cpp.

1834  {
1835  std::vector<std::string> result;
1836  for (size_t target_idx = 0, agg_col_idx = 0; target_idx < target_exprs.size();
1837  ++target_idx, ++agg_col_idx) {
1838  const auto target_expr = target_exprs[target_idx];
1839  CHECK(target_expr);
1840  const auto target_type_info = target_expr->get_type_info();
1841  const auto agg_expr = dynamic_cast<Analyzer::AggExpr*>(target_expr);
1842  const bool is_varlen =
1843  (target_type_info.is_string() &&
1844  target_type_info.get_compression() == kENCODING_NONE) ||
1845  target_type_info.is_array(); // TODO: should it use is_varlen_array() ?
1846  if (!agg_expr || agg_expr->get_aggtype() == kSAMPLE) {
1847  result.emplace_back(target_type_info.is_fp() ? "agg_id_double" : "agg_id");
1848  if (is_varlen) {
1849  result.emplace_back("agg_id");
1850  }
1851  if (target_type_info.is_geometry()) {
1852  result.emplace_back("agg_id");
1853  for (auto i = 2; i < 2 * target_type_info.get_physical_coord_cols(); ++i) {
1854  result.emplace_back("agg_id");
1855  }
1856  }
1857  continue;
1858  }
1859  const auto agg_type = agg_expr->get_aggtype();
1860  SQLTypeInfo agg_type_info;
1861  switch (agg_type) {
1862  case kCOUNT:
1863  case kCOUNT_IF:
1864  agg_type_info = target_type_info;
1865  break;
1866  default:
1867  agg_type_info = agg_expr->get_arg()->get_type_info();
1868  break;
1869  }
1870  switch (agg_type) {
1871  case kAVG: {
1872  if (!agg_type_info.is_integer() && !agg_type_info.is_decimal() &&
1873  !agg_type_info.is_fp()) {
1874  throw std::runtime_error("AVG is only valid on integer and floating point");
1875  }
1876  result.emplace_back((agg_type_info.is_integer() || agg_type_info.is_time())
1877  ? "agg_sum"
1878  : "agg_sum_double");
1879  result.emplace_back((agg_type_info.is_integer() || agg_type_info.is_time())
1880  ? "agg_count"
1881  : "agg_count_double");
1882  break;
1883  }
1884  case kMIN: {
1885  if (agg_type_info.is_string() || agg_type_info.is_array() ||
1886  agg_type_info.is_geometry()) {
1887  throw std::runtime_error(
1888  "MIN on strings, arrays or geospatial types not supported yet");
1889  }
1890  result.emplace_back((agg_type_info.is_integer() || agg_type_info.is_time())
1891  ? "agg_min"
1892  : "agg_min_double");
1893  break;
1894  }
1895  case kMAX: {
1896  if (agg_type_info.is_string() || agg_type_info.is_array() ||
1897  agg_type_info.is_geometry()) {
1898  throw std::runtime_error(
1899  "MAX on strings, arrays or geospatial types not supported yet");
1900  }
1901  result.emplace_back((agg_type_info.is_integer() || agg_type_info.is_time())
1902  ? "agg_max"
1903  : "agg_max_double");
1904  break;
1905  }
1906  case kSUM:
1907  case kSUM_IF: {
1908  if (!agg_type_info.is_integer() && !agg_type_info.is_decimal() &&
1909  !agg_type_info.is_fp()) {
1910  throw std::runtime_error(
1911  "SUM and SUM_IF is only valid on integer and floating point");
1912  }
1913  std::string func_name = (agg_type_info.is_integer() || agg_type_info.is_time())
1914  ? "agg_sum"
1915  : "agg_sum_double";
1916  if (agg_type == kSUM_IF) {
1917  func_name += "_if";
1918  }
1919  result.emplace_back(func_name);
1920  break;
1921  }
1922  case kCOUNT:
1923  result.emplace_back(agg_expr->get_is_distinct() ? "agg_count_distinct"
1924  : "agg_count");
1925  break;
1926  case kCOUNT_IF:
1927  result.emplace_back("agg_count_if");
1928  break;
1929  case kSINGLE_VALUE: {
1930  result.emplace_back(agg_type_info.is_fp() ? "agg_id_double" : "agg_id");
1931  break;
1932  }
1933  case kSAMPLE: {
1934  // Note that varlen SAMPLE arguments are handled separately above
1935  result.emplace_back(agg_type_info.is_fp() ? "agg_id_double" : "agg_id");
1936  break;
1937  }
1939  result.emplace_back("agg_approximate_count_distinct");
1940  break;
1941  case kAPPROX_QUANTILE:
1942  result.emplace_back("agg_approx_quantile");
1943  break;
1944  case kMODE:
1945  result.emplace_back("agg_mode_func");
1946  break;
1947  default:
1948  UNREACHABLE() << "Usupported agg_type: " << agg_type;
1949  }
1950  }
1951  return result;
1952 }
bool is_fp() const
Definition: sqltypes.h:573
#define UNREACHABLE()
Definition: Logger.h:338
bool is_time() const
Definition: sqltypes.h:579
Definition: sqldefs.h:78
bool is_integer() const
Definition: sqltypes.h:567
Definition: sqldefs.h:80
Definition: sqldefs.h:81
SQLAgg get_aggtype() const
Definition: Analyzer.h:1329
#define CHECK(condition)
Definition: Logger.h:291
bool is_geometry() const
Definition: sqltypes.h:597
bool is_string() const
Definition: sqltypes.h:561
Definition: sqldefs.h:79
bool is_decimal() const
Definition: sqltypes.h:570
Definition: sqldefs.h:77
Definition: sqldefs.h:86
bool is_array() const
Definition: sqltypes.h:585
size_t anonymous_namespace{NativeCodegen.cpp}::get_shared_memory_size ( const bool  shared_mem_used,
const QueryMemoryDescriptor query_mem_desc_ptr 
)

Definition at line 2580 of file NativeCodegen.cpp.

2581  {
2582  return shared_mem_used
2583  ? (query_mem_desc_ptr->getRowSize() * query_mem_desc_ptr->getEntryCount())
2584  : 0;
2585 }
bool anonymous_namespace{NativeCodegen.cpp}::has_case_expr_within_groupby_expr ( RelAlgExecutionUnit const &  ra_exe_unit)

Definition at line 2617 of file NativeCodegen.cpp.

2617  {
2618  if (ra_exe_unit.groupby_exprs.empty() || !ra_exe_unit.groupby_exprs.front()) {
2619  return false;
2620  }
2621  CaseExprDetector detector;
2622  for (auto expr : ra_exe_unit.groupby_exprs) {
2623  if (detector.detectCaseExpr(expr.get())) {
2624  return true;
2625  }
2626  }
2627  return false;
2628 }
bool anonymous_namespace{NativeCodegen.cpp}::has_count_expr ( RelAlgExecutionUnit const &  ra_exe_unit)

Definition at line 2587 of file NativeCodegen.cpp.

2587  {
2588  for (auto const expr : ra_exe_unit.target_exprs) {
2589  if (auto const agg_expr = dynamic_cast<Analyzer::AggExpr*>(expr)) {
2590  if (shared::is_any<SQLAgg::kCOUNT, SQLAgg::kCOUNT_IF>(agg_expr->get_aggtype())) {
2591  return true;
2592  }
2593  }
2594  }
2595  return false;
2596 }
bool anonymous_namespace{NativeCodegen.cpp}::is_gpu_shared_mem_supported ( const QueryMemoryDescriptor query_mem_desc_ptr,
const RelAlgExecutionUnit ra_exe_unit,
const CudaMgr_Namespace::CudaMgr cuda_mgr,
const ExecutorDeviceType  device_type,
const unsigned  cuda_blocksize,
const unsigned  num_blocks_per_mp 
)

To simplify the implementation for practical purposes, we initially provide shared memory support for cases where there are at most as many entries in the output buffer as there are threads within each GPU device. In order to relax this assumption later, we need to add a for loop in generated codes such that each thread loops over multiple entries. TODO: relax this if necessary

Definition at line 2630 of file NativeCodegen.cpp.

2635  {
2636  if (device_type == ExecutorDeviceType::CPU) {
2637  return false;
2638  }
2639  if (query_mem_desc_ptr->didOutputColumnar()) {
2640  return false;
2641  }
2642  CHECK(query_mem_desc_ptr);
2643  CHECK(cuda_mgr);
2644  /*
2645  * We only use shared memory strategy if GPU hardware provides native shared
2646  * memory atomics support. From CUDA Toolkit documentation:
2647  * https://docs.nvidia.com/cuda/pascal-tuning-guide/index.html#atomic-ops "Like
2648  * Maxwell, Pascal [and Volta] provides native shared memory atomic operations
2649  * for 32-bit integer arithmetic, along with native 32 or 64-bit compare-and-swap
2650  * (CAS)."
2651  *
2652  */
2653  if (!cuda_mgr->isArchMaxwellOrLaterForAll()) {
2654  return false;
2655  }
2656  if (cuda_mgr->isArchPascal() && !ra_exe_unit.join_quals.empty() &&
2657  has_count_expr(ra_exe_unit) && has_case_expr_within_groupby_expr(ra_exe_unit)) {
2658  return false;
2659  }
2660 
2661  if (query_mem_desc_ptr->getQueryDescriptionType() ==
2664  query_mem_desc_ptr->countDistinctDescriptorsLogicallyEmpty()) {
2665  // TODO: relax this, if necessary
2666  if (cuda_blocksize < query_mem_desc_ptr->getEntryCount()) {
2667  return false;
2668  }
2669  // skip shared memory usage when dealing with 1) variable length targets, 2)
2670  // not a COUNT aggregate
2671  const auto target_infos =
2672  target_exprs_to_infos(ra_exe_unit.target_exprs, *query_mem_desc_ptr);
2673  std::unordered_set<SQLAgg> supported_aggs{kCOUNT, kCOUNT_IF};
2674  if (std::find_if(target_infos.begin(),
2675  target_infos.end(),
2676  [&supported_aggs](const TargetInfo& ti) {
2677  if (ti.sql_type.is_varlen() ||
2678  !supported_aggs.count(ti.agg_kind)) {
2679  return true;
2680  } else {
2681  return false;
2682  }
2683  }) == target_infos.end()) {
2684  return true;
2685  }
2686  }
2687  if (query_mem_desc_ptr->getQueryDescriptionType() ==
2698  if (cuda_blocksize < query_mem_desc_ptr->getEntryCount()) {
2699  return false;
2700  }
2701 
2702  // Fundamentally, we should use shared memory whenever the output buffer
2703  // is small enough so that we can fit it in the shared memory and yet expect
2704  // good occupancy.
2705  // For now, we allow keyless, row-wise layout, and only for perfect hash
2706  // group by operations.
2707  if (query_mem_desc_ptr->hasKeylessHash() &&
2708  query_mem_desc_ptr->countDistinctDescriptorsLogicallyEmpty() &&
2709  !query_mem_desc_ptr->useStreamingTopN()) {
2710  const size_t shared_memory_threshold_bytes = std::min(
2711  g_gpu_smem_threshold == 0 ? SIZE_MAX : g_gpu_smem_threshold,
2712  cuda_mgr->getMinSharedMemoryPerBlockForAllDevices() / num_blocks_per_mp);
2713  const auto output_buffer_size =
2714  query_mem_desc_ptr->getRowSize() * query_mem_desc_ptr->getEntryCount();
2715  if (output_buffer_size > shared_memory_threshold_bytes) {
2716  return false;
2717  }
2718 
2719  // skip shared memory usage when dealing with 1) variable length targets, 2)
2720  // non-basic aggregates (COUNT, SUM, MIN, MAX, AVG)
2721  // TODO: relax this if necessary
2722  const auto target_infos =
2723  target_exprs_to_infos(ra_exe_unit.target_exprs, *query_mem_desc_ptr);
2724  std::unordered_set<SQLAgg> supported_aggs{kCOUNT, kCOUNT_IF};
2726  supported_aggs = {kCOUNT, kCOUNT_IF, kMIN, kMAX, kSUM, kSUM_IF, kAVG};
2727  }
2728  if (std::find_if(target_infos.begin(),
2729  target_infos.end(),
2730  [&supported_aggs](const TargetInfo& ti) {
2731  if (ti.sql_type.is_varlen() ||
2732  !supported_aggs.count(ti.agg_kind)) {
2733  return true;
2734  } else {
2735  return false;
2736  }
2737  }) == target_infos.end()) {
2738  return true;
2739  }
2740  }
2741  }
2742  return false;
2743 }
GroupByPerfectHash
Definition: enums.h:58
std::vector< Analyzer::Expr * > target_exprs
bool g_enable_smem_group_by
bool countDistinctDescriptorsLogicallyEmpty() const
NonGroupedAggregate
Definition: enums.h:58
bool g_enable_smem_non_grouped_agg
Definition: Execute.cpp:150
Definition: sqldefs.h:78
const JoinQualsPerNestingLevel join_quals
Definition: sqldefs.h:80
size_t getMinSharedMemoryPerBlockForAllDevices() const
Definition: CudaMgr.h:128
QueryDescriptionType getQueryDescriptionType() const
bool isArchMaxwellOrLaterForAll() const
Definition: CudaMgr.cpp:437
bool g_enable_smem_grouped_non_count_agg
Definition: Execute.cpp:147
Definition: sqldefs.h:81
bool has_count_expr(RelAlgExecutionUnit const &ra_exe_unit)
#define CHECK(condition)
Definition: Logger.h:291
std::vector< TargetInfo > target_exprs_to_infos(const std::vector< Analyzer::Expr * > &targets, const QueryMemoryDescriptor &query_mem_desc)
bool isArchPascal() const
Definition: CudaMgr.h:153
Definition: sqldefs.h:79
Definition: sqldefs.h:77
bool has_case_expr_within_groupby_expr(RelAlgExecutionUnit const &ra_exe_unit)
size_t g_gpu_smem_threshold
Definition: Execute.cpp:142
void anonymous_namespace{NativeCodegen.cpp}::optimize_ir ( llvm::Function *  query_func,
llvm::Module *  llvm_module,
llvm::legacy::PassManager &  pass_manager,
const std::unordered_set< llvm::Function * > &  live_funcs,
const bool  is_gpu_smem_used,
const CompilationOptions co 
)

Definition at line 308 of file NativeCodegen.cpp.

References DEBUG_TIMER, and eliminate_dead_self_recursive_funcs().

Referenced by CodeGenerator::generateNativeCPUCode().

313  {
314  auto timer = DEBUG_TIMER(__func__);
315  // the always inliner legacy pass must always run first
316  pass_manager.add(llvm::createVerifierPass());
317  pass_manager.add(llvm::createAlwaysInlinerLegacyPass());
318 
319  pass_manager.add(new AnnotateInternalFunctionsPass());
320 
321  pass_manager.add(llvm::createSROAPass());
322  // mem ssa drops unused load and store instructions, e.g. passing variables directly
323  // where possible
324  pass_manager.add(
325  llvm::createEarlyCSEPass(/*enable_mem_ssa=*/true)); // Catch trivial redundancies
326 
327  if (!is_gpu_smem_used) {
328  // thread jumps can change the execution order around SMEM sections guarded by
329  // `__syncthreads()`, which results in race conditions. For now, disable jump
330  // threading for shared memory queries. In the future, consider handling shared
331  // memory aggregations with a separate kernel launch
332  pass_manager.add(llvm::createJumpThreadingPass()); // Thread jumps.
333  }
334  pass_manager.add(llvm::createCFGSimplificationPass());
335 
336  // remove load/stores in PHIs if instructions can be accessed directly post thread jumps
337  pass_manager.add(llvm::createNewGVNPass());
338 
339  pass_manager.add(llvm::createDeadStoreEliminationPass());
340  pass_manager.add(llvm::createLICMPass());
341 
342  pass_manager.add(llvm::createInstructionCombiningPass());
343 
344  // module passes
345  pass_manager.add(llvm::createPromoteMemoryToRegisterPass());
346  pass_manager.add(llvm::createGlobalOptimizerPass());
347 
348  pass_manager.add(llvm::createCFGSimplificationPass()); // cleanup after everything
349 
350  pass_manager.run(*llvm_module);
351 
352  eliminate_dead_self_recursive_funcs(*llvm_module, live_funcs);
353 }
void eliminate_dead_self_recursive_funcs(llvm::Module &M, const std::unordered_set< llvm::Function * > &live_funcs)
#define DEBUG_TIMER(name)
Definition: Logger.h:412

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

template<typename T = void>
void anonymous_namespace{NativeCodegen.cpp}::scan_function_calls ( llvm::Function &  F,
std::unordered_set< std::string > &  defined,
std::unordered_set< std::string > &  undefined,
const std::unordered_set< std::string > &  ignored 
)

Definition at line 195 of file NativeCodegen.cpp.

Referenced by scan_function_calls().

198  {
199  for (llvm::inst_iterator I = llvm::inst_begin(F), E = llvm::inst_end(F); I != E; ++I) {
200  if (auto* CI = llvm::dyn_cast<llvm::CallInst>(&*I)) {
201  auto* F2 = CI->getCalledFunction();
202  if (F2 != nullptr) {
203  auto F2name = F2->getName().str();
204  if (F2->isDeclaration()) {
205  if (F2name.rfind("__", 0) !=
206  0 // assume symbols with double underscore are defined
207  && F2name.rfind("llvm.", 0) !=
208  0 // TODO: this may give false positive for NVVM intrinsics
209  && ignored.find(F2name) == ignored.end() // not in ignored list
210  ) {
211  undefined.emplace(F2name);
212  }
213  } else {
214  if (defined.find(F2name) == defined.end()) {
215  defined.emplace(F2name);
216  scan_function_calls<T>(*F2, defined, undefined, ignored);
217  }
218  }
219  }
220  }
221  }
222 }

+ Here is the caller graph for this function:

template<typename T = void>
void anonymous_namespace{NativeCodegen.cpp}::scan_function_calls ( llvm::Module &  llvm_module,
std::unordered_set< std::string > &  defined,
std::unordered_set< std::string > &  undefined,
const std::unordered_set< std::string > &  ignored 
)

Definition at line 225 of file NativeCodegen.cpp.

References scan_function_calls().

228  {
229  for (auto& F : llvm_module) {
230  if (!F.isDeclaration()) {
231  scan_function_calls(F, defined, undefined, ignored);
232  }
233  }
234 }
void scan_function_calls(llvm::Function &F, std::unordered_set< std::string > &defined, std::unordered_set< std::string > &undefined, const std::unordered_set< std::string > &ignored)

+ Here is the call graph for this function:

template<typename T = void>
std::tuple<std::unordered_set<std::string>, std::unordered_set<std::string> > anonymous_namespace{NativeCodegen.cpp}::scan_function_calls ( llvm::Module &  llvm_module,
const std::unordered_set< std::string > &  ignored = {} 
)

Definition at line 238 of file NativeCodegen.cpp.

239  {}) {
240  std::unordered_set<std::string> defined, undefined;
241  scan_function_calls(llvm_module, defined, undefined, ignored);
242  return std::make_tuple(defined, undefined);
243 }
void scan_function_calls(llvm::Function &F, std::unordered_set< std::string > &defined, std::unordered_set< std::string > &undefined, const std::unordered_set< std::string > &ignored)
std::string anonymous_namespace{NativeCodegen.cpp}::serialize_llvm_metadata_footnotes ( llvm::Function *  query_func,
CgenState cgen_state 
)

Definition at line 2746 of file NativeCodegen.cpp.

2747  {
2748  std::string llvm_ir;
2749  std::unordered_set<llvm::MDNode*> md;
2750 
2751  // Loop over all instructions in the query function.
2752  for (auto bb_it = query_func->begin(); bb_it != query_func->end(); ++bb_it) {
2753  for (auto instr_it = bb_it->begin(); instr_it != bb_it->end(); ++instr_it) {
2754  llvm::SmallVector<std::pair<unsigned, llvm::MDNode*>, 100> imd;
2755  instr_it->getAllMetadata(imd);
2756  for (auto [kind, node] : imd) {
2757  md.insert(node);
2758  }
2759  }
2760  }
2761 
2762  // Loop over all instructions in the row function.
2763  for (auto bb_it = cgen_state->row_func_->begin(); bb_it != cgen_state->row_func_->end();
2764  ++bb_it) {
2765  for (auto instr_it = bb_it->begin(); instr_it != bb_it->end(); ++instr_it) {
2766  llvm::SmallVector<std::pair<unsigned, llvm::MDNode*>, 100> imd;
2767  instr_it->getAllMetadata(imd);
2768  for (auto [kind, node] : imd) {
2769  md.insert(node);
2770  }
2771  }
2772  }
2773 
2774  // Loop over all instructions in the filter function.
2775  if (cgen_state->filter_func_) {
2776  for (auto bb_it = cgen_state->filter_func_->begin();
2777  bb_it != cgen_state->filter_func_->end();
2778  ++bb_it) {
2779  for (auto instr_it = bb_it->begin(); instr_it != bb_it->end(); ++instr_it) {
2780  llvm::SmallVector<std::pair<unsigned, llvm::MDNode*>, 100> imd;
2781  instr_it->getAllMetadata(imd);
2782  for (auto [kind, node] : imd) {
2783  md.insert(node);
2784  }
2785  }
2786  }
2787  }
2788 
2789  // Sort the metadata by canonical number and convert to text.
2790  if (!md.empty()) {
2791  std::map<size_t, std::string> sorted_strings;
2792  for (auto p : md) {
2793  std::string str;
2794  llvm::raw_string_ostream os(str);
2795  p->print(os, cgen_state->module_, true);
2796  os.flush();
2797  auto fields = split(str, {}, 1);
2798  if (fields.empty() || fields[0].empty()) {
2799  continue;
2800  }
2801  sorted_strings.emplace(std::stoul(fields[0].substr(1)), str);
2802  }
2803  llvm_ir += "\n";
2804  for (auto [id, text] : sorted_strings) {
2805  llvm_ir += text;
2806  llvm_ir += "\n";
2807  }
2808  }
2809 
2810  return llvm_ir;
2811 }
std::vector< std::string > split(std::string_view str, std::string_view delim, std::optional< size_t > maxsplit)
split apart a string into a vector of substrings
llvm::Function * row_func_
Definition: CgenState.h:374
llvm::Module * module_
Definition: CgenState.h:373
llvm::Function * filter_func_
Definition: CgenState.h:375
void anonymous_namespace{NativeCodegen.cpp}::set_row_func_argnames ( llvm::Function *  row_func,
const size_t  in_col_count,
const size_t  agg_col_count,
const bool  hoist_literals 
)

Definition at line 1684 of file NativeCodegen.cpp.

References SQLTypeInfo::is_fp().

1687  {
1688  auto arg_it = row_func->arg_begin();
1689 
1690  if (agg_col_count) {
1691  for (size_t i = 0; i < agg_col_count; ++i) {
1692  arg_it->setName("out");
1693  ++arg_it;
1694  }
1695  } else {
1696  arg_it->setName("group_by_buff");
1697  ++arg_it;
1698  arg_it->setName("varlen_output_buff");
1699  ++arg_it;
1700  arg_it->setName("crt_matched");
1701  ++arg_it;
1702  arg_it->setName("total_matched");
1703  ++arg_it;
1704  arg_it->setName("old_total_matched");
1705  ++arg_it;
1706  arg_it->setName("max_matched");
1707  ++arg_it;
1708  }
1709 
1710  arg_it->setName("agg_init_val");
1711  ++arg_it;
1712 
1713  arg_it->setName("pos");
1714  ++arg_it;
1715 
1716  arg_it->setName("frag_row_off");
1717  ++arg_it;
1718 
1719  arg_it->setName("num_rows_per_scan");
1720  ++arg_it;
1721 
1722  if (hoist_literals) {
1723  arg_it->setName("literals");
1724  ++arg_it;
1725  }
1726 
1727  for (size_t i = 0; i < in_col_count; ++i) {
1728  arg_it->setName("col_buf" + std::to_string(i));
1729  ++arg_it;
1730  }
1731 
1732  arg_it->setName("join_hash_tables");
1733  ++arg_it;
1734  arg_it->setName("row_func_mgr");
1735 }
std::string to_string(char const *&&v)

+ Here is the call graph for this function:

template<typename T = void>
void anonymous_namespace{NativeCodegen.cpp}::show_defined ( llvm::Module &  llvm_module)

Definition at line 156 of file NativeCodegen.cpp.

References f().

Referenced by show_defined().

156  {
157  std::cout << "defines: ";
158  for (auto& f : llvm_module.getFunctionList()) {
159  if (!f.isDeclaration()) {
160  std::cout << f.getName().str() << ", ";
161  }
162  }
163  std::cout << std::endl;
164 }
torch::Tensor f(torch::Tensor x, torch::Tensor W_target, torch::Tensor b_target)

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

template<typename T = void>
void anonymous_namespace{NativeCodegen.cpp}::show_defined ( llvm::Module *  llvm_module)

Definition at line 167 of file NativeCodegen.cpp.

References show_defined().

167  {
168  if (llvm_module == nullptr) {
169  std::cout << "is null" << std::endl;
170  } else {
171  show_defined(*llvm_module);
172  }
173 }
void show_defined(llvm::Module &llvm_module)

+ Here is the call graph for this function:

template<typename T = void>
void anonymous_namespace{NativeCodegen.cpp}::show_defined ( std::unique_ptr< llvm::Module > &  llvm_module)

Definition at line 176 of file NativeCodegen.cpp.

References show_defined().

176  {
177  show_defined(llvm_module.get());
178 }
void show_defined(llvm::Module &llvm_module)

+ Here is the call graph for this function:

void anonymous_namespace{NativeCodegen.cpp}::throw_parseIR_error ( const llvm::SMDiagnostic &  parse_error,
std::string  src = "",
const bool  is_gpu = false 
)

Definition at line 124 of file NativeCodegen.cpp.

126  {
127  std::string excname = (is_gpu ? "NVVM IR ParseError: " : "LLVM IR ParseError: ");
128  llvm::raw_string_ostream ss(excname);
129  parse_error.print(src.c_str(), ss, false, false);
130  throw ParseIRError(ss.str());
131 }

Variable Documentation

const std::string anonymous_namespace{NativeCodegen.cpp}::cuda_rt_decls

Definition at line 640 of file NativeCodegen.cpp.