OmniSciDB  a987f07e93
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
anonymous_namespace{NativeCodegen.cpp} Namespace Reference

Functions

void throw_parseIR_error (const llvm::SMDiagnostic &parse_error, std::string src="", const bool is_gpu=false)
 
template<typename T = void>
void show_defined (llvm::Module &llvm_module)
 
template<typename T = void>
void show_defined (llvm::Module *llvm_module)
 
template<typename T = void>
void show_defined (std::unique_ptr< llvm::Module > &llvm_module)
 
template<typename T = void>
void scan_function_calls (llvm::Function &F, std::unordered_set< std::string > &defined, std::unordered_set< std::string > &undefined, const std::unordered_set< std::string > &ignored)
 
template<typename T = void>
void scan_function_calls (llvm::Module &llvm_module, std::unordered_set< std::string > &defined, std::unordered_set< std::string > &undefined, const std::unordered_set< std::string > &ignored)
 
template<typename T = void>
std::tuple< std::unordered_set
< std::string >
, std::unordered_set
< std::string > > 
scan_function_calls (llvm::Module &llvm_module, const std::unordered_set< std::string > &ignored={})
 
void eliminate_dead_self_recursive_funcs (llvm::Module &M, const std::unordered_set< llvm::Function * > &live_funcs)
 
void optimize_ir (llvm::Function *query_func, llvm::Module *llvm_module, llvm::legacy::PassManager &pass_manager, const std::unordered_set< llvm::Function * > &live_funcs, const bool is_gpu_smem_used, const CompilationOptions &co)
 
std::string assemblyForCPU (ExecutionEngineWrapper &execution_engine, llvm::Module *llvm_module)
 
ExecutionEngineWrapper create_execution_engine (llvm::Module *llvm_module, llvm::EngineBuilder &eb, const CompilationOptions &co)
 
std::string cpp_to_llvm_name (const std::string &s)
 
std::string gen_array_any_all_sigs ()
 
std::string gen_translate_null_key_sigs ()
 
void bind_pos_placeholders (const std::string &pos_fn_name, const bool use_resume_param, llvm::Function *query_func, llvm::Module *llvm_module)
 
void set_row_func_argnames (llvm::Function *row_func, const size_t in_col_count, const size_t agg_col_count, const bool hoist_literals)
 
llvm::Function * create_row_function (const size_t in_col_count, const size_t agg_col_count, const bool hoist_literals, llvm::Module *llvm_module, llvm::LLVMContext &context)
 
void bind_query (llvm::Function *query_func, const std::string &query_fname, llvm::Function *multifrag_query_func, llvm::Module *llvm_module)
 
std::vector< std::string > get_agg_fnames (const std::vector< Analyzer::Expr * > &target_exprs, const bool is_group_by)
 
template<typename InstType >
llvm::Value * find_variable_in_basic_block (llvm::Function *func, std::string bb_name, std::string variable_name)
 
size_t get_shared_memory_size (const bool shared_mem_used, const QueryMemoryDescriptor *query_mem_desc_ptr)
 
bool is_gpu_shared_mem_supported (const QueryMemoryDescriptor *query_mem_desc_ptr, const RelAlgExecutionUnit &ra_exe_unit, const CudaMgr_Namespace::CudaMgr *cuda_mgr, const ExecutorDeviceType device_type, const unsigned cuda_blocksize, const unsigned num_blocks_per_mp)
 
std::string serialize_llvm_metadata_footnotes (llvm::Function *query_func, CgenState *cgen_state)
 

Variables

const std::string cuda_rt_decls
 

Function Documentation

std::string anonymous_namespace{NativeCodegen.cpp}::assemblyForCPU ( ExecutionEngineWrapper execution_engine,
llvm::Module *  llvm_module 
)

Definition at line 402 of file NativeCodegen.cpp.

References CHECK.

Referenced by create_execution_engine().

403  {
404  llvm::legacy::PassManager pass_manager;
405  auto cpu_target_machine = execution_engine->getTargetMachine();
406  CHECK(cpu_target_machine);
407  llvm::SmallString<256> code_str;
408  llvm::raw_svector_ostream os(code_str);
409 #if LLVM_VERSION_MAJOR >= 10
410  cpu_target_machine->addPassesToEmitFile(
411  pass_manager, os, nullptr, llvm::CGFT_AssemblyFile);
412 #else
413  cpu_target_machine->addPassesToEmitFile(
414  pass_manager, os, nullptr, llvm::TargetMachine::CGFT_AssemblyFile);
415 #endif
416  pass_manager.run(*llvm_module);
417  return "Assembly for the CPU:\n" + std::string(code_str.str()) + "\nEnd of assembly";
418 }
#define CHECK(condition)
Definition: Logger.h:289

+ Here is the caller graph for this function:

void anonymous_namespace{NativeCodegen.cpp}::bind_pos_placeholders ( const std::string &  pos_fn_name,
const bool  use_resume_param,
llvm::Function *  query_func,
llvm::Module *  llvm_module 
)

Definition at line 1599 of file NativeCodegen.cpp.

1602  {
1603  for (auto it = llvm::inst_begin(query_func), e = llvm::inst_end(query_func); it != e;
1604  ++it) {
1605  if (!llvm::isa<llvm::CallInst>(*it)) {
1606  continue;
1607  }
1608  auto& pos_call = llvm::cast<llvm::CallInst>(*it);
1609  if (std::string(pos_call.getCalledFunction()->getName()) == pos_fn_name) {
1610  if (use_resume_param) {
1611  const auto error_code_arg = get_arg_by_name(query_func, "error_code");
1612  llvm::ReplaceInstWithInst(
1613  &pos_call,
1614  llvm::CallInst::Create(llvm_module->getFunction(pos_fn_name + "_impl"),
1615  error_code_arg));
1616  } else {
1617  llvm::ReplaceInstWithInst(
1618  &pos_call,
1619  llvm::CallInst::Create(llvm_module->getFunction(pos_fn_name + "_impl")));
1620  }
1621  break;
1622  }
1623  }
1624 }
llvm::Value * get_arg_by_name(llvm::Function *func, const std::string &name)
Definition: Execute.h:166
void anonymous_namespace{NativeCodegen.cpp}::bind_query ( llvm::Function *  query_func,
const std::string &  query_fname,
llvm::Function *  multifrag_query_func,
llvm::Module *  llvm_module 
)

Definition at line 1748 of file NativeCodegen.cpp.

1751  {
1752  std::vector<llvm::CallInst*> query_stubs;
1753  for (auto it = llvm::inst_begin(multifrag_query_func),
1754  e = llvm::inst_end(multifrag_query_func);
1755  it != e;
1756  ++it) {
1757  if (!llvm::isa<llvm::CallInst>(*it)) {
1758  continue;
1759  }
1760  auto& query_call = llvm::cast<llvm::CallInst>(*it);
1761  if (std::string(query_call.getCalledFunction()->getName()) == query_fname) {
1762  query_stubs.push_back(&query_call);
1763  }
1764  }
1765  for (auto& S : query_stubs) {
1766  std::vector<llvm::Value*> args;
1767  for (size_t i = 0; i < S->getNumOperands() - 1; ++i) {
1768  args.push_back(S->getArgOperand(i));
1769  }
1770  llvm::ReplaceInstWithInst(S, llvm::CallInst::Create(query_func, args, ""));
1771  }
1772 }
std::string anonymous_namespace{NativeCodegen.cpp}::cpp_to_llvm_name ( const std::string &  s)

Definition at line 581 of file NativeCodegen.cpp.

References CHECK.

Referenced by gen_array_any_all_sigs(), and gen_translate_null_key_sigs().

581  {
582  if (s == "int8_t") {
583  return "i8";
584  }
585  if (s == "int16_t") {
586  return "i16";
587  }
588  if (s == "int32_t") {
589  return "i32";
590  }
591  if (s == "int64_t") {
592  return "i64";
593  }
594  CHECK(s == "float" || s == "double");
595  return s;
596 }
#define CHECK(condition)
Definition: Logger.h:289

+ Here is the caller graph for this function:

ExecutionEngineWrapper anonymous_namespace{NativeCodegen.cpp}::create_execution_engine ( llvm::Module *  llvm_module,
llvm::EngineBuilder &  eb,
const CompilationOptions co 
)

Definition at line 420 of file NativeCodegen.cpp.

References logger::ASM, assemblyForCPU(), CHECK, DEBUG_TIMER, g_ee_create_mutex, and LOG.

Referenced by CodeGenerator::generateNativeCPUCode().

422  {
423  auto timer = DEBUG_TIMER(__func__);
424  // Avoids data race in
425  // llvm::sys::DynamicLibrary::getPermanentLibrary and
426  // GDBJITRegistrationListener::notifyObjectLoaded while creating a
427  // new ExecutionEngine instance. Unfortunately we have to use global
428  // mutex here.
429  std::lock_guard<llvm::sys::Mutex> lock(g_ee_create_mutex);
430  ExecutionEngineWrapper execution_engine(eb.create(), co);
431  CHECK(execution_engine.get());
432  // Force the module data layout to match the layout for the selected target
433  llvm_module->setDataLayout(execution_engine->getDataLayout());
434 
435  LOG(ASM) << assemblyForCPU(execution_engine, llvm_module);
436 
437  execution_engine->finalizeObject();
438  return execution_engine;
439 }
#define LOG(tag)
Definition: Logger.h:283
std::string assemblyForCPU(ExecutionEngineWrapper &execution_engine, llvm::Module *llvm_module)
#define CHECK(condition)
Definition: Logger.h:289
#define DEBUG_TIMER(name)
Definition: Logger.h:407
static llvm::sys::Mutex g_ee_create_mutex

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

llvm::Function* anonymous_namespace{NativeCodegen.cpp}::create_row_function ( const size_t  in_col_count,
const size_t  agg_col_count,
const bool  hoist_literals,
llvm::Module *  llvm_module,
llvm::LLVMContext &  context 
)

Definition at line 1679 of file NativeCodegen.cpp.

1683  {
1684  std::vector<llvm::Type*> row_process_arg_types;
1685 
1686  if (agg_col_count) {
1687  // output (aggregate) arguments
1688  for (size_t i = 0; i < agg_col_count; ++i) {
1689  row_process_arg_types.push_back(llvm::Type::getInt64PtrTy(context));
1690  }
1691  } else {
1692  // group by buffer
1693  row_process_arg_types.push_back(llvm::Type::getInt64PtrTy(context));
1694  // varlen output buffer
1695  row_process_arg_types.push_back(llvm::Type::getInt64PtrTy(context));
1696  // current match count
1697  row_process_arg_types.push_back(llvm::Type::getInt32PtrTy(context));
1698  // total match count passed from the caller
1699  row_process_arg_types.push_back(llvm::Type::getInt32PtrTy(context));
1700  // old total match count returned to the caller
1701  row_process_arg_types.push_back(llvm::Type::getInt32PtrTy(context));
1702  // max matched (total number of slots in the output buffer)
1703  row_process_arg_types.push_back(llvm::Type::getInt32PtrTy(context));
1704  }
1705 
1706  // aggregate init values
1707  row_process_arg_types.push_back(llvm::Type::getInt64PtrTy(context));
1708 
1709  // position argument
1710  row_process_arg_types.push_back(llvm::Type::getInt64Ty(context));
1711 
1712  // fragment row offset argument
1713  row_process_arg_types.push_back(llvm::Type::getInt64PtrTy(context));
1714 
1715  // number of rows for each scan
1716  row_process_arg_types.push_back(llvm::Type::getInt64PtrTy(context));
1717 
1718  // literals buffer argument
1719  if (hoist_literals) {
1720  row_process_arg_types.push_back(llvm::Type::getInt8PtrTy(context));
1721  }
1722 
1723  // column buffer arguments
1724  for (size_t i = 0; i < in_col_count; ++i) {
1725  row_process_arg_types.emplace_back(llvm::Type::getInt8PtrTy(context));
1726  }
1727 
1728  // join hash table argument
1729  row_process_arg_types.push_back(llvm::Type::getInt64PtrTy(context));
1730 
1731  // row function manager
1732  row_process_arg_types.push_back(llvm::Type::getInt8PtrTy(context));
1733 
1734  // generate the function
1735  auto ft =
1736  llvm::FunctionType::get(get_int_type(32, context), row_process_arg_types, false);
1737 
1738  auto row_func = llvm::Function::Create(
1739  ft, llvm::Function::ExternalLinkage, "row_func", llvm_module);
1740 
1741  // set the row function argument names; for debugging purposes only
1742  set_row_func_argnames(row_func, in_col_count, agg_col_count, hoist_literals);
1743 
1744  return row_func;
1745 }
llvm::Type * get_int_type(const int width, llvm::LLVMContext &context)
void set_row_func_argnames(llvm::Function *row_func, const size_t in_col_count, const size_t agg_col_count, const bool hoist_literals)
void anonymous_namespace{NativeCodegen.cpp}::eliminate_dead_self_recursive_funcs ( llvm::Module &  M,
const std::unordered_set< llvm::Function * > &  live_funcs 
)

Definition at line 246 of file NativeCodegen.cpp.

Referenced by optimize_ir().

248  {
249  std::vector<llvm::Function*> dead_funcs;
250  for (auto& F : M) {
251  bool bAlive = false;
252  if (live_funcs.count(&F)) {
253  continue;
254  }
255  for (auto U : F.users()) {
256  auto* C = llvm::dyn_cast<const llvm::CallInst>(U);
257  if (!C || C->getParent()->getParent() != &F) {
258  bAlive = true;
259  break;
260  }
261  }
262  if (!bAlive) {
263  dead_funcs.push_back(&F);
264  }
265  }
266  for (auto pFn : dead_funcs) {
267  pFn->eraseFromParent();
268  }
269 }

+ Here is the caller graph for this function:

template<typename InstType >
llvm::Value* anonymous_namespace{NativeCodegen.cpp}::find_variable_in_basic_block ( llvm::Function *  func,
std::string  bb_name,
std::string  variable_name 
)

Definition at line 1944 of file NativeCodegen.cpp.

1946  {
1947  llvm::Value* result = nullptr;
1948  if (func == nullptr || variable_name.empty()) {
1949  return result;
1950  }
1951  bool is_found = false;
1952  for (auto bb_it = func->begin(); bb_it != func->end() && !is_found; ++bb_it) {
1953  if (!bb_name.empty() && bb_it->getName() != bb_name) {
1954  continue;
1955  }
1956  for (auto inst_it = bb_it->begin(); inst_it != bb_it->end(); inst_it++) {
1957  if (llvm::isa<InstType>(*inst_it)) {
1958  if (inst_it->getName() == variable_name) {
1959  result = &*inst_it;
1960  is_found = true;
1961  break;
1962  }
1963  }
1964  }
1965  }
1966  return result;
1967 }
std::string anonymous_namespace{NativeCodegen.cpp}::gen_array_any_all_sigs ( )

Definition at line 598 of file NativeCodegen.cpp.

References cpp_to_llvm_name(), and run_benchmark_import::result.

598  {
599  std::string result;
600  for (const std::string any_or_all : {"any", "all"}) {
601  for (const std::string elem_type :
602  {"int8_t", "int16_t", "int32_t", "int64_t", "float", "double"}) {
603  for (const std::string needle_type :
604  {"int8_t", "int16_t", "int32_t", "int64_t", "float", "double"}) {
605  for (const std::string op_name : {"eq", "ne", "lt", "le", "gt", "ge"}) {
606  result += ("declare i1 @array_" + any_or_all + "_" + op_name + "_" + elem_type +
607  "_" + needle_type + "(i8*, i64, " + cpp_to_llvm_name(needle_type) +
608  ", " + cpp_to_llvm_name(elem_type) + ");\n");
609  }
610  }
611  }
612  }
613  return result;
614 }
std::string cpp_to_llvm_name(const std::string &s)

+ Here is the call graph for this function:

std::string anonymous_namespace{NativeCodegen.cpp}::gen_translate_null_key_sigs ( )

Definition at line 616 of file NativeCodegen.cpp.

References cpp_to_llvm_name(), and run_benchmark_import::result.

616  {
617  std::string result;
618  for (const std::string key_type : {"int8_t", "int16_t", "int32_t", "int64_t"}) {
619  const auto key_llvm_type = cpp_to_llvm_name(key_type);
620  result += "declare i64 @translate_null_key_" + key_type + "(" + key_llvm_type + ", " +
621  key_llvm_type + ", i64);\n";
622  }
623  return result;
624 }
std::string cpp_to_llvm_name(const std::string &s)

+ Here is the call graph for this function:

std::vector<std::string> anonymous_namespace{NativeCodegen.cpp}::get_agg_fnames ( const std::vector< Analyzer::Expr * > &  target_exprs,
const bool  is_group_by 
)

Definition at line 1774 of file NativeCodegen.cpp.

1775  {
1776  std::vector<std::string> result;
1777  for (size_t target_idx = 0, agg_col_idx = 0; target_idx < target_exprs.size();
1778  ++target_idx, ++agg_col_idx) {
1779  const auto target_expr = target_exprs[target_idx];
1780  CHECK(target_expr);
1781  const auto target_type_info = target_expr->get_type_info();
1782  const auto agg_expr = dynamic_cast<Analyzer::AggExpr*>(target_expr);
1783  const bool is_varlen =
1784  (target_type_info.is_string() &&
1785  target_type_info.get_compression() == kENCODING_NONE) ||
1786  target_type_info.is_array(); // TODO: should it use is_varlen_array() ?
1787  if (!agg_expr || agg_expr->get_aggtype() == kSAMPLE) {
1788  result.emplace_back(target_type_info.is_fp() ? "agg_id_double" : "agg_id");
1789  if (is_varlen) {
1790  result.emplace_back("agg_id");
1791  }
1792  if (target_type_info.is_geometry()) {
1793  result.emplace_back("agg_id");
1794  for (auto i = 2; i < 2 * target_type_info.get_physical_coord_cols(); ++i) {
1795  result.emplace_back("agg_id");
1796  }
1797  }
1798  continue;
1799  }
1800  const auto agg_type = agg_expr->get_aggtype();
1801  SQLTypeInfo agg_type_info;
1802  switch (agg_type) {
1803  case kCOUNT:
1804  case kCOUNT_IF:
1805  agg_type_info = target_type_info;
1806  break;
1807  default:
1808  agg_type_info = agg_expr->get_arg()->get_type_info();
1809  break;
1810  }
1811  switch (agg_type) {
1812  case kAVG: {
1813  if (!agg_type_info.is_integer() && !agg_type_info.is_decimal() &&
1814  !agg_type_info.is_fp()) {
1815  throw std::runtime_error("AVG is only valid on integer and floating point");
1816  }
1817  result.emplace_back((agg_type_info.is_integer() || agg_type_info.is_time())
1818  ? "agg_sum"
1819  : "agg_sum_double");
1820  result.emplace_back((agg_type_info.is_integer() || agg_type_info.is_time())
1821  ? "agg_count"
1822  : "agg_count_double");
1823  break;
1824  }
1825  case kMIN: {
1826  if (agg_type_info.is_string() || agg_type_info.is_array() ||
1827  agg_type_info.is_geometry()) {
1828  throw std::runtime_error(
1829  "MIN on strings, arrays or geospatial types not supported yet");
1830  }
1831  result.emplace_back((agg_type_info.is_integer() || agg_type_info.is_time())
1832  ? "agg_min"
1833  : "agg_min_double");
1834  break;
1835  }
1836  case kMAX: {
1837  if (agg_type_info.is_string() || agg_type_info.is_array() ||
1838  agg_type_info.is_geometry()) {
1839  throw std::runtime_error(
1840  "MAX on strings, arrays or geospatial types not supported yet");
1841  }
1842  result.emplace_back((agg_type_info.is_integer() || agg_type_info.is_time())
1843  ? "agg_max"
1844  : "agg_max_double");
1845  break;
1846  }
1847  case kSUM:
1848  case kSUM_IF: {
1849  if (!agg_type_info.is_integer() && !agg_type_info.is_decimal() &&
1850  !agg_type_info.is_fp()) {
1851  throw std::runtime_error(
1852  "SUM and SUM_IF is only valid on integer and floating point");
1853  }
1854  std::string func_name = (agg_type_info.is_integer() || agg_type_info.is_time())
1855  ? "agg_sum"
1856  : "agg_sum_double";
1857  if (agg_type == kSUM_IF) {
1858  func_name += "_if";
1859  }
1860  result.emplace_back(func_name);
1861  break;
1862  }
1863  case kCOUNT:
1864  result.emplace_back(agg_expr->get_is_distinct() ? "agg_count_distinct"
1865  : "agg_count");
1866  break;
1867  case kCOUNT_IF:
1868  result.emplace_back("agg_count_if");
1869  break;
1870  case kSINGLE_VALUE: {
1871  result.emplace_back(agg_type_info.is_fp() ? "agg_id_double" : "agg_id");
1872  break;
1873  }
1874  case kSAMPLE: {
1875  // Note that varlen SAMPLE arguments are handled separately above
1876  result.emplace_back(agg_type_info.is_fp() ? "agg_id_double" : "agg_id");
1877  break;
1878  }
1880  result.emplace_back("agg_approximate_count_distinct");
1881  break;
1882  case kAPPROX_QUANTILE:
1883  result.emplace_back("agg_approx_quantile");
1884  break;
1885  case kMODE:
1886  result.emplace_back("agg_mode_func");
1887  break;
1888  default:
1889  UNREACHABLE() << "Usupported agg_type: " << agg_type;
1890  }
1891  }
1892  return result;
1893 }
bool is_fp() const
Definition: sqltypes.h:580
#define UNREACHABLE()
Definition: Logger.h:333
bool is_time() const
Definition: sqltypes.h:582
Definition: sqldefs.h:75
bool is_integer() const
Definition: sqltypes.h:578
Definition: sqldefs.h:77
Definition: sqldefs.h:78
SQLAgg get_aggtype() const
Definition: Analyzer.h:1203
#define CHECK(condition)
Definition: Logger.h:289
bool is_geometry() const
Definition: sqltypes.h:588
bool is_string() const
Definition: sqltypes.h:576
Definition: sqldefs.h:76
bool is_decimal() const
Definition: sqltypes.h:579
Definition: sqldefs.h:74
Definition: sqldefs.h:83
bool is_array() const
Definition: sqltypes.h:584
size_t anonymous_namespace{NativeCodegen.cpp}::get_shared_memory_size ( const bool  shared_mem_used,
const QueryMemoryDescriptor query_mem_desc_ptr 
)

Definition at line 2518 of file NativeCodegen.cpp.

2519  {
2520  return shared_mem_used
2521  ? (query_mem_desc_ptr->getRowSize() * query_mem_desc_ptr->getEntryCount())
2522  : 0;
2523 }
bool anonymous_namespace{NativeCodegen.cpp}::is_gpu_shared_mem_supported ( const QueryMemoryDescriptor query_mem_desc_ptr,
const RelAlgExecutionUnit ra_exe_unit,
const CudaMgr_Namespace::CudaMgr cuda_mgr,
const ExecutorDeviceType  device_type,
const unsigned  cuda_blocksize,
const unsigned  num_blocks_per_mp 
)

To simplify the implementation for practical purposes, we initially provide shared memory support for cases where there are at most as many entries in the output buffer as there are threads within each GPU device. In order to relax this assumption later, we need to add a for loop in generated codes such that each thread loops over multiple entries. TODO: relax this if necessary

Definition at line 2525 of file NativeCodegen.cpp.

2530  {
2531  if (device_type == ExecutorDeviceType::CPU) {
2532  return false;
2533  }
2534  if (query_mem_desc_ptr->didOutputColumnar()) {
2535  return false;
2536  }
2537  CHECK(query_mem_desc_ptr);
2538  CHECK(cuda_mgr);
2539  /*
2540  * We only use shared memory strategy if GPU hardware provides native shared
2541  * memory atomics support. From CUDA Toolkit documentation:
2542  * https://docs.nvidia.com/cuda/pascal-tuning-guide/index.html#atomic-ops "Like
2543  * Maxwell, Pascal [and Volta] provides native shared memory atomic operations
2544  * for 32-bit integer arithmetic, along with native 32 or 64-bit compare-and-swap
2545  * (CAS)."
2546  *
2547  */
2548  if (!cuda_mgr->isArchMaxwellOrLaterForAll()) {
2549  return false;
2550  }
2551 
2552  if (query_mem_desc_ptr->getQueryDescriptionType() ==
2555  query_mem_desc_ptr->countDistinctDescriptorsLogicallyEmpty()) {
2556  // TODO: relax this, if necessary
2557  if (cuda_blocksize < query_mem_desc_ptr->getEntryCount()) {
2558  return false;
2559  }
2560  // skip shared memory usage when dealing with 1) variable length targets, 2)
2561  // not a COUNT aggregate
2562  const auto target_infos =
2563  target_exprs_to_infos(ra_exe_unit.target_exprs, *query_mem_desc_ptr);
2564  std::unordered_set<SQLAgg> supported_aggs{kCOUNT, kCOUNT_IF};
2565  if (std::find_if(target_infos.begin(),
2566  target_infos.end(),
2567  [&supported_aggs](const TargetInfo& ti) {
2568  if (ti.sql_type.is_varlen() ||
2569  !supported_aggs.count(ti.agg_kind)) {
2570  return true;
2571  } else {
2572  return false;
2573  }
2574  }) == target_infos.end()) {
2575  return true;
2576  }
2577  }
2578  if (query_mem_desc_ptr->getQueryDescriptionType() ==
2589  if (cuda_blocksize < query_mem_desc_ptr->getEntryCount()) {
2590  return false;
2591  }
2592 
2593  // Fundamentally, we should use shared memory whenever the output buffer
2594  // is small enough so that we can fit it in the shared memory and yet expect
2595  // good occupancy.
2596  // For now, we allow keyless, row-wise layout, and only for perfect hash
2597  // group by operations.
2598  if (query_mem_desc_ptr->hasKeylessHash() &&
2599  query_mem_desc_ptr->countDistinctDescriptorsLogicallyEmpty() &&
2600  !query_mem_desc_ptr->useStreamingTopN()) {
2601  const size_t shared_memory_threshold_bytes = std::min(
2602  g_gpu_smem_threshold == 0 ? SIZE_MAX : g_gpu_smem_threshold,
2603  cuda_mgr->getMinSharedMemoryPerBlockForAllDevices() / num_blocks_per_mp);
2604  const auto output_buffer_size =
2605  query_mem_desc_ptr->getRowSize() * query_mem_desc_ptr->getEntryCount();
2606  if (output_buffer_size > shared_memory_threshold_bytes) {
2607  return false;
2608  }
2609 
2610  // skip shared memory usage when dealing with 1) variable length targets, 2)
2611  // non-basic aggregates (COUNT, SUM, MIN, MAX, AVG)
2612  // TODO: relax this if necessary
2613  const auto target_infos =
2614  target_exprs_to_infos(ra_exe_unit.target_exprs, *query_mem_desc_ptr);
2615  std::unordered_set<SQLAgg> supported_aggs{kCOUNT, kCOUNT_IF};
2617  supported_aggs = {kCOUNT, kCOUNT_IF, kMIN, kMAX, kSUM, kSUM_IF, kAVG};
2618  }
2619  if (std::find_if(target_infos.begin(),
2620  target_infos.end(),
2621  [&supported_aggs](const TargetInfo& ti) {
2622  if (ti.sql_type.is_varlen() ||
2623  !supported_aggs.count(ti.agg_kind)) {
2624  return true;
2625  } else {
2626  return false;
2627  }
2628  }) == target_infos.end()) {
2629  return true;
2630  }
2631  }
2632  }
2633  return false;
2634 }
std::vector< Analyzer::Expr * > target_exprs
bool g_enable_smem_group_by
bool countDistinctDescriptorsLogicallyEmpty() const
bool g_enable_smem_non_grouped_agg
Definition: Execute.cpp:138
Definition: sqldefs.h:75
Definition: sqldefs.h:77
size_t getMinSharedMemoryPerBlockForAllDevices() const
Definition: CudaMgr.h:122
QueryDescriptionType getQueryDescriptionType() const
bool isArchMaxwellOrLaterForAll() const
Definition: CudaMgr.cpp:331
bool g_enable_smem_grouped_non_count_agg
Definition: Execute.cpp:135
Definition: sqldefs.h:78
#define CHECK(condition)
Definition: Logger.h:289
std::vector< TargetInfo > target_exprs_to_infos(const std::vector< Analyzer::Expr * > &targets, const QueryMemoryDescriptor &query_mem_desc)
Definition: sqldefs.h:76
Definition: sqldefs.h:74
size_t g_gpu_smem_threshold
Definition: Execute.cpp:130
void anonymous_namespace{NativeCodegen.cpp}::optimize_ir ( llvm::Function *  query_func,
llvm::Module *  llvm_module,
llvm::legacy::PassManager &  pass_manager,
const std::unordered_set< llvm::Function * > &  live_funcs,
const bool  is_gpu_smem_used,
const CompilationOptions co 
)

Definition at line 308 of file NativeCodegen.cpp.

References DEBUG_TIMER, and eliminate_dead_self_recursive_funcs().

Referenced by CodeGenerator::generateNativeCPUCode().

313  {
314  auto timer = DEBUG_TIMER(__func__);
315  // the always inliner legacy pass must always run first
316  pass_manager.add(llvm::createVerifierPass());
317  pass_manager.add(llvm::createAlwaysInlinerLegacyPass());
318 
319  pass_manager.add(new AnnotateInternalFunctionsPass());
320 
321  pass_manager.add(llvm::createSROAPass());
322  // mem ssa drops unused load and store instructions, e.g. passing variables directly
323  // where possible
324  pass_manager.add(
325  llvm::createEarlyCSEPass(/*enable_mem_ssa=*/true)); // Catch trivial redundancies
326 
327  if (!is_gpu_smem_used) {
328  // thread jumps can change the execution order around SMEM sections guarded by
329  // `__syncthreads()`, which results in race conditions. For now, disable jump
330  // threading for shared memory queries. In the future, consider handling shared memory
331  // aggregations with a separate kernel launch
332  pass_manager.add(llvm::createJumpThreadingPass()); // Thread jumps.
333  }
334  pass_manager.add(llvm::createCFGSimplificationPass());
335 
336  // remove load/stores in PHIs if instructions can be accessed directly post thread jumps
337  pass_manager.add(llvm::createNewGVNPass());
338 
339  pass_manager.add(llvm::createDeadStoreEliminationPass());
340  pass_manager.add(llvm::createLICMPass());
341 
342  pass_manager.add(llvm::createInstructionCombiningPass());
343 
344  // module passes
345  pass_manager.add(llvm::createPromoteMemoryToRegisterPass());
346  pass_manager.add(llvm::createGlobalOptimizerPass());
347 
348  pass_manager.add(llvm::createCFGSimplificationPass()); // cleanup after everything
349 
350  pass_manager.run(*llvm_module);
351 
352  eliminate_dead_self_recursive_funcs(*llvm_module, live_funcs);
353 }
void eliminate_dead_self_recursive_funcs(llvm::Module &M, const std::unordered_set< llvm::Function * > &live_funcs)
#define DEBUG_TIMER(name)
Definition: Logger.h:407

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

template<typename T = void>
void anonymous_namespace{NativeCodegen.cpp}::scan_function_calls ( llvm::Function &  F,
std::unordered_set< std::string > &  defined,
std::unordered_set< std::string > &  undefined,
const std::unordered_set< std::string > &  ignored 
)

Definition at line 195 of file NativeCodegen.cpp.

Referenced by scan_function_calls().

198  {
199  for (llvm::inst_iterator I = llvm::inst_begin(F), E = llvm::inst_end(F); I != E; ++I) {
200  if (auto* CI = llvm::dyn_cast<llvm::CallInst>(&*I)) {
201  auto* F2 = CI->getCalledFunction();
202  if (F2 != nullptr) {
203  auto F2name = F2->getName().str();
204  if (F2->isDeclaration()) {
205  if (F2name.rfind("__", 0) !=
206  0 // assume symbols with double underscore are defined
207  && F2name.rfind("llvm.", 0) !=
208  0 // TODO: this may give false positive for NVVM intrinsics
209  && ignored.find(F2name) == ignored.end() // not in ignored list
210  ) {
211  undefined.emplace(F2name);
212  }
213  } else {
214  if (defined.find(F2name) == defined.end()) {
215  defined.emplace(F2name);
216  scan_function_calls<T>(*F2, defined, undefined, ignored);
217  }
218  }
219  }
220  }
221  }
222 }

+ Here is the caller graph for this function:

template<typename T = void>
void anonymous_namespace{NativeCodegen.cpp}::scan_function_calls ( llvm::Module &  llvm_module,
std::unordered_set< std::string > &  defined,
std::unordered_set< std::string > &  undefined,
const std::unordered_set< std::string > &  ignored 
)

Definition at line 225 of file NativeCodegen.cpp.

References scan_function_calls().

228  {
229  for (auto& F : llvm_module) {
230  if (!F.isDeclaration()) {
231  scan_function_calls(F, defined, undefined, ignored);
232  }
233  }
234 }
void scan_function_calls(llvm::Function &F, std::unordered_set< std::string > &defined, std::unordered_set< std::string > &undefined, const std::unordered_set< std::string > &ignored)

+ Here is the call graph for this function:

template<typename T = void>
std::tuple<std::unordered_set<std::string>, std::unordered_set<std::string> > anonymous_namespace{NativeCodegen.cpp}::scan_function_calls ( llvm::Module &  llvm_module,
const std::unordered_set< std::string > &  ignored = {} 
)

Definition at line 238 of file NativeCodegen.cpp.

239  {}) {
240  std::unordered_set<std::string> defined, undefined;
241  scan_function_calls(llvm_module, defined, undefined, ignored);
242  return std::make_tuple(defined, undefined);
243 }
void scan_function_calls(llvm::Function &F, std::unordered_set< std::string > &defined, std::unordered_set< std::string > &undefined, const std::unordered_set< std::string > &ignored)
std::string anonymous_namespace{NativeCodegen.cpp}::serialize_llvm_metadata_footnotes ( llvm::Function *  query_func,
CgenState cgen_state 
)

Definition at line 2637 of file NativeCodegen.cpp.

2638  {
2639  std::string llvm_ir;
2640  std::unordered_set<llvm::MDNode*> md;
2641 
2642  // Loop over all instructions in the query function.
2643  for (auto bb_it = query_func->begin(); bb_it != query_func->end(); ++bb_it) {
2644  for (auto instr_it = bb_it->begin(); instr_it != bb_it->end(); ++instr_it) {
2645  llvm::SmallVector<std::pair<unsigned, llvm::MDNode*>, 100> imd;
2646  instr_it->getAllMetadata(imd);
2647  for (auto [kind, node] : imd) {
2648  md.insert(node);
2649  }
2650  }
2651  }
2652 
2653  // Loop over all instructions in the row function.
2654  for (auto bb_it = cgen_state->row_func_->begin(); bb_it != cgen_state->row_func_->end();
2655  ++bb_it) {
2656  for (auto instr_it = bb_it->begin(); instr_it != bb_it->end(); ++instr_it) {
2657  llvm::SmallVector<std::pair<unsigned, llvm::MDNode*>, 100> imd;
2658  instr_it->getAllMetadata(imd);
2659  for (auto [kind, node] : imd) {
2660  md.insert(node);
2661  }
2662  }
2663  }
2664 
2665  // Loop over all instructions in the filter function.
2666  if (cgen_state->filter_func_) {
2667  for (auto bb_it = cgen_state->filter_func_->begin();
2668  bb_it != cgen_state->filter_func_->end();
2669  ++bb_it) {
2670  for (auto instr_it = bb_it->begin(); instr_it != bb_it->end(); ++instr_it) {
2671  llvm::SmallVector<std::pair<unsigned, llvm::MDNode*>, 100> imd;
2672  instr_it->getAllMetadata(imd);
2673  for (auto [kind, node] : imd) {
2674  md.insert(node);
2675  }
2676  }
2677  }
2678  }
2679 
2680  // Sort the metadata by canonical number and convert to text.
2681  if (!md.empty()) {
2682  std::map<size_t, std::string> sorted_strings;
2683  for (auto p : md) {
2684  std::string str;
2685  llvm::raw_string_ostream os(str);
2686  p->print(os, cgen_state->module_, true);
2687  os.flush();
2688  auto fields = split(str, {}, 1);
2689  if (fields.empty() || fields[0].empty()) {
2690  continue;
2691  }
2692  sorted_strings.emplace(std::stoul(fields[0].substr(1)), str);
2693  }
2694  llvm_ir += "\n";
2695  for (auto [id, text] : sorted_strings) {
2696  llvm_ir += text;
2697  llvm_ir += "\n";
2698  }
2699  }
2700 
2701  return llvm_ir;
2702 }
std::vector< std::string > split(std::string_view str, std::string_view delim, std::optional< size_t > maxsplit)
split apart a string into a vector of substrings
llvm::Function * row_func_
Definition: CgenState.h:365
llvm::Module * module_
Definition: CgenState.h:364
llvm::Function * filter_func_
Definition: CgenState.h:366
void anonymous_namespace{NativeCodegen.cpp}::set_row_func_argnames ( llvm::Function *  row_func,
const size_t  in_col_count,
const size_t  agg_col_count,
const bool  hoist_literals 
)

Definition at line 1626 of file NativeCodegen.cpp.

References SQLTypeInfo::is_fp().

1629  {
1630  auto arg_it = row_func->arg_begin();
1631 
1632  if (agg_col_count) {
1633  for (size_t i = 0; i < agg_col_count; ++i) {
1634  arg_it->setName("out");
1635  ++arg_it;
1636  }
1637  } else {
1638  arg_it->setName("group_by_buff");
1639  ++arg_it;
1640  arg_it->setName("varlen_output_buff");
1641  ++arg_it;
1642  arg_it->setName("crt_matched");
1643  ++arg_it;
1644  arg_it->setName("total_matched");
1645  ++arg_it;
1646  arg_it->setName("old_total_matched");
1647  ++arg_it;
1648  arg_it->setName("max_matched");
1649  ++arg_it;
1650  }
1651 
1652  arg_it->setName("agg_init_val");
1653  ++arg_it;
1654 
1655  arg_it->setName("pos");
1656  ++arg_it;
1657 
1658  arg_it->setName("frag_row_off");
1659  ++arg_it;
1660 
1661  arg_it->setName("num_rows_per_scan");
1662  ++arg_it;
1663 
1664  if (hoist_literals) {
1665  arg_it->setName("literals");
1666  ++arg_it;
1667  }
1668 
1669  for (size_t i = 0; i < in_col_count; ++i) {
1670  arg_it->setName("col_buf" + std::to_string(i));
1671  ++arg_it;
1672  }
1673 
1674  arg_it->setName("join_hash_tables");
1675  ++arg_it;
1676  arg_it->setName("row_func_mgr");
1677 }
std::string to_string(char const *&&v)

+ Here is the call graph for this function:

template<typename T = void>
void anonymous_namespace{NativeCodegen.cpp}::show_defined ( llvm::Module &  llvm_module)

Definition at line 156 of file NativeCodegen.cpp.

References anonymous_namespace{Utm.h}::f.

Referenced by show_defined().

156  {
157  std::cout << "defines: ";
158  for (auto& f : llvm_module.getFunctionList()) {
159  if (!f.isDeclaration()) {
160  std::cout << f.getName().str() << ", ";
161  }
162  }
163  std::cout << std::endl;
164 }
constexpr double f
Definition: Utm.h:31

+ Here is the caller graph for this function:

template<typename T = void>
void anonymous_namespace{NativeCodegen.cpp}::show_defined ( llvm::Module *  llvm_module)

Definition at line 167 of file NativeCodegen.cpp.

References show_defined().

167  {
168  if (llvm_module == nullptr) {
169  std::cout << "is null" << std::endl;
170  } else {
171  show_defined(*llvm_module);
172  }
173 }
void show_defined(llvm::Module &llvm_module)

+ Here is the call graph for this function:

template<typename T = void>
void anonymous_namespace{NativeCodegen.cpp}::show_defined ( std::unique_ptr< llvm::Module > &  llvm_module)

Definition at line 176 of file NativeCodegen.cpp.

References show_defined().

176  {
177  show_defined(llvm_module.get());
178 }
void show_defined(llvm::Module &llvm_module)

+ Here is the call graph for this function:

void anonymous_namespace{NativeCodegen.cpp}::throw_parseIR_error ( const llvm::SMDiagnostic &  parse_error,
std::string  src = "",
const bool  is_gpu = false 
)

Definition at line 124 of file NativeCodegen.cpp.

126  {
127  std::string excname = (is_gpu ? "NVVM IR ParseError: " : "LLVM IR ParseError: ");
128  llvm::raw_string_ostream ss(excname);
129  parse_error.print(src.c_str(), ss, false, false);
130  throw ParseIRError(ss.str());
131 }

Variable Documentation

const std::string anonymous_namespace{NativeCodegen.cpp}::cuda_rt_decls

Definition at line 626 of file NativeCodegen.cpp.