OmniSciDB  85c2d10cdc
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
anonymous_namespace{NativeCodegen.cpp} Namespace Reference

Functions

void throw_parseIR_error (const llvm::SMDiagnostic &parse_error, std::string src="", const bool is_gpu=false)
 
template<typename T = void>
void show_defined (llvm::Module &module)
 
template<typename T = void>
void show_defined (llvm::Module *module)
 
template<typename T = void>
void show_defined (std::unique_ptr< llvm::Module > &module)
 
template<typename T = void>
void scan_function_calls (llvm::Function &F, std::unordered_set< std::string > &defined, std::unordered_set< std::string > &undefined, const std::unordered_set< std::string > &ignored)
 
template<typename T = void>
void scan_function_calls (llvm::Module &module, std::unordered_set< std::string > &defined, std::unordered_set< std::string > &undefined, const std::unordered_set< std::string > &ignored)
 
template<typename T = void>
std::tuple< std::unordered_set
< std::string >
, std::unordered_set
< std::string > > 
scan_function_calls (llvm::Module &module, const std::unordered_set< std::string > &ignored={})
 
void eliminate_dead_self_recursive_funcs (llvm::Module &M, const std::unordered_set< llvm::Function * > &live_funcs)
 
void optimize_ir (llvm::Function *query_func, llvm::Module *module, llvm::legacy::PassManager &pass_manager, const std::unordered_set< llvm::Function * > &live_funcs, const CompilationOptions &co)
 
std::string assemblyForCPU (ExecutionEngineWrapper &execution_engine, llvm::Module *module)
 
std::string cpp_to_llvm_name (const std::string &s)
 
std::string gen_array_any_all_sigs ()
 
std::string gen_translate_null_key_sigs ()
 
void bind_pos_placeholders (const std::string &pos_fn_name, const bool use_resume_param, llvm::Function *query_func, llvm::Module *module)
 
void set_row_func_argnames (llvm::Function *row_func, const size_t in_col_count, const size_t agg_col_count, const bool hoist_literals)
 
llvm::Function * create_row_function (const size_t in_col_count, const size_t agg_col_count, const bool hoist_literals, llvm::Module *module, llvm::LLVMContext &context)
 
void bind_query (llvm::Function *query_func, const std::string &query_fname, llvm::Function *multifrag_query_func, llvm::Module *module)
 
std::vector< std::string > get_agg_fnames (const std::vector< Analyzer::Expr * > &target_exprs, const bool is_group_by)
 
template<typename InstType >
llvm::Value * find_variable_in_basic_block (llvm::Function *func, std::string bb_name, std::string variable_name)
 
size_t get_shared_memory_size (const bool shared_mem_used, const QueryMemoryDescriptor *query_mem_desc_ptr)
 
bool is_gpu_shared_mem_supported (const QueryMemoryDescriptor *query_mem_desc_ptr, const RelAlgExecutionUnit &ra_exe_unit, const CudaMgr_Namespace::CudaMgr *cuda_mgr, const ExecutorDeviceType device_type, const unsigned gpu_blocksize, const unsigned num_blocks_per_mp)
 
std::string serialize_llvm_metadata_footnotes (llvm::Function *query_func, CgenState *cgen_state)
 

Variables

const std::string cuda_rt_decls
 

Function Documentation

std::string anonymous_namespace{NativeCodegen.cpp}::assemblyForCPU ( ExecutionEngineWrapper execution_engine,
llvm::Module *  module 
)

Definition at line 396 of file NativeCodegen.cpp.

References CHECK.

Referenced by CodeGenerator::generateNativeCPUCode().

397  {
398  llvm::legacy::PassManager pass_manager;
399  auto cpu_target_machine = execution_engine->getTargetMachine();
400  CHECK(cpu_target_machine);
401  llvm::SmallString<256> code_str;
402  llvm::raw_svector_ostream os(code_str);
403 #if LLVM_VERSION_MAJOR >= 10
404  cpu_target_machine->addPassesToEmitFile(
405  pass_manager, os, nullptr, llvm::CGFT_AssemblyFile);
406 #else
407  cpu_target_machine->addPassesToEmitFile(
408  pass_manager, os, nullptr, llvm::TargetMachine::CGFT_AssemblyFile);
409 #endif
410  pass_manager.run(*module);
411  return "Assembly for the CPU:\n" + std::string(code_str.str()) + "\nEnd of assembly";
412 }
#define CHECK(condition)
Definition: Logger.h:197

+ Here is the caller graph for this function:

void anonymous_namespace{NativeCodegen.cpp}::bind_pos_placeholders ( const std::string &  pos_fn_name,
const bool  use_resume_param,
llvm::Function *  query_func,
llvm::Module *  module 
)

Definition at line 1415 of file NativeCodegen.cpp.

1418  {
1419  for (auto it = llvm::inst_begin(query_func), e = llvm::inst_end(query_func); it != e;
1420  ++it) {
1421  if (!llvm::isa<llvm::CallInst>(*it)) {
1422  continue;
1423  }
1424  auto& pos_call = llvm::cast<llvm::CallInst>(*it);
1425  if (std::string(pos_call.getCalledFunction()->getName()) == pos_fn_name) {
1426  if (use_resume_param) {
1427  const auto error_code_arg = get_arg_by_name(query_func, "error_code");
1428  llvm::ReplaceInstWithInst(
1429  &pos_call,
1430  llvm::CallInst::Create(module->getFunction(pos_fn_name + "_impl"),
1431  error_code_arg));
1432  } else {
1433  llvm::ReplaceInstWithInst(
1434  &pos_call,
1435  llvm::CallInst::Create(module->getFunction(pos_fn_name + "_impl")));
1436  }
1437  break;
1438  }
1439  }
1440 }
llvm::Value * get_arg_by_name(llvm::Function *func, const std::string &name)
Definition: Execute.h:168
void anonymous_namespace{NativeCodegen.cpp}::bind_query ( llvm::Function *  query_func,
const std::string &  query_fname,
llvm::Function *  multifrag_query_func,
llvm::Module *  module 
)

Definition at line 1555 of file NativeCodegen.cpp.

1558  {
1559  std::vector<llvm::CallInst*> query_stubs;
1560  for (auto it = llvm::inst_begin(multifrag_query_func),
1561  e = llvm::inst_end(multifrag_query_func);
1562  it != e;
1563  ++it) {
1564  if (!llvm::isa<llvm::CallInst>(*it)) {
1565  continue;
1566  }
1567  auto& query_call = llvm::cast<llvm::CallInst>(*it);
1568  if (std::string(query_call.getCalledFunction()->getName()) == query_fname) {
1569  query_stubs.push_back(&query_call);
1570  }
1571  }
1572  for (auto& S : query_stubs) {
1573  std::vector<llvm::Value*> args;
1574  for (size_t i = 0; i < S->getNumArgOperands(); ++i) {
1575  args.push_back(S->getArgOperand(i));
1576  }
1577  llvm::ReplaceInstWithInst(S, llvm::CallInst::Create(query_func, args, ""));
1578  }
1579 }
std::string anonymous_namespace{NativeCodegen.cpp}::cpp_to_llvm_name ( const std::string &  s)

Definition at line 556 of file NativeCodegen.cpp.

References CHECK.

Referenced by gen_array_any_all_sigs(), and gen_translate_null_key_sigs().

556  {
557  if (s == "int8_t") {
558  return "i8";
559  }
560  if (s == "int16_t") {
561  return "i16";
562  }
563  if (s == "int32_t") {
564  return "i32";
565  }
566  if (s == "int64_t") {
567  return "i64";
568  }
569  CHECK(s == "float" || s == "double");
570  return s;
571 }
#define CHECK(condition)
Definition: Logger.h:197

+ Here is the caller graph for this function:

llvm::Function* anonymous_namespace{NativeCodegen.cpp}::create_row_function ( const size_t  in_col_count,
const size_t  agg_col_count,
const bool  hoist_literals,
llvm::Module *  module,
llvm::LLVMContext &  context 
)

Definition at line 1491 of file NativeCodegen.cpp.

References rt_udf_cpu_module, and rt_udf_gpu_module.

1495  {
1496  std::vector<llvm::Type*> row_process_arg_types;
1497 
1498  if (agg_col_count) {
1499  // output (aggregate) arguments
1500  for (size_t i = 0; i < agg_col_count; ++i) {
1501  row_process_arg_types.push_back(llvm::Type::getInt64PtrTy(context));
1502  }
1503  } else {
1504  // group by buffer
1505  row_process_arg_types.push_back(llvm::Type::getInt64PtrTy(context));
1506  // current match count
1507  row_process_arg_types.push_back(llvm::Type::getInt32PtrTy(context));
1508  // total match count passed from the caller
1509  row_process_arg_types.push_back(llvm::Type::getInt32PtrTy(context));
1510  // old total match count returned to the caller
1511  row_process_arg_types.push_back(llvm::Type::getInt32PtrTy(context));
1512  // max matched (total number of slots in the output buffer)
1513  row_process_arg_types.push_back(llvm::Type::getInt32PtrTy(context));
1514  }
1515 
1516  // aggregate init values
1517  row_process_arg_types.push_back(llvm::Type::getInt64PtrTy(context));
1518 
1519  // position argument
1520  row_process_arg_types.push_back(llvm::Type::getInt64Ty(context));
1521 
1522  // fragment row offset argument
1523  row_process_arg_types.push_back(llvm::Type::getInt64PtrTy(context));
1524 
1525  // number of rows for each scan
1526  row_process_arg_types.push_back(llvm::Type::getInt64PtrTy(context));
1527 
1528  // literals buffer argument
1529  if (hoist_literals) {
1530  row_process_arg_types.push_back(llvm::Type::getInt8PtrTy(context));
1531  }
1532 
1533  // column buffer arguments
1534  for (size_t i = 0; i < in_col_count; ++i) {
1535  row_process_arg_types.emplace_back(llvm::Type::getInt8PtrTy(context));
1536  }
1537 
1538  // join hash table argument
1539  row_process_arg_types.push_back(llvm::Type::getInt64PtrTy(context));
1540 
1541  // generate the function
1542  auto ft =
1543  llvm::FunctionType::get(get_int_type(32, context), row_process_arg_types, false);
1544 
1545  auto row_func =
1546  llvm::Function::Create(ft, llvm::Function::ExternalLinkage, "row_func", module);
1547 
1548  // set the row function argument names; for debugging purposes only
1549  set_row_func_argnames(row_func, in_col_count, agg_col_count, hoist_literals);
1550 
1551  return row_func;
1552 }
llvm::Type * get_int_type(const int width, llvm::LLVMContext &context)
void set_row_func_argnames(llvm::Function *row_func, const size_t in_col_count, const size_t agg_col_count, const bool hoist_literals)
void anonymous_namespace{NativeCodegen.cpp}::eliminate_dead_self_recursive_funcs ( llvm::Module &  M,
const std::unordered_set< llvm::Function * > &  live_funcs 
)

Definition at line 248 of file NativeCodegen.cpp.

Referenced by optimize_ir().

250  {
251  std::vector<llvm::Function*> dead_funcs;
252  for (auto& F : M) {
253  bool bAlive = false;
254  if (live_funcs.count(&F)) {
255  continue;
256  }
257  for (auto U : F.users()) {
258  auto* C = llvm::dyn_cast<const llvm::CallInst>(U);
259  if (!C || C->getParent()->getParent() != &F) {
260  bAlive = true;
261  break;
262  }
263  }
264  if (!bAlive) {
265  dead_funcs.push_back(&F);
266  }
267  }
268  for (auto pFn : dead_funcs) {
269  pFn->eraseFromParent();
270  }
271 }

+ Here is the caller graph for this function:

template<typename InstType >
llvm::Value* anonymous_namespace{NativeCodegen.cpp}::find_variable_in_basic_block ( llvm::Function *  func,
std::string  bb_name,
std::string  variable_name 
)

Definition at line 1807 of file NativeCodegen.cpp.

1809  {
1810  llvm::Value* result = nullptr;
1811  if (func == nullptr || variable_name.empty()) {
1812  return result;
1813  }
1814  bool is_found = false;
1815  for (auto bb_it = func->begin(); bb_it != func->end() && !is_found; ++bb_it) {
1816  if (!bb_name.empty() && bb_it->getName() != bb_name) {
1817  continue;
1818  }
1819  for (auto inst_it = bb_it->begin(); inst_it != bb_it->end(); inst_it++) {
1820  if (llvm::isa<InstType>(*inst_it)) {
1821  if (inst_it->getName() == variable_name) {
1822  result = &*inst_it;
1823  is_found = true;
1824  break;
1825  }
1826  }
1827  }
1828  }
1829  return result;
1830 }
std::string anonymous_namespace{NativeCodegen.cpp}::gen_array_any_all_sigs ( )

Definition at line 573 of file NativeCodegen.cpp.

References cpp_to_llvm_name(), and run_benchmark_import::result.

573  {
574  std::string result;
575  for (const std::string any_or_all : {"any", "all"}) {
576  for (const std::string elem_type :
577  {"int8_t", "int16_t", "int32_t", "int64_t", "float", "double"}) {
578  for (const std::string needle_type :
579  {"int8_t", "int16_t", "int32_t", "int64_t", "float", "double"}) {
580  for (const std::string op_name : {"eq", "ne", "lt", "le", "gt", "ge"}) {
581  result += ("declare i1 @array_" + any_or_all + "_" + op_name + "_" + elem_type +
582  "_" + needle_type + "(i8*, i64, " + cpp_to_llvm_name(needle_type) +
583  ", " + cpp_to_llvm_name(elem_type) + ");\n");
584  }
585  }
586  }
587  }
588  return result;
589 }
std::string cpp_to_llvm_name(const std::string &s)

+ Here is the call graph for this function:

std::string anonymous_namespace{NativeCodegen.cpp}::gen_translate_null_key_sigs ( )

Definition at line 591 of file NativeCodegen.cpp.

References cpp_to_llvm_name(), and run_benchmark_import::result.

591  {
592  std::string result;
593  for (const std::string key_type : {"int8_t", "int16_t", "int32_t", "int64_t"}) {
594  const auto key_llvm_type = cpp_to_llvm_name(key_type);
595  result += "declare i64 @translate_null_key_" + key_type + "(" + key_llvm_type + ", " +
596  key_llvm_type + ", i64);\n";
597  }
598  return result;
599 }
std::string cpp_to_llvm_name(const std::string &s)

+ Here is the call graph for this function:

std::vector<std::string> anonymous_namespace{NativeCodegen.cpp}::get_agg_fnames ( const std::vector< Analyzer::Expr * > &  target_exprs,
const bool  is_group_by 
)

Definition at line 1581 of file NativeCodegen.cpp.

1582  {
1583  std::vector<std::string> result;
1584  for (size_t target_idx = 0, agg_col_idx = 0; target_idx < target_exprs.size();
1585  ++target_idx, ++agg_col_idx) {
1586  const auto target_expr = target_exprs[target_idx];
1587  CHECK(target_expr);
1588  const auto target_type_info = target_expr->get_type_info();
1589  const auto agg_expr = dynamic_cast<Analyzer::AggExpr*>(target_expr);
1590  const bool is_varlen =
1591  (target_type_info.is_string() &&
1592  target_type_info.get_compression() == kENCODING_NONE) ||
1593  target_type_info.is_array(); // TODO: should it use is_varlen_array() ?
1594  if (!agg_expr || agg_expr->get_aggtype() == kSAMPLE) {
1595  result.emplace_back(target_type_info.is_fp() ? "agg_id_double" : "agg_id");
1596  if (is_varlen) {
1597  result.emplace_back("agg_id");
1598  }
1599  if (target_type_info.is_geometry()) {
1600  result.emplace_back("agg_id");
1601  for (auto i = 2; i < 2 * target_type_info.get_physical_coord_cols(); ++i) {
1602  result.emplace_back("agg_id");
1603  }
1604  }
1605  continue;
1606  }
1607  const auto agg_type = agg_expr->get_aggtype();
1608  const auto& agg_type_info =
1609  agg_type != kCOUNT ? agg_expr->get_arg()->get_type_info() : target_type_info;
1610  switch (agg_type) {
1611  case kAVG: {
1612  if (!agg_type_info.is_integer() && !agg_type_info.is_decimal() &&
1613  !agg_type_info.is_fp()) {
1614  throw std::runtime_error("AVG is only valid on integer and floating point");
1615  }
1616  result.emplace_back((agg_type_info.is_integer() || agg_type_info.is_time())
1617  ? "agg_sum"
1618  : "agg_sum_double");
1619  result.emplace_back((agg_type_info.is_integer() || agg_type_info.is_time())
1620  ? "agg_count"
1621  : "agg_count_double");
1622  break;
1623  }
1624  case kMIN: {
1625  if (agg_type_info.is_string() || agg_type_info.is_array() ||
1626  agg_type_info.is_geometry()) {
1627  throw std::runtime_error(
1628  "MIN on strings, arrays or geospatial types not supported yet");
1629  }
1630  result.emplace_back((agg_type_info.is_integer() || agg_type_info.is_time())
1631  ? "agg_min"
1632  : "agg_min_double");
1633  break;
1634  }
1635  case kMAX: {
1636  if (agg_type_info.is_string() || agg_type_info.is_array() ||
1637  agg_type_info.is_geometry()) {
1638  throw std::runtime_error(
1639  "MAX on strings, arrays or geospatial types not supported yet");
1640  }
1641  result.emplace_back((agg_type_info.is_integer() || agg_type_info.is_time())
1642  ? "agg_max"
1643  : "agg_max_double");
1644  break;
1645  }
1646  case kSUM: {
1647  if (!agg_type_info.is_integer() && !agg_type_info.is_decimal() &&
1648  !agg_type_info.is_fp()) {
1649  throw std::runtime_error("SUM is only valid on integer and floating point");
1650  }
1651  result.emplace_back((agg_type_info.is_integer() || agg_type_info.is_time())
1652  ? "agg_sum"
1653  : "agg_sum_double");
1654  break;
1655  }
1656  case kCOUNT:
1657  result.emplace_back(agg_expr->get_is_distinct() ? "agg_count_distinct"
1658  : "agg_count");
1659  break;
1660  case kSINGLE_VALUE: {
1661  result.emplace_back(agg_type_info.is_fp() ? "agg_id_double" : "agg_id");
1662  break;
1663  }
1664  case kSAMPLE: {
1665  // Note that varlen SAMPLE arguments are handled separately above
1666  result.emplace_back(agg_type_info.is_fp() ? "agg_id_double" : "agg_id");
1667  break;
1668  }
1670  result.emplace_back("agg_approximate_count_distinct");
1671  break;
1672  case kAPPROX_MEDIAN:
1673  result.emplace_back("agg_approx_median");
1674  break;
1675  default:
1676  CHECK(false);
1677  }
1678  }
1679  return result;
1680 }
Definition: sqldefs.h:73
Definition: sqldefs.h:75
Definition: sqldefs.h:76
SQLAgg get_aggtype() const
Definition: Analyzer.h:1095
#define CHECK(condition)
Definition: Logger.h:197
Definition: sqldefs.h:74
Definition: sqldefs.h:72
size_t anonymous_namespace{NativeCodegen.cpp}::get_shared_memory_size ( const bool  shared_mem_used,
const QueryMemoryDescriptor query_mem_desc_ptr 
)

Definition at line 2289 of file NativeCodegen.cpp.

2290  {
2291  return shared_mem_used
2292  ? (query_mem_desc_ptr->getRowSize() * query_mem_desc_ptr->getEntryCount())
2293  : 0;
2294 }
bool anonymous_namespace{NativeCodegen.cpp}::is_gpu_shared_mem_supported ( const QueryMemoryDescriptor query_mem_desc_ptr,
const RelAlgExecutionUnit ra_exe_unit,
const CudaMgr_Namespace::CudaMgr cuda_mgr,
const ExecutorDeviceType  device_type,
const unsigned  gpu_blocksize,
const unsigned  num_blocks_per_mp 
)

To simplify the implementation for practical purposes, we initially provide shared memory support for cases where there are at most as many entries in the output buffer as there are threads within each GPU device. In order to relax this assumption later, we need to add a for loop in generated codes such that each thread loops over multiple entries. TODO: relax this if necessary

Definition at line 2296 of file NativeCodegen.cpp.

2301  {
2302  if (device_type == ExecutorDeviceType::CPU) {
2303  return false;
2304  }
2305  if (query_mem_desc_ptr->didOutputColumnar()) {
2306  return false;
2307  }
2308  CHECK(query_mem_desc_ptr);
2309  CHECK(cuda_mgr);
2310  /*
2311  * We only use shared memory strategy if GPU hardware provides native shared
2312  * memory atomics support. From CUDA Toolkit documentation:
2313  * https://docs.nvidia.com/cuda/pascal-tuning-guide/index.html#atomic-ops "Like
2314  * Maxwell, Pascal [and Volta] provides native shared memory atomic operations
2315  * for 32-bit integer arithmetic, along with native 32 or 64-bit compare-and-swap
2316  * (CAS)."
2317  *
2318  **/
2319  if (!cuda_mgr->isArchMaxwellOrLaterForAll()) {
2320  return false;
2321  }
2322 
2323  if (query_mem_desc_ptr->getQueryDescriptionType() ==
2326  query_mem_desc_ptr->countDistinctDescriptorsLogicallyEmpty()) {
2327  // TODO: relax this, if necessary
2328  if (gpu_blocksize < query_mem_desc_ptr->getEntryCount()) {
2329  return false;
2330  }
2331  // skip shared memory usage when dealing with 1) variable length targets, 2)
2332  // not a COUNT aggregate
2333  const auto target_infos =
2334  target_exprs_to_infos(ra_exe_unit.target_exprs, *query_mem_desc_ptr);
2335  std::unordered_set<SQLAgg> supported_aggs{kCOUNT};
2336  if (std::find_if(target_infos.begin(),
2337  target_infos.end(),
2338  [&supported_aggs](const TargetInfo& ti) {
2339  if (ti.sql_type.is_varlen() ||
2340  !supported_aggs.count(ti.agg_kind)) {
2341  return true;
2342  } else {
2343  return false;
2344  }
2345  }) == target_infos.end()) {
2346  return true;
2347  }
2348  }
2349  if (query_mem_desc_ptr->getQueryDescriptionType() ==
2360  if (gpu_blocksize < query_mem_desc_ptr->getEntryCount()) {
2361  return false;
2362  }
2363 
2364  // Fundamentally, we should use shared memory whenever the output buffer
2365  // is small enough so that we can fit it in the shared memory and yet expect
2366  // good occupancy.
2367  // For now, we allow keyless, row-wise layout, and only for perfect hash
2368  // group by operations.
2369  if (query_mem_desc_ptr->hasKeylessHash() &&
2370  query_mem_desc_ptr->countDistinctDescriptorsLogicallyEmpty() &&
2371  !query_mem_desc_ptr->useStreamingTopN()) {
2372  const size_t shared_memory_threshold_bytes = std::min(
2374  cuda_mgr->getMinSharedMemoryPerBlockForAllDevices() / num_blocks_per_mp);
2375  const auto output_buffer_size =
2376  query_mem_desc_ptr->getRowSize() * query_mem_desc_ptr->getEntryCount();
2377  if (output_buffer_size > shared_memory_threshold_bytes) {
2378  return false;
2379  }
2380 
2381  // skip shared memory usage when dealing with 1) variable length targets, 2)
2382  // non-basic aggregates (COUNT, SUM, MIN, MAX, AVG)
2383  // TODO: relax this if necessary
2384  const auto target_infos =
2385  target_exprs_to_infos(ra_exe_unit.target_exprs, *query_mem_desc_ptr);
2386  std::unordered_set<SQLAgg> supported_aggs{kCOUNT};
2388  supported_aggs = {kCOUNT, kMIN, kMAX, kSUM, kAVG};
2389  }
2390  if (std::find_if(target_infos.begin(),
2391  target_infos.end(),
2392  [&supported_aggs](const TargetInfo& ti) {
2393  if (ti.sql_type.is_varlen() ||
2394  !supported_aggs.count(ti.agg_kind)) {
2395  return true;
2396  } else {
2397  return false;
2398  }
2399  }) == target_infos.end()) {
2400  return true;
2401  }
2402  }
2403  }
2404  return false;
2405 }
std::vector< Analyzer::Expr * > target_exprs
bool g_enable_smem_group_by
bool countDistinctDescriptorsLogicallyEmpty() const
#define SIZE_MAX
bool g_enable_smem_non_grouped_agg
Definition: Execute.cpp:127
Definition: sqldefs.h:73
Definition: sqldefs.h:75
size_t getMinSharedMemoryPerBlockForAllDevices() const
Definition: CudaMgr.h:114
QueryDescriptionType getQueryDescriptionType() const
bool isArchMaxwellOrLaterForAll() const
Definition: CudaMgr.cpp:287
bool g_enable_smem_grouped_non_count_agg
Definition: Execute.cpp:124
Definition: sqldefs.h:76
#define CHECK(condition)
Definition: Logger.h:197
std::vector< TargetInfo > target_exprs_to_infos(const std::vector< Analyzer::Expr * > &targets, const QueryMemoryDescriptor &query_mem_desc)
Definition: sqldefs.h:74
Definition: sqldefs.h:72
size_t g_gpu_smem_threshold
Definition: Execute.cpp:119
void anonymous_namespace{NativeCodegen.cpp}::optimize_ir ( llvm::Function *  query_func,
llvm::Module *  module,
llvm::legacy::PassManager &  pass_manager,
const std::unordered_set< llvm::Function * > &  live_funcs,
const CompilationOptions co 
)

Definition at line 309 of file NativeCodegen.cpp.

References eliminate_dead_self_recursive_funcs(), LoopStrengthReduction, and CompilationOptions::opt_level.

Referenced by CodeGenerator::generateNativeCPUCode().

313  {
314  pass_manager.add(llvm::createAlwaysInlinerLegacyPass());
315  pass_manager.add(llvm::createPromoteMemoryToRegisterPass());
316  pass_manager.add(llvm::createInstSimplifyLegacyPass());
317  pass_manager.add(llvm::createInstructionCombiningPass());
318  pass_manager.add(llvm::createGlobalOptimizerPass());
319 
320  pass_manager.add(llvm::createLICMPass());
322  pass_manager.add(llvm::createLoopStrengthReducePass());
323  }
324  pass_manager.run(*module);
325 
326  eliminate_dead_self_recursive_funcs(*module, live_funcs);
327 }
void eliminate_dead_self_recursive_funcs(llvm::Module &M, const std::unordered_set< llvm::Function * > &live_funcs)
ExecutorOptLevel opt_level

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

template<typename T = void>
void anonymous_namespace{NativeCodegen.cpp}::scan_function_calls ( llvm::Function &  F,
std::unordered_set< std::string > &  defined,
std::unordered_set< std::string > &  undefined,
const std::unordered_set< std::string > &  ignored 
)

Definition at line 197 of file NativeCodegen.cpp.

Referenced by scan_function_calls().

200  {
201  for (llvm::inst_iterator I = llvm::inst_begin(F), E = llvm::inst_end(F); I != E; ++I) {
202  if (auto* CI = llvm::dyn_cast<llvm::CallInst>(&*I)) {
203  auto* F2 = CI->getCalledFunction();
204  if (F2 != nullptr) {
205  auto F2name = F2->getName().str();
206  if (F2->isDeclaration()) {
207  if (F2name.rfind("__", 0) !=
208  0 // assume symbols with double underscore are defined
209  && F2name.rfind("llvm.", 0) !=
210  0 // TODO: this may give false positive for NVVM intrinsics
211  && ignored.find(F2name) == ignored.end() // not in ignored list
212  ) {
213  undefined.emplace(F2name);
214  }
215  } else {
216  if (defined.find(F2name) == defined.end()) {
217  defined.emplace(F2name);
218  scan_function_calls<T>(*F2, defined, undefined, ignored);
219  }
220  }
221  }
222  }
223  }
224 }

+ Here is the caller graph for this function:

template<typename T = void>
void anonymous_namespace{NativeCodegen.cpp}::scan_function_calls ( llvm::Module &  module,
std::unordered_set< std::string > &  defined,
std::unordered_set< std::string > &  undefined,
const std::unordered_set< std::string > &  ignored 
)

Definition at line 227 of file NativeCodegen.cpp.

References scan_function_calls().

230  {
231  for (auto& F : module) {
232  if (!F.isDeclaration()) {
233  scan_function_calls(F, defined, undefined, ignored);
234  }
235  }
236 }
void scan_function_calls(llvm::Function &F, std::unordered_set< std::string > &defined, std::unordered_set< std::string > &undefined, const std::unordered_set< std::string > &ignored)

+ Here is the call graph for this function:

template<typename T = void>
std::tuple<std::unordered_set<std::string>, std::unordered_set<std::string> > anonymous_namespace{NativeCodegen.cpp}::scan_function_calls ( llvm::Module &  module,
const std::unordered_set< std::string > &  ignored = {} 
)

Definition at line 240 of file NativeCodegen.cpp.

241  {}) {
242  std::unordered_set<std::string> defined, undefined;
243  scan_function_calls(module, defined, undefined, ignored);
244  return std::make_tuple(defined, undefined);
245 }
void scan_function_calls(llvm::Function &F, std::unordered_set< std::string > &defined, std::unordered_set< std::string > &undefined, const std::unordered_set< std::string > &ignored)
std::string anonymous_namespace{NativeCodegen.cpp}::serialize_llvm_metadata_footnotes ( llvm::Function *  query_func,
CgenState cgen_state 
)

Definition at line 2408 of file NativeCodegen.cpp.

2409  {
2410  std::string llvm_ir;
2411  std::unordered_set<llvm::MDNode*> md;
2412 
2413  // Loop over all instructions in the query function.
2414  for (auto bb_it = query_func->begin(); bb_it != query_func->end(); ++bb_it) {
2415  for (auto instr_it = bb_it->begin(); instr_it != bb_it->end(); ++instr_it) {
2416  llvm::SmallVector<std::pair<unsigned, llvm::MDNode*>, 100> imd;
2417  instr_it->getAllMetadata(imd);
2418  for (auto [kind, node] : imd) {
2419  md.insert(node);
2420  }
2421  }
2422  }
2423 
2424  // Loop over all instructions in the row function.
2425  for (auto bb_it = cgen_state->row_func_->begin(); bb_it != cgen_state->row_func_->end();
2426  ++bb_it) {
2427  for (auto instr_it = bb_it->begin(); instr_it != bb_it->end(); ++instr_it) {
2428  llvm::SmallVector<std::pair<unsigned, llvm::MDNode*>, 100> imd;
2429  instr_it->getAllMetadata(imd);
2430  for (auto [kind, node] : imd) {
2431  md.insert(node);
2432  }
2433  }
2434  }
2435 
2436  // Loop over all instructions in the filter function.
2437  if (cgen_state->filter_func_) {
2438  for (auto bb_it = cgen_state->filter_func_->begin();
2439  bb_it != cgen_state->filter_func_->end();
2440  ++bb_it) {
2441  for (auto instr_it = bb_it->begin(); instr_it != bb_it->end(); ++instr_it) {
2442  llvm::SmallVector<std::pair<unsigned, llvm::MDNode*>, 100> imd;
2443  instr_it->getAllMetadata(imd);
2444  for (auto [kind, node] : imd) {
2445  md.insert(node);
2446  }
2447  }
2448  }
2449  }
2450 
2451  // Sort the metadata by canonical number and convert to text.
2452  if (!md.empty()) {
2453  std::map<size_t, std::string> sorted_strings;
2454  for (auto p : md) {
2455  std::string str;
2456  llvm::raw_string_ostream os(str);
2457  p->print(os, cgen_state->module_, true);
2458  os.flush();
2459  auto fields = split(str, {}, 1);
2460  if (fields.empty() || fields[0].empty()) {
2461  continue;
2462  }
2463  sorted_strings.emplace(std::stoul(fields[0].substr(1)), str);
2464  }
2465  llvm_ir += "\n";
2466  for (auto [id, text] : sorted_strings) {
2467  llvm_ir += text;
2468  llvm_ir += "\n";
2469  }
2470  }
2471 
2472  return llvm_ir;
2473 }
std::vector< std::string > split(std::string_view str, std::string_view delim, std::optional< size_t > maxsplit)
split apart a string into a vector of substrings
llvm::Function * row_func_
Definition: CgenState.h:319
llvm::Module * module_
Definition: CgenState.h:318
llvm::Function * filter_func_
Definition: CgenState.h:320
void anonymous_namespace{NativeCodegen.cpp}::set_row_func_argnames ( llvm::Function *  row_func,
const size_t  in_col_count,
const size_t  agg_col_count,
const bool  hoist_literals 
)

Definition at line 1442 of file NativeCodegen.cpp.

1445  {
1446  auto arg_it = row_func->arg_begin();
1447 
1448  if (agg_col_count) {
1449  for (size_t i = 0; i < agg_col_count; ++i) {
1450  arg_it->setName("out");
1451  ++arg_it;
1452  }
1453  } else {
1454  arg_it->setName("group_by_buff");
1455  ++arg_it;
1456  arg_it->setName("crt_matched");
1457  ++arg_it;
1458  arg_it->setName("total_matched");
1459  ++arg_it;
1460  arg_it->setName("old_total_matched");
1461  ++arg_it;
1462  arg_it->setName("max_matched");
1463  ++arg_it;
1464  }
1465 
1466  arg_it->setName("agg_init_val");
1467  ++arg_it;
1468 
1469  arg_it->setName("pos");
1470  ++arg_it;
1471 
1472  arg_it->setName("frag_row_off");
1473  ++arg_it;
1474 
1475  arg_it->setName("num_rows_per_scan");
1476  ++arg_it;
1477 
1478  if (hoist_literals) {
1479  arg_it->setName("literals");
1480  ++arg_it;
1481  }
1482 
1483  for (size_t i = 0; i < in_col_count; ++i) {
1484  arg_it->setName("col_buf" + std::to_string(i));
1485  ++arg_it;
1486  }
1487 
1488  arg_it->setName("join_hash_tables");
1489 }
std::string to_string(char const *&&v)
template<typename T = void>
void anonymous_namespace{NativeCodegen.cpp}::show_defined ( llvm::Module &  module)

Definition at line 158 of file NativeCodegen.cpp.

References f.

Referenced by show_defined().

158  {
159  std::cout << "defines: ";
160  for (auto& f : module.getFunctionList()) {
161  if (!f.isDeclaration()) {
162  std::cout << f.getName().str() << ", ";
163  }
164  }
165  std::cout << std::endl;
166 }
char * f

+ Here is the caller graph for this function:

template<typename T = void>
void anonymous_namespace{NativeCodegen.cpp}::show_defined ( llvm::Module *  module)

Definition at line 169 of file NativeCodegen.cpp.

References show_defined().

169  {
170  if (module == nullptr) {
171  std::cout << "is null" << std::endl;
172  } else {
173  show_defined(*module);
174  }
175 }
void show_defined(llvm::Module &module)

+ Here is the call graph for this function:

template<typename T = void>
void anonymous_namespace{NativeCodegen.cpp}::show_defined ( std::unique_ptr< llvm::Module > &  module)

Definition at line 178 of file NativeCodegen.cpp.

References show_defined().

178  {
179  show_defined(module.get());
180 }
void show_defined(llvm::Module &module)

+ Here is the call graph for this function:

void anonymous_namespace{NativeCodegen.cpp}::throw_parseIR_error ( const llvm::SMDiagnostic &  parse_error,
std::string  src = "",
const bool  is_gpu = false 
)

Definition at line 126 of file NativeCodegen.cpp.

128  {
129  std::string excname = (is_gpu ? "NVVM IR ParseError: " : "LLVM IR ParseError: ");
130  llvm::raw_string_ostream ss(excname);
131  parse_error.print(src.c_str(), ss, false, false);
132  throw ParseIRError(ss.str());
133 }

Variable Documentation

const std::string anonymous_namespace{NativeCodegen.cpp}::cuda_rt_decls

Definition at line 601 of file NativeCodegen.cpp.