OmniSciDB  fe05a0c208
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
anonymous_namespace{NativeCodegen.cpp} Namespace Reference

Functions

void throw_parseIR_error (const llvm::SMDiagnostic &parse_error, std::string src="", const bool is_gpu=false)
 
template<typename T = void>
void show_defined (llvm::Module &module)
 
template<typename T = void>
void show_defined (llvm::Module *module)
 
template<typename T = void>
void show_defined (std::unique_ptr< llvm::Module > &module)
 
template<typename T = void>
void scan_function_calls (llvm::Function &F, std::unordered_set< std::string > &defined, std::unordered_set< std::string > &undefined, const std::unordered_set< std::string > &ignored)
 
template<typename T = void>
void scan_function_calls (llvm::Module &module, std::unordered_set< std::string > &defined, std::unordered_set< std::string > &undefined, const std::unordered_set< std::string > &ignored)
 
template<typename T = void>
std::tuple< std::unordered_set
< std::string >
, std::unordered_set
< std::string > > 
scan_function_calls (llvm::Module &module, const std::unordered_set< std::string > &ignored={})
 
void eliminate_dead_self_recursive_funcs (llvm::Module &M, const std::unordered_set< llvm::Function * > &live_funcs)
 
void optimize_ir (llvm::Function *query_func, llvm::Module *module, llvm::legacy::PassManager &pass_manager, const std::unordered_set< llvm::Function * > &live_funcs, const CompilationOptions &co)
 
std::string assemblyForCPU (ExecutionEngineWrapper &execution_engine, llvm::Module *module)
 
std::string cpp_to_llvm_name (const std::string &s)
 
std::string gen_array_any_all_sigs ()
 
std::string gen_translate_null_key_sigs ()
 
void bind_pos_placeholders (const std::string &pos_fn_name, const bool use_resume_param, llvm::Function *query_func, llvm::Module *module)
 
void set_row_func_argnames (llvm::Function *row_func, const size_t in_col_count, const size_t agg_col_count, const bool hoist_literals)
 
llvm::Function * create_row_function (const size_t in_col_count, const size_t agg_col_count, const bool hoist_literals, llvm::Module *module, llvm::LLVMContext &context)
 
void bind_query (llvm::Function *query_func, const std::string &query_fname, llvm::Function *multifrag_query_func, llvm::Module *module)
 
std::vector< std::string > get_agg_fnames (const std::vector< Analyzer::Expr * > &target_exprs, const bool is_group_by)
 
template<typename InstType >
llvm::Value * find_variable_in_basic_block (llvm::Function *func, std::string bb_name, std::string variable_name)
 
size_t get_shared_memory_size (const bool shared_mem_used, const QueryMemoryDescriptor *query_mem_desc_ptr)
 
bool is_gpu_shared_mem_supported (const QueryMemoryDescriptor *query_mem_desc_ptr, const RelAlgExecutionUnit &ra_exe_unit, const CudaMgr_Namespace::CudaMgr *cuda_mgr, const ExecutorDeviceType device_type, const unsigned gpu_blocksize, const unsigned num_blocks_per_mp)
 
std::string serialize_llvm_metadata_footnotes (llvm::Function *query_func, CgenState *cgen_state)
 

Variables

const std::string cuda_rt_decls
 

Function Documentation

std::string anonymous_namespace{NativeCodegen.cpp}::assemblyForCPU ( ExecutionEngineWrapper execution_engine,
llvm::Module *  module 
)

Definition at line 396 of file NativeCodegen.cpp.

References CHECK.

Referenced by CodeGenerator::generateNativeCPUCode().

397  {
398  llvm::legacy::PassManager pass_manager;
399  auto cpu_target_machine = execution_engine->getTargetMachine();
400  CHECK(cpu_target_machine);
401  llvm::SmallString<256> code_str;
402  llvm::raw_svector_ostream os(code_str);
403 #if LLVM_VERSION_MAJOR >= 10
404  cpu_target_machine->addPassesToEmitFile(
405  pass_manager, os, nullptr, llvm::CGFT_AssemblyFile);
406 #else
407  cpu_target_machine->addPassesToEmitFile(
408  pass_manager, os, nullptr, llvm::TargetMachine::CGFT_AssemblyFile);
409 #endif
410  pass_manager.run(*module);
411  return "Assembly for the CPU:\n" + std::string(code_str.str()) + "\nEnd of assembly";
412 }
#define CHECK(condition)
Definition: Logger.h:203

+ Here is the caller graph for this function:

void anonymous_namespace{NativeCodegen.cpp}::bind_pos_placeholders ( const std::string &  pos_fn_name,
const bool  use_resume_param,
llvm::Function *  query_func,
llvm::Module *  module 
)

Definition at line 1417 of file NativeCodegen.cpp.

1420  {
1421  for (auto it = llvm::inst_begin(query_func), e = llvm::inst_end(query_func); it != e;
1422  ++it) {
1423  if (!llvm::isa<llvm::CallInst>(*it)) {
1424  continue;
1425  }
1426  auto& pos_call = llvm::cast<llvm::CallInst>(*it);
1427  if (std::string(pos_call.getCalledFunction()->getName()) == pos_fn_name) {
1428  if (use_resume_param) {
1429  const auto error_code_arg = get_arg_by_name(query_func, "error_code");
1430  llvm::ReplaceInstWithInst(
1431  &pos_call,
1432  llvm::CallInst::Create(module->getFunction(pos_fn_name + "_impl"),
1433  error_code_arg));
1434  } else {
1435  llvm::ReplaceInstWithInst(
1436  &pos_call,
1437  llvm::CallInst::Create(module->getFunction(pos_fn_name + "_impl")));
1438  }
1439  break;
1440  }
1441  }
1442 }
llvm::Value * get_arg_by_name(llvm::Function *func, const std::string &name)
Definition: Execute.h:167
void anonymous_namespace{NativeCodegen.cpp}::bind_query ( llvm::Function *  query_func,
const std::string &  query_fname,
llvm::Function *  multifrag_query_func,
llvm::Module *  module 
)

Definition at line 1557 of file NativeCodegen.cpp.

1560  {
1561  std::vector<llvm::CallInst*> query_stubs;
1562  for (auto it = llvm::inst_begin(multifrag_query_func),
1563  e = llvm::inst_end(multifrag_query_func);
1564  it != e;
1565  ++it) {
1566  if (!llvm::isa<llvm::CallInst>(*it)) {
1567  continue;
1568  }
1569  auto& query_call = llvm::cast<llvm::CallInst>(*it);
1570  if (std::string(query_call.getCalledFunction()->getName()) == query_fname) {
1571  query_stubs.push_back(&query_call);
1572  }
1573  }
1574  for (auto& S : query_stubs) {
1575  std::vector<llvm::Value*> args;
1576  for (size_t i = 0; i < S->getNumArgOperands(); ++i) {
1577  args.push_back(S->getArgOperand(i));
1578  }
1579  llvm::ReplaceInstWithInst(S, llvm::CallInst::Create(query_func, args, ""));
1580  }
1581 }
std::string anonymous_namespace{NativeCodegen.cpp}::cpp_to_llvm_name ( const std::string &  s)

Definition at line 556 of file NativeCodegen.cpp.

References CHECK.

Referenced by gen_array_any_all_sigs(), and gen_translate_null_key_sigs().

556  {
557  if (s == "int8_t") {
558  return "i8";
559  }
560  if (s == "int16_t") {
561  return "i16";
562  }
563  if (s == "int32_t") {
564  return "i32";
565  }
566  if (s == "int64_t") {
567  return "i64";
568  }
569  CHECK(s == "float" || s == "double");
570  return s;
571 }
#define CHECK(condition)
Definition: Logger.h:203

+ Here is the caller graph for this function:

llvm::Function* anonymous_namespace{NativeCodegen.cpp}::create_row_function ( const size_t  in_col_count,
const size_t  agg_col_count,
const bool  hoist_literals,
llvm::Module *  module,
llvm::LLVMContext &  context 
)

Definition at line 1493 of file NativeCodegen.cpp.

1497  {
1498  std::vector<llvm::Type*> row_process_arg_types;
1499 
1500  if (agg_col_count) {
1501  // output (aggregate) arguments
1502  for (size_t i = 0; i < agg_col_count; ++i) {
1503  row_process_arg_types.push_back(llvm::Type::getInt64PtrTy(context));
1504  }
1505  } else {
1506  // group by buffer
1507  row_process_arg_types.push_back(llvm::Type::getInt64PtrTy(context));
1508  // current match count
1509  row_process_arg_types.push_back(llvm::Type::getInt32PtrTy(context));
1510  // total match count passed from the caller
1511  row_process_arg_types.push_back(llvm::Type::getInt32PtrTy(context));
1512  // old total match count returned to the caller
1513  row_process_arg_types.push_back(llvm::Type::getInt32PtrTy(context));
1514  // max matched (total number of slots in the output buffer)
1515  row_process_arg_types.push_back(llvm::Type::getInt32PtrTy(context));
1516  }
1517 
1518  // aggregate init values
1519  row_process_arg_types.push_back(llvm::Type::getInt64PtrTy(context));
1520 
1521  // position argument
1522  row_process_arg_types.push_back(llvm::Type::getInt64Ty(context));
1523 
1524  // fragment row offset argument
1525  row_process_arg_types.push_back(llvm::Type::getInt64PtrTy(context));
1526 
1527  // number of rows for each scan
1528  row_process_arg_types.push_back(llvm::Type::getInt64PtrTy(context));
1529 
1530  // literals buffer argument
1531  if (hoist_literals) {
1532  row_process_arg_types.push_back(llvm::Type::getInt8PtrTy(context));
1533  }
1534 
1535  // column buffer arguments
1536  for (size_t i = 0; i < in_col_count; ++i) {
1537  row_process_arg_types.emplace_back(llvm::Type::getInt8PtrTy(context));
1538  }
1539 
1540  // join hash table argument
1541  row_process_arg_types.push_back(llvm::Type::getInt64PtrTy(context));
1542 
1543  // generate the function
1544  auto ft =
1545  llvm::FunctionType::get(get_int_type(32, context), row_process_arg_types, false);
1546 
1547  auto row_func =
1548  llvm::Function::Create(ft, llvm::Function::ExternalLinkage, "row_func", module);
1549 
1550  // set the row function argument names; for debugging purposes only
1551  set_row_func_argnames(row_func, in_col_count, agg_col_count, hoist_literals);
1552 
1553  return row_func;
1554 }
llvm::Type * get_int_type(const int width, llvm::LLVMContext &context)
void set_row_func_argnames(llvm::Function *row_func, const size_t in_col_count, const size_t agg_col_count, const bool hoist_literals)
void anonymous_namespace{NativeCodegen.cpp}::eliminate_dead_self_recursive_funcs ( llvm::Module &  M,
const std::unordered_set< llvm::Function * > &  live_funcs 
)

Definition at line 248 of file NativeCodegen.cpp.

Referenced by optimize_ir().

250  {
251  std::vector<llvm::Function*> dead_funcs;
252  for (auto& F : M) {
253  bool bAlive = false;
254  if (live_funcs.count(&F)) {
255  continue;
256  }
257  for (auto U : F.users()) {
258  auto* C = llvm::dyn_cast<const llvm::CallInst>(U);
259  if (!C || C->getParent()->getParent() != &F) {
260  bAlive = true;
261  break;
262  }
263  }
264  if (!bAlive) {
265  dead_funcs.push_back(&F);
266  }
267  }
268  for (auto pFn : dead_funcs) {
269  pFn->eraseFromParent();
270  }
271 }

+ Here is the caller graph for this function:

template<typename InstType >
llvm::Value* anonymous_namespace{NativeCodegen.cpp}::find_variable_in_basic_block ( llvm::Function *  func,
std::string  bb_name,
std::string  variable_name 
)

Definition at line 1809 of file NativeCodegen.cpp.

1811  {
1812  llvm::Value* result = nullptr;
1813  if (func == nullptr || variable_name.empty()) {
1814  return result;
1815  }
1816  bool is_found = false;
1817  for (auto bb_it = func->begin(); bb_it != func->end() && !is_found; ++bb_it) {
1818  if (!bb_name.empty() && bb_it->getName() != bb_name) {
1819  continue;
1820  }
1821  for (auto inst_it = bb_it->begin(); inst_it != bb_it->end(); inst_it++) {
1822  if (llvm::isa<InstType>(*inst_it)) {
1823  if (inst_it->getName() == variable_name) {
1824  result = &*inst_it;
1825  is_found = true;
1826  break;
1827  }
1828  }
1829  }
1830  }
1831  return result;
1832 }
std::string anonymous_namespace{NativeCodegen.cpp}::gen_array_any_all_sigs ( )

Definition at line 573 of file NativeCodegen.cpp.

References cpp_to_llvm_name(), and run_benchmark_import::result.

573  {
574  std::string result;
575  for (const std::string any_or_all : {"any", "all"}) {
576  for (const std::string elem_type :
577  {"int8_t", "int16_t", "int32_t", "int64_t", "float", "double"}) {
578  for (const std::string needle_type :
579  {"int8_t", "int16_t", "int32_t", "int64_t", "float", "double"}) {
580  for (const std::string op_name : {"eq", "ne", "lt", "le", "gt", "ge"}) {
581  result += ("declare i1 @array_" + any_or_all + "_" + op_name + "_" + elem_type +
582  "_" + needle_type + "(i8*, i64, " + cpp_to_llvm_name(needle_type) +
583  ", " + cpp_to_llvm_name(elem_type) + ");\n");
584  }
585  }
586  }
587  }
588  return result;
589 }
std::string cpp_to_llvm_name(const std::string &s)

+ Here is the call graph for this function:

std::string anonymous_namespace{NativeCodegen.cpp}::gen_translate_null_key_sigs ( )

Definition at line 591 of file NativeCodegen.cpp.

References cpp_to_llvm_name(), and run_benchmark_import::result.

591  {
592  std::string result;
593  for (const std::string key_type : {"int8_t", "int16_t", "int32_t", "int64_t"}) {
594  const auto key_llvm_type = cpp_to_llvm_name(key_type);
595  result += "declare i64 @translate_null_key_" + key_type + "(" + key_llvm_type + ", " +
596  key_llvm_type + ", i64);\n";
597  }
598  return result;
599 }
std::string cpp_to_llvm_name(const std::string &s)

+ Here is the call graph for this function:

std::vector<std::string> anonymous_namespace{NativeCodegen.cpp}::get_agg_fnames ( const std::vector< Analyzer::Expr * > &  target_exprs,
const bool  is_group_by 
)

Definition at line 1583 of file NativeCodegen.cpp.

1584  {
1585  std::vector<std::string> result;
1586  for (size_t target_idx = 0, agg_col_idx = 0; target_idx < target_exprs.size();
1587  ++target_idx, ++agg_col_idx) {
1588  const auto target_expr = target_exprs[target_idx];
1589  CHECK(target_expr);
1590  const auto target_type_info = target_expr->get_type_info();
1591  const auto agg_expr = dynamic_cast<Analyzer::AggExpr*>(target_expr);
1592  const bool is_varlen =
1593  (target_type_info.is_string() &&
1594  target_type_info.get_compression() == kENCODING_NONE) ||
1595  target_type_info.is_array(); // TODO: should it use is_varlen_array() ?
1596  if (!agg_expr || agg_expr->get_aggtype() == kSAMPLE) {
1597  result.emplace_back(target_type_info.is_fp() ? "agg_id_double" : "agg_id");
1598  if (is_varlen) {
1599  result.emplace_back("agg_id");
1600  }
1601  if (target_type_info.is_geometry()) {
1602  result.emplace_back("agg_id");
1603  for (auto i = 2; i < 2 * target_type_info.get_physical_coord_cols(); ++i) {
1604  result.emplace_back("agg_id");
1605  }
1606  }
1607  continue;
1608  }
1609  const auto agg_type = agg_expr->get_aggtype();
1610  const auto& agg_type_info =
1611  agg_type != kCOUNT ? agg_expr->get_arg()->get_type_info() : target_type_info;
1612  switch (agg_type) {
1613  case kAVG: {
1614  if (!agg_type_info.is_integer() && !agg_type_info.is_decimal() &&
1615  !agg_type_info.is_fp()) {
1616  throw std::runtime_error("AVG is only valid on integer and floating point");
1617  }
1618  result.emplace_back((agg_type_info.is_integer() || agg_type_info.is_time())
1619  ? "agg_sum"
1620  : "agg_sum_double");
1621  result.emplace_back((agg_type_info.is_integer() || agg_type_info.is_time())
1622  ? "agg_count"
1623  : "agg_count_double");
1624  break;
1625  }
1626  case kMIN: {
1627  if (agg_type_info.is_string() || agg_type_info.is_array() ||
1628  agg_type_info.is_geometry()) {
1629  throw std::runtime_error(
1630  "MIN on strings, arrays or geospatial types not supported yet");
1631  }
1632  result.emplace_back((agg_type_info.is_integer() || agg_type_info.is_time())
1633  ? "agg_min"
1634  : "agg_min_double");
1635  break;
1636  }
1637  case kMAX: {
1638  if (agg_type_info.is_string() || agg_type_info.is_array() ||
1639  agg_type_info.is_geometry()) {
1640  throw std::runtime_error(
1641  "MAX on strings, arrays or geospatial types not supported yet");
1642  }
1643  result.emplace_back((agg_type_info.is_integer() || agg_type_info.is_time())
1644  ? "agg_max"
1645  : "agg_max_double");
1646  break;
1647  }
1648  case kSUM: {
1649  if (!agg_type_info.is_integer() && !agg_type_info.is_decimal() &&
1650  !agg_type_info.is_fp()) {
1651  throw std::runtime_error("SUM is only valid on integer and floating point");
1652  }
1653  result.emplace_back((agg_type_info.is_integer() || agg_type_info.is_time())
1654  ? "agg_sum"
1655  : "agg_sum_double");
1656  break;
1657  }
1658  case kCOUNT:
1659  result.emplace_back(agg_expr->get_is_distinct() ? "agg_count_distinct"
1660  : "agg_count");
1661  break;
1662  case kSINGLE_VALUE: {
1663  result.emplace_back(agg_type_info.is_fp() ? "agg_id_double" : "agg_id");
1664  break;
1665  }
1666  case kSAMPLE: {
1667  // Note that varlen SAMPLE arguments are handled separately above
1668  result.emplace_back(agg_type_info.is_fp() ? "agg_id_double" : "agg_id");
1669  break;
1670  }
1672  result.emplace_back("agg_approximate_count_distinct");
1673  break;
1674  case kAPPROX_MEDIAN:
1675  result.emplace_back("agg_approx_median");
1676  break;
1677  default:
1678  CHECK(false);
1679  }
1680  }
1681  return result;
1682 }
Definition: sqldefs.h:73
Definition: sqldefs.h:75
Definition: sqldefs.h:76
SQLAgg get_aggtype() const
Definition: Analyzer.h:1095
#define CHECK(condition)
Definition: Logger.h:203
Definition: sqldefs.h:74
Definition: sqldefs.h:72
size_t anonymous_namespace{NativeCodegen.cpp}::get_shared_memory_size ( const bool  shared_mem_used,
const QueryMemoryDescriptor query_mem_desc_ptr 
)

Definition at line 2291 of file NativeCodegen.cpp.

2292  {
2293  return shared_mem_used
2294  ? (query_mem_desc_ptr->getRowSize() * query_mem_desc_ptr->getEntryCount())
2295  : 0;
2296 }
bool anonymous_namespace{NativeCodegen.cpp}::is_gpu_shared_mem_supported ( const QueryMemoryDescriptor query_mem_desc_ptr,
const RelAlgExecutionUnit ra_exe_unit,
const CudaMgr_Namespace::CudaMgr cuda_mgr,
const ExecutorDeviceType  device_type,
const unsigned  gpu_blocksize,
const unsigned  num_blocks_per_mp 
)

To simplify the implementation for practical purposes, we initially provide shared memory support for cases where there are at most as many entries in the output buffer as there are threads within each GPU device. In order to relax this assumption later, we need to add a for loop in generated codes such that each thread loops over multiple entries. TODO: relax this if necessary

Definition at line 2298 of file NativeCodegen.cpp.

2303  {
2304  if (device_type == ExecutorDeviceType::CPU) {
2305  return false;
2306  }
2307  if (query_mem_desc_ptr->didOutputColumnar()) {
2308  return false;
2309  }
2310  CHECK(query_mem_desc_ptr);
2311  CHECK(cuda_mgr);
2312  /*
2313  * We only use shared memory strategy if GPU hardware provides native shared
2314  * memory atomics support. From CUDA Toolkit documentation:
2315  * https://docs.nvidia.com/cuda/pascal-tuning-guide/index.html#atomic-ops "Like
2316  * Maxwell, Pascal [and Volta] provides native shared memory atomic operations
2317  * for 32-bit integer arithmetic, along with native 32 or 64-bit compare-and-swap
2318  * (CAS)."
2319  *
2320  **/
2321  if (!cuda_mgr->isArchMaxwellOrLaterForAll()) {
2322  return false;
2323  }
2324 
2325  if (query_mem_desc_ptr->getQueryDescriptionType() ==
2328  query_mem_desc_ptr->countDistinctDescriptorsLogicallyEmpty()) {
2329  // TODO: relax this, if necessary
2330  if (gpu_blocksize < query_mem_desc_ptr->getEntryCount()) {
2331  return false;
2332  }
2333  // skip shared memory usage when dealing with 1) variable length targets, 2)
2334  // not a COUNT aggregate
2335  const auto target_infos =
2336  target_exprs_to_infos(ra_exe_unit.target_exprs, *query_mem_desc_ptr);
2337  std::unordered_set<SQLAgg> supported_aggs{kCOUNT};
2338  if (std::find_if(target_infos.begin(),
2339  target_infos.end(),
2340  [&supported_aggs](const TargetInfo& ti) {
2341  if (ti.sql_type.is_varlen() ||
2342  !supported_aggs.count(ti.agg_kind)) {
2343  return true;
2344  } else {
2345  return false;
2346  }
2347  }) == target_infos.end()) {
2348  return true;
2349  }
2350  }
2351  if (query_mem_desc_ptr->getQueryDescriptionType() ==
2362  if (gpu_blocksize < query_mem_desc_ptr->getEntryCount()) {
2363  return false;
2364  }
2365 
2366  // Fundamentally, we should use shared memory whenever the output buffer
2367  // is small enough so that we can fit it in the shared memory and yet expect
2368  // good occupancy.
2369  // For now, we allow keyless, row-wise layout, and only for perfect hash
2370  // group by operations.
2371  if (query_mem_desc_ptr->hasKeylessHash() &&
2372  query_mem_desc_ptr->countDistinctDescriptorsLogicallyEmpty() &&
2373  !query_mem_desc_ptr->useStreamingTopN()) {
2374  const size_t shared_memory_threshold_bytes = std::min(
2376  cuda_mgr->getMinSharedMemoryPerBlockForAllDevices() / num_blocks_per_mp);
2377  const auto output_buffer_size =
2378  query_mem_desc_ptr->getRowSize() * query_mem_desc_ptr->getEntryCount();
2379  if (output_buffer_size > shared_memory_threshold_bytes) {
2380  return false;
2381  }
2382 
2383  // skip shared memory usage when dealing with 1) variable length targets, 2)
2384  // non-basic aggregates (COUNT, SUM, MIN, MAX, AVG)
2385  // TODO: relax this if necessary
2386  const auto target_infos =
2387  target_exprs_to_infos(ra_exe_unit.target_exprs, *query_mem_desc_ptr);
2388  std::unordered_set<SQLAgg> supported_aggs{kCOUNT};
2390  supported_aggs = {kCOUNT, kMIN, kMAX, kSUM, kAVG};
2391  }
2392  if (std::find_if(target_infos.begin(),
2393  target_infos.end(),
2394  [&supported_aggs](const TargetInfo& ti) {
2395  if (ti.sql_type.is_varlen() ||
2396  !supported_aggs.count(ti.agg_kind)) {
2397  return true;
2398  } else {
2399  return false;
2400  }
2401  }) == target_infos.end()) {
2402  return true;
2403  }
2404  }
2405  }
2406  return false;
2407 }
std::vector< Analyzer::Expr * > target_exprs
bool g_enable_smem_group_by
bool countDistinctDescriptorsLogicallyEmpty() const
#define SIZE_MAX
bool g_enable_smem_non_grouped_agg
Definition: Execute.cpp:127
Definition: sqldefs.h:73
Definition: sqldefs.h:75
size_t getMinSharedMemoryPerBlockForAllDevices() const
Definition: CudaMgr.h:114
QueryDescriptionType getQueryDescriptionType() const
bool isArchMaxwellOrLaterForAll() const
Definition: CudaMgr.cpp:287
bool g_enable_smem_grouped_non_count_agg
Definition: Execute.cpp:124
Definition: sqldefs.h:76
#define CHECK(condition)
Definition: Logger.h:203
std::vector< TargetInfo > target_exprs_to_infos(const std::vector< Analyzer::Expr * > &targets, const QueryMemoryDescriptor &query_mem_desc)
Definition: sqldefs.h:74
Definition: sqldefs.h:72
size_t g_gpu_smem_threshold
Definition: Execute.cpp:119
void anonymous_namespace{NativeCodegen.cpp}::optimize_ir ( llvm::Function *  query_func,
llvm::Module *  module,
llvm::legacy::PassManager &  pass_manager,
const std::unordered_set< llvm::Function * > &  live_funcs,
const CompilationOptions co 
)

Definition at line 309 of file NativeCodegen.cpp.

References eliminate_dead_self_recursive_funcs(), LoopStrengthReduction, and CompilationOptions::opt_level.

Referenced by CodeGenerator::generateNativeCPUCode().

313  {
314  pass_manager.add(llvm::createAlwaysInlinerLegacyPass());
315  pass_manager.add(llvm::createPromoteMemoryToRegisterPass());
316  pass_manager.add(llvm::createInstSimplifyLegacyPass());
317  pass_manager.add(llvm::createInstructionCombiningPass());
318  pass_manager.add(llvm::createGlobalOptimizerPass());
319 
320  pass_manager.add(llvm::createLICMPass());
322  pass_manager.add(llvm::createLoopStrengthReducePass());
323  }
324  pass_manager.run(*module);
325 
326  eliminate_dead_self_recursive_funcs(*module, live_funcs);
327 }
void eliminate_dead_self_recursive_funcs(llvm::Module &M, const std::unordered_set< llvm::Function * > &live_funcs)
ExecutorOptLevel opt_level

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

template<typename T = void>
void anonymous_namespace{NativeCodegen.cpp}::scan_function_calls ( llvm::Function &  F,
std::unordered_set< std::string > &  defined,
std::unordered_set< std::string > &  undefined,
const std::unordered_set< std::string > &  ignored 
)

Definition at line 197 of file NativeCodegen.cpp.

Referenced by scan_function_calls().

200  {
201  for (llvm::inst_iterator I = llvm::inst_begin(F), E = llvm::inst_end(F); I != E; ++I) {
202  if (auto* CI = llvm::dyn_cast<llvm::CallInst>(&*I)) {
203  auto* F2 = CI->getCalledFunction();
204  if (F2 != nullptr) {
205  auto F2name = F2->getName().str();
206  if (F2->isDeclaration()) {
207  if (F2name.rfind("__", 0) !=
208  0 // assume symbols with double underscore are defined
209  && F2name.rfind("llvm.", 0) !=
210  0 // TODO: this may give false positive for NVVM intrinsics
211  && ignored.find(F2name) == ignored.end() // not in ignored list
212  ) {
213  undefined.emplace(F2name);
214  }
215  } else {
216  if (defined.find(F2name) == defined.end()) {
217  defined.emplace(F2name);
218  scan_function_calls<T>(*F2, defined, undefined, ignored);
219  }
220  }
221  }
222  }
223  }
224 }

+ Here is the caller graph for this function:

template<typename T = void>
void anonymous_namespace{NativeCodegen.cpp}::scan_function_calls ( llvm::Module &  module,
std::unordered_set< std::string > &  defined,
std::unordered_set< std::string > &  undefined,
const std::unordered_set< std::string > &  ignored 
)

Definition at line 227 of file NativeCodegen.cpp.

References scan_function_calls().

230  {
231  for (auto& F : module) {
232  if (!F.isDeclaration()) {
233  scan_function_calls(F, defined, undefined, ignored);
234  }
235  }
236 }
void scan_function_calls(llvm::Function &F, std::unordered_set< std::string > &defined, std::unordered_set< std::string > &undefined, const std::unordered_set< std::string > &ignored)

+ Here is the call graph for this function:

template<typename T = void>
std::tuple<std::unordered_set<std::string>, std::unordered_set<std::string> > anonymous_namespace{NativeCodegen.cpp}::scan_function_calls ( llvm::Module &  module,
const std::unordered_set< std::string > &  ignored = {} 
)

Definition at line 240 of file NativeCodegen.cpp.

241  {}) {
242  std::unordered_set<std::string> defined, undefined;
243  scan_function_calls(module, defined, undefined, ignored);
244  return std::make_tuple(defined, undefined);
245 }
void scan_function_calls(llvm::Function &F, std::unordered_set< std::string > &defined, std::unordered_set< std::string > &undefined, const std::unordered_set< std::string > &ignored)
std::string anonymous_namespace{NativeCodegen.cpp}::serialize_llvm_metadata_footnotes ( llvm::Function *  query_func,
CgenState cgen_state 
)

Definition at line 2410 of file NativeCodegen.cpp.

2411  {
2412  std::string llvm_ir;
2413  std::unordered_set<llvm::MDNode*> md;
2414 
2415  // Loop over all instructions in the query function.
2416  for (auto bb_it = query_func->begin(); bb_it != query_func->end(); ++bb_it) {
2417  for (auto instr_it = bb_it->begin(); instr_it != bb_it->end(); ++instr_it) {
2418  llvm::SmallVector<std::pair<unsigned, llvm::MDNode*>, 100> imd;
2419  instr_it->getAllMetadata(imd);
2420  for (auto [kind, node] : imd) {
2421  md.insert(node);
2422  }
2423  }
2424  }
2425 
2426  // Loop over all instructions in the row function.
2427  for (auto bb_it = cgen_state->row_func_->begin(); bb_it != cgen_state->row_func_->end();
2428  ++bb_it) {
2429  for (auto instr_it = bb_it->begin(); instr_it != bb_it->end(); ++instr_it) {
2430  llvm::SmallVector<std::pair<unsigned, llvm::MDNode*>, 100> imd;
2431  instr_it->getAllMetadata(imd);
2432  for (auto [kind, node] : imd) {
2433  md.insert(node);
2434  }
2435  }
2436  }
2437 
2438  // Loop over all instructions in the filter function.
2439  if (cgen_state->filter_func_) {
2440  for (auto bb_it = cgen_state->filter_func_->begin();
2441  bb_it != cgen_state->filter_func_->end();
2442  ++bb_it) {
2443  for (auto instr_it = bb_it->begin(); instr_it != bb_it->end(); ++instr_it) {
2444  llvm::SmallVector<std::pair<unsigned, llvm::MDNode*>, 100> imd;
2445  instr_it->getAllMetadata(imd);
2446  for (auto [kind, node] : imd) {
2447  md.insert(node);
2448  }
2449  }
2450  }
2451  }
2452 
2453  // Sort the metadata by canonical number and convert to text.
2454  if (!md.empty()) {
2455  std::map<size_t, std::string> sorted_strings;
2456  for (auto p : md) {
2457  std::string str;
2458  llvm::raw_string_ostream os(str);
2459  p->print(os, cgen_state->module_, true);
2460  os.flush();
2461  auto fields = split(str, {}, 1);
2462  if (fields.empty() || fields[0].empty()) {
2463  continue;
2464  }
2465  sorted_strings.emplace(std::stoul(fields[0].substr(1)), str);
2466  }
2467  llvm_ir += "\n";
2468  for (auto [id, text] : sorted_strings) {
2469  llvm_ir += text;
2470  llvm_ir += "\n";
2471  }
2472  }
2473 
2474  return llvm_ir;
2475 }
std::vector< std::string > split(std::string_view str, std::string_view delim, std::optional< size_t > maxsplit)
split apart a string into a vector of substrings
llvm::Function * row_func_
Definition: CgenState.h:325
llvm::Module * module_
Definition: CgenState.h:324
llvm::Function * filter_func_
Definition: CgenState.h:326
void anonymous_namespace{NativeCodegen.cpp}::set_row_func_argnames ( llvm::Function *  row_func,
const size_t  in_col_count,
const size_t  agg_col_count,
const bool  hoist_literals 
)

Definition at line 1444 of file NativeCodegen.cpp.

1447  {
1448  auto arg_it = row_func->arg_begin();
1449 
1450  if (agg_col_count) {
1451  for (size_t i = 0; i < agg_col_count; ++i) {
1452  arg_it->setName("out");
1453  ++arg_it;
1454  }
1455  } else {
1456  arg_it->setName("group_by_buff");
1457  ++arg_it;
1458  arg_it->setName("crt_matched");
1459  ++arg_it;
1460  arg_it->setName("total_matched");
1461  ++arg_it;
1462  arg_it->setName("old_total_matched");
1463  ++arg_it;
1464  arg_it->setName("max_matched");
1465  ++arg_it;
1466  }
1467 
1468  arg_it->setName("agg_init_val");
1469  ++arg_it;
1470 
1471  arg_it->setName("pos");
1472  ++arg_it;
1473 
1474  arg_it->setName("frag_row_off");
1475  ++arg_it;
1476 
1477  arg_it->setName("num_rows_per_scan");
1478  ++arg_it;
1479 
1480  if (hoist_literals) {
1481  arg_it->setName("literals");
1482  ++arg_it;
1483  }
1484 
1485  for (size_t i = 0; i < in_col_count; ++i) {
1486  arg_it->setName("col_buf" + std::to_string(i));
1487  ++arg_it;
1488  }
1489 
1490  arg_it->setName("join_hash_tables");
1491 }
std::string to_string(char const *&&v)
template<typename T = void>
void anonymous_namespace{NativeCodegen.cpp}::show_defined ( llvm::Module &  module)

Definition at line 158 of file NativeCodegen.cpp.

References f.

Referenced by show_defined().

158  {
159  std::cout << "defines: ";
160  for (auto& f : module.getFunctionList()) {
161  if (!f.isDeclaration()) {
162  std::cout << f.getName().str() << ", ";
163  }
164  }
165  std::cout << std::endl;
166 }
char * f

+ Here is the caller graph for this function:

template<typename T = void>
void anonymous_namespace{NativeCodegen.cpp}::show_defined ( llvm::Module *  module)

Definition at line 169 of file NativeCodegen.cpp.

References show_defined().

169  {
170  if (module == nullptr) {
171  std::cout << "is null" << std::endl;
172  } else {
173  show_defined(*module);
174  }
175 }
void show_defined(llvm::Module &module)

+ Here is the call graph for this function:

template<typename T = void>
void anonymous_namespace{NativeCodegen.cpp}::show_defined ( std::unique_ptr< llvm::Module > &  module)

Definition at line 178 of file NativeCodegen.cpp.

References show_defined().

178  {
179  show_defined(module.get());
180 }
void show_defined(llvm::Module &module)

+ Here is the call graph for this function:

void anonymous_namespace{NativeCodegen.cpp}::throw_parseIR_error ( const llvm::SMDiagnostic &  parse_error,
std::string  src = "",
const bool  is_gpu = false 
)

Definition at line 126 of file NativeCodegen.cpp.

128  {
129  std::string excname = (is_gpu ? "NVVM IR ParseError: " : "LLVM IR ParseError: ");
130  llvm::raw_string_ostream ss(excname);
131  parse_error.print(src.c_str(), ss, false, false);
132  throw ParseIRError(ss.str());
133 }

Variable Documentation

const std::string anonymous_namespace{NativeCodegen.cpp}::cuda_rt_decls

Definition at line 601 of file NativeCodegen.cpp.