Classes
class	CaseExprDetector

Functions
void	throw_parseIR_error (const llvm::SMDiagnostic &parse_error, std::string src="", const bool is_gpu=false)

template<typename T = void>
void	show_defined (llvm::Module &llvm_module)

template<typename T = void>
void	show_defined (llvm::Module *llvm_module)

template<typename T = void>
void	show_defined (std::unique_ptr< llvm::Module > &llvm_module)

template<typename T = void>
void	scan_function_calls (llvm::Function &F, std::unordered_set< std::string > &defined, std::unordered_set< std::string > &undefined, const std::unordered_set< std::string > &ignored)

template<typename T = void>
void	scan_function_calls (llvm::Module &llvm_module, std::unordered_set< std::string > &defined, std::unordered_set< std::string > &undefined, const std::unordered_set< std::string > &ignored)

template<typename T = void>
std::tuple< std::unordered_set < std::string > , std::unordered_set < std::string > >	scan_function_calls (llvm::Module &llvm_module, const std::unordered_set< std::string > &ignored={})

void	eliminate_dead_self_recursive_funcs (llvm::Module &M, const std::unordered_set< llvm::Function * > &live_funcs)

void	optimize_ir (llvm::Function query_func, llvm::Module llvm_module, llvm::legacy::PassManager &pass_manager, const std::unordered_set< llvm::Function * > &live_funcs, const bool is_gpu_smem_used, const CompilationOptions &co)

std::string	assemblyForCPU (ExecutionEngineWrapper &execution_engine, llvm::Module *llvm_module)

ExecutionEngineWrapper	create_execution_engine (llvm::Module *llvm_module, llvm::EngineBuilder &eb, const CompilationOptions &co)

std::string	cpp_to_llvm_name (const std::string &s)

std::string	gen_array_any_all_sigs ()

std::string	gen_translate_null_key_sigs ()

void	bind_pos_placeholders (const std::string &pos_fn_name, const bool use_resume_param, llvm::Function query_func, llvm::Module llvm_module)

void	set_row_func_argnames (llvm::Function *row_func, const size_t in_col_count, const size_t agg_col_count, const bool hoist_literals)

llvm::Function *	create_row_function (const size_t in_col_count, const size_t agg_col_count, const bool hoist_literals, llvm::Module *llvm_module, llvm::LLVMContext &context)

void	bind_query (llvm::Function query_func, const std::string &query_fname, llvm::Function multifrag_query_func, llvm::Module *llvm_module)

std::vector< std::string >	get_agg_fnames (const std::vector< Analyzer::Expr * > &target_exprs, const bool is_group_by)

template<typename InstType >
llvm::Value *	find_variable_in_basic_block (llvm::Function *func, std::string bb_name, std::string variable_name)

size_t	get_shared_memory_size (const bool shared_mem_used, const QueryMemoryDescriptor *query_mem_desc_ptr)

bool	has_count_expr (RelAlgExecutionUnit const &ra_exe_unit)

bool	has_case_expr_within_groupby_expr (RelAlgExecutionUnit const &ra_exe_unit)

bool	is_gpu_shared_mem_supported (const QueryMemoryDescriptor query_mem_desc_ptr, const RelAlgExecutionUnit &ra_exe_unit, const CudaMgr_Namespace::CudaMgr cuda_mgr, const ExecutorDeviceType device_type, const unsigned cuda_blocksize, const unsigned num_blocks_per_mp)

std::string	serialize_llvm_metadata_footnotes (llvm::Function query_func, CgenState cgen_state)

Variables
const std::string	cuda_rt_decls

Function Documentation

std::string anonymous_namespace{NativeCodegen.cpp}::assemblyForCPU	(	ExecutionEngineWrapper &	execution_engine,
		llvm::Module *	llvm_module
	)

Definition at line 400 of file NativeCodegen.cpp.

References CHECK.

Referenced by create_execution_engine().

                                                     {
   llvm::legacy::PassManager pass_manager;
   auto cpu_target_machine = execution_engine->getTargetMachine();
   CHECK(cpu_target_machine);
   llvm::SmallString<256> code_str;
   llvm::raw_svector_ostream os(code_str);
 #if LLVM_VERSION_MAJOR >= 10
   cpu_target_machine->addPassesToEmitFile(
       pass_manager, os, nullptr, llvm::CGFT_AssemblyFile);
 #else
   cpu_target_machine->addPassesToEmitFile(
       pass_manager, os, nullptr, llvm::TargetMachine::CGFT_AssemblyFile);
 #endif
   pass_manager.run(*llvm_module);
   return "Assembly for the CPU:\n" + std::string(code_str.str()) + "\nEnd of assembly";
 }

Here is the caller graph for this function:

void anonymous_namespace{NativeCodegen.cpp}::bind_pos_placeholders	(	const std::string &	pos_fn_name,
		const bool	use_resume_param,
		llvm::Function *	query_func,
		llvm::Module *	llvm_module
	)

Definition at line 1660 of file NativeCodegen.cpp.

                                                     {
   for (auto it = llvm::inst_begin(query_func), e = llvm::inst_end(query_func); it != e;
        ++it) {
     if (!llvm::isa<llvm::CallInst>(*it)) {
       continue;
     }
     auto& pos_call = llvm::cast<llvm::CallInst>(*it);
     auto const func_name = CodegenUtil::getCalledFunctionName(pos_call);
     if (func_name && *func_name == pos_fn_name) {
       if (use_resume_param) {
         auto* const row_index_resume = get_arg_by_name(query_func, "row_index_resume");
         llvm::ReplaceInstWithInst(
             &pos_call,
             llvm::CallInst::Create(llvm_module->getFunction(pos_fn_name + "_impl"),
                                    row_index_resume));
       } else {
         llvm::ReplaceInstWithInst(
             &pos_call,
             llvm::CallInst::Create(llvm_module->getFunction(pos_fn_name + "_impl")));
       }
       break;
     }
   }
 }

void anonymous_namespace{NativeCodegen.cpp}::bind_query	(	llvm::Function *	query_func,
		const std::string &	query_fname,
		llvm::Function *	multifrag_query_func,
		llvm::Module *	llvm_module
	)

Definition at line 1810 of file NativeCodegen.cpp.

                                          {
   std::vector<llvm::CallInst*> query_stubs;
   for (auto it = llvm::inst_begin(multifrag_query_func),
             e = llvm::inst_end(multifrag_query_func);
        it != e;
        ++it) {
     if (!llvm::isa<llvm::CallInst>(*it)) {
       continue;
     }
     auto& query_call = llvm::cast<llvm::CallInst>(*it);
     auto const call_func_name = CodegenUtil::getCalledFunctionName(query_call);
     if (call_func_name && *call_func_name == query_fname) {
       query_stubs.push_back(&query_call);
     }
   }
   for (auto& S : query_stubs) {
     std::vector<llvm::Value*> args;
     for (size_t i = 0; i < S->getNumOperands() - 1; ++i) {
       args.push_back(S->getArgOperand(i));
     }
     llvm::ReplaceInstWithInst(S, llvm::CallInst::Create(query_func, args, ""));
   }
 }

std::string anonymous_namespace{NativeCodegen.cpp}::cpp_to_llvm_name ( const std::string & s )

Definition at line 593 of file NativeCodegen.cpp.

References CHECK.

Referenced by gen_array_any_all_sigs(), and gen_translate_null_key_sigs().

                                                {
   if (s == "int8_t") {
     return "i8";
   }
   if (s == "int16_t") {
     return "i16";
   }
   if (s == "int32_t") {
     return "i32";
   }
   if (s == "int64_t") {
     return "i64";
   }
   CHECK(s == "float" || s == "double");
   return s;
 }

Here is the caller graph for this function:

ExecutionEngineWrapper anonymous_namespace{NativeCodegen.cpp}::create_execution_engine	(	llvm::Module *	llvm_module,
		llvm::EngineBuilder &	eb,
		const CompilationOptions &	co
	)

Definition at line 418 of file NativeCodegen.cpp.

References logger::ASM, assemblyForCPU(), CHECK, DEBUG_TIMER, and LOG.

Referenced by CodeGenerator::generateNativeCPUCode().

                                                                              {
   auto timer = DEBUG_TIMER(__func__);
   ExecutionEngineWrapper execution_engine(eb.create(), co);
   CHECK(execution_engine.get());
   // Force the module data layout to match the layout for the selected target
   llvm_module->setDataLayout(execution_engine->getDataLayout());
 
   LOG(ASM) << assemblyForCPU(execution_engine, llvm_module);
 
   execution_engine->finalizeObject();
   return execution_engine;
 }

Here is the call graph for this function:

Here is the caller graph for this function:

llvm::Function* anonymous_namespace{NativeCodegen.cpp}::create_row_function	(	const size_t	in_col_count,
		const size_t	agg_col_count,
		const bool	hoist_literals,
		llvm::Module *	llvm_module,
		llvm::LLVMContext &	context
	)

Definition at line 1741 of file NativeCodegen.cpp.

                                                               {
   std::vector<llvm::Type*> row_process_arg_types;
 
   if (agg_col_count) {
     // output (aggregate) arguments
     for (size_t i = 0; i < agg_col_count; ++i) {
       row_process_arg_types.push_back(llvm::Type::getInt64PtrTy(context));
     }
   } else {
     // group by buffer
     row_process_arg_types.push_back(llvm::Type::getInt64PtrTy(context));
     // varlen output buffer
     row_process_arg_types.push_back(llvm::Type::getInt64PtrTy(context));
     // current match count
     row_process_arg_types.push_back(llvm::Type::getInt32PtrTy(context));
     // total match count passed from the caller
     row_process_arg_types.push_back(llvm::Type::getInt32PtrTy(context));
     // old total match count returned to the caller
     row_process_arg_types.push_back(llvm::Type::getInt32PtrTy(context));
     // max matched (total number of slots in the output buffer)
     row_process_arg_types.push_back(llvm::Type::getInt32PtrTy(context));
   }
 
   // aggregate init values
   row_process_arg_types.push_back(llvm::Type::getInt64PtrTy(context));
 
   // position argument
   row_process_arg_types.push_back(llvm::Type::getInt64Ty(context));
 
   // fragment row offset argument
   row_process_arg_types.push_back(llvm::Type::getInt64PtrTy(context));
 
   // number of rows for each scan
   row_process_arg_types.push_back(llvm::Type::getInt64PtrTy(context));
 
   // literals buffer argument
   if (hoist_literals) {
     row_process_arg_types.push_back(llvm::Type::getInt8PtrTy(context));
   }
 
   // column buffer arguments
   for (size_t i = 0; i < in_col_count; ++i) {
     row_process_arg_types.emplace_back(llvm::Type::getInt8PtrTy(context));
   }
 
   // join hash table argument
   row_process_arg_types.push_back(llvm::Type::getInt64PtrTy(context));
 
   // row function manager
   row_process_arg_types.push_back(llvm::Type::getInt8PtrTy(context));
 
   // generate the function
   auto ft =
       llvm::FunctionType::get(get_int_type(32, context), row_process_arg_types, false);
 
   auto row_func = llvm::Function::Create(
       ft, llvm::Function::ExternalLinkage, "row_func", llvm_module);
 
   // set the row function argument names; for debugging purposes only
   set_row_func_argnames(row_func, in_col_count, agg_col_count, hoist_literals);
 
   return row_func;
 }

void anonymous_namespace{NativeCodegen.cpp}::eliminate_dead_self_recursive_funcs	(	llvm::Module &	M,
		const std::unordered_set< llvm::Function * > &	live_funcs
	)

Definition at line 244 of file NativeCodegen.cpp.

Referenced by optimize_ir().

                                                        {
   std::vector<llvm::Function*> dead_funcs;
   for (auto& F : M) {
     bool bAlive = false;
     if (live_funcs.count(&F)) {
       continue;
     }
     for (auto U : F.users()) {
       auto* C = llvm::dyn_cast<const llvm::CallInst>(U);
       if (!C || C->getParent()->getParent() != &F) {
         bAlive = true;
         break;
       }
     }
     if (!bAlive) {
       dead_funcs.push_back(&F);
     }
   }
   for (auto pFn : dead_funcs) {
     pFn->eraseFromParent();
   }
 }

Here is the caller graph for this function:

template<typename InstType >

llvm::Value* anonymous_namespace{NativeCodegen.cpp}::find_variable_in_basic_block	(	llvm::Function *	func,
		std::string	bb_name,
		std::string	variable_name
	)

Definition at line 2007 of file NativeCodegen.cpp.

                                                                    {
   llvm::Value* result = nullptr;
   if (func == nullptr || variable_name.empty()) {
     return result;
   }
   bool is_found = false;
   for (auto bb_it = func->begin(); bb_it != func->end() && !is_found; ++bb_it) {
     if (!bb_name.empty() && bb_it->getName() != bb_name) {
       continue;
     }
     for (auto inst_it = bb_it->begin(); inst_it != bb_it->end(); inst_it++) {
       if (llvm::isa<InstType>(*inst_it)) {
         if (inst_it->getName() == variable_name) {
           result = &*inst_it;
           is_found = true;
           break;
         }
       }
     }
   }
   return result;
 }

std::string anonymous_namespace{NativeCodegen.cpp}::gen_array_any_all_sigs ( )

Definition at line 610 of file NativeCodegen.cpp.

References cpp_to_llvm_name(), and run_benchmark_import::result.

                                    {
   std::string result;
   for (const std::string any_or_all : {"any", "all"}) {
     for (const std::string elem_type :
          {"int8_t", "int16_t", "int32_t", "int64_t", "float", "double"}) {
       for (const std::string needle_type :
            {"int8_t", "int16_t", "int32_t", "int64_t", "float", "double"}) {
         for (const std::string op_name : {"eq", "ne", "lt", "le", "gt", "ge"}) {
           result += ("declare i1 @array_" + any_or_all + "_" + op_name + "_" + elem_type +
                      "_" + needle_type + "(i8*, i64, " + cpp_to_llvm_name(needle_type) +
                      ", " + cpp_to_llvm_name(elem_type) + ");\n");
         }
       }
     }
   }
   return result;
 }

Here is the call graph for this function:

std::string anonymous_namespace{NativeCodegen.cpp}::gen_translate_null_key_sigs ( )

Definition at line 628 of file NativeCodegen.cpp.

References cpp_to_llvm_name(), and run_benchmark_import::result.

                                         {
   std::string result;
   for (const std::string key_type : {"int8_t", "int16_t", "int32_t", "int64_t"}) {
     const auto key_llvm_type = cpp_to_llvm_name(key_type);
     result += "declare i64 @translate_null_key_" + key_type + "(" + key_llvm_type + ", " +
               key_llvm_type + ", i64);\n";
   }
   return result;
 }

Here is the call graph for this function:

std::vector<std::string> anonymous_namespace{NativeCodegen.cpp}::get_agg_fnames	(	const std::vector< Analyzer::Expr * > &	target_exprs,
		const bool	is_group_by
	)

Definition at line 1837 of file NativeCodegen.cpp.

                                                                 {
   std::vector<std::string> result;
   for (size_t target_idx = 0, agg_col_idx = 0; target_idx < target_exprs.size();
        ++target_idx, ++agg_col_idx) {
     const auto target_expr = target_exprs[target_idx];
     CHECK(target_expr);
     const auto target_type_info = target_expr->get_type_info();
     const auto agg_expr = dynamic_cast<Analyzer::AggExpr*>(target_expr);
     const bool is_varlen =
         (target_type_info.is_string() &&
          target_type_info.get_compression() == kENCODING_NONE) ||
         target_type_info.is_array();  // TODO: should it use is_varlen_array() ?
     if (!agg_expr || agg_expr->get_aggtype() == kSAMPLE) {
       result.emplace_back(target_type_info.is_fp() ? "agg_id_double" : "agg_id");
       if (is_varlen) {
         result.emplace_back("agg_id");
       }
       if (target_type_info.is_geometry()) {
         result.emplace_back("agg_id");
         for (auto i = 2; i < 2 * target_type_info.get_physical_coord_cols(); ++i) {
           result.emplace_back("agg_id");
         }
       }
       continue;
     }
     const auto agg_type = agg_expr->get_aggtype();
     SQLTypeInfo agg_type_info;
     switch (agg_type) {
       case kCOUNT:
       case kCOUNT_IF:
         agg_type_info = target_type_info;
         break;
       default:
         agg_type_info = agg_expr->get_arg()->get_type_info();
         break;
     }
     switch (agg_type) {
       case kAVG: {
         if (!agg_type_info.is_integer() && !agg_type_info.is_decimal() &&
             !agg_type_info.is_fp()) {
           throw std::runtime_error("AVG is only valid on integer and floating point");
         }
         result.emplace_back((agg_type_info.is_integer() || agg_type_info.is_time())
                                 ? "agg_sum"
                                 : "agg_sum_double");
         result.emplace_back((agg_type_info.is_integer() || agg_type_info.is_time())
                                 ? "agg_count"
                                 : "agg_count_double");
         break;
       }
       case kMIN: {
         if (agg_type_info.is_string() || agg_type_info.is_array() ||
             agg_type_info.is_geometry()) {
           throw std::runtime_error(
               "MIN on strings, arrays or geospatial types not supported yet");
         }
         result.emplace_back((agg_type_info.is_integer() || agg_type_info.is_time())
                                 ? "agg_min"
                                 : "agg_min_double");
         break;
       }
       case kMAX: {
         if (agg_type_info.is_string() || agg_type_info.is_array() ||
             agg_type_info.is_geometry()) {
           throw std::runtime_error(
               "MAX on strings, arrays or geospatial types not supported yet");
         }
         result.emplace_back((agg_type_info.is_integer() || agg_type_info.is_time())
                                 ? "agg_max"
                                 : "agg_max_double");
         break;
       }
       case kSUM:
       case kSUM_IF: {
         if (!agg_type_info.is_integer() && !agg_type_info.is_decimal() &&
             !agg_type_info.is_fp()) {
           throw std::runtime_error(
               "SUM and SUM_IF is only valid on integer and floating point");
         }
         std::string func_name = (agg_type_info.is_integer() || agg_type_info.is_time())
                                     ? "agg_sum"
                                     : "agg_sum_double";
         if (agg_type == kSUM_IF) {
           func_name += "_if";
         }
         result.emplace_back(func_name);
         break;
       }
       case kCOUNT:
         result.emplace_back(agg_expr->get_is_distinct() ? "agg_count_distinct"
                                                         : "agg_count");
         break;
       case kCOUNT_IF:
         result.emplace_back("agg_count_if");
         break;
       case kSINGLE_VALUE: {
         result.emplace_back(agg_type_info.is_fp() ? "agg_id_double" : "agg_id");
         break;
       }
       case kSAMPLE: {
         // Note that varlen SAMPLE arguments are handled separately above
         result.emplace_back(agg_type_info.is_fp() ? "agg_id_double" : "agg_id");
         break;
       }
       case kAPPROX_COUNT_DISTINCT:
         result.emplace_back("agg_approximate_count_distinct");
         break;
       case kAPPROX_QUANTILE:
         result.emplace_back("agg_approx_quantile");
         break;
       case kMODE:
         result.emplace_back("agg_mode_func");
         break;
       default:
         UNREACHABLE() << "Usupported agg_type: " << agg_type;
     }
   }
   return result;
 }

size_t anonymous_namespace{NativeCodegen.cpp}::get_shared_memory_size	(	const bool	shared_mem_used,
		const QueryMemoryDescriptor *	query_mem_desc_ptr
	)

Definition at line 2581 of file NativeCodegen.cpp.

                                                                                {
   return shared_mem_used
              ? (query_mem_desc_ptr->getRowSize() * query_mem_desc_ptr->getEntryCount())
              : 0;
 }

bool anonymous_namespace{NativeCodegen.cpp}::has_case_expr_within_groupby_expr ( RelAlgExecutionUnit const & ra_exe_unit )

Definition at line 2618 of file NativeCodegen.cpp.

                                                                                {
   if (ra_exe_unit.groupby_exprs.empty() || !ra_exe_unit.groupby_exprs.front()) {
     return false;
   }
   CaseExprDetector detector;
   for (auto expr : ra_exe_unit.groupby_exprs) {
     if (detector.detectCaseExpr(expr.get())) {
       return true;
     }
   }
   return false;
 }

bool anonymous_namespace{NativeCodegen.cpp}::has_count_expr ( RelAlgExecutionUnit const & ra_exe_unit )

Definition at line 2588 of file NativeCodegen.cpp.

                                                             {
   for (auto const expr : ra_exe_unit.target_exprs) {
     if (auto const agg_expr = dynamic_cast<Analyzer::AggExpr*>(expr)) {
       if (shared::is_any<SQLAgg::kCOUNT, SQLAgg::kCOUNT_IF>(agg_expr->get_aggtype())) {
         return true;
       }
     }
   }
   return false;
 }

bool anonymous_namespace{NativeCodegen.cpp}::is_gpu_shared_mem_supported	(	const QueryMemoryDescriptor *	query_mem_desc_ptr,
		const RelAlgExecutionUnit &	ra_exe_unit,
		const CudaMgr_Namespace::CudaMgr *	cuda_mgr,
		const ExecutorDeviceType	device_type,
		const unsigned	cuda_blocksize,
		const unsigned	num_blocks_per_mp
	)

To simplify the implementation for practical purposes, we initially provide shared memory support for cases where there are at most as many entries in the output buffer as there are threads within each GPU device. In order to relax this assumption later, we need to add a for loop in generated codes such that each thread loops over multiple entries. TODO: relax this if necessary

Definition at line 2631 of file NativeCodegen.cpp.

                                                                    {
   if (device_type == ExecutorDeviceType::CPU) {
     return false;
   }
   if (query_mem_desc_ptr->didOutputColumnar()) {
     return false;
   }
   CHECK(query_mem_desc_ptr);
   CHECK(cuda_mgr);
   /*
    * We only use shared memory strategy if GPU hardware provides native shared
    * memory atomics support. From CUDA Toolkit documentation:
    * https://docs.nvidia.com/cuda/pascal-tuning-guide/index.html#atomic-ops "Like
    * Maxwell, Pascal [and Volta] provides native shared memory atomic operations
    * for 32-bit integer arithmetic, along with native 32 or 64-bit compare-and-swap
    * (CAS)."
    *
    */
   if (!cuda_mgr->isArchMaxwellOrLaterForAll()) {
     return false;
   }
   if (cuda_mgr->isArchPascal() && !ra_exe_unit.join_quals.empty() &&
       has_count_expr(ra_exe_unit) && has_case_expr_within_groupby_expr(ra_exe_unit)) {
     return false;
   }
 
   if (query_mem_desc_ptr->getQueryDescriptionType() ==
           QueryDescriptionType::NonGroupedAggregate &&
       g_enable_smem_non_grouped_agg &&
       query_mem_desc_ptr->countDistinctDescriptorsLogicallyEmpty()) {
     // TODO: relax this, if necessary
     if (cuda_blocksize < query_mem_desc_ptr->getEntryCount()) {
       return false;
     }
     // skip shared memory usage when dealing with 1) variable length targets, 2)
     // not a COUNT aggregate
     const auto target_infos =
         target_exprs_to_infos(ra_exe_unit.target_exprs, *query_mem_desc_ptr);
     std::unordered_set<SQLAgg> supported_aggs{kCOUNT, kCOUNT_IF};
     if (std::find_if(target_infos.begin(),
                      target_infos.end(),
                      [&supported_aggs](const TargetInfo& ti) {
                        if (ti.sql_type.is_varlen() ||
                            !supported_aggs.count(ti.agg_kind)) {
                          return true;
                        } else {
                          return false;
                        }
                      }) == target_infos.end()) {
       return true;
     }
   }
   if (query_mem_desc_ptr->getQueryDescriptionType() ==
           QueryDescriptionType::GroupByPerfectHash &&
       g_enable_smem_group_by) {
     if (cuda_blocksize < query_mem_desc_ptr->getEntryCount()) {
       return false;
     }
 
     // Fundamentally, we should use shared memory whenever the output buffer
     // is small enough so that we can fit it in the shared memory and yet expect
     // good occupancy.
     // For now, we allow keyless, row-wise layout, and only for perfect hash
     // group by operations.
     if (query_mem_desc_ptr->hasKeylessHash() &&
         query_mem_desc_ptr->countDistinctDescriptorsLogicallyEmpty() &&
         !query_mem_desc_ptr->useStreamingTopN()) {
       const size_t shared_memory_threshold_bytes = std::min(
           g_gpu_smem_threshold == 0 ? SIZE_MAX : g_gpu_smem_threshold,
           cuda_mgr->getMinSharedMemoryPerBlockForAllDevices() / num_blocks_per_mp);
       const auto output_buffer_size =
           query_mem_desc_ptr->getRowSize() * query_mem_desc_ptr->getEntryCount();
       if (output_buffer_size > shared_memory_threshold_bytes) {
         return false;
       }
 
       // skip shared memory usage when dealing with 1) variable length targets, 2)
       // non-basic aggregates (COUNT, SUM, MIN, MAX, AVG)
       // TODO: relax this if necessary
       const auto target_infos =
           target_exprs_to_infos(ra_exe_unit.target_exprs, *query_mem_desc_ptr);
       std::unordered_set<SQLAgg> supported_aggs{kCOUNT, kCOUNT_IF};
       if (g_enable_smem_grouped_non_count_agg) {
         supported_aggs = {kCOUNT, kCOUNT_IF, kMIN, kMAX, kSUM, kSUM_IF, kAVG};
       }
       if (std::find_if(target_infos.begin(),
                        target_infos.end(),
                        [&supported_aggs](const TargetInfo& ti) {
                          if (ti.sql_type.is_varlen() ||
                              !supported_aggs.count(ti.agg_kind)) {
                            return true;
                          } else {
                            return false;
                          }
                        }) == target_infos.end()) {
         return true;
       }
     }
   }
   return false;
 }

void anonymous_namespace{NativeCodegen.cpp}::optimize_ir	(	llvm::Function *	query_func,
		llvm::Module *	llvm_module,
		llvm::legacy::PassManager &	pass_manager,
		const std::unordered_set< llvm::Function * > &	live_funcs,
		const bool	is_gpu_smem_used,
		const CompilationOptions &	co
	)

Definition at line 306 of file NativeCodegen.cpp.

References DEBUG_TIMER, and eliminate_dead_self_recursive_funcs().

Referenced by CodeGenerator::generateNativeCPUCode().

                                                {
   auto timer = DEBUG_TIMER(__func__);
   // the always inliner legacy pass must always run first
   pass_manager.add(llvm::createVerifierPass());
   pass_manager.add(llvm::createAlwaysInlinerLegacyPass());
 
   pass_manager.add(new AnnotateInternalFunctionsPass());
 
   pass_manager.add(llvm::createSROAPass());
   // mem ssa drops unused load and store instructions, e.g. passing variables directly
   // where possible
   pass_manager.add(
       llvm::createEarlyCSEPass(/*enable_mem_ssa=*/true));  // Catch trivial redundancies
 
   if (!is_gpu_smem_used) {
     // thread jumps can change the execution order around SMEM sections guarded by
     // `__syncthreads()`, which results in race conditions. For now, disable jump
     // threading for shared memory queries. In the future, consider handling shared
     // memory aggregations with a separate kernel launch
     pass_manager.add(llvm::createJumpThreadingPass());  // Thread jumps.
   }
   pass_manager.add(llvm::createCFGSimplificationPass());
 
   // remove load/stores in PHIs if instructions can be accessed directly post thread jumps
   pass_manager.add(llvm::createNewGVNPass());
 
   pass_manager.add(llvm::createDeadStoreEliminationPass());
   pass_manager.add(llvm::createLICMPass());
 
   pass_manager.add(llvm::createInstructionCombiningPass());
 
   // module passes
   pass_manager.add(llvm::createPromoteMemoryToRegisterPass());
   pass_manager.add(llvm::createGlobalOptimizerPass());
 
   pass_manager.add(llvm::createCFGSimplificationPass());  // cleanup after everything
 
   pass_manager.run(*llvm_module);
 
   eliminate_dead_self_recursive_funcs(*llvm_module, live_funcs);
 }

Here is the call graph for this function:

Here is the caller graph for this function:

template<typename T = void>

void anonymous_namespace{NativeCodegen.cpp}::scan_function_calls	(	llvm::Function &	F,
		std::unordered_set< std::string > &	defined,
		std::unordered_set< std::string > &	undefined,
		const std::unordered_set< std::string > &	ignored
	)

Definition at line 193 of file NativeCodegen.cpp.

Referenced by scan_function_calls().

                                                                      {
   for (llvm::inst_iterator I = llvm::inst_begin(F), E = llvm::inst_end(F); I != E; ++I) {
     if (auto* CI = llvm::dyn_cast<llvm::CallInst>(&*I)) {
       auto* F2 = CI->getCalledFunction();
       if (F2 != nullptr) {
         auto F2name = F2->getName().str();
         if (F2->isDeclaration()) {
           if (F2name.rfind("__", 0) !=
                   0  // assume symbols with double underscore are defined
               && F2name.rfind("llvm.", 0) !=
                      0  // TODO: this may give false positive for NVVM intrinsics
               && ignored.find(F2name) == ignored.end()  // not in ignored list
           ) {
             undefined.emplace(F2name);
           }
         } else {
           if (defined.find(F2name) == defined.end()) {
             defined.emplace(F2name);
             scan_function_calls<T>(*F2, defined, undefined, ignored);
           }
         }
       }
     }
   }
 }

Here is the caller graph for this function:

template<typename T = void>

void anonymous_namespace{NativeCodegen.cpp}::scan_function_calls	(	llvm::Module &	llvm_module,
		std::unordered_set< std::string > &	defined,
		std::unordered_set< std::string > &	undefined,
		const std::unordered_set< std::string > &	ignored
	)

Definition at line 223 of file NativeCodegen.cpp.

References scan_function_calls().

                                                                      {
   for (auto& F : llvm_module) {
     if (!F.isDeclaration()) {
       scan_function_calls(F, defined, undefined, ignored);
     }
   }
 }

Here is the call graph for this function:

template<typename T = void>

std::tuple<std::unordered_set<std::string>, std::unordered_set<std::string> > anonymous_namespace{NativeCodegen.cpp}::scan_function_calls	(	llvm::Module &	llvm_module,
		const std::unordered_set< std::string > &	ignored = `{}`
	)

Definition at line 236 of file NativeCodegen.cpp.

                                                                  {}) {
   std::unordered_set<std::string> defined, undefined;
   scan_function_calls(llvm_module, defined, undefined, ignored);
   return std::make_tuple(defined, undefined);
 }

std::string anonymous_namespace{NativeCodegen.cpp}::serialize_llvm_metadata_footnotes	(	llvm::Function *	query_func,
		CgenState *	cgen_state
	)

Definition at line 2747 of file NativeCodegen.cpp.

                                                                      {
   std::string llvm_ir;
   std::unordered_set<llvm::MDNode*> md;
 
   // Loop over all instructions in the query function.
   for (auto bb_it = query_func->begin(); bb_it != query_func->end(); ++bb_it) {
     for (auto instr_it = bb_it->begin(); instr_it != bb_it->end(); ++instr_it) {
       llvm::SmallVector<std::pair<unsigned, llvm::MDNode*>, 100> imd;
       instr_it->getAllMetadata(imd);
       for (auto [kind, node] : imd) {
         md.insert(node);
       }
     }
   }
 
   // Loop over all instructions in the row function.
   for (auto bb_it = cgen_state->row_func_->begin(); bb_it != cgen_state->row_func_->end();
        ++bb_it) {
     for (auto instr_it = bb_it->begin(); instr_it != bb_it->end(); ++instr_it) {
       llvm::SmallVector<std::pair<unsigned, llvm::MDNode*>, 100> imd;
       instr_it->getAllMetadata(imd);
       for (auto [kind, node] : imd) {
         md.insert(node);
       }
     }
   }
 
   // Loop over all instructions in the filter function.
   if (cgen_state->filter_func_) {
     for (auto bb_it = cgen_state->filter_func_->begin();
          bb_it != cgen_state->filter_func_->end();
          ++bb_it) {
       for (auto instr_it = bb_it->begin(); instr_it != bb_it->end(); ++instr_it) {
         llvm::SmallVector<std::pair<unsigned, llvm::MDNode*>, 100> imd;
         instr_it->getAllMetadata(imd);
         for (auto [kind, node] : imd) {
           md.insert(node);
         }
       }
     }
   }
 
   // Sort the metadata by canonical number and convert to text.
   if (!md.empty()) {
     std::map<size_t, std::string> sorted_strings;
     for (auto p : md) {
       std::string str;
       llvm::raw_string_ostream os(str);
       p->print(os, cgen_state->module_, true);
       os.flush();
       auto fields = split(str, {}, 1);
       if (fields.empty() || fields[0].empty()) {
         continue;
       }
       sorted_strings.emplace(std::stoul(fields[0].substr(1)), str);
     }
     llvm_ir += "\n";
     for (auto [id, text] : sorted_strings) {
       llvm_ir += text;
       llvm_ir += "\n";
     }
   }
 
   return llvm_ir;
 }

void anonymous_namespace{NativeCodegen.cpp}::set_row_func_argnames	(	llvm::Function *	row_func,
		const size_t	in_col_count,
		const size_t	agg_col_count,
		const bool	hoist_literals
	)

Definition at line 1688 of file NativeCodegen.cpp.

References SQLTypeInfo::is_fp().

                                                       {
   auto arg_it = row_func->arg_begin();
 
   if (agg_col_count) {
     for (size_t i = 0; i < agg_col_count; ++i) {
       arg_it->setName("out");
       ++arg_it;
     }
   } else {
     arg_it->setName("group_by_buff");
     ++arg_it;
     arg_it->setName("varlen_output_buff");
     ++arg_it;
     arg_it->setName("crt_matched");
     ++arg_it;
     arg_it->setName("total_matched");
     ++arg_it;
     arg_it->setName("old_total_matched");
     ++arg_it;
     arg_it->setName("max_matched");
     ++arg_it;
   }
 
   arg_it->setName("agg_init_val");
   ++arg_it;
 
   arg_it->setName("pos");
   ++arg_it;
 
   arg_it->setName("frag_row_off");
   ++arg_it;
 
   arg_it->setName("num_rows_per_scan");
   ++arg_it;
 
   if (hoist_literals) {
     arg_it->setName("literals");
     ++arg_it;
   }
 
   for (size_t i = 0; i < in_col_count; ++i) {
     arg_it->setName("col_buf" + std::to_string(i));
     ++arg_it;
   }
 
   arg_it->setName("join_hash_tables");
   ++arg_it;
   arg_it->setName("row_func_mgr");
 }

Here is the call graph for this function:

template<typename T = void>

void anonymous_namespace{NativeCodegen.cpp}::show_defined ( llvm::Module & llvm_module )

Definition at line 154 of file NativeCodegen.cpp.

References f().

Referenced by show_defined().

                                            {
   std::cout << "defines: ";
   for (auto& f : llvm_module.getFunctionList()) {
     if (!f.isDeclaration()) {
       std::cout << f.getName().str() << ", ";
     }
   }
   std::cout << std::endl;
 }

Here is the call graph for this function:

Here is the caller graph for this function:

template<typename T = void>

void anonymous_namespace{NativeCodegen.cpp}::show_defined ( llvm::Module * llvm_module )

Definition at line 165 of file NativeCodegen.cpp.

References show_defined().

                                            {
   if (llvm_module == nullptr) {
     std::cout << "is null" << std::endl;
   } else {
     show_defined(*llvm_module);
   }
 }

Here is the call graph for this function:

template<typename T = void>

void anonymous_namespace{NativeCodegen.cpp}::show_defined ( std::unique_ptr< llvm::Module > & llvm_module )

Definition at line 174 of file NativeCodegen.cpp.

References show_defined().

                                                           {
   show_defined(llvm_module.get());
 }

Here is the call graph for this function:

void anonymous_namespace{NativeCodegen.cpp}::throw_parseIR_error	(	const llvm::SMDiagnostic &	parse_error,
		std::string	src = `""`,
		const bool	is_gpu = `false`
	)

Definition at line 122 of file NativeCodegen.cpp.

                                                     {
   std::string excname = (is_gpu ? "NVVM IR ParseError: " : "LLVM IR ParseError: ");
   llvm::raw_string_ostream ss(excname);
   parse_error.print(src.c_str(), ss, false, false);
   throw ParseIRError(ss.str());
 }

Variable Documentation

const std::string anonymous_namespace{NativeCodegen.cpp}::cuda_rt_decls

Definition at line 638 of file NativeCodegen.cpp.

Classes

Functions

Variables

Function Documentation

Variable Documentation