#include "QueryEngine/Execute.h"
#include <llvm/Analysis/ScopedNoAliasAA.h>
#include <llvm/Analysis/TypeBasedAliasAnalysis.h>
#include <llvm/Bitcode/BitcodeReader.h>
#include <llvm/Bitcode/BitcodeWriter.h>
#include <llvm/ExecutionEngine/MCJIT.h>
#include <llvm/IR/Attributes.h>
#include <llvm/IR/GlobalValue.h>
#include <llvm/IR/InstIterator.h>
#include <llvm/IR/IntrinsicInst.h>
#include <llvm/IR/Intrinsics.h>
#include <llvm/IR/LegacyPassManager.h>
#include <llvm/IR/Verifier.h>
#include <llvm/IRReader/IRReader.h>
#include <llvm/Support/TargetRegistry.h>
#include <llvm/Support/Casting.h>
#include <llvm/Support/FileSystem.h>
#include <llvm/Support/FormattedStream.h>
#include <llvm/Support/MemoryBuffer.h>
#include <llvm/Support/SourceMgr.h>
#include <llvm/Support/TargetSelect.h>
#include <llvm/Support/raw_os_ostream.h>
#include <llvm/Support/raw_ostream.h>
#include <llvm/Transforms/IPO.h>
#include <llvm/Transforms/IPO/AlwaysInliner.h>
#include <llvm/Transforms/IPO/InferFunctionAttrs.h>
#include <llvm/Transforms/IPO/PassManagerBuilder.h>
#include <llvm/Transforms/InstCombine/InstCombine.h>
#include <llvm/Transforms/Instrumentation.h>
#include <llvm/Transforms/Scalar.h>
#include <llvm/Transforms/Scalar/GVN.h>
#include <llvm/Transforms/Scalar/InstSimplifyPass.h>
#include <llvm/Transforms/Utils.h>
#include <llvm/Transforms/Utils/BasicBlockUtils.h>
#include <llvm/Transforms/Utils/Cloning.h>
#include "CudaMgr/CudaMgr.h"
#include "QueryEngine/CodeGenerator.h"
#include "QueryEngine/CodegenHelper.h"
#include "QueryEngine/ExtensionFunctionsWhitelist.h"
#include "QueryEngine/GpuSharedMemoryUtils.h"
#include "QueryEngine/LLVMFunctionAttributesUtil.h"
#include "QueryEngine/Optimization/AnnotateInternalFunctionsPass.h"
#include "QueryEngine/OutputBufferInitialization.h"
#include "QueryEngine/QueryEngine.h"
#include "QueryEngine/QueryTemplateGenerator.h"
#include "QueryEngine/UsedColumnsVisitor.h"
#include "Shared/InlineNullValues.h"
#include "Shared/MathUtils.h"
#include "StreamingTopN.h"

Include dependency graph for NativeCodegen.cpp:

Classes
class	anonymous_namespace{NativeCodegen.cpp}::CaseExprDetector

Namespaces
	anonymous_namespace{NativeCodegen.cpp}

Macros
#define	SHOW_DEFINED(MODULE)

#define	SHOW_FUNCTIONS(MODULE)

Functions
void	anonymous_namespace{NativeCodegen.cpp}::throw_parseIR_error (const llvm::SMDiagnostic &parse_error, std::string src="", const bool is_gpu=false)

template<typename T = void>
void	anonymous_namespace{NativeCodegen.cpp}::show_defined (llvm::Module &llvm_module)

template<typename T = void>
void	anonymous_namespace{NativeCodegen.cpp}::show_defined (llvm::Module *llvm_module)

template<typename T = void>
void	anonymous_namespace{NativeCodegen.cpp}::show_defined (std::unique_ptr< llvm::Module > &llvm_module)

template<typename T = void>
void	anonymous_namespace{NativeCodegen.cpp}::scan_function_calls (llvm::Function &F, std::unordered_set< std::string > &defined, std::unordered_set< std::string > &undefined, const std::unordered_set< std::string > &ignored)

template<typename T = void>
void	anonymous_namespace{NativeCodegen.cpp}::scan_function_calls (llvm::Module &llvm_module, std::unordered_set< std::string > &defined, std::unordered_set< std::string > &undefined, const std::unordered_set< std::string > &ignored)

template<typename T = void>
std::tuple< std::unordered_set < std::string > , std::unordered_set < std::string > >	anonymous_namespace{NativeCodegen.cpp}::scan_function_calls (llvm::Module &llvm_module, const std::unordered_set< std::string > &ignored={})

void	anonymous_namespace{NativeCodegen.cpp}::eliminate_dead_self_recursive_funcs (llvm::Module &M, const std::unordered_set< llvm::Function * > &live_funcs)

void	anonymous_namespace{NativeCodegen.cpp}::optimize_ir (llvm::Function query_func, llvm::Module llvm_module, llvm::legacy::PassManager &pass_manager, const std::unordered_set< llvm::Function * > &live_funcs, const bool is_gpu_smem_used, const CompilationOptions &co)

void	verify_function_ir (const llvm::Function *func)

std::string	anonymous_namespace{NativeCodegen.cpp}::assemblyForCPU (ExecutionEngineWrapper &execution_engine, llvm::Module *llvm_module)

ExecutionEngineWrapper	anonymous_namespace{NativeCodegen.cpp}::create_execution_engine (llvm::Module *llvm_module, llvm::EngineBuilder &eb, const CompilationOptions &co)

std::string	anonymous_namespace{NativeCodegen.cpp}::cpp_to_llvm_name (const std::string &s)

std::string	anonymous_namespace{NativeCodegen.cpp}::gen_array_any_all_sigs ()

std::string	anonymous_namespace{NativeCodegen.cpp}::gen_translate_null_key_sigs ()

llvm::StringRef	get_gpu_target_triple_string ()

llvm::StringRef	get_gpu_data_layout ()

std::map< std::string, std::string >	get_device_parameters (bool cpu_only)

std::unique_ptr< llvm::Module >	read_llvm_module_from_bc_file (const std::string &bc_filename, llvm::LLVMContext &context)

std::unique_ptr< llvm::Module >	read_llvm_module_from_ir_file (const std::string &udf_ir_filename, llvm::LLVMContext &ctx, bool is_gpu=false)

std::unique_ptr< llvm::Module >	read_llvm_module_from_ir_string (const std::string &udf_ir_string, llvm::LLVMContext &ctx, bool is_gpu=false)

void	anonymous_namespace{NativeCodegen.cpp}::bind_pos_placeholders (const std::string &pos_fn_name, const bool use_resume_param, llvm::Function query_func, llvm::Module llvm_module)

void	anonymous_namespace{NativeCodegen.cpp}::set_row_func_argnames (llvm::Function *row_func, const size_t in_col_count, const size_t agg_col_count, const bool hoist_literals)

llvm::Function *	anonymous_namespace{NativeCodegen.cpp}::create_row_function (const size_t in_col_count, const size_t agg_col_count, const bool hoist_literals, llvm::Module *llvm_module, llvm::LLVMContext &context)

void	anonymous_namespace{NativeCodegen.cpp}::bind_query (llvm::Function query_func, const std::string &query_fname, llvm::Function multifrag_query_func, llvm::Module *llvm_module)

std::vector< std::string >	anonymous_namespace{NativeCodegen.cpp}::get_agg_fnames (const std::vector< Analyzer::Expr * > &target_exprs, const bool is_group_by)

template<typename InstType >
llvm::Value *	anonymous_namespace{NativeCodegen.cpp}::find_variable_in_basic_block (llvm::Function *func, std::string bb_name, std::string variable_name)

size_t	anonymous_namespace{NativeCodegen.cpp}::get_shared_memory_size (const bool shared_mem_used, const QueryMemoryDescriptor *query_mem_desc_ptr)

bool	anonymous_namespace{NativeCodegen.cpp}::has_count_expr (RelAlgExecutionUnit const &ra_exe_unit)

bool	anonymous_namespace{NativeCodegen.cpp}::has_case_expr_within_groupby_expr (RelAlgExecutionUnit const &ra_exe_unit)

bool	anonymous_namespace{NativeCodegen.cpp}::is_gpu_shared_mem_supported (const QueryMemoryDescriptor query_mem_desc_ptr, const RelAlgExecutionUnit &ra_exe_unit, const CudaMgr_Namespace::CudaMgr cuda_mgr, const ExecutorDeviceType device_type, const unsigned cuda_blocksize, const unsigned num_blocks_per_mp)

std::string	anonymous_namespace{NativeCodegen.cpp}::serialize_llvm_metadata_footnotes (llvm::Function query_func, CgenState cgen_state)

std::vector< llvm::Value * >	generate_column_heads_load (const int num_columns, llvm::Value *byte_stream_arg, llvm::IRBuilder<> &ir_builder, llvm::LLVMContext &ctx)

Variables
float	g_fraction_code_cache_to_evict = 0.2

const std::string	anonymous_namespace{NativeCodegen.cpp}::cuda_rt_decls

constexpr std::array < std::string_view, 18 >	TARGET_RUNTIME_FUNCTIONS_FOR_MODULE_CLONING

Macro Definition Documentation

#define SHOW_DEFINED ( MODULE )

Value:

{                                                                  \
    std::cout << __func__ << "#" << __LINE__ << ": " #MODULE << " "; \
    ::show_defined(MODULE);                                          \
  }

Definition at line 143 of file NativeCodegen.cpp.

#define SHOW_FUNCTIONS ( MODULE )

Value:

{                                                                  \
    std::cout << __func__ << "#" << __LINE__ << ": " #MODULE << " "; \
    ::show_functions(MODULE);                                        \
  }

Definition at line 149 of file NativeCodegen.cpp.

Function Documentation

std::vector<llvm::Value*> generate_column_heads_load	(	const int	num_columns,
		llvm::Value *	byte_stream_arg,
		llvm::IRBuilder<> &	ir_builder,
		llvm::LLVMContext &	ctx
	)

Loads individual columns from a single, packed pointers buffer (the byte stream arg)

Definition at line 3453 of file NativeCodegen.cpp.

Referenced by TableFunctionCompilationContext::generateEntryPoint().

                                                                            {
   CHECK(byte_stream_arg);
   const auto max_col_local_id = num_columns - 1;
 
   std::vector<llvm::Value*> col_heads;
   for (int col_id = 0; col_id <= max_col_local_id; ++col_id) {
     auto* gep = ir_builder.CreateGEP(
         byte_stream_arg->getType()->getScalarType()->getPointerElementType(),
         byte_stream_arg,
         llvm::ConstantInt::get(llvm::Type::getInt32Ty(ctx), col_id));
     auto* load_gep = ir_builder.CreateLoad(gep->getType()->getPointerElementType(), gep);
     load_gep->setName(byte_stream_arg->getName() + "_" + std::to_string(col_id) + "_ptr");
     col_heads.emplace_back(load_gep);
   }
   return col_heads;
 }

Here is the caller graph for this function:

std::map<std::string, std::string> get_device_parameters ( bool cpu_only )

Definition at line 970 of file NativeCodegen.cpp.

                                                                   {
   std::map<std::string, std::string> result;
 
   result.insert(std::make_pair("cpu_name", llvm::sys::getHostCPUName()));
   result.insert(std::make_pair("cpu_triple", llvm::sys::getProcessTriple()));
   result.insert(
       std::make_pair("cpu_cores", std::to_string(llvm::sys::getHostNumPhysicalCores())));
   result.insert(std::make_pair("cpu_threads", std::to_string(cpu_threads())));
 
   // https://en.cppreference.com/w/cpp/language/types
   std::string sizeof_types;
   sizeof_types += "bool:" + std::to_string(sizeof(bool)) + ";";
   sizeof_types += "size_t:" + std::to_string(sizeof(size_t)) + ";";
   sizeof_types += "ssize_t:" + std::to_string(sizeof(ssize_t)) + ";";
   sizeof_types += "char:" + std::to_string(sizeof(char)) + ";";
   sizeof_types += "uchar:" + std::to_string(sizeof(unsigned char)) + ";";
   sizeof_types += "short:" + std::to_string(sizeof(short)) + ";";
   sizeof_types += "ushort:" + std::to_string(sizeof(unsigned short int)) + ";";
   sizeof_types += "int:" + std::to_string(sizeof(int)) + ";";
   sizeof_types += "uint:" + std::to_string(sizeof(unsigned int)) + ";";
   sizeof_types += "long:" + std::to_string(sizeof(long int)) + ";";
   sizeof_types += "ulong:" + std::to_string(sizeof(unsigned long int)) + ";";
   sizeof_types += "longlong:" + std::to_string(sizeof(long long int)) + ";";
   sizeof_types += "ulonglong:" + std::to_string(sizeof(unsigned long long int)) + ";";
   sizeof_types += "float:" + std::to_string(sizeof(float)) + ";";
   sizeof_types += "double:" + std::to_string(sizeof(double)) + ";";
   sizeof_types += "longdouble:" + std::to_string(sizeof(long double)) + ";";
   sizeof_types += "voidptr:" + std::to_string(sizeof(void*)) + ";";
 
   result.insert(std::make_pair("type_sizeof", sizeof_types));
 
   std::string null_values;
   null_values += "boolean1:" + std::to_string(serialized_null_value<bool>()) + ";";
   null_values += "boolean8:" + std::to_string(serialized_null_value<int8_t>()) + ";";
   null_values += "int8:" + std::to_string(serialized_null_value<int8_t>()) + ";";
   null_values += "int16:" + std::to_string(serialized_null_value<int16_t>()) + ";";
   null_values += "int32:" + std::to_string(serialized_null_value<int32_t>()) + ";";
   null_values += "int64:" + std::to_string(serialized_null_value<int64_t>()) + ";";
   null_values += "uint8:" + std::to_string(serialized_null_value<uint8_t>()) + ";";
   null_values += "uint16:" + std::to_string(serialized_null_value<uint16_t>()) + ";";
   null_values += "uint32:" + std::to_string(serialized_null_value<uint32_t>()) + ";";
   null_values += "uint64:" + std::to_string(serialized_null_value<uint64_t>()) + ";";
   null_values += "float32:" + std::to_string(serialized_null_value<float>()) + ";";
   null_values += "float64:" + std::to_string(serialized_null_value<double>()) + ";";
   null_values +=
       "Array<boolean8>:" + std::to_string(serialized_null_value<int8_t, true>()) + ";";
   null_values +=
       "Array<int8>:" + std::to_string(serialized_null_value<int8_t, true>()) + ";";
   null_values +=
       "Array<int16>:" + std::to_string(serialized_null_value<int16_t, true>()) + ";";
   null_values +=
       "Array<int32>:" + std::to_string(serialized_null_value<int32_t, true>()) + ";";
   null_values +=
       "Array<int64>:" + std::to_string(serialized_null_value<int64_t, true>()) + ";";
   null_values +=
       "Array<float32>:" + std::to_string(serialized_null_value<float, true>()) + ";";
   null_values +=
       "Array<float64>:" + std::to_string(serialized_null_value<double, true>()) + ";";
 
   result.insert(std::make_pair("null_values", null_values));
 
   llvm::StringMap<bool> cpu_features;
   if (llvm::sys::getHostCPUFeatures(cpu_features)) {
     std::string features_str = "";
     for (auto it = cpu_features.begin(); it != cpu_features.end(); ++it) {
       features_str += (it->getValue() ? " +" : " -");
       features_str += it->getKey().str();
     }
     result.insert(std::make_pair("cpu_features", features_str));
   }
 
   result.insert(std::make_pair("llvm_version",
                                std::to_string(LLVM_VERSION_MAJOR) + "." +
                                    std::to_string(LLVM_VERSION_MINOR) + "." +
                                    std::to_string(LLVM_VERSION_PATCH)));
 
 #ifdef HAVE_CUDA
   if (!cpu_only) {
     int device_count = 0;
     checkCudaErrors(cuDeviceGetCount(&device_count));
     if (device_count) {
       CUdevice device{};
       char device_name[256];
       int major = 0, minor = 0;
       int driver_version;
       checkCudaErrors(cuDeviceGet(&device, 0));  // assuming homogeneous multi-GPU system
       checkCudaErrors(cuDeviceGetName(device_name, 256, device));
       checkCudaErrors(cuDeviceGetAttribute(
           &major, CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MAJOR, device));
       checkCudaErrors(cuDeviceGetAttribute(
           &minor, CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MINOR, device));
       checkCudaErrors(cuDriverGetVersion(&driver_version));
 
       result.insert(std::make_pair("gpu_name", device_name));
       result.insert(std::make_pair("gpu_count", std::to_string(device_count)));
       result.insert(std::make_pair("gpu_compute_capability",
                                    std::to_string(major) + "." + std::to_string(minor)));
       result.insert(std::make_pair("gpu_triple", get_gpu_target_triple_string()));
       result.insert(std::make_pair("gpu_datalayout", get_gpu_data_layout()));
       result.insert(std::make_pair("gpu_driver",
                                    "CUDA " + std::to_string(driver_version / 1000) + "." +
                                        std::to_string((driver_version % 1000) / 10)));
 
       auto rt_libdevice_path = get_cuda_libdevice_dir() + "/libdevice.10.bc";
       result.insert(
           std::make_pair("gpu_has_libdevice",
                          std::to_string(boost::filesystem::exists(rt_libdevice_path))));
     }
   }
 #endif
 
   return result;
 }

llvm::StringRef get_gpu_data_layout ( )

Definition at line 962 of file NativeCodegen.cpp.

                                     {
   return llvm::StringRef(
       "e-p:64:64:64-i1:8:8-i8:8:8-"
       "i16:16:16-i32:32:32-i64:64:64-"
       "f32:32:32-f64:64:64-v16:16:16-"
       "v32:32:32-v64:64:64-v128:128:128-n16:32:64");
 }

llvm::StringRef get_gpu_target_triple_string ( )

Definition at line 958 of file NativeCodegen.cpp.

References DEBUG_TIMER.

                                              {
   return llvm::StringRef("nvptx64-nvidia-cuda");
 }

std::unique_ptr<llvm::Module> read_llvm_module_from_bc_file	(	const std::string &	bc_filename,
		llvm::LLVMContext &	context
	)

Definition at line 1583 of file NativeCodegen.cpp.

Referenced by Executor::update_extension_modules().

                               {
   llvm::SMDiagnostic err;
 
   auto buffer_or_error = llvm::MemoryBuffer::getFile(bc_filename);
   CHECK(!buffer_or_error.getError()) << "bc_filename=" << bc_filename;
   llvm::MemoryBuffer* buffer = buffer_or_error.get().get();
 
   auto owner = llvm::parseBitcodeFile(buffer->getMemBufferRef(), context);
   CHECK(!owner.takeError());
   CHECK(owner->get());
   return std::move(owner.get());
 }

Here is the caller graph for this function:

std::unique_ptr<llvm::Module> read_llvm_module_from_ir_file	(	const std::string &	udf_ir_filename,
		llvm::LLVMContext &	ctx,
		bool	is_gpu = `false`
	)

Definition at line 1598 of file NativeCodegen.cpp.

Referenced by Executor::update_extension_modules().

                          {
   llvm::SMDiagnostic parse_error;
 
   llvm::StringRef file_name_arg(udf_ir_filename);
 
   auto owner = llvm::parseIRFile(file_name_arg, parse_error, ctx);
   if (!owner) {
     throw_parseIR_error(parse_error, udf_ir_filename, is_gpu);
   }
 
   if (is_gpu) {
     llvm::Triple gpu_triple(owner->getTargetTriple());
     if (!gpu_triple.isNVPTX()) {
       LOG(WARNING)
           << "Expected triple nvptx64-nvidia-cuda for NVVM IR of loadtime UDFs but got "
           << gpu_triple.str() << ". Disabling the NVVM IR module.";
       return std::unique_ptr<llvm::Module>();
     }
   }
   return owner;
 }

Here is the caller graph for this function:

std::unique_ptr<llvm::Module> read_llvm_module_from_ir_string	(	const std::string &	udf_ir_string,
		llvm::LLVMContext &	ctx,
		bool	is_gpu = `false`
	)

Definition at line 1623 of file NativeCodegen.cpp.

Referenced by Executor::update_extension_modules().

                          {
   llvm::SMDiagnostic parse_error;
 
   auto buf = std::make_unique<llvm::MemoryBufferRef>(udf_ir_string,
                                                      "Runtime UDF/UDTF LLVM/NVVM IR");
 
   auto owner = llvm::parseIR(*buf, parse_error, ctx);
   if (!owner) {
     LOG(IR) << "read_llvm_module_from_ir_string:\n"
             << udf_ir_string << "\nEnd of LLVM/NVVM IR";
     throw_parseIR_error(parse_error, "", /* is_gpu= */ is_gpu);
   }
 
   if (is_gpu) {
     llvm::Triple gpu_triple(owner->getTargetTriple());
     if (!gpu_triple.isNVPTX()) {
       LOG(IR) << "read_llvm_module_from_ir_string:\n"
               << udf_ir_string << "\nEnd of NNVM IR";
       LOG(WARNING) << "Expected triple nvptx64-nvidia-cuda for NVVM IR but got "
                    << gpu_triple.str()
                    << ". Executing runtime UDF/UDTFs on GPU will be disabled.";
       return std::unique_ptr<llvm::Module>();
       ;
     }
   }
   return owner;
 }

Here is the caller graph for this function:

void verify_function_ir ( const llvm::Function * func )

Definition at line 388 of file NativeCodegen.cpp.

References logger::FATAL, and LOG.

Referenced by GpuSharedMemCodeBuilder::codegen(), spatial_type::Transform::codegen(), anonymous_namespace{JoinLoopTest.cpp}::create_loop_test_function(), TableFunctionCompilationContext::generateEntryPoint(), StubGenerator::generateStub(), and translate_function().

                                                   {
   std::stringstream err_ss;
   llvm::raw_os_ostream err_os(err_ss);
   err_os << "\n-----\n";
   if (llvm::verifyFunction(*func, &err_os)) {
     err_os << "\n-----\n";
     func->print(err_os, nullptr);
     err_os << "\n-----\n";
     LOG(FATAL) << err_ss.str();
   }
 }

Here is the caller graph for this function:

Variable Documentation

float g_fraction_code_cache_to_evict = 0.2

Definition at line 83 of file NativeCodegen.cpp.

Referenced by CommandLineOptions::fillDeveloperOptions().

constexpr std::array<std::string_view, 18> TARGET_RUNTIME_FUNCTIONS_FOR_MODULE_CLONING

Initial value:

{"query_stub_hoisted_literals",
     "multifrag_query_hoisted_literals",
     "query_stub",
     "multifrag_query",
     "fixed_width_int_decode",
     "fixed_width_unsigned_decode",
     "diff_fixed_width_int_decode",
     "fixed_width_double_decode",
     "fixed_width_float_decode",
     "fixed_width_small_date_decode",
     "record_error_code",
     "get_error_code",
     "pos_start_impl",
     "pos_step_impl",
     "group_buff_idx_impl",
     "init_shared_mem",
     "init_shared_mem_nop",
     "write_back_nop"}}

Definition at line 1556 of file NativeCodegen.cpp.

Classes

Namespaces

Macros

Functions

Variables

Macro Definition Documentation

Function Documentation

Variable Documentation