OmniSciDB  f632821e96
NativeCodegen.cpp File Reference
#include "CodeGenerator.h"
#include "Execute.h"
#include "ExtensionFunctionsWhitelist.h"
#include "GpuSharedMemoryUtils.h"
#include "LLVMFunctionAttributesUtil.h"
#include "OutputBufferInitialization.h"
#include "QueryTemplateGenerator.h"
#include "OSDependent/omnisci_path.h"
#include "Shared/MathUtils.h"
#include "StreamingTopN.h"
#include <llvm/Bitcode/BitcodeReader.h>
#include <llvm/Bitcode/BitcodeWriter.h>
#include <llvm/ExecutionEngine/MCJIT.h>
#include <llvm/IR/Attributes.h>
#include <llvm/IR/GlobalValue.h>
#include <llvm/IR/InstIterator.h>
#include <llvm/IR/LegacyPassManager.h>
#include <llvm/IR/Verifier.h>
#include <llvm/IRReader/IRReader.h>
#include <llvm/Support/Casting.h>
#include <llvm/Support/FileSystem.h>
#include <llvm/Support/FormattedStream.h>
#include <llvm/Support/MemoryBuffer.h>
#include <llvm/Support/SourceMgr.h>
#include <llvm/Support/TargetRegistry.h>
#include <llvm/Support/TargetSelect.h>
#include <llvm/Support/raw_os_ostream.h>
#include <llvm/Transforms/IPO.h>
#include <llvm/Transforms/IPO/AlwaysInliner.h>
#include <llvm/Transforms/IPO/PassManagerBuilder.h>
#include <llvm/Transforms/InstCombine/InstCombine.h>
#include <llvm/Transforms/Instrumentation.h>
#include <llvm/Transforms/Scalar.h>
#include <llvm/Transforms/Utils/BasicBlockUtils.h>
#include <llvm/Transforms/Utils/Cloning.h>
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/Intrinsics.h"
#include <llvm/Linker/Linker.h>
#include <llvm/Support/raw_ostream.h>
+ Include dependency graph for NativeCodegen.cpp:

Go to the source code of this file.

Namespaces

 anonymous_namespace{NativeCodegen.cpp}
 

Functions

void anonymous_namespace{NativeCodegen.cpp}::eliminate_dead_self_recursive_funcs (llvm::Module &M, const std::unordered_set< llvm::Function *> &live_funcs)
 
void anonymous_namespace{NativeCodegen.cpp}::optimize_ir (llvm::Function *query_func, llvm::Module *module, llvm::legacy::PassManager &pass_manager, const std::unordered_set< llvm::Function *> &live_funcs, const CompilationOptions &co)
 
void verify_function_ir (const llvm::Function *func)
 
std::string anonymous_namespace{NativeCodegen.cpp}::assemblyForCPU (ExecutionEngineWrapper &execution_engine, llvm::Module *module)
 
std::string anonymous_namespace{NativeCodegen.cpp}::cpp_to_llvm_name (const std::string &s)
 
std::string anonymous_namespace{NativeCodegen.cpp}::gen_array_any_all_sigs ()
 
std::string anonymous_namespace{NativeCodegen.cpp}::gen_translate_null_key_sigs ()
 
llvm::StringRef get_gpu_target_triple_string ()
 
llvm::StringRef get_gpu_data_layout ()
 
std::map< std::string, std::string > get_device_parameters (bool cpu_only)
 
llvm::Module * read_template_module (llvm::LLVMContext &context)
 
void anonymous_namespace{NativeCodegen.cpp}::bind_pos_placeholders (const std::string &pos_fn_name, const bool use_resume_param, llvm::Function *query_func, llvm::Module *module)
 
void anonymous_namespace{NativeCodegen.cpp}::set_row_func_argnames (llvm::Function *row_func, const size_t in_col_count, const size_t agg_col_count, const bool hoist_literals)
 
llvm::Function * anonymous_namespace{NativeCodegen.cpp}::create_row_function (const size_t in_col_count, const size_t agg_col_count, const bool hoist_literals, llvm::Module *module, llvm::LLVMContext &context)
 
void anonymous_namespace{NativeCodegen.cpp}::bind_query (llvm::Function *query_func, const std::string &query_fname, llvm::Function *multifrag_query_func, llvm::Module *module)
 
std::vector< std::string > anonymous_namespace{NativeCodegen.cpp}::get_agg_fnames (const std::vector< Analyzer::Expr *> &target_exprs, const bool is_group_by)
 
std::unique_ptr< llvm::Module > g_rt_module (read_template_module(getGlobalLLVMContext()))
 
bool is_udf_module_present (bool cpu_only)
 
bool is_rt_udf_module_present (bool cpu_only)
 
void throw_parseIR_error (const llvm::SMDiagnostic &parse_error, std::string src="")
 
void read_udf_gpu_module (const std::string &udf_ir_filename)
 
void read_udf_cpu_module (const std::string &udf_ir_filename)
 
void read_rt_udf_gpu_module (const std::string &udf_ir_string)
 
void read_rt_udf_cpu_module (const std::string &udf_ir_string)
 
template<typename InstType >
llvm::Value * anonymous_namespace{NativeCodegen.cpp}::find_variable_in_basic_block (llvm::Function *func, std::string bb_name, std::string variable_name)
 
size_t anonymous_namespace{NativeCodegen.cpp}::get_shared_memory_size (const bool shared_mem_used, const QueryMemoryDescriptor *query_mem_desc_ptr)
 
bool anonymous_namespace{NativeCodegen.cpp}::is_gpu_shared_mem_supported (const QueryMemoryDescriptor *query_mem_desc_ptr, const RelAlgExecutionUnit &ra_exe_unit, const CudaMgr_Namespace::CudaMgr *cuda_mgr, const ExecutorDeviceType device_type, const unsigned gpu_blocksize, const unsigned num_blocks_per_mp)
 
std::string anonymous_namespace{NativeCodegen.cpp}::serialize_llvm_metadata_footnotes (llvm::Function *query_func, CgenState *cgen_state)
 
std::unique_ptr< llvm::Module > runtime_module_shallow_copy (CgenState *cgen_state)
 
std::vector< llvm::Value * > generate_column_heads_load (const int num_columns, llvm::Value *byte_stream_arg, llvm::IRBuilder<> &ir_builder, llvm::LLVMContext &ctx)
 

Variables

float g_fraction_code_cache_to_evict = 0.2
 
std::unique_ptr< llvm::Module > udf_gpu_module
 
std::unique_ptr< llvm::Module > udf_cpu_module
 
std::unique_ptr< llvm::Module > rt_udf_gpu_module
 
std::unique_ptr< llvm::Module > rt_udf_cpu_module
 
std::unique_ptr< llvm::Module > g_rt_module
 
const std::string anonymous_namespace{NativeCodegen.cpp}::cuda_rt_decls
 

Function Documentation

◆ g_rt_module()

std::unique_ptr<llvm::Module> g_rt_module ( read_template_module(getGlobalLLVMContext())  )

◆ generate_column_heads_load()

std::vector<llvm::Value*> generate_column_heads_load ( const int  num_columns,
llvm::Value *  byte_stream_arg,
llvm::IRBuilder<> &  ir_builder,
llvm::LLVMContext &  ctx 
)

Loads individual columns from a single, packed pointers buffer (the byte stream arg)

Definition at line 2623 of file NativeCodegen.cpp.

References CHECK.

Referenced by Executor::compileWorkUnit(), and TableFunctionCompilationContext::generateEntryPoint().

2626  {
2627  CHECK(byte_stream_arg);
2628  const auto max_col_local_id = num_columns - 1;
2629 
2630  std::vector<llvm::Value*> col_heads;
2631  for (int col_id = 0; col_id <= max_col_local_id; ++col_id) {
2632  col_heads.emplace_back(ir_builder.CreateLoad(ir_builder.CreateGEP(
2633  byte_stream_arg, llvm::ConstantInt::get(llvm::Type::getInt32Ty(ctx), col_id))));
2634  }
2635  return col_heads;
2636 }
#define CHECK(condition)
Definition: Logger.h:197
+ Here is the caller graph for this function:

◆ get_device_parameters()

std::map<std::string, std::string> get_device_parameters ( bool  cpu_only)

Definition at line 708 of file NativeCodegen.cpp.

References checkCudaErrors(), cpu_threads(), get_gpu_data_layout(), get_gpu_target_triple_string(), run_benchmark_import::result, and to_string().

Referenced by DBHandler::max_bytes_for_thrift().

708  {
709  std::map<std::string, std::string> result;
710 
711  result.insert(std::make_pair("cpu_name", llvm::sys::getHostCPUName()));
712  result.insert(std::make_pair("cpu_triple", llvm::sys::getProcessTriple()));
713  result.insert(
714  std::make_pair("cpu_cores", std::to_string(llvm::sys::getHostNumPhysicalCores())));
715  result.insert(std::make_pair("cpu_threads", std::to_string(cpu_threads())));
716 
717  llvm::StringMap<bool> cpu_features;
718  if (llvm::sys::getHostCPUFeatures(cpu_features)) {
719  std::string features_str = "";
720  for (auto it = cpu_features.begin(); it != cpu_features.end(); ++it) {
721  features_str += (it->getValue() ? " +" : " -");
722  features_str += it->getKey().str();
723  }
724  result.insert(std::make_pair("cpu_features", features_str));
725  }
726 
727 #ifdef HAVE_CUDA
728  if (!cpu_only) {
729  int device_count = 0;
730  checkCudaErrors(cuDeviceGetCount(&device_count));
731  if (device_count) {
732  CUdevice device{};
733  char device_name[256];
734  int major = 0, minor = 0;
735  checkCudaErrors(cuDeviceGet(&device, 0)); // assuming homogeneous multi-GPU system
736  checkCudaErrors(cuDeviceGetName(device_name, 256, device));
737  checkCudaErrors(cuDeviceGetAttribute(
738  &major, CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MAJOR, device));
739  checkCudaErrors(cuDeviceGetAttribute(
740  &minor, CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MINOR, device));
741 
742  result.insert(std::make_pair("gpu_name", device_name));
743  result.insert(std::make_pair("gpu_count", std::to_string(device_count)));
744  result.insert(std::make_pair("gpu_compute_capability",
745  std::to_string(major) + "." + std::to_string(minor)));
746  result.insert(std::make_pair("gpu_triple", get_gpu_target_triple_string()));
747  result.insert(std::make_pair("gpu_datalayout", get_gpu_data_layout()));
748  }
749  }
750 #endif
751 
752  return result;
753 }
void checkCudaErrors(CUresult err)
Definition: sample.cpp:38
llvm::StringRef get_gpu_data_layout()
std::string to_string(char const *&&v)
llvm::StringRef get_gpu_target_triple_string()
int CUdevice
Definition: nocuda.h:20
int cpu_threads()
Definition: thread_count.h:24
+ Here is the call graph for this function:
+ Here is the caller graph for this function:

◆ get_gpu_data_layout()

llvm::StringRef get_gpu_data_layout ( )

Definition at line 700 of file NativeCodegen.cpp.

Referenced by Executor::compileWorkUnit(), and get_device_parameters().

700  {
701  return llvm::StringRef(
702  "e-p:64:64:64-i1:8:8-i8:8:8-"
703  "i16:16:16-i32:32:32-i64:64:64-"
704  "f32:32:32-f64:64:64-v16:16:16-"
705  "v32:32:32-v64:64:64-v128:128:128-n16:32:64");
706 }
+ Here is the caller graph for this function:

◆ get_gpu_target_triple_string()

llvm::StringRef get_gpu_target_triple_string ( )

Definition at line 696 of file NativeCodegen.cpp.

Referenced by Executor::compileWorkUnit(), and get_device_parameters().

696  {
697  return llvm::StringRef("nvptx64-nvidia-cuda");
698 }
+ Here is the caller graph for this function:

◆ is_rt_udf_module_present()

bool is_rt_udf_module_present ( bool  cpu_only)

Definition at line 1367 of file NativeCodegen.cpp.

References rt_udf_cpu_module, and rt_udf_gpu_module.

Referenced by Executor::compileWorkUnit(), and CodeGenerator::generateNativeGPUCode().

1367  {
1368  return (cpu_only || rt_udf_gpu_module != nullptr) && (rt_udf_cpu_module != nullptr);
1369 }
std::unique_ptr< llvm::Module > rt_udf_cpu_module
std::unique_ptr< llvm::Module > rt_udf_gpu_module
+ Here is the caller graph for this function:

◆ is_udf_module_present()

bool is_udf_module_present ( bool  cpu_only)

Definition at line 1363 of file NativeCodegen.cpp.

References udf_cpu_module, and udf_gpu_module.

Referenced by Executor::compileWorkUnit(), and CodeGenerator::generateNativeGPUCode().

1363  {
1364  return (cpu_only || udf_gpu_module != nullptr) && (udf_cpu_module != nullptr);
1365 }
std::unique_ptr< llvm::Module > udf_cpu_module
std::unique_ptr< llvm::Module > udf_gpu_module
+ Here is the caller graph for this function:

◆ read_rt_udf_cpu_module()

void read_rt_udf_cpu_module ( const std::string &  udf_ir_string)

Definition at line 1412 of file NativeCodegen.cpp.

References getGlobalLLVMContext(), rt_udf_cpu_module, and throw_parseIR_error().

Referenced by DBHandler::register_runtime_extension_functions().

1412  {
1413  llvm::SMDiagnostic parse_error;
1414 
1415  auto buf =
1416  std::make_unique<llvm::MemoryBufferRef>(udf_ir_string, "Runtime UDF for CPU");
1417 
1418  rt_udf_cpu_module = llvm::parseIR(*buf, parse_error, getGlobalLLVMContext());
1419  if (!rt_udf_cpu_module) {
1420  throw_parseIR_error(parse_error);
1421  }
1422 }
std::unique_ptr< llvm::Module > rt_udf_cpu_module
llvm::LLVMContext & getGlobalLLVMContext()
void throw_parseIR_error(const llvm::SMDiagnostic &parse_error, std::string src="")
+ Here is the call graph for this function:
+ Here is the caller graph for this function:

◆ read_rt_udf_gpu_module()

void read_rt_udf_gpu_module ( const std::string &  udf_ir_string)

Definition at line 1400 of file NativeCodegen.cpp.

References getGlobalLLVMContext(), rt_udf_gpu_module, and throw_parseIR_error().

Referenced by DBHandler::register_runtime_extension_functions().

1400  {
1401  llvm::SMDiagnostic parse_error;
1402 
1403  auto buf =
1404  std::make_unique<llvm::MemoryBufferRef>(udf_ir_string, "Runtime UDF for GPU");
1405 
1406  rt_udf_gpu_module = llvm::parseIR(*buf, parse_error, getGlobalLLVMContext());
1407  if (!rt_udf_gpu_module) {
1408  throw_parseIR_error(parse_error);
1409  }
1410 }
std::unique_ptr< llvm::Module > rt_udf_gpu_module
llvm::LLVMContext & getGlobalLLVMContext()
void throw_parseIR_error(const llvm::SMDiagnostic &parse_error, std::string src="")
+ Here is the call graph for this function:
+ Here is the caller graph for this function:

◆ read_template_module()

llvm::Module* read_template_module ( llvm::LLVMContext &  context)

Definition at line 1056 of file NativeCodegen.cpp.

References CHECK, and omnisci::get_root_abs_path().

Referenced by anonymous_namespace{NativeCodegen.cpp}::get_agg_fnames(), and ll_bool().

1056  {
1057  llvm::SMDiagnostic err;
1058 
1059  auto buffer_or_error = llvm::MemoryBuffer::getFile(omnisci::get_root_abs_path() +
1060  "/QueryEngine/RuntimeFunctions.bc");
1061  CHECK(!buffer_or_error.getError());
1062  llvm::MemoryBuffer* buffer = buffer_or_error.get().get();
1063 
1064  auto owner = llvm::parseBitcodeFile(buffer->getMemBufferRef(), context);
1065  CHECK(!owner.takeError());
1066  auto module = owner.get().release();
1067  CHECK(module);
1068 
1069  return module;
1070 }
#define CHECK(condition)
Definition: Logger.h:197
std::string get_root_abs_path()
+ Here is the call graph for this function:
+ Here is the caller graph for this function:

◆ read_udf_cpu_module()

void read_udf_cpu_module ( const std::string &  udf_ir_filename)

Definition at line 1389 of file NativeCodegen.cpp.

References getGlobalLLVMContext(), throw_parseIR_error(), and udf_cpu_module.

Referenced by UdfCompiler::readCpuCompiledModule().

1389  {
1390  llvm::SMDiagnostic parse_error;
1391 
1392  llvm::StringRef file_name_arg(udf_ir_filename);
1393 
1394  udf_cpu_module = llvm::parseIRFile(file_name_arg, parse_error, getGlobalLLVMContext());
1395  if (!udf_cpu_module) {
1396  throw_parseIR_error(parse_error, udf_ir_filename);
1397  }
1398 }
std::unique_ptr< llvm::Module > udf_cpu_module
llvm::LLVMContext & getGlobalLLVMContext()
void throw_parseIR_error(const llvm::SMDiagnostic &parse_error, std::string src="")
+ Here is the call graph for this function:
+ Here is the caller graph for this function:

◆ read_udf_gpu_module()

void read_udf_gpu_module ( const std::string &  udf_ir_filename)

Definition at line 1378 of file NativeCodegen.cpp.

References getGlobalLLVMContext(), throw_parseIR_error(), and udf_gpu_module.

Referenced by UdfCompiler::readGpuCompiledModule().

1378  {
1379  llvm::SMDiagnostic parse_error;
1380 
1381  llvm::StringRef file_name_arg(udf_ir_filename);
1382 
1383  udf_gpu_module = llvm::parseIRFile(file_name_arg, parse_error, getGlobalLLVMContext());
1384  if (!udf_gpu_module) {
1385  throw_parseIR_error(parse_error, udf_ir_filename);
1386  }
1387 }
std::unique_ptr< llvm::Module > udf_gpu_module
llvm::LLVMContext & getGlobalLLVMContext()
void throw_parseIR_error(const llvm::SMDiagnostic &parse_error, std::string src="")
+ Here is the call graph for this function:
+ Here is the caller graph for this function:

◆ runtime_module_shallow_copy()

std::unique_ptr<llvm::Module> runtime_module_shallow_copy ( CgenState cgen_state)

Makes a shallow copy (just declarations) of the runtime module. Function definitions are cloned only if they're used from the generated code.

Definition at line 2605 of file NativeCodegen.cpp.

References CgenState::vmap_.

Referenced by ResultSetReductionJIT::codegen(), GpuReductionHelperJIT::codegen(), StubGenerator::generateStub(), and TableFunctionCompilationContext::TableFunctionCompilationContext().

2605  {
2606  return llvm::CloneModule(
2607 #if LLVM_VERSION_MAJOR >= 7
2608  *g_rt_module.get(),
2609 #else
2610  g_rt_module.get(),
2611 #endif
2612  cgen_state->vmap_,
2613  [](const llvm::GlobalValue* gv) {
2614  auto func = llvm::dyn_cast<llvm::Function>(gv);
2615  if (!func) {
2616  return true;
2617  }
2618  return (func->getLinkage() == llvm::GlobalValue::LinkageTypes::PrivateLinkage ||
2619  func->getLinkage() == llvm::GlobalValue::LinkageTypes::InternalLinkage);
2620  });
2621 }
std::unique_ptr< llvm::Module > g_rt_module
llvm::ValueToValueMapTy vmap_
Definition: CgenState.h:340
+ Here is the caller graph for this function:

◆ throw_parseIR_error()

void throw_parseIR_error ( const llvm::SMDiagnostic &  parse_error,
std::string  src = "" 
)

Definition at line 1371 of file NativeCodegen.cpp.

References src.

Referenced by read_rt_udf_cpu_module(), read_rt_udf_gpu_module(), read_udf_cpu_module(), and read_udf_gpu_module().

1371  {
1372  std::string excname = "LLVM IR ParseError: ";
1373  llvm::raw_string_ostream ss(excname);
1374  parse_error.print(src.c_str(), ss, false, false);
1375  throw std::runtime_error(ss.str());
1376 }
int64_t * src
+ Here is the caller graph for this function:

◆ verify_function_ir()

void verify_function_ir ( const llvm::Function *  func)

Definition at line 204 of file NativeCodegen.cpp.

References logger::FATAL, and LOG.

Referenced by GpuSharedMemCodeBuilder::codegen(), Executor::compileWorkUnit(), TableFunctionCompilationContext::generateEntryPoint(), StubGenerator::generateStub(), serialize_llvm_object(), and translate_function().

204  {
205  std::stringstream err_ss;
206  llvm::raw_os_ostream err_os(err_ss);
207  if (llvm::verifyFunction(*func, &err_os)) {
208  func->print(llvm::outs());
209  LOG(FATAL) << err_ss.str();
210  }
211 }
#define LOG(tag)
Definition: Logger.h:188
+ Here is the caller graph for this function:

Variable Documentation

◆ g_fraction_code_cache_to_evict

float g_fraction_code_cache_to_evict = 0.2

◆ g_rt_module

std::unique_ptr<llvm::Module> g_rt_module

◆ rt_udf_cpu_module

std::unique_ptr<llvm::Module> rt_udf_cpu_module

◆ rt_udf_gpu_module

std::unique_ptr<llvm::Module> rt_udf_gpu_module

◆ udf_cpu_module

std::unique_ptr<llvm::Module> udf_cpu_module

◆ udf_gpu_module

std::unique_ptr<llvm::Module> udf_gpu_module