OmniSciDB
0264ff685a
|
#include "QueryTemplateGenerator.h"
#include "IRCodegenUtils.h"
#include "Logger/Logger.h"
#include <llvm/IR/Constants.h>
#include <llvm/IR/IRBuilder.h>
#include <llvm/IR/Instructions.h>
#include <llvm/IR/Verifier.h>
Go to the source code of this file.
Namespaces | |
anonymous_namespace{QueryTemplateGenerator.cpp} | |
Functions | |
llvm::Type * | anonymous_namespace{QueryTemplateGenerator.cpp}::get_pointer_element_type (llvm::Value *value) |
template<class Attributes > | |
llvm::Function * | anonymous_namespace{QueryTemplateGenerator.cpp}::default_func_builder (llvm::Module *mod, const std::string &name) |
template<class Attributes > | |
llvm::Function * | anonymous_namespace{QueryTemplateGenerator.cpp}::pos_start (llvm::Module *mod) |
template<class Attributes > | |
llvm::Function * | anonymous_namespace{QueryTemplateGenerator.cpp}::group_buff_idx (llvm::Module *mod) |
template<class Attributes > | |
llvm::Function * | anonymous_namespace{QueryTemplateGenerator.cpp}::pos_step (llvm::Module *mod) |
template<class Attributes > | |
llvm::Function * | anonymous_namespace{QueryTemplateGenerator.cpp}::row_process (llvm::Module *mod, const size_t aggr_col_count, const bool hoist_literals) |
template<class Attributes > | |
std::tuple< llvm::Function *, llvm::CallInst * > | query_template_impl (llvm::Module *mod, const size_t aggr_col_count, const bool hoist_literals, const bool is_estimate_query, const GpuSharedMemoryContext &gpu_smem_context) |
template<class Attributes > | |
std::tuple< llvm::Function *, llvm::CallInst * > | query_group_by_template_impl (llvm::Module *mod, const bool hoist_literals, const QueryMemoryDescriptor &query_mem_desc, const ExecutorDeviceType device_type, const bool check_scan_limit, const GpuSharedMemoryContext &gpu_smem_context) |
std::tuple< llvm::Function *, llvm::CallInst * > | query_template (llvm::Module *module, const size_t aggr_col_count, const bool hoist_literals, const bool is_estimate_query, const GpuSharedMemoryContext &gpu_smem_context) |
std::tuple< llvm::Function *, llvm::CallInst * > | query_group_by_template (llvm::Module *module, const bool hoist_literals, const QueryMemoryDescriptor &query_mem_desc, const ExecutorDeviceType device_type, const bool check_scan_limit, const GpuSharedMemoryContext &gpu_smem_context) |
std::tuple<llvm::Function*, llvm::CallInst*> query_group_by_template | ( | llvm::Module * | module, |
const bool | hoist_literals, | ||
const QueryMemoryDescriptor & | query_mem_desc, | ||
const ExecutorDeviceType | device_type, | ||
const bool | check_scan_limit, | ||
const GpuSharedMemoryContext & | gpu_smem_context | ||
) |
Definition at line 874 of file QueryTemplateGenerator.cpp.
Referenced by Executor::compileWorkUnit().
std::tuple<llvm::Function*, llvm::CallInst*> query_group_by_template_impl | ( | llvm::Module * | mod, |
const bool | hoist_literals, | ||
const QueryMemoryDescriptor & | query_mem_desc, | ||
const ExecutorDeviceType | device_type, | ||
const bool | check_scan_limit, | ||
const GpuSharedMemoryContext & | gpu_smem_context | ||
) |
Definition at line 543 of file QueryTemplateGenerator.cpp.
References CHECK, logger::FATAL, anonymous_namespace{QueryTemplateGenerator.cpp}::get_pointer_element_type(), GpuSharedMemoryContext::getSharedMemorySize(), GPU, anonymous_namespace{QueryTemplateGenerator.cpp}::group_buff_idx(), GpuSharedMemoryContext::isSharedMemoryUsed(), QueryMemoryDescriptor::isWarpSyncRequired(), LLVM_ALIGN, LOG, anonymous_namespace{QueryTemplateGenerator.cpp}::pos_start(), anonymous_namespace{QueryTemplateGenerator.cpp}::pos_step(), and anonymous_namespace{QueryTemplateGenerator.cpp}::row_process().
std::tuple<llvm::Function*, llvm::CallInst*> query_template | ( | llvm::Module * | module, |
const size_t | aggr_col_count, | ||
const bool | hoist_literals, | ||
const bool | is_estimate_query, | ||
const GpuSharedMemoryContext & | gpu_smem_context | ||
) |
Definition at line 865 of file QueryTemplateGenerator.cpp.
Referenced by Executor::compileWorkUnit().
std::tuple<llvm::Function*, llvm::CallInst*> query_template_impl | ( | llvm::Module * | mod, |
const size_t | aggr_col_count, | ||
const bool | hoist_literals, | ||
const bool | is_estimate_query, | ||
const GpuSharedMemoryContext & | gpu_smem_context | ||
) |
If GPU shared memory optimization is disabled, for each aggregate target, threads copy back their aggregate results (stored in registers) back into memory. This process is performed per processed fragment. In the host the final results are reduced (per target, for all threads and all fragments).
If GPU Shared memory optimization is enabled, we properly (atomically) aggregate all thread's results into memory, which makes the final reduction on host much cheaper. Here, we call a noop dummy write back function which will be properly replaced at runtime depending on the target expressions.
Definition at line 195 of file QueryTemplateGenerator.cpp.
References CHECK, logger::FATAL, anonymous_namespace{QueryTemplateGenerator.cpp}::get_pointer_element_type(), anonymous_namespace{QueryTemplateGenerator.cpp}::group_buff_idx(), GpuSharedMemoryContext::isSharedMemoryUsed(), LLVM_ALIGN, LOG, anonymous_namespace{QueryTemplateGenerator.cpp}::pos_start(), anonymous_namespace{QueryTemplateGenerator.cpp}::pos_step(), run_benchmark_import::result, anonymous_namespace{QueryTemplateGenerator.cpp}::row_process(), and to_string().