17 #ifndef QUERYENGINE_GROUPBYANDAGGREGATE_H
18 #define QUERYENGINE_GROUPBYANDAGGREGATE_H
30 #include "../Shared/sqltypes.h"
33 #include <llvm/IR/Function.h>
34 #include <llvm/IR/Instructions.h>
35 #include <llvm/IR/Value.h>
36 #include <boost/algorithm/string/join.hpp>
37 #include <boost/make_unique.hpp>
51 auto nptr = boost::get<void*>(&str);
56 auto sptr = boost::get<std::string>(&str);
63 const std::string& delim) {
65 const auto array_tv = boost::get<ArrayTargetValue>(&tv);
67 if (array_tv->is_initialized()) {
68 const auto& vec = array_tv->get();
69 std::vector<std::string> elem_strs;
70 elem_strs.reserve(vec.size());
72 for (
const auto& elem_tv : vec) {
79 const auto scalar_tv = boost::get<ScalarTargetValue>(&tv);
82 datum.
bigintval = *boost::get<int64_t>(scalar_tv);
89 const auto bool_val = *boost::get<int64_t>(scalar_tv);
90 return bool_val ==
NULL_BOOLEAN ?
"NULL" : (bool_val ?
"true" :
"false");
92 auto iptr = boost::get<int64_t>(scalar_tv);
96 auto fptr = boost::get<float>(scalar_tv);
100 auto dptr = boost::get<double>(scalar_tv);
106 auto sptr = boost::get<NullableString>(scalar_tv);
130 const std::vector<InputTableInfo>& query_infos,
131 std::shared_ptr<RowSetMemoryOwner> row_set_mem_owner,
132 const std::optional<int64_t>& group_cardinality_estimation);
136 bool codegen(llvm::Value* filter_result,
137 llvm::BasicBlock* sc_false,
145 std::shared_ptr<RowSetMemoryOwner> row_set_mem_owner);
153 const bool chain_to_next,
154 const std::string& label_prefix,
156 const bool share_false_edge_with_parent);
173 const bool allow_multifrag,
174 const size_t max_groups_buffer_entry_count,
175 const int8_t crt_min_byte_width,
177 const bool output_columnar_hint);
180 const bool allow_multifrag,
181 const size_t max_groups_buffer_entry_count,
182 const int8_t crt_min_byte_width,
183 const bool sort_on_gpu_hint,
185 const bool must_use_baseline_sort,
186 const bool output_columnar_hint);
189 const size_t shard_count)
const;
208 llvm::Value* groups_buffer,
209 llvm::Value* group_expr_lv_translated,
210 llvm::Value* group_expr_lv_original,
211 const int32_t row_size_quad);
214 llvm::Value* groups_buffer,
215 llvm::Value* group_key,
216 llvm::Value* key_size_lv,
218 const int32_t row_size_quad);
223 llvm::Value* groups_buffer,
224 llvm::Value* group_key,
225 llvm::Value* key_size_lv,
227 const size_t key_width,
228 const int32_t row_size_quad);
237 const bool is_group_by)
const;
241 llvm::Value* target);
243 bool codegenAggCalls(
const std::tuple<llvm::Value*, llvm::Value*>& agg_out_ptr_w_idx,
244 const std::vector<llvm::Value*>& agg_out_vec,
256 llvm::Value* output_buffer_byte_stream,
257 llvm::Value* out_row_idx,
258 const std::tuple<llvm::Value*, llvm::Value*>& agg_out_ptr_w_idx,
260 const size_t chosen_bytes,
261 const size_t agg_out_off,
262 const size_t target_idx);
271 std::vector<llvm::Value*>& agg_args,
277 std::vector<llvm::Value*>& agg_args,
286 llvm::Value*
emitCall(
const std::string& fname,
const std::vector<llvm::Value*>&
args);
291 const std::string& agg_base_name,
292 const bool threads_share_memory,
355 return bitmap_sz_bits < 50000 && ra_exe_unit.
groupby_exprs.empty() &&
363 std::vector<int8_t> col_widths;
364 size_t col_expr_idx = 0;
365 for (
const auto col_expr : col_expr_list) {
368 col_widths.push_back(
sizeof(int64_t));
372 if ((chosen_type.is_string() && chosen_type.get_compression() ==
kENCODING_NONE) ||
373 chosen_type.is_array()) {
374 col_widths.push_back(
sizeof(int64_t));
375 col_widths.push_back(
sizeof(int64_t));
379 if (chosen_type.is_geometry()) {
380 for (
auto i = 0; i < chosen_type.get_physical_coord_cols(); ++i) {
381 col_widths.push_back(
sizeof(int64_t));
382 col_widths.push_back(
sizeof(int64_t));
388 CHECK_EQ(
size_t(0), col_expr_bitwidth % 8);
389 col_widths.push_back(static_cast<int8_t>(col_expr_bitwidth >> 3));
391 if (agg_info.agg_kind ==
kAVG) {
392 CHECK(agg_info.is_agg);
393 col_widths.push_back(
sizeof(int64_t));
407 #endif // QUERYENGINE_GROUPBYANDAGGREGATE_H
void codegenEstimator(std::stack< llvm::BasicBlock * > &array_loops, GroupByAndAggregate::DiamondCodegen &diamond_codegen, const QueryMemoryDescriptor &query_mem_desc, const CompilationOptions &)
const RelAlgExecutionUnit & ra_exe_unit
bool g_enable_smem_group_by
bool gpuCanHandleOrderEntries(const std::list< Analyzer::OrderEntry > &order_entries)
static int64_t getBucketedCardinality(const ColRangeInfo &col_range_info)
llvm::Value * getAdditionalLiteral(const int32_t off)
llvm::Value * codegenAggColumnPtr(llvm::Value *output_buffer_byte_stream, llvm::Value *out_row_idx, const std::tuple< llvm::Value *, llvm::Value * > &agg_out_ptr_w_idx, const QueryMemoryDescriptor &query_mem_desc, const size_t chosen_bytes, const size_t agg_out_off, const size_t target_idx)
: returns the pointer to where the aggregation should be stored.
bool codegenAggCalls(const std::tuple< llvm::Value *, llvm::Value * > &agg_out_ptr_w_idx, const std::vector< llvm::Value * > &agg_out_vec, const QueryMemoryDescriptor &query_mem_desc, const CompilationOptions &co, const GpuSharedMemoryContext &gpu_smem_context, DiamondCodegen &diamond_codegen)
std::string DatumToString(Datum d, const SQLTypeInfo &ti)
class for a per-database catalog. also includes metadata for the current database and the current use...
std::unique_ptr< QueryMemoryDescriptor > initQueryMemoryDescriptorImpl(const bool allow_multifrag, const size_t max_groups_buffer_entry_count, const int8_t crt_min_byte_width, const bool sort_on_gpu_hint, RenderInfo *render_info, const bool must_use_baseline_sort, const bool output_columnar_hint)
void codegenApproxMedian(const size_t target_idx, const Analyzer::Expr *target_expr, std::vector< llvm::Value * > &agg_args, const QueryMemoryDescriptor &query_mem_desc, const ExecutorDeviceType device_type)
TargetInfo get_target_info(const PointerType target_expr, const bool bigint_count)
std::string datum_to_string(const TargetValue &tv, const SQLTypeInfo &ti, const std::string &delim)
ColRangeInfo getColRangeInfo()
std::string nullable_str_to_string(const NullableString &str)
bool codegen(llvm::Value *filter_result, llvm::BasicBlock *sc_false, const QueryMemoryDescriptor &query_mem_desc, const CompilationOptions &co, const GpuSharedMemoryContext &gpu_smem_context)
QueryDescriptionType hash_type_
llvm::Value * emitCall(const std::string &fname, const std::vector< llvm::Value * > &args)
std::shared_ptr< RowSetMemoryOwner > row_set_mem_owner_
void checkErrorCode(llvm::Value *retCode)
bool needsUnnestDoublePatch(llvm::Value *val_ptr, const std::string &agg_base_name, const bool threads_share_memory, const CompilationOptions &co) const
const std::list< std::shared_ptr< Analyzer::Expr > > groupby_exprs
HOST DEVICE SQLTypes get_type() const
std::tuple< llvm::Value *, llvm::Value * > codegenMultiColumnBaselineHash(const CompilationOptions &co, llvm::Value *groups_buffer, llvm::Value *group_key, llvm::Value *key_size_lv, const QueryMemoryDescriptor &query_mem_desc, const size_t key_width, const int32_t row_size_quad)
int64_t extract_from_datum(const Datum datum, const SQLTypeInfo &ti)
double inline_fp_null_val(const SQL_TYPE_INFO &ti)
int8_t get_min_byte_width()
size_t get_count_distinct_sub_bitmap_count(const size_t bitmap_sz_bits, const RelAlgExecutionUnit &ra_exe_unit, const ExecutorDeviceType device_type)
const SQLTypeInfo get_compact_type(const TargetInfo &target)
CountDistinctDescriptors initCountDistinctDescriptors()
size_t get_bit_width(const SQLTypeInfo &ti)
GroupByAndAggregate(Executor *executor, const ExecutorDeviceType device_type, const RelAlgExecutionUnit &ra_exe_unit, const std::vector< InputTableInfo > &query_infos, std::shared_ptr< RowSetMemoryOwner > row_set_mem_owner, const std::optional< int64_t > &group_cardinality_estimation)
std::vector< CountDistinctDescriptor > CountDistinctDescriptors
llvm::Value * convertNullIfAny(const SQLTypeInfo &arg_type, const TargetInfo &agg_info, llvm::Value *target)
void setFalseTarget(llvm::BasicBlock *cond_false)
std::tuple< llvm::Value *, llvm::Value * > codegenSingleColumnPerfectHash(const QueryMemoryDescriptor &query_mem_desc, const CompilationOptions &co, llvm::Value *groups_buffer, llvm::Value *group_expr_lv_translated, llvm::Value *group_expr_lv_original, const int32_t row_size_quad)
std::tuple< llvm::Value *, llvm::Value * > codegenGroupBy(const QueryMemoryDescriptor &query_mem_desc, const CompilationOptions &co, DiamondCodegen &codegen)
llvm::BasicBlock * cond_false_
KeylessInfo getKeylessInfo(const std::vector< Analyzer::Expr * > &target_expr_list, const bool is_group_by) const
void codegenCountDistinct(const size_t target_idx, const Analyzer::Expr *target_expr, std::vector< llvm::Value * > &agg_args, const QueryMemoryDescriptor &, const ExecutorDeviceType)
DiamondCodegen(llvm::Value *cond, Executor *executor, const bool chain_to_next, const std::string &label_prefix, DiamondCodegen *parent, const bool share_false_edge_with_parent)
std::tuple< llvm::Value *, llvm::Value * > codegenMultiColumnPerfectHash(llvm::Value *groups_buffer, llvm::Value *group_key, llvm::Value *key_size_lv, const QueryMemoryDescriptor &query_mem_desc, const int32_t row_size_quad)
int64_t getShardedTopBucket(const ColRangeInfo &col_range_info, const size_t shard_count) const
SQLTypes decimal_to_int_type(const SQLTypeInfo &ti)
const int32_t target_index
const std::vector< InputTableInfo > & query_infos_
std::vector< int8_t > get_col_byte_widths(const T &col_expr_list)
HOST DEVICE EncodingType get_compression() const
llvm::BasicBlock * cond_true_
std::unique_ptr< QueryMemoryDescriptor > initQueryMemoryDescriptor(const bool allow_multifrag, const size_t max_groups_buffer_entry_count, const int8_t crt_min_byte_width, RenderInfo *render_info, const bool output_columnar_hint)
const ExecutorDeviceType device_type_
std::vector< llvm::Value * > codegenAggArg(const Analyzer::Expr *target_expr, const CompilationOptions &co)
llvm::Function * codegenPerfectHashFunction()
llvm::Value * codegenWindowRowPointer(const Analyzer::WindowFunction *window_func, const QueryMemoryDescriptor &query_mem_desc, const CompilationOptions &co, DiamondCodegen &diamond_codegen)
boost::variant< std::string, void * > NullableString
const std::optional< int64_t > group_cardinality_estimation_
llvm::BasicBlock * orig_cond_false_
llvm::Value * codegenOutputSlot(llvm::Value *groups_buffer, const QueryMemoryDescriptor &query_mem_desc, const CompilationOptions &co, DiamondCodegen &diamond_codegen)
int64_t extract_min_stat(const ChunkStats &stats, const SQLTypeInfo &ti)
void addTransientStringLiterals()
int64_t inline_int_null_val(const SQL_TYPE_INFO &ti)
ColRangeInfo getExprRangeInfo(const Analyzer::Expr *expr) const
constexpr int8_t MAX_BYTE_WIDTH_SUPPORTED
boost::variant< ScalarTargetValue, ArrayTargetValue, GeoTargetValue, GeoTargetValuePtr > TargetValue
const RelAlgExecutionUnit & ra_exe_unit_
SQLTypeInfo get_elem_type() const
int64_t extract_max_stat(const ChunkStats &stats, const SQLTypeInfo &ti)
static size_t shard_count_for_top_groups(const RelAlgExecutionUnit &ra_exe_unit, const Catalog_Namespace::Catalog &catalog)