OmniSciDB  6686921089
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
GroupByAndAggregate.h
Go to the documentation of this file.
1 /*
2  * Copyright 2017 MapD Technologies, Inc.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #ifndef QUERYENGINE_GROUPBYANDAGGREGATE_H
18 #define QUERYENGINE_GROUPBYANDAGGREGATE_H
19 
20 #include "BufferCompaction.h"
21 #include "ColumnarResults.h"
22 #include "CompilationOptions.h"
23 #include "GpuMemUtils.h"
24 #include "GpuSharedMemoryContext.h"
25 #include "InputMetadata.h"
26 #include "QueryExecutionContext.h"
27 #include "Rendering/RenderInfo.h"
28 #include "RuntimeFunctions.h"
29 
31 
32 #include "../Shared/sqltypes.h"
33 #include "Logger/Logger.h"
34 
35 #include <llvm/IR/Function.h>
36 #include <llvm/IR/Instructions.h>
37 #include <llvm/IR/Value.h>
38 #include <boost/algorithm/string/join.hpp>
39 #include <boost/make_unique.hpp>
40 
41 #include <stack>
42 #include <vector>
43 
44 extern bool g_enable_smem_group_by;
45 extern bool g_bigint_count;
46 
47 struct ColRangeInfo {
49  int64_t min;
50  int64_t max;
51  int64_t bucket;
52  bool has_nulls;
53  bool isEmpty() { return min == 0 && max == -1; }
54 };
55 
56 struct KeylessInfo {
57  const bool keyless;
58  const int32_t target_index;
59 };
60 
62  public:
63  GroupByAndAggregate(Executor* executor,
64  const ExecutorDeviceType device_type,
65  const RelAlgExecutionUnit& ra_exe_unit,
66  const std::vector<InputTableInfo>& query_infos,
67  std::shared_ptr<RowSetMemoryOwner> row_set_mem_owner,
68  const std::optional<int64_t>& group_cardinality_estimation);
69 
70  // returns true iff checking the error code after every row
71  // is required -- slow path group by queries for now
72  bool codegen(llvm::Value* filter_result,
73  llvm::BasicBlock* sc_false,
75  const CompilationOptions& co,
76  const GpuSharedMemoryContext& gpu_smem_context);
77 
78  static size_t shard_count_for_top_groups(const RelAlgExecutionUnit& ra_exe_unit,
79  const Catalog_Namespace::Catalog& catalog);
80 
81  private:
82  bool gpuCanHandleOrderEntries(const std::list<Analyzer::OrderEntry>& order_entries);
83 
84  std::unique_ptr<QueryMemoryDescriptor> initQueryMemoryDescriptor(
85  const bool allow_multifrag,
86  const size_t max_groups_buffer_entry_count,
87  const int8_t crt_min_byte_width,
88  RenderInfo* render_info,
89  const bool output_columnar_hint);
90 
91  std::unique_ptr<QueryMemoryDescriptor> initQueryMemoryDescriptorImpl(
92  const bool allow_multifrag,
93  const size_t max_groups_buffer_entry_count,
94  const int8_t crt_min_byte_width,
95  const bool sort_on_gpu_hint,
96  RenderInfo* render_info,
97  const bool must_use_baseline_sort,
98  const bool output_columnar_hint);
99 
100  int64_t getShardedTopBucket(const ColRangeInfo& col_range_info,
101  const size_t shard_count) const;
102 
103  llvm::Value* codegenOutputSlot(llvm::Value* groups_buffer,
104  const QueryMemoryDescriptor& query_mem_desc,
105  const CompilationOptions& co,
106  DiamondCodegen& diamond_codegen);
107 
108  std::tuple<llvm::Value*, llvm::Value*> codegenGroupBy(
109  const QueryMemoryDescriptor& query_mem_desc,
110  const CompilationOptions& co,
111  DiamondCodegen& codegen);
112 
113  llvm::Value* codegenVarlenOutputBuffer(const QueryMemoryDescriptor& query_mem_desc);
114 
115  std::tuple<llvm::Value*, llvm::Value*> codegenSingleColumnPerfectHash(
116  const QueryMemoryDescriptor& query_mem_desc,
117  const CompilationOptions& co,
118  llvm::Value* groups_buffer,
119  llvm::Value* group_expr_lv_translated,
120  llvm::Value* group_expr_lv_original,
121  const int32_t row_size_quad);
122 
123  std::tuple<llvm::Value*, llvm::Value*> codegenMultiColumnPerfectHash(
124  llvm::Value* groups_buffer,
125  llvm::Value* group_key,
126  llvm::Value* key_size_lv,
127  const QueryMemoryDescriptor& query_mem_desc,
128  const int32_t row_size_quad);
129  llvm::Function* codegenPerfectHashFunction();
130 
131  std::tuple<llvm::Value*, llvm::Value*> codegenMultiColumnBaselineHash(
132  const CompilationOptions& co,
133  llvm::Value* groups_buffer,
134  llvm::Value* group_key,
135  llvm::Value* key_size_lv,
136  const QueryMemoryDescriptor& query_mem_desc,
137  const size_t key_width,
138  const int32_t row_size_quad);
139 
141 
142  static int64_t getBucketedCardinality(const ColRangeInfo& col_range_info);
143 
144  llvm::Value* convertNullIfAny(const SQLTypeInfo& arg_type,
145  const TargetInfo& agg_info,
146  llvm::Value* target);
147 
148  bool codegenAggCalls(const std::tuple<llvm::Value*, llvm::Value*>& agg_out_ptr_w_idx,
149  llvm::Value* varlen_output_buffer,
150  const std::vector<llvm::Value*>& agg_out_vec,
151  const QueryMemoryDescriptor& query_mem_desc,
152  const CompilationOptions& co,
153  const GpuSharedMemoryContext& gpu_smem_context,
154  DiamondCodegen& diamond_codegen);
155 
156  llvm::Value* codegenWindowRowPointer(const Analyzer::WindowFunction* window_func,
157  const QueryMemoryDescriptor& query_mem_desc,
158  const CompilationOptions& co,
159  DiamondCodegen& diamond_codegen);
160 
161  llvm::Value* codegenAggColumnPtr(
162  llvm::Value* output_buffer_byte_stream,
163  llvm::Value* out_row_idx,
164  const std::tuple<llvm::Value*, llvm::Value*>& agg_out_ptr_w_idx,
165  const QueryMemoryDescriptor& query_mem_desc,
166  const size_t chosen_bytes,
167  const size_t agg_out_off,
168  const size_t target_idx);
169 
170  void codegenEstimator(std::stack<llvm::BasicBlock*>& array_loops,
171  DiamondCodegen& diamond_codegen,
172  const QueryMemoryDescriptor& query_mem_desc,
173  const CompilationOptions&);
174 
175  void codegenCountDistinct(const size_t target_idx,
176  const Analyzer::Expr* target_expr,
177  std::vector<llvm::Value*>& agg_args,
178  const QueryMemoryDescriptor&,
179  const ExecutorDeviceType);
180 
181  void codegenApproxQuantile(const size_t target_idx,
182  const Analyzer::Expr* target_expr,
183  std::vector<llvm::Value*>& agg_args,
184  const QueryMemoryDescriptor& query_mem_desc,
185  const ExecutorDeviceType device_type);
186 
187  llvm::Value* getAdditionalLiteral(const int32_t off);
188 
189  std::vector<llvm::Value*> codegenAggArg(const Analyzer::Expr* target_expr,
190  const CompilationOptions& co);
191 
192  llvm::Value* emitCall(const std::string& fname, const std::vector<llvm::Value*>& args);
193 
194  void checkErrorCode(llvm::Value* retCode);
195 
196  bool needsUnnestDoublePatch(llvm::Value const* val_ptr,
197  const std::string& agg_base_name,
198  const bool threads_share_memory,
199  const CompilationOptions& co) const;
200 
201  void prependForceSync();
202 
205  const std::vector<InputTableInfo>& query_infos_;
206  std::shared_ptr<RowSetMemoryOwner> row_set_mem_owner_;
209 
210  const std::optional<int64_t> group_cardinality_estimation_;
211 
212  friend class Executor;
213  friend class QueryMemoryDescriptor;
214  friend class CodeGenerator;
215  friend class ExecutionKernel;
216  friend struct TargetExprCodegen;
218 };
219 
220 inline int64_t extract_from_datum(const Datum datum, const SQLTypeInfo& ti) {
221  const auto type = ti.is_decimal() ? decimal_to_int_type(ti) : ti.get_type();
222  switch (type) {
223  case kBOOLEAN:
224  return datum.tinyintval;
225  case kTINYINT:
226  return datum.tinyintval;
227  case kSMALLINT:
228  return datum.smallintval;
229  case kCHAR:
230  case kVARCHAR:
231  case kTEXT:
233  case kINT:
234  return datum.intval;
235  case kBIGINT:
236  return datum.bigintval;
237  case kTIME:
238  case kTIMESTAMP:
239  case kDATE:
240  return datum.bigintval;
241  default:
242  abort();
243  }
244 }
245 
246 inline int64_t extract_min_stat(const ChunkStats& stats, const SQLTypeInfo& ti) {
247  return extract_from_datum(stats.min, ti);
248 }
249 
250 inline int64_t extract_max_stat(const ChunkStats& stats, const SQLTypeInfo& ti) {
251  return extract_from_datum(stats.max, ti);
252 }
253 
254 inline size_t get_count_distinct_sub_bitmap_count(const size_t bitmap_sz_bits,
256  const ExecutorDeviceType device_type) {
257  // For count distinct on a column with a very small number of distinct values
258  // contention can be very high, especially for non-grouped queries. We'll split
259  // the bitmap into multiple sub-bitmaps which are unified to get the full result.
260  // The threshold value for bitmap_sz_bits works well on Kepler.
261  return bitmap_sz_bits < 50000 && ra_exe_unit.groupby_exprs.empty() &&
262  (device_type == ExecutorDeviceType::GPU || g_cluster)
263  ? 64 // NB: must be a power of 2 to keep runtime offset computations cheap
264  : 1;
265 }
266 
267 #endif // QUERYENGINE_GROUPBYANDAGGREGATE_H
int8_t tinyintval
Definition: sqltypes.h:212
const RelAlgExecutionUnit & ra_exe_unit
#define CHECK_EQ(x, y)
Definition: Logger.h:217
bool g_enable_smem_group_by
bool gpuCanHandleOrderEntries(const std::list< Analyzer::OrderEntry > &order_entries)
static int64_t getBucketedCardinality(const ColRangeInfo &col_range_info)
llvm::Value * getAdditionalLiteral(const int32_t off)
llvm::Value * codegenAggColumnPtr(llvm::Value *output_buffer_byte_stream, llvm::Value *out_row_idx, const std::tuple< llvm::Value *, llvm::Value * > &agg_out_ptr_w_idx, const QueryMemoryDescriptor &query_mem_desc, const size_t chosen_bytes, const size_t agg_out_off, const size_t target_idx)
: returns the pointer to where the aggregation should be stored.
bool codegenAggCalls(const std::tuple< llvm::Value *, llvm::Value * > &agg_out_ptr_w_idx, llvm::Value *varlen_output_buffer, const std::vector< llvm::Value * > &agg_out_vec, const QueryMemoryDescriptor &query_mem_desc, const CompilationOptions &co, const GpuSharedMemoryContext &gpu_smem_context, DiamondCodegen &diamond_codegen)
class for a per-database catalog. also includes metadata for the current database and the current use...
Definition: Catalog.h:111
Definition: sqltypes.h:49
const bool keyless
ExecutorDeviceType
std::unique_ptr< QueryMemoryDescriptor > initQueryMemoryDescriptorImpl(const bool allow_multifrag, const size_t max_groups_buffer_entry_count, const int8_t crt_min_byte_width, const bool sort_on_gpu_hint, RenderInfo *render_info, const bool must_use_baseline_sort, const bool output_columnar_hint)
ColRangeInfo getColRangeInfo()
bool codegen(llvm::Value *filter_result, llvm::BasicBlock *sc_false, const QueryMemoryDescriptor &query_mem_desc, const CompilationOptions &co, const GpuSharedMemoryContext &gpu_smem_context)
QueryDescriptionType hash_type_
llvm::Value * emitCall(const std::string &fname, const std::vector< llvm::Value * > &args)
std::shared_ptr< RowSetMemoryOwner > row_set_mem_owner_
llvm::Value * codegenVarlenOutputBuffer(const QueryMemoryDescriptor &query_mem_desc)
void codegenApproxQuantile(const size_t target_idx, const Analyzer::Expr *target_expr, std::vector< llvm::Value * > &agg_args, const QueryMemoryDescriptor &query_mem_desc, const ExecutorDeviceType device_type)
void checkErrorCode(llvm::Value *retCode)
const std::list< std::shared_ptr< Analyzer::Expr > > groupby_exprs
HOST DEVICE SQLTypes get_type() const
Definition: sqltypes.h:329
bool needsUnnestDoublePatch(llvm::Value const *val_ptr, const std::string &agg_base_name, const bool threads_share_memory, const CompilationOptions &co) const
std::tuple< llvm::Value *, llvm::Value * > codegenMultiColumnBaselineHash(const CompilationOptions &co, llvm::Value *groups_buffer, llvm::Value *group_key, llvm::Value *key_size_lv, const QueryMemoryDescriptor &query_mem_desc, const size_t key_width, const int32_t row_size_quad)
int64_t extract_from_datum(const Datum datum, const SQLTypeInfo &ti)
int32_t intval
Definition: sqltypes.h:214
size_t get_count_distinct_sub_bitmap_count(const size_t bitmap_sz_bits, const RelAlgExecutionUnit &ra_exe_unit, const ExecutorDeviceType device_type)
GroupByAndAggregate(Executor *executor, const ExecutorDeviceType device_type, const RelAlgExecutionUnit &ra_exe_unit, const std::vector< InputTableInfo > &query_infos, std::shared_ptr< RowSetMemoryOwner > row_set_mem_owner, const std::optional< int64_t > &group_cardinality_estimation)
llvm::Value * convertNullIfAny(const SQLTypeInfo &arg_type, const TargetInfo &agg_info, llvm::Value *target)
int64_t bigintval
Definition: sqltypes.h:215
std::tuple< llvm::Value *, llvm::Value * > codegenSingleColumnPerfectHash(const QueryMemoryDescriptor &query_mem_desc, const CompilationOptions &co, llvm::Value *groups_buffer, llvm::Value *group_expr_lv_translated, llvm::Value *group_expr_lv_original, const int32_t row_size_quad)
std::tuple< llvm::Value *, llvm::Value * > codegenGroupBy(const QueryMemoryDescriptor &query_mem_desc, const CompilationOptions &co, DiamondCodegen &codegen)
bool g_bigint_count
int16_t smallintval
Definition: sqltypes.h:213
void codegenCountDistinct(const size_t target_idx, const Analyzer::Expr *target_expr, std::vector< llvm::Value * > &agg_args, const QueryMemoryDescriptor &, const ExecutorDeviceType)
std::tuple< llvm::Value *, llvm::Value * > codegenMultiColumnPerfectHash(llvm::Value *groups_buffer, llvm::Value *group_key, llvm::Value *key_size_lv, const QueryMemoryDescriptor &query_mem_desc, const int32_t row_size_quad)
int64_t getShardedTopBucket(const ColRangeInfo &col_range_info, const size_t shard_count) const
SQLTypes decimal_to_int_type(const SQLTypeInfo &ti)
Definition: Datum.cpp:455
const int32_t target_index
Definition: sqltypes.h:52
Definition: sqltypes.h:53
const std::vector< InputTableInfo > & query_infos_
HOST DEVICE EncodingType get_compression() const
Definition: sqltypes.h:337
std::unique_ptr< QueryMemoryDescriptor > initQueryMemoryDescriptor(const bool allow_multifrag, const size_t max_groups_buffer_entry_count, const int8_t crt_min_byte_width, RenderInfo *render_info, const bool output_columnar_hint)
const ExecutorDeviceType device_type_
void codegenEstimator(std::stack< llvm::BasicBlock * > &array_loops, DiamondCodegen &diamond_codegen, const QueryMemoryDescriptor &query_mem_desc, const CompilationOptions &)
std::vector< llvm::Value * > codegenAggArg(const Analyzer::Expr *target_expr, const CompilationOptions &co)
llvm::Function * codegenPerfectHashFunction()
llvm::Value * codegenWindowRowPointer(const Analyzer::WindowFunction *window_func, const QueryMemoryDescriptor &query_mem_desc, const CompilationOptions &co, DiamondCodegen &diamond_codegen)
Definition: sqltypes.h:41
const std::optional< int64_t > group_cardinality_estimation_
llvm::Value * codegenOutputSlot(llvm::Value *groups_buffer, const QueryMemoryDescriptor &query_mem_desc, const CompilationOptions &co, DiamondCodegen &diamond_codegen)
int64_t extract_min_stat(const ChunkStats &stats, const SQLTypeInfo &ti)
QueryDescriptionType
Definition: Types.h:26
bool g_cluster
Definition: sqltypes.h:45
const RelAlgExecutionUnit & ra_exe_unit_
bool is_decimal() const
Definition: sqltypes.h:512
int64_t extract_max_stat(const ChunkStats &stats, const SQLTypeInfo &ti)
static size_t shard_count_for_top_groups(const RelAlgExecutionUnit &ra_exe_unit, const Catalog_Namespace::Catalog &catalog)