OmniSciDB  dfae7c3b14
GroupByAndAggregate.h
Go to the documentation of this file.
1 /*
2  * Copyright 2017 MapD Technologies, Inc.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #ifndef QUERYENGINE_GROUPBYANDAGGREGATE_H
18 #define QUERYENGINE_GROUPBYANDAGGREGATE_H
19 
20 #include "BufferCompaction.h"
21 #include "ColumnarResults.h"
22 #include "CompilationOptions.h"
23 #include "GpuMemUtils.h"
24 #include "GpuSharedMemoryContext.h"
25 #include "InputMetadata.h"
26 #include "QueryExecutionContext.h"
27 #include "Rendering/RenderInfo.h"
28 #include "RuntimeFunctions.h"
29 
30 #include "../Shared/sqltypes.h"
31 #include "Logger/Logger.h"
32 
33 #include <llvm/IR/Function.h>
34 #include <llvm/IR/Instructions.h>
35 #include <llvm/IR/Value.h>
36 #include <boost/algorithm/string/join.hpp>
37 #include <boost/make_unique.hpp>
38 
39 #include <stack>
40 #include <vector>
41 
42 extern bool g_enable_smem_group_by;
43 extern bool g_bigint_count;
44 
45 class ReductionRanOutOfSlots : public std::runtime_error {
46  public:
47  ReductionRanOutOfSlots() : std::runtime_error("ReductionRanOutOfSlots") {}
48 };
49 
50 inline std::string nullable_str_to_string(const NullableString& str) {
51  auto nptr = boost::get<void*>(&str);
52  if (nptr) {
53  CHECK(!*nptr);
54  return "NULL";
55  }
56  auto sptr = boost::get<std::string>(&str);
57  CHECK(sptr);
58  return *sptr;
59 }
60 
61 inline std::string datum_to_string(const TargetValue& tv,
62  const SQLTypeInfo& ti,
63  const std::string& delim) {
64  if (ti.is_array()) {
65  const auto array_tv = boost::get<ArrayTargetValue>(&tv);
66  CHECK(array_tv);
67  if (array_tv->is_initialized()) {
68  const auto& vec = array_tv->get();
69  std::vector<std::string> elem_strs;
70  elem_strs.reserve(vec.size());
71  const auto& elem_ti = ti.get_elem_type();
72  for (const auto& elem_tv : vec) {
73  elem_strs.push_back(datum_to_string(elem_tv, elem_ti, delim));
74  }
75  return "{" + boost::algorithm::join(elem_strs, delim) + "}";
76  }
77  return "NULL";
78  }
79  const auto scalar_tv = boost::get<ScalarTargetValue>(&tv);
80  if (ti.is_time() || ti.is_decimal()) {
81  Datum datum;
82  datum.bigintval = *boost::get<int64_t>(scalar_tv);
83  if (datum.bigintval == NULL_BIGINT) {
84  return "NULL";
85  }
86  return DatumToString(datum, ti);
87  }
88  if (ti.is_boolean()) {
89  const auto bool_val = *boost::get<int64_t>(scalar_tv);
90  return bool_val == NULL_BOOLEAN ? "NULL" : (bool_val ? "true" : "false");
91  }
92  auto iptr = boost::get<int64_t>(scalar_tv);
93  if (iptr) {
94  return *iptr == inline_int_null_val(ti) ? "NULL" : std::to_string(*iptr);
95  }
96  auto fptr = boost::get<float>(scalar_tv);
97  if (fptr) {
98  return *fptr == inline_fp_null_val(ti) ? "NULL" : std::to_string(*fptr);
99  }
100  auto dptr = boost::get<double>(scalar_tv);
101  if (dptr) {
102  return *dptr == inline_fp_null_val(ti.is_decimal() ? SQLTypeInfo(kDOUBLE, false) : ti)
103  ? "NULL"
104  : std::to_string(*dptr);
105  }
106  auto sptr = boost::get<NullableString>(scalar_tv);
107  CHECK(sptr);
108  return nullable_str_to_string(*sptr);
109 }
110 
111 struct ColRangeInfo {
113  int64_t min;
114  int64_t max;
115  int64_t bucket;
116  bool has_nulls;
117  bool isEmpty() { return min == 0 && max == -1; }
118 };
119 
120 struct KeylessInfo {
121  const bool keyless;
122  const int32_t target_index;
123 };
124 
126  public:
127  GroupByAndAggregate(Executor* executor,
128  const ExecutorDeviceType device_type,
129  const RelAlgExecutionUnit& ra_exe_unit,
130  const std::vector<InputTableInfo>& query_infos,
131  std::shared_ptr<RowSetMemoryOwner> row_set_mem_owner,
132  const std::optional<int64_t>& group_cardinality_estimation);
133 
134  // returns true iff checking the error code after every row
135  // is required -- slow path group by queries for now
136  bool codegen(llvm::Value* filter_result,
137  llvm::BasicBlock* sc_false,
138  const QueryMemoryDescriptor& query_mem_desc,
139  const CompilationOptions& co,
140  const GpuSharedMemoryContext& gpu_smem_context);
141 
142  static void addTransientStringLiterals(
143  const RelAlgExecutionUnit& ra_exe_unit,
144  Executor* executor,
145  std::shared_ptr<RowSetMemoryOwner> row_set_mem_owner);
146 
147  static size_t shard_count_for_top_groups(const RelAlgExecutionUnit& ra_exe_unit,
148  const Catalog_Namespace::Catalog& catalog);
149 
150  struct DiamondCodegen {
151  DiamondCodegen(llvm::Value* cond,
152  Executor* executor,
153  const bool chain_to_next,
154  const std::string& label_prefix,
155  DiamondCodegen* parent,
156  const bool share_false_edge_with_parent);
157  void setChainToNext();
158  void setFalseTarget(llvm::BasicBlock* cond_false);
159  ~DiamondCodegen();
160 
161  Executor* executor_;
162  llvm::BasicBlock* cond_true_;
163  llvm::BasicBlock* cond_false_;
164  llvm::BasicBlock* orig_cond_false_;
167  };
168 
169  private:
170  bool gpuCanHandleOrderEntries(const std::list<Analyzer::OrderEntry>& order_entries);
171 
172  std::unique_ptr<QueryMemoryDescriptor> initQueryMemoryDescriptor(
173  const bool allow_multifrag,
174  const size_t max_groups_buffer_entry_count,
175  const int8_t crt_min_byte_width,
176  RenderInfo* render_info,
177  const bool output_columnar_hint);
178 
179  std::unique_ptr<QueryMemoryDescriptor> initQueryMemoryDescriptorImpl(
180  const bool allow_multifrag,
181  const size_t max_groups_buffer_entry_count,
182  const int8_t crt_min_byte_width,
183  const bool sort_on_gpu_hint,
184  RenderInfo* render_info,
185  const bool must_use_baseline_sort,
186  const bool output_columnar_hint);
187 
188  int64_t getShardedTopBucket(const ColRangeInfo& col_range_info,
189  const size_t shard_count) const;
190 
191  void addTransientStringLiterals();
192 
193  CountDistinctDescriptors initCountDistinctDescriptors();
194 
195  llvm::Value* codegenOutputSlot(llvm::Value* groups_buffer,
196  const QueryMemoryDescriptor& query_mem_desc,
197  const CompilationOptions& co,
198  DiamondCodegen& diamond_codegen);
199 
200  std::tuple<llvm::Value*, llvm::Value*> codegenGroupBy(
201  const QueryMemoryDescriptor& query_mem_desc,
202  const CompilationOptions& co,
203  DiamondCodegen& codegen);
204 
205  std::tuple<llvm::Value*, llvm::Value*> codegenSingleColumnPerfectHash(
206  const QueryMemoryDescriptor& query_mem_desc,
207  const CompilationOptions& co,
208  llvm::Value* groups_buffer,
209  llvm::Value* group_expr_lv_translated,
210  llvm::Value* group_expr_lv_original,
211  const int32_t row_size_quad);
212 
213  std::tuple<llvm::Value*, llvm::Value*> codegenMultiColumnPerfectHash(
214  llvm::Value* groups_buffer,
215  llvm::Value* group_key,
216  llvm::Value* key_size_lv,
217  const QueryMemoryDescriptor& query_mem_desc,
218  const int32_t row_size_quad);
219  llvm::Function* codegenPerfectHashFunction();
220 
221  std::tuple<llvm::Value*, llvm::Value*> codegenMultiColumnBaselineHash(
222  const CompilationOptions& co,
223  llvm::Value* groups_buffer,
224  llvm::Value* group_key,
225  llvm::Value* key_size_lv,
226  const QueryMemoryDescriptor& query_mem_desc,
227  const size_t key_width,
228  const int32_t row_size_quad);
229 
230  ColRangeInfo getColRangeInfo();
231 
232  ColRangeInfo getExprRangeInfo(const Analyzer::Expr* expr) const;
233 
234  static int64_t getBucketedCardinality(const ColRangeInfo& col_range_info);
235 
236  KeylessInfo getKeylessInfo(const std::vector<Analyzer::Expr*>& target_expr_list,
237  const bool is_group_by) const;
238 
239  llvm::Value* convertNullIfAny(const SQLTypeInfo& arg_type,
240  const TargetInfo& agg_info,
241  llvm::Value* target);
242 
243  bool codegenAggCalls(const std::tuple<llvm::Value*, llvm::Value*>& agg_out_ptr_w_idx,
244  const std::vector<llvm::Value*>& agg_out_vec,
245  const QueryMemoryDescriptor& query_mem_desc,
246  const CompilationOptions& co,
247  const GpuSharedMemoryContext& gpu_smem_context,
248  DiamondCodegen& diamond_codegen);
249 
250  llvm::Value* codegenWindowRowPointer(const Analyzer::WindowFunction* window_func,
251  const QueryMemoryDescriptor& query_mem_desc,
252  const CompilationOptions& co,
253  DiamondCodegen& diamond_codegen);
254 
255  llvm::Value* codegenAggColumnPtr(
256  llvm::Value* output_buffer_byte_stream,
257  llvm::Value* out_row_idx,
258  const std::tuple<llvm::Value*, llvm::Value*>& agg_out_ptr_w_idx,
259  const QueryMemoryDescriptor& query_mem_desc,
260  const size_t chosen_bytes,
261  const size_t agg_out_off,
262  const size_t target_idx);
263 
264  void codegenEstimator(std::stack<llvm::BasicBlock*>& array_loops,
265  GroupByAndAggregate::DiamondCodegen& diamond_codegen,
266  const QueryMemoryDescriptor& query_mem_desc,
267  const CompilationOptions&);
268 
269  void codegenCountDistinct(const size_t target_idx,
270  const Analyzer::Expr* target_expr,
271  std::vector<llvm::Value*>& agg_args,
272  const QueryMemoryDescriptor&,
273  const ExecutorDeviceType);
274 
275  llvm::Value* getAdditionalLiteral(const int32_t off);
276 
277  std::vector<llvm::Value*> codegenAggArg(const Analyzer::Expr* target_expr,
278  const CompilationOptions& co);
279 
280  llvm::Value* emitCall(const std::string& fname, const std::vector<llvm::Value*>& args);
281 
282  void checkErrorCode(llvm::Value* retCode);
283 
284  bool needsUnnestDoublePatch(llvm::Value* val_ptr,
285  const std::string& agg_base_name,
286  const bool threads_share_memory,
287  const CompilationOptions& co) const;
288 
289  void prependForceSync();
290 
291  Executor* executor_;
293  const std::vector<InputTableInfo>& query_infos_;
294  std::shared_ptr<RowSetMemoryOwner> row_set_mem_owner_;
297 
298  const std::optional<int64_t> group_cardinality_estimation_;
299 
300  friend class Executor;
301  friend class QueryMemoryDescriptor;
302  friend class CodeGenerator;
303  friend class ExecutionKernel;
304  friend struct TargetExprCodegen;
306 };
307 
308 inline int64_t extract_from_datum(const Datum datum, const SQLTypeInfo& ti) {
309  const auto type = ti.is_decimal() ? decimal_to_int_type(ti) : ti.get_type();
310  switch (type) {
311  case kBOOLEAN:
312  return datum.tinyintval;
313  case kTINYINT:
314  return datum.tinyintval;
315  case kSMALLINT:
316  return datum.smallintval;
317  case kCHAR:
318  case kVARCHAR:
319  case kTEXT:
321  case kINT:
322  return datum.intval;
323  case kBIGINT:
324  return datum.bigintval;
325  case kTIME:
326  case kTIMESTAMP:
327  case kDATE:
328  return datum.bigintval;
329  default:
330  abort();
331  }
332 }
333 
334 inline int64_t extract_min_stat(const ChunkStats& stats, const SQLTypeInfo& ti) {
335  return extract_from_datum(stats.min, ti);
336 }
337 
338 inline int64_t extract_max_stat(const ChunkStats& stats, const SQLTypeInfo& ti) {
339  return extract_from_datum(stats.max, ti);
340 }
341 
342 inline size_t get_count_distinct_sub_bitmap_count(const size_t bitmap_sz_bits,
344  const ExecutorDeviceType device_type) {
345  // For count distinct on a column with a very small number of distinct values
346  // contention can be very high, especially for non-grouped queries. We'll split
347  // the bitmap into multiple sub-bitmaps which are unified to get the full result.
348  // The threshold value for bitmap_sz_bits works well on Kepler.
349  return bitmap_sz_bits < 50000 && ra_exe_unit.groupby_exprs.empty() &&
350  (device_type == ExecutorDeviceType::GPU || g_cluster)
351  ? 64 // NB: must be a power of 2 to keep runtime offset computations cheap
352  : 1;
353 }
354 
355 template <class T>
356 inline std::vector<int8_t> get_col_byte_widths(const T& col_expr_list) {
357  std::vector<int8_t> col_widths;
358  size_t col_expr_idx = 0;
359  for (const auto col_expr : col_expr_list) {
360  if (!col_expr) {
361  // row index
362  col_widths.push_back(sizeof(int64_t));
363  } else {
364  const auto agg_info = get_target_info(col_expr, g_bigint_count);
365  const auto chosen_type = get_compact_type(agg_info);
366  if ((chosen_type.is_string() && chosen_type.get_compression() == kENCODING_NONE) ||
367  chosen_type.is_array()) {
368  col_widths.push_back(sizeof(int64_t));
369  col_widths.push_back(sizeof(int64_t));
370  ++col_expr_idx;
371  continue;
372  }
373  if (chosen_type.is_geometry()) {
374  for (auto i = 0; i < chosen_type.get_physical_coord_cols(); ++i) {
375  col_widths.push_back(sizeof(int64_t));
376  col_widths.push_back(sizeof(int64_t));
377  }
378  ++col_expr_idx;
379  continue;
380  }
381  const auto col_expr_bitwidth = get_bit_width(chosen_type);
382  CHECK_EQ(size_t(0), col_expr_bitwidth % 8);
383  col_widths.push_back(static_cast<int8_t>(col_expr_bitwidth >> 3));
384  // for average, we'll need to keep the count as well
385  if (agg_info.agg_kind == kAVG) {
386  CHECK(agg_info.is_agg);
387  col_widths.push_back(sizeof(int64_t));
388  }
389  }
390  ++col_expr_idx;
391  }
392  return col_widths;
393 }
394 
395 inline int8_t get_min_byte_width() {
397 }
398 
399 struct RelAlgExecutionUnit;
400 
401 #endif // QUERYENGINE_GROUPBYANDAGGREGATE_H
int8_t tinyintval
Definition: sqltypes.h:135
const RelAlgExecutionUnit & ra_exe_unit
#define CHECK_EQ(x, y)
Definition: Logger.h:205
bool is_time() const
Definition: sqltypes.h:423
bool is_array() const
Definition: sqltypes.h:425
std::string DatumToString(Datum d, const SQLTypeInfo &ti)
Definition: Datum.cpp:239
bool is_boolean() const
Definition: sqltypes.h:424
class for a per-database catalog. also includes metadata for the current database and the current use...
Definition: Catalog.h:97
Definition: sqltypes.h:51
const bool keyless
ExecutorDeviceType
#define NULL_BIGINT
Definition: sqltypes.h:184
TargetInfo get_target_info(const PointerType target_expr, const bool bigint_count)
Definition: TargetInfo.h:78
std::string datum_to_string(const TargetValue &tv, const SQLTypeInfo &ti, const std::string &delim)
std::string nullable_str_to_string(const NullableString &str)
QueryDescriptionType hash_type_
std::string join(T const &container, std::string const &delim)
std::shared_ptr< RowSetMemoryOwner > row_set_mem_owner_
const std::list< std::shared_ptr< Analyzer::Expr > > groupby_exprs
HOST DEVICE EncodingType get_compression() const
Definition: sqltypes.h:267
int64_t extract_from_datum(const Datum datum, const SQLTypeInfo &ti)
bool is_decimal() const
Definition: sqltypes.h:420
double inline_fp_null_val(const SQL_TYPE_INFO &ti)
int32_t intval
Definition: sqltypes.h:137
std::string to_string(char const *&&v)
int8_t get_min_byte_width()
size_t get_count_distinct_sub_bitmap_count(const size_t bitmap_sz_bits, const RelAlgExecutionUnit &ra_exe_unit, const ExecutorDeviceType device_type)
const SQLTypeInfo get_compact_type(const TargetInfo &target)
size_t get_bit_width(const SQLTypeInfo &ti)
std::vector< CountDistinctDescriptor > CountDistinctDescriptors
Definition: CountDistinct.h:35
int64_t bigintval
Definition: sqltypes.h:138
int16_t smallintval
Definition: sqltypes.h:136
SQLTypes decimal_to_int_type(const SQLTypeInfo &ti)
Definition: Datum.cpp:302
const int32_t target_index
Definition: sqltypes.h:54
Definition: sqltypes.h:55
const std::vector< InputTableInfo > & query_infos_
std::vector< int8_t > get_col_byte_widths(const T &col_expr_list)
const ExecutorDeviceType device_type_
Definition: sqltypes.h:43
boost::variant< std::string, void * > NullableString
Definition: TargetValue.h:155
const std::optional< int64_t > group_cardinality_estimation_
int64_t extract_min_stat(const ChunkStats &stats, const SQLTypeInfo &ti)
bool g_enable_smem_group_by
SQLTypeInfo get_elem_type() const
Definition: sqltypes.h:624
#define CHECK(condition)
Definition: Logger.h:197
HOST DEVICE SQLTypes get_type() const
Definition: sqltypes.h:259
bool g_bigint_count
int64_t inline_int_null_val(const SQL_TYPE_INFO &ti)
QueryDescriptionType
Definition: Types.h:26
bool g_cluster
constexpr int8_t MAX_BYTE_WIDTH_SUPPORTED
boost::variant< ScalarTargetValue, ArrayTargetValue, GeoTargetValue, GeoTargetValuePtr > TargetValue
Definition: TargetValue.h:167
Executor(const ExecutorId id, const size_t block_size_x, const size_t grid_size_x, const size_t max_gpu_slab_size, const std::string &debug_dir, const std::string &debug_file)
Definition: Execute.cpp:131
Definition: sqltypes.h:47
const RelAlgExecutionUnit & ra_exe_unit_
Definition: sqldefs.h:72
#define NULL_BOOLEAN
Definition: sqltypes.h:180
int64_t extract_max_stat(const ChunkStats &stats, const SQLTypeInfo &ti)