OmniSciDB  c07336695a
GroupByAndAggregate.h File Reference
#include "BufferCompaction.h"
#include "ColumnarResults.h"
#include "CompilationOptions.h"
#include "GpuMemUtils.h"
#include "InputMetadata.h"
#include "QueryExecutionContext.h"
#include "Rendering/RenderInfo.h"
#include "RuntimeFunctions.h"
#include "../Planner/Planner.h"
#include "../Shared/sqltypes.h"
#include "Shared/Logger.h"
#include <llvm/IR/Function.h>
#include <llvm/IR/Instructions.h>
#include <llvm/IR/Value.h>
#include <boost/algorithm/string/join.hpp>
#include <boost/make_unique.hpp>
#include <stack>
#include <vector>
+ Include dependency graph for GroupByAndAggregate.h:
+ This graph shows which files directly or indirectly include this file:

Go to the source code of this file.

Classes

class  ReductionRanOutOfSlots
 
struct  ColRangeInfo
 
struct  KeylessInfo
 
class  GroupByAndAggregate
 
struct  GroupByAndAggregate::DiamondCodegen
 

Functions

std::string nullable_str_to_string (const NullableString &str)
 
std::string datum_to_string (const TargetValue &tv, const SQLTypeInfo &ti, const std::string &delim)
 
int64_t extract_from_datum (const Datum datum, const SQLTypeInfo &ti)
 
int64_t extract_min_stat (const ChunkStats &stats, const SQLTypeInfo &ti)
 
int64_t extract_max_stat (const ChunkStats &stats, const SQLTypeInfo &ti)
 
size_t get_count_distinct_sub_bitmap_count (const size_t bitmap_sz_bits, const RelAlgExecutionUnit &ra_exe_unit, const ExecutorDeviceType device_type)
 
template<class T >
std::vector< int8_t > get_col_byte_widths (const T &col_expr_list, const std::vector< ssize_t > &col_exprs_to_not_project)
 
int8_t get_min_byte_width ()
 

Variables

bool g_enable_smem_group_by
 
bool g_bigint_count
 

Function Documentation

◆ datum_to_string()

std::string datum_to_string ( const TargetValue tv,
const SQLTypeInfo ti,
const std::string &  delim 
)
inline

Definition at line 61 of file GroupByAndAggregate.h.

References Datum::bigintval, CHECK, DatumToString(), SQLTypeInfoCore< TYPE_FACET_PACK >::get_elem_type(), inline_fp_null_val(), inline_int_null_val(), SQLTypeInfoCore< TYPE_FACET_PACK >::is_array(), SQLTypeInfoCore< TYPE_FACET_PACK >::is_boolean(), SQLTypeInfoCore< TYPE_FACET_PACK >::is_decimal(), SQLTypeInfoCore< TYPE_FACET_PACK >::is_time(), join(), kDOUBLE, NULL_BIGINT, NULL_BOOLEAN, nullable_str_to_string(), and to_string().

Referenced by Parser::ExportQueryStmt::execute(), and main().

63  {
64  if (ti.is_array()) {
65  const auto array_tv = boost::get<ArrayTargetValue>(&tv);
66  CHECK(array_tv);
67  if (array_tv->is_initialized()) {
68  const auto& vec = array_tv->get();
69  std::vector<std::string> elem_strs;
70  elem_strs.reserve(vec.size());
71  const auto& elem_ti = ti.get_elem_type();
72  for (const auto& elem_tv : vec) {
73  elem_strs.push_back(datum_to_string(elem_tv, elem_ti, delim));
74  }
75  return "{" + boost::algorithm::join(elem_strs, delim) + "}";
76  }
77  return "NULL";
78  }
79  const auto scalar_tv = boost::get<ScalarTargetValue>(&tv);
80  if (ti.is_time()) {
81  Datum datum;
82  datum.bigintval = *boost::get<int64_t>(scalar_tv);
83  if (datum.bigintval == NULL_BIGINT) {
84  return "NULL";
85  }
86  return DatumToString(datum, ti);
87  }
88  if (ti.is_boolean()) {
89  const auto bool_val = *boost::get<int64_t>(scalar_tv);
90  return bool_val == NULL_BOOLEAN ? "NULL" : (bool_val ? "true" : "false");
91  }
92  auto iptr = boost::get<int64_t>(scalar_tv);
93  if (iptr) {
94  return *iptr == inline_int_null_val(ti) ? "NULL" : std::to_string(*iptr);
95  }
96  auto fptr = boost::get<float>(scalar_tv);
97  if (fptr) {
98  return *fptr == inline_fp_null_val(ti) ? "NULL" : std::to_string(*fptr);
99  }
100  auto dptr = boost::get<double>(scalar_tv);
101  if (dptr) {
102  return *dptr == inline_fp_null_val(ti.is_decimal() ? SQLTypeInfo(kDOUBLE, false) : ti)
103  ? "NULL"
104  : std::to_string(*dptr);
105  }
106  auto sptr = boost::get<NullableString>(scalar_tv);
107  CHECK(sptr);
108  return nullable_str_to_string(*sptr);
109 }
std::string DatumToString(Datum d, const SQLTypeInfo &ti)
Definition: Datum.cpp:341
bool is_time() const
Definition: sqltypes.h:452
#define NULL_BIGINT
Definition: sqltypes.h:175
std::string datum_to_string(const TargetValue &tv, const SQLTypeInfo &ti, const std::string &delim)
std::string nullable_str_to_string(const NullableString &str)
std::string join(T const &container, std::string const &delim)
double inline_fp_null_val(const SQL_TYPE_INFO &ti)
std::string to_string(char const *&&v)
bool is_array() const
Definition: sqltypes.h:454
bool is_decimal() const
Definition: sqltypes.h:449
int64_t bigintval
Definition: sqltypes.h:126
SQLTypeInfoCore get_elem_type() const
Definition: sqltypes.h:628
SQLTypeInfoCore< ArrayContextTypeSizer, ExecutorTypePackaging, DateTimeFacilities > SQLTypeInfo
Definition: sqltypes.h:819
bool is_boolean() const
Definition: sqltypes.h:453
#define CHECK(condition)
Definition: Logger.h:187
int64_t inline_int_null_val(const SQL_TYPE_INFO &ti)
#define NULL_BOOLEAN
Definition: sqltypes.h:171
+ Here is the call graph for this function:
+ Here is the caller graph for this function:

◆ extract_from_datum()

int64_t extract_from_datum ( const Datum  datum,
const SQLTypeInfo ti 
)
inline

Definition at line 308 of file GroupByAndAggregate.h.

References Datum::bigintval, CHECK_EQ, decimal_to_int_type(), SQLTypeInfoCore< TYPE_FACET_PACK >::get_compression(), SQLTypeInfoCore< TYPE_FACET_PACK >::get_type(), Datum::intval, SQLTypeInfoCore< TYPE_FACET_PACK >::is_decimal(), kBIGINT, kBOOLEAN, kCHAR, kDATE, kENCODING_DICT, kINT, kSMALLINT, kTEXT, kTIME, kTIMESTAMP, kTINYINT, kVARCHAR, Datum::smallintval, Datum::tinyintval, and run-benchmark-import::type.

Referenced by extract_max_stat(), and extract_min_stat().

308  {
309  const auto type = ti.is_decimal() ? decimal_to_int_type(ti) : ti.get_type();
310  switch (type) {
311  case kBOOLEAN:
312  return datum.tinyintval;
313  case kTINYINT:
314  return datum.tinyintval;
315  case kSMALLINT:
316  return datum.smallintval;
317  case kCHAR:
318  case kVARCHAR:
319  case kTEXT:
321  case kINT:
322  return datum.intval;
323  case kBIGINT:
324  return datum.bigintval;
325  case kTIME:
326  case kTIMESTAMP:
327  case kDATE:
328  return datum.bigintval;
329  default:
330  abort();
331  }
332 }
int8_t tinyintval
Definition: sqltypes.h:123
#define CHECK_EQ(x, y)
Definition: Logger.h:195
Definition: sqltypes.h:51
HOST DEVICE EncodingType get_compression() const
Definition: sqltypes.h:327
int32_t intval
Definition: sqltypes.h:125
bool is_decimal() const
Definition: sqltypes.h:449
int64_t bigintval
Definition: sqltypes.h:126
int16_t smallintval
Definition: sqltypes.h:124
SQLTypes decimal_to_int_type(const SQLTypeInfo &ti)
Definition: Datum.cpp:416
Definition: sqltypes.h:54
Definition: sqltypes.h:55
Definition: sqltypes.h:43
Definition: sqltypes.h:47
+ Here is the call graph for this function:
+ Here is the caller graph for this function:

◆ extract_max_stat()

int64_t extract_max_stat ( const ChunkStats stats,
const SQLTypeInfo ti 
)
inline

Definition at line 338 of file GroupByAndAggregate.h.

References extract_from_datum(), and ChunkStats::max.

Referenced by getLeafColumnRange(), and Executor::skipFragment().

338  {
339  return extract_from_datum(stats.max, ti);
340 }
int64_t extract_from_datum(const Datum datum, const SQLTypeInfo &ti)
+ Here is the call graph for this function:
+ Here is the caller graph for this function:

◆ extract_min_stat()

int64_t extract_min_stat ( const ChunkStats stats,
const SQLTypeInfo ti 
)
inline

Definition at line 334 of file GroupByAndAggregate.h.

References extract_from_datum(), and ChunkStats::min.

Referenced by getLeafColumnRange(), and Executor::skipFragment().

334  {
335  return extract_from_datum(stats.min, ti);
336 }
int64_t extract_from_datum(const Datum datum, const SQLTypeInfo &ti)
+ Here is the call graph for this function:
+ Here is the caller graph for this function:

◆ get_col_byte_widths()

template<class T >
std::vector<int8_t> get_col_byte_widths ( const T &  col_expr_list,
const std::vector< ssize_t > &  col_exprs_to_not_project 
)
inline

Definition at line 356 of file GroupByAndAggregate.h.

References CHECK, CHECK_EQ, g_bigint_count, get_bit_width(), get_compact_type(), get_target_info(), kAVG, and kENCODING_NONE.

Referenced by QueryMemoryDescriptor::init(), GroupByAndAggregate::initQueryMemoryDescriptorImpl(), and QueryMemoryDescriptor::pick_target_compact_width().

358  {
359  // Note that non-projected col exprs could be projected cols that we can lazy fetch or
360  // grouped cols with keyless hash
361  if (!col_exprs_to_not_project.empty()) {
362  CHECK_EQ(col_expr_list.size(), col_exprs_to_not_project.size());
363  }
364  std::vector<int8_t> col_widths;
365  size_t col_expr_idx = 0;
366  for (const auto col_expr : col_expr_list) {
367  if (!col_exprs_to_not_project.empty() &&
368  col_exprs_to_not_project[col_expr_idx] != -1) {
369  col_widths.push_back(0);
370  ++col_expr_idx;
371  continue;
372  }
373  if (!col_expr) {
374  // row index
375  col_widths.push_back(sizeof(int64_t));
376  } else {
377  const auto agg_info = get_target_info(col_expr, g_bigint_count);
378  const auto chosen_type = get_compact_type(agg_info);
379  if ((chosen_type.is_string() && chosen_type.get_compression() == kENCODING_NONE) ||
380  chosen_type.is_array()) {
381  col_widths.push_back(sizeof(int64_t));
382  col_widths.push_back(sizeof(int64_t));
383  ++col_expr_idx;
384  continue;
385  }
386  if (chosen_type.is_geometry()) {
387  for (auto i = 0; i < chosen_type.get_physical_coord_cols(); ++i) {
388  col_widths.push_back(sizeof(int64_t));
389  col_widths.push_back(sizeof(int64_t));
390  }
391  ++col_expr_idx;
392  continue;
393  }
394  const auto col_expr_bitwidth = get_bit_width(chosen_type);
395  CHECK_EQ(size_t(0), col_expr_bitwidth % 8);
396  col_widths.push_back(static_cast<int8_t>(col_expr_bitwidth >> 3));
397  // for average, we'll need to keep the count as well
398  if (agg_info.agg_kind == kAVG) {
399  CHECK(agg_info.is_agg);
400  col_widths.push_back(sizeof(int64_t));
401  }
402  }
403  ++col_expr_idx;
404  }
405  return col_widths;
406 }
#define CHECK_EQ(x, y)
Definition: Logger.h:195
TargetInfo get_target_info(const PointerType target_expr, const bool bigint_count)
Definition: TargetInfo.h:65
const SQLTypeInfo get_compact_type(const TargetInfo &target)
size_t get_bit_width(const SQLTypeInfo &ti)
#define CHECK(condition)
Definition: Logger.h:187
bool g_bigint_count
Definition: sqldefs.h:71
+ Here is the call graph for this function:
+ Here is the caller graph for this function:

◆ get_count_distinct_sub_bitmap_count()

size_t get_count_distinct_sub_bitmap_count ( const size_t  bitmap_sz_bits,
const RelAlgExecutionUnit ra_exe_unit,
const ExecutorDeviceType  device_type 
)
inline

Definition at line 342 of file GroupByAndAggregate.h.

References g_cluster, GPU, and RelAlgExecutionUnit::groupby_exprs.

Referenced by anonymous_namespace{RelAlgExecutor.cpp}::decide_approx_count_distinct_implementation(), and GroupByAndAggregate::initCountDistinctDescriptors().

344  {
345  // For count distinct on a column with a very small number of distinct values
346  // contention can be very high, especially for non-grouped queries. We'll split
347  // the bitmap into multiple sub-bitmaps which are unified to get the full result.
348  // The threshold value for bitmap_sz_bits works well on Kepler.
349  return bitmap_sz_bits < 50000 && ra_exe_unit.groupby_exprs.empty() &&
350  (device_type == ExecutorDeviceType::GPU || g_cluster)
351  ? 64 // NB: must be a power of 2 to keep runtime offset computations cheap
352  : 1;
353 }
const std::list< std::shared_ptr< Analyzer::Expr > > groupby_exprs
bool g_cluster
+ Here is the caller graph for this function:

◆ get_min_byte_width()

int8_t get_min_byte_width ( )
inline

Definition at line 408 of file GroupByAndAggregate.h.

References MAX_BYTE_WIDTH_SUPPORTED.

Referenced by Executor::executeWorkUnitImpl().

408  {
410 }
constexpr int8_t MAX_BYTE_WIDTH_SUPPORTED
+ Here is the caller graph for this function:

◆ nullable_str_to_string()

std::string nullable_str_to_string ( const NullableString str)
inline

Definition at line 50 of file GroupByAndAggregate.h.

References CHECK.

Referenced by datum_to_string().

50  {
51  auto nptr = boost::get<void*>(&str);
52  if (nptr) {
53  CHECK(!*nptr);
54  return "NULL";
55  }
56  auto sptr = boost::get<std::string>(&str);
57  CHECK(sptr);
58  return *sptr;
59 }
#define CHECK(condition)
Definition: Logger.h:187
+ Here is the caller graph for this function:

Variable Documentation

◆ g_bigint_count

bool g_bigint_count

Definition at line 50 of file GroupByAndAggregate.cpp.

Referenced by get_col_byte_widths().

◆ g_enable_smem_group_by

bool g_enable_smem_group_by

Definition at line 26 of file QueryMemoryDescriptor.cpp.