OmniSciDB  06b3bd477c
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
GroupByAndAggregate.h File Reference
#include "BufferCompaction.h"
#include "ColumnarResults.h"
#include "CompilationOptions.h"
#include "GpuMemUtils.h"
#include "GpuSharedMemoryContext.h"
#include "InputMetadata.h"
#include "QueryExecutionContext.h"
#include "Rendering/RenderInfo.h"
#include "RuntimeFunctions.h"
#include "../Shared/sqltypes.h"
#include "Shared/Logger.h"
#include <llvm/IR/Function.h>
#include <llvm/IR/Instructions.h>
#include <llvm/IR/Value.h>
#include <boost/algorithm/string/join.hpp>
#include <boost/make_unique.hpp>
#include <stack>
#include <vector>
+ Include dependency graph for GroupByAndAggregate.h:
+ This graph shows which files directly or indirectly include this file:

Go to the source code of this file.

Classes

class  ReductionRanOutOfSlots
 
struct  ColRangeInfo
 
struct  KeylessInfo
 
class  GroupByAndAggregate
 
struct  GroupByAndAggregate::DiamondCodegen
 

Functions

std::string nullable_str_to_string (const NullableString &str)
 
std::string datum_to_string (const TargetValue &tv, const SQLTypeInfo &ti, const std::string &delim)
 
int64_t extract_from_datum (const Datum datum, const SQLTypeInfo &ti)
 
int64_t extract_min_stat (const ChunkStats &stats, const SQLTypeInfo &ti)
 
int64_t extract_max_stat (const ChunkStats &stats, const SQLTypeInfo &ti)
 
size_t get_count_distinct_sub_bitmap_count (const size_t bitmap_sz_bits, const RelAlgExecutionUnit &ra_exe_unit, const ExecutorDeviceType device_type)
 
template<class T >
std::vector< int8_t > get_col_byte_widths (const T &col_expr_list, const std::vector< ssize_t > &col_exprs_to_not_project)
 
int8_t get_min_byte_width ()
 

Variables

bool g_enable_smem_group_by
 
bool g_bigint_count
 

Function Documentation

std::string datum_to_string ( const TargetValue tv,
const SQLTypeInfo ti,
const std::string &  delim 
)
inline

Definition at line 61 of file GroupByAndAggregate.h.

References Datum::bigintval, CHECK(), DatumToString(), SQLTypeInfo::get_elem_type(), inline_fp_null_val(), inline_int_null_val(), SQLTypeInfo::is_array(), SQLTypeInfo::is_boolean(), SQLTypeInfo::is_decimal(), SQLTypeInfo::is_time(), join(), kDOUBLE, NULL_BIGINT, NULL_BOOLEAN, nullable_str_to_string(), and to_string().

Referenced by import_export::QueryExporterCSV::exportResults().

63  {
64  if (ti.is_array()) {
65  const auto array_tv = boost::get<ArrayTargetValue>(&tv);
66  CHECK(array_tv);
67  if (array_tv->is_initialized()) {
68  const auto& vec = array_tv->get();
69  std::vector<std::string> elem_strs;
70  elem_strs.reserve(vec.size());
71  const auto& elem_ti = ti.get_elem_type();
72  for (const auto& elem_tv : vec) {
73  elem_strs.push_back(datum_to_string(elem_tv, elem_ti, delim));
74  }
75  return "{" + boost::algorithm::join(elem_strs, delim) + "}";
76  }
77  return "NULL";
78  }
79  const auto scalar_tv = boost::get<ScalarTargetValue>(&tv);
80  if (ti.is_time() || ti.is_decimal()) {
81  Datum datum;
82  datum.bigintval = *boost::get<int64_t>(scalar_tv);
83  if (datum.bigintval == NULL_BIGINT) {
84  return "NULL";
85  }
86  return DatumToString(datum, ti);
87  }
88  if (ti.is_boolean()) {
89  const auto bool_val = *boost::get<int64_t>(scalar_tv);
90  return bool_val == NULL_BOOLEAN ? "NULL" : (bool_val ? "true" : "false");
91  }
92  auto iptr = boost::get<int64_t>(scalar_tv);
93  if (iptr) {
94  return *iptr == inline_int_null_val(ti) ? "NULL" : std::to_string(*iptr);
95  }
96  auto fptr = boost::get<float>(scalar_tv);
97  if (fptr) {
98  return *fptr == inline_fp_null_val(ti) ? "NULL" : std::to_string(*fptr);
99  }
100  auto dptr = boost::get<double>(scalar_tv);
101  if (dptr) {
102  return *dptr == inline_fp_null_val(ti.is_decimal() ? SQLTypeInfo(kDOUBLE, false) : ti)
103  ? "NULL"
104  : std::to_string(*dptr);
105  }
106  auto sptr = boost::get<NullableString>(scalar_tv);
107  CHECK(sptr);
108  return nullable_str_to_string(*sptr);
109 }
std::string DatumToString(Datum d, const SQLTypeInfo &ti)
Definition: Datum.cpp:230
#define NULL_BIGINT
Definition: sqltypes.h:183
std::string datum_to_string(const TargetValue &tv, const SQLTypeInfo &ti, const std::string &delim)
std::string nullable_str_to_string(const NullableString &str)
std::string join(T const &container, std::string const &delim)
double inline_fp_null_val(const SQL_TYPE_INFO &ti)
bool is_time() const
Definition: sqltypes.h:421
std::string to_string(char const *&&v)
CHECK(cgen_state)
int64_t bigintval
Definition: sqltypes.h:137
bool is_boolean() const
Definition: sqltypes.h:422
int64_t inline_int_null_val(const SQL_TYPE_INFO &ti)
SQLTypeInfo get_elem_type() const
Definition: sqltypes.h:622
bool is_decimal() const
Definition: sqltypes.h:418
#define NULL_BOOLEAN
Definition: sqltypes.h:179
bool is_array() const
Definition: sqltypes.h:423

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

int64_t extract_from_datum ( const Datum  datum,
const SQLTypeInfo ti 
)
inline

Definition at line 305 of file GroupByAndAggregate.h.

References Datum::bigintval, CHECK_EQ, decimal_to_int_type(), SQLTypeInfo::get_compression(), SQLTypeInfo::get_type(), Datum::intval, SQLTypeInfo::is_decimal(), kBIGINT, kBOOLEAN, kCHAR, kDATE, kENCODING_DICT, kINT, kSMALLINT, kTEXT, kTIME, kTIMESTAMP, kTINYINT, kVARCHAR, Datum::smallintval, Datum::tinyintval, and run_benchmark_import::type.

Referenced by extract_max_stat(), and extract_min_stat().

305  {
306  const auto type = ti.is_decimal() ? decimal_to_int_type(ti) : ti.get_type();
307  switch (type) {
308  case kBOOLEAN:
309  return datum.tinyintval;
310  case kTINYINT:
311  return datum.tinyintval;
312  case kSMALLINT:
313  return datum.smallintval;
314  case kCHAR:
315  case kVARCHAR:
316  case kTEXT:
318  case kINT:
319  return datum.intval;
320  case kBIGINT:
321  return datum.bigintval;
322  case kTIME:
323  case kTIMESTAMP:
324  case kDATE:
325  return datum.bigintval;
326  default:
327  abort();
328  }
329 }
int8_t tinyintval
Definition: sqltypes.h:134
#define CHECK_EQ(x, y)
Definition: Logger.h:205
Definition: sqltypes.h:50
int32_t intval
Definition: sqltypes.h:136
int64_t bigintval
Definition: sqltypes.h:137
int16_t smallintval
Definition: sqltypes.h:135
SQLTypes decimal_to_int_type(const SQLTypeInfo &ti)
Definition: Datum.cpp:311
Definition: sqltypes.h:53
Definition: sqltypes.h:54
HOST DEVICE EncodingType get_compression() const
Definition: sqltypes.h:266
Definition: sqltypes.h:42
Definition: sqltypes.h:46
bool is_decimal() const
Definition: sqltypes.h:418

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

int64_t extract_max_stat ( const ChunkStats stats,
const SQLTypeInfo ti 
)
inline

Definition at line 335 of file GroupByAndAggregate.h.

References extract_from_datum(), and ChunkStats::max.

Referenced by getLeafColumnRange(), and Executor::skipFragment().

335  {
336  return extract_from_datum(stats.max, ti);
337 }
int64_t extract_from_datum(const Datum datum, const SQLTypeInfo &ti)

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

int64_t extract_min_stat ( const ChunkStats stats,
const SQLTypeInfo ti 
)
inline

Definition at line 331 of file GroupByAndAggregate.h.

References extract_from_datum(), and ChunkStats::min.

Referenced by getLeafColumnRange(), and Executor::skipFragment().

331  {
332  return extract_from_datum(stats.min, ti);
333 }
int64_t extract_from_datum(const Datum datum, const SQLTypeInfo &ti)

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

template<class T >
std::vector<int8_t> get_col_byte_widths ( const T &  col_expr_list,
const std::vector< ssize_t > &  col_exprs_to_not_project 
)
inline

Definition at line 353 of file GroupByAndAggregate.h.

References CHECK(), CHECK_EQ, g_bigint_count, get_bit_width(), get_compact_type(), get_target_info(), kAVG, and kENCODING_NONE.

Referenced by QueryMemoryDescriptor::init(), GroupByAndAggregate::initQueryMemoryDescriptorImpl(), and QueryMemoryDescriptor::pick_target_compact_width().

355  {
356  // Note that non-projected col exprs could be projected cols that we can lazy fetch or
357  // grouped cols with keyless hash
358  if (!col_exprs_to_not_project.empty()) {
359  CHECK_EQ(col_expr_list.size(), col_exprs_to_not_project.size());
360  }
361  std::vector<int8_t> col_widths;
362  size_t col_expr_idx = 0;
363  for (const auto col_expr : col_expr_list) {
364  if (!col_exprs_to_not_project.empty() &&
365  col_exprs_to_not_project[col_expr_idx] != -1) {
366  col_widths.push_back(0);
367  ++col_expr_idx;
368  continue;
369  }
370  if (!col_expr) {
371  // row index
372  col_widths.push_back(sizeof(int64_t));
373  } else {
374  const auto agg_info = get_target_info(col_expr, g_bigint_count);
375  const auto chosen_type = get_compact_type(agg_info);
376  if ((chosen_type.is_string() && chosen_type.get_compression() == kENCODING_NONE) ||
377  chosen_type.is_array()) {
378  col_widths.push_back(sizeof(int64_t));
379  col_widths.push_back(sizeof(int64_t));
380  ++col_expr_idx;
381  continue;
382  }
383  if (chosen_type.is_geometry()) {
384  for (auto i = 0; i < chosen_type.get_physical_coord_cols(); ++i) {
385  col_widths.push_back(sizeof(int64_t));
386  col_widths.push_back(sizeof(int64_t));
387  }
388  ++col_expr_idx;
389  continue;
390  }
391  const auto col_expr_bitwidth = get_bit_width(chosen_type);
392  CHECK_EQ(size_t(0), col_expr_bitwidth % 8);
393  col_widths.push_back(static_cast<int8_t>(col_expr_bitwidth >> 3));
394  // for average, we'll need to keep the count as well
395  if (agg_info.agg_kind == kAVG) {
396  CHECK(agg_info.is_agg);
397  col_widths.push_back(sizeof(int64_t));
398  }
399  }
400  ++col_expr_idx;
401  }
402  return col_widths;
403 }
#define CHECK_EQ(x, y)
Definition: Logger.h:205
TargetInfo get_target_info(const PointerType target_expr, const bool bigint_count)
Definition: TargetInfo.h:78
const SQLTypeInfo get_compact_type(const TargetInfo &target)
size_t get_bit_width(const SQLTypeInfo &ti)
CHECK(cgen_state)
bool g_bigint_count
Definition: sqldefs.h:72

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

size_t get_count_distinct_sub_bitmap_count ( const size_t  bitmap_sz_bits,
const RelAlgExecutionUnit ra_exe_unit,
const ExecutorDeviceType  device_type 
)
inline

Definition at line 339 of file GroupByAndAggregate.h.

References g_cluster, GPU, and RelAlgExecutionUnit::groupby_exprs.

Referenced by anonymous_namespace{RelAlgExecutor.cpp}::decide_approx_count_distinct_implementation(), and GroupByAndAggregate::initCountDistinctDescriptors().

341  {
342  // For count distinct on a column with a very small number of distinct values
343  // contention can be very high, especially for non-grouped queries. We'll split
344  // the bitmap into multiple sub-bitmaps which are unified to get the full result.
345  // The threshold value for bitmap_sz_bits works well on Kepler.
346  return bitmap_sz_bits < 50000 && ra_exe_unit.groupby_exprs.empty() &&
347  (device_type == ExecutorDeviceType::GPU || g_cluster)
348  ? 64 // NB: must be a power of 2 to keep runtime offset computations cheap
349  : 1;
350 }
const std::list< std::shared_ptr< Analyzer::Expr > > groupby_exprs
bool g_cluster

+ Here is the caller graph for this function:

int8_t get_min_byte_width ( )
inline

Definition at line 405 of file GroupByAndAggregate.h.

References MAX_BYTE_WIDTH_SUPPORTED.

Referenced by Executor::executeWorkUnitImpl().

405  {
407 }
constexpr int8_t MAX_BYTE_WIDTH_SUPPORTED

+ Here is the caller graph for this function:

std::string nullable_str_to_string ( const NullableString str)
inline

Definition at line 50 of file GroupByAndAggregate.h.

References CHECK().

Referenced by datum_to_string().

50  {
51  auto nptr = boost::get<void*>(&str);
52  if (nptr) {
53  CHECK(!*nptr);
54  return "NULL";
55  }
56  auto sptr = boost::get<std::string>(&str);
57  CHECK(sptr);
58  return *sptr;
59 }
CHECK(cgen_state)

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

Variable Documentation

bool g_bigint_count

Definition at line 49 of file GroupByAndAggregate.cpp.

bool g_enable_smem_group_by