OmniSciDB  06b3bd477c
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
anonymous_namespace{Execute.cpp} Namespace Reference

Classes

class  OutVecOwner
 

Functions

ResultSetPtr get_merged_result (std::vector< std::pair< ResultSetPtr, std::vector< size_t >>> &results_per_device)
 
ReductionCode get_reduction_code (std::vector< std::pair< ResultSetPtr, std::vector< size_t >>> &results_per_device, int64_t *compilation_queue_time)
 
size_t compute_buffer_entry_guess (const std::vector< InputTableInfo > &query_infos)
 
std::string get_table_name (const InputDescriptor &input_desc, const Catalog_Namespace::Catalog &cat)
 
size_t getDeviceBasedScanLimit (const ExecutorDeviceType device_type, const int device_count)
 
void checkWorkUnitWatchdog (const RelAlgExecutionUnit &ra_exe_unit, const std::vector< InputTableInfo > &table_infos, const Catalog_Namespace::Catalog &cat, const ExecutorDeviceType device_type, const int device_count)
 
template<typename T >
std::vector< std::string > expr_container_to_string (const T &expr_container)
 
template<>
std::vector< std::string > expr_container_to_string (const std::list< Analyzer::OrderEntry > &expr_container)
 
std::string join_type_to_string (const JoinType type)
 
std::string sort_algorithm_to_string (const SortAlgorithm algorithm)
 
RelAlgExecutionUnit replace_scan_limit (const RelAlgExecutionUnit &ra_exe_unit_in, const size_t new_scan_limit)
 
int64_t inline_null_val (const SQLTypeInfo &ti, const bool float_argument_input)
 
void fill_entries_for_empty_input (std::vector< TargetInfo > &target_infos, std::vector< int64_t > &entry, const std::vector< Analyzer::Expr * > &target_exprs, const QueryMemoryDescriptor &query_mem_desc)
 
ResultSetPtr build_row_for_empty_input (const std::vector< Analyzer::Expr * > &target_exprs_in, const QueryMemoryDescriptor &query_mem_desc, const ExecutorDeviceType device_type)
 
size_t permute_storage_columnar (const ResultSetStorage *input_storage, const QueryMemoryDescriptor &input_query_mem_desc, const ResultSetStorage *output_storage, size_t output_row_index, const QueryMemoryDescriptor &output_query_mem_desc, const std::vector< uint32_t > &top_permutation)
 
size_t permute_storage_row_wise (const ResultSetStorage *input_storage, const ResultSetStorage *output_storage, size_t output_row_index, const QueryMemoryDescriptor &output_query_mem_desc, const std::vector< uint32_t > &top_permutation)
 
bool has_lazy_fetched_columns (const std::vector< ColumnLazyFetchInfo > &fetched_cols)
 
const ColumnDescriptortry_get_column_descriptor (const InputColDescriptor *col_desc, const Catalog_Namespace::Catalog &cat)
 
bool check_rows_less_than_needed (const ResultSetPtr &results, const size_t scan_limit)
 
std::tuple< bool, int64_t,
int64_t > 
get_hpt_overflow_underflow_safe_scaled_values (const int64_t chunk_min, const int64_t chunk_max, const SQLTypeInfo &lhs_type, const SQLTypeInfo &rhs_type)
 

Function Documentation

ResultSetPtr anonymous_namespace{Execute.cpp}::build_row_for_empty_input ( const std::vector< Analyzer::Expr * > &  target_exprs_in,
const QueryMemoryDescriptor query_mem_desc,
const ExecutorDeviceType  device_type 
)

Definition at line 1718 of file Execute.cpp.

References CHECK(), fill_entries_for_empty_input(), QueryMemoryDescriptor::getExecutor(), query_mem_desc, and SQLTypeInfo::set_notnull().

Referenced by Executor::collectAllDeviceResults().

1721  {
1722  std::vector<std::shared_ptr<Analyzer::Expr>> target_exprs_owned_copies;
1723  std::vector<Analyzer::Expr*> target_exprs;
1724  for (const auto target_expr : target_exprs_in) {
1725  const auto target_expr_copy =
1726  std::dynamic_pointer_cast<Analyzer::AggExpr>(target_expr->deep_copy());
1727  CHECK(target_expr_copy);
1728  auto ti = target_expr->get_type_info();
1729  ti.set_notnull(false);
1730  target_expr_copy->set_type_info(ti);
1731  if (target_expr_copy->get_arg()) {
1732  auto arg_ti = target_expr_copy->get_arg()->get_type_info();
1733  arg_ti.set_notnull(false);
1734  target_expr_copy->get_arg()->set_type_info(arg_ti);
1735  }
1736  target_exprs_owned_copies.push_back(target_expr_copy);
1737  target_exprs.push_back(target_expr_copy.get());
1738  }
1739  std::vector<TargetInfo> target_infos;
1740  std::vector<int64_t> entry;
1741  fill_entries_for_empty_input(target_infos, entry, target_exprs, query_mem_desc);
1742  const auto executor = query_mem_desc.getExecutor();
1743  CHECK(executor);
1744  auto row_set_mem_owner = executor->getRowSetMemoryOwner();
1745  CHECK(row_set_mem_owner);
1746  auto rs = std::make_shared<ResultSet>(
1747  target_infos, device_type, query_mem_desc, row_set_mem_owner, executor);
1748  rs->allocateStorage();
1749  rs->fillOneEntry(entry);
1750  return rs;
1751 }
void fill_entries_for_empty_input(std::vector< TargetInfo > &target_infos, std::vector< int64_t > &entry, const std::vector< Analyzer::Expr * > &target_exprs, const QueryMemoryDescriptor &query_mem_desc)
Definition: Execute.cpp:1664
CHECK(cgen_state)
const Executor * getExecutor() const

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

bool anonymous_namespace{Execute.cpp}::check_rows_less_than_needed ( const ResultSetPtr results,
const size_t  scan_limit 
)

Definition at line 2850 of file Execute.cpp.

References CHECK().

Referenced by Executor::executePlanWithGroupBy().

2850  {
2851  CHECK(scan_limit);
2852  return results && results->rowCount() < scan_limit;
2853 }
CHECK(cgen_state)

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

void anonymous_namespace{Execute.cpp}::checkWorkUnitWatchdog ( const RelAlgExecutionUnit ra_exe_unit,
const std::vector< InputTableInfo > &  table_infos,
const Catalog_Namespace::Catalog cat,
const ExecutorDeviceType  device_type,
const int  device_count 
)

Definition at line 1069 of file Execute.cpp.

References SortInfo::algorithm, get_table_name(), getDeviceBasedScanLimit(), RelAlgExecutionUnit::groupby_exprs, Executor::high_scan_limit, RelAlgExecutionUnit::input_descs, join(), RelAlgExecutionUnit::scan_limit, RelAlgExecutionUnit::sort_info, StreamingTopN, RelAlgExecutionUnit::target_exprs, to_string(), and RelAlgExecutionUnit::use_bump_allocator.

Referenced by Executor::createKernels().

1073  {
1074  for (const auto target_expr : ra_exe_unit.target_exprs) {
1075  if (dynamic_cast<const Analyzer::AggExpr*>(target_expr)) {
1076  return;
1077  }
1078  }
1079  if (!ra_exe_unit.scan_limit && table_infos.size() == 1 &&
1080  table_infos.front().info.getPhysicalNumTuples() < Executor::high_scan_limit) {
1081  // Allow a query with no scan limit to run on small tables
1082  return;
1083  }
1084  if (ra_exe_unit.use_bump_allocator) {
1085  // Bump allocator removes the scan limit (and any knowledge of the size of the output
1086  // relative to the size of the input), so we bypass this check for now
1087  return;
1088  }
1089  if (ra_exe_unit.sort_info.algorithm != SortAlgorithm::StreamingTopN &&
1090  ra_exe_unit.groupby_exprs.size() == 1 && !ra_exe_unit.groupby_exprs.front() &&
1091  (!ra_exe_unit.scan_limit ||
1092  ra_exe_unit.scan_limit > getDeviceBasedScanLimit(device_type, device_count))) {
1093  std::vector<std::string> table_names;
1094  const auto& input_descs = ra_exe_unit.input_descs;
1095  for (const auto& input_desc : input_descs) {
1096  table_names.push_back(get_table_name(input_desc, cat));
1097  }
1098  if (!ra_exe_unit.scan_limit) {
1099  throw WatchdogException(
1100  "Projection query would require a scan without a limit on table(s): " +
1101  boost::algorithm::join(table_names, ", "));
1102  } else {
1103  throw WatchdogException(
1104  "Projection query output result set on table(s): " +
1105  boost::algorithm::join(table_names, ", ") + " would contain " +
1106  std::to_string(ra_exe_unit.scan_limit) +
1107  " rows, which is more than the current system limit of " +
1108  std::to_string(getDeviceBasedScanLimit(device_type, device_count)));
1109  }
1110  }
1111 }
std::vector< Analyzer::Expr * > target_exprs
std::string get_table_name(const InputDescriptor &input_desc, const Catalog_Namespace::Catalog &cat)
Definition: Execute.cpp:1049
std::string join(T const &container, std::string const &delim)
std::vector< InputDescriptor > input_descs
const SortAlgorithm algorithm
const std::list< std::shared_ptr< Analyzer::Expr > > groupby_exprs
std::string to_string(char const *&&v)
static const size_t high_scan_limit
Definition: Execute.h:374
const SortInfo sort_info
size_t getDeviceBasedScanLimit(const ExecutorDeviceType device_type, const int device_count)
Definition: Execute.cpp:1061

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

size_t anonymous_namespace{Execute.cpp}::compute_buffer_entry_guess ( const std::vector< InputTableInfo > &  query_infos)

Definition at line 1019 of file Execute.cpp.

References CHECK().

Referenced by Executor::executeWorkUnitImpl().

1019  {
1021  // Check for overflows since we're multiplying potentially big table sizes.
1022  using checked_size_t = boost::multiprecision::number<
1023  boost::multiprecision::cpp_int_backend<64,
1024  64,
1025  boost::multiprecision::unsigned_magnitude,
1026  boost::multiprecision::checked,
1027  void>>;
1028  checked_size_t max_groups_buffer_entry_guess = 1;
1029  for (const auto& query_info : query_infos) {
1030  CHECK(!query_info.info.fragments.empty());
1031  auto it = std::max_element(query_info.info.fragments.begin(),
1032  query_info.info.fragments.end(),
1033  [](const FragmentInfo& f1, const FragmentInfo& f2) {
1034  return f1.getNumTuples() < f2.getNumTuples();
1035  });
1036  max_groups_buffer_entry_guess *= it->getNumTuples();
1037  }
1038  // Cap the rough approximation to 100M entries, it's unlikely we can do a great job for
1039  // baseline group layout with that many entries anyway.
1040  constexpr size_t max_groups_buffer_entry_guess_cap = 100000000;
1041  try {
1042  return std::min(static_cast<size_t>(max_groups_buffer_entry_guess),
1043  max_groups_buffer_entry_guess_cap);
1044  } catch (...) {
1045  return max_groups_buffer_entry_guess_cap;
1046  }
1047 }
CHECK(cgen_state)
Used by Fragmenter classes to store info about each fragment - the fragment id and number of tuples(r...
Definition: Fragmenter.h:78

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

template<typename T >
std::vector<std::string> anonymous_namespace{Execute.cpp}::expr_container_to_string ( const T &  expr_container)

Definition at line 1140 of file Execute.cpp.

Referenced by operator<<().

1140  {
1141  std::vector<std::string> expr_strs;
1142  for (const auto& expr : expr_container) {
1143  if (!expr) {
1144  expr_strs.emplace_back("NULL");
1145  } else {
1146  expr_strs.emplace_back(expr->toString());
1147  }
1148  }
1149  return expr_strs;
1150 }

+ Here is the caller graph for this function:

template<>
std::vector<std::string> anonymous_namespace{Execute.cpp}::expr_container_to_string ( const std::list< Analyzer::OrderEntry > &  expr_container)

Definition at line 1153 of file Execute.cpp.

1154  {
1155  std::vector<std::string> expr_strs;
1156  for (const auto& expr : expr_container) {
1157  expr_strs.emplace_back(expr.toString());
1158  }
1159  return expr_strs;
1160 }
void anonymous_namespace{Execute.cpp}::fill_entries_for_empty_input ( std::vector< TargetInfo > &  target_infos,
std::vector< int64_t > &  entry,
const std::vector< Analyzer::Expr * > &  target_exprs,
const QueryMemoryDescriptor query_mem_desc 
)

Definition at line 1664 of file Execute.cpp.

References Bitmap, CHECK(), g_bigint_count, g_cluster, get_target_info(), QueryMemoryDescriptor::getCountDistinctDescriptor(), QueryMemoryDescriptor::getExecutor(), inline_null_val(), kAPPROX_COUNT_DISTINCT, kAVG, kCOUNT, kSAMPLE, kSINGLE_VALUE, StdSet, and takes_float_argument().

Referenced by build_row_for_empty_input().

1667  {
1668  for (size_t target_idx = 0; target_idx < target_exprs.size(); ++target_idx) {
1669  const auto target_expr = target_exprs[target_idx];
1670  const auto agg_info = get_target_info(target_expr, g_bigint_count);
1671  CHECK(agg_info.is_agg);
1672  target_infos.push_back(agg_info);
1673  if (g_cluster) {
1674  const auto executor = query_mem_desc.getExecutor();
1675  CHECK(executor);
1676  auto row_set_mem_owner = executor->getRowSetMemoryOwner();
1677  CHECK(row_set_mem_owner);
1678  const auto& count_distinct_desc =
1679  query_mem_desc.getCountDistinctDescriptor(target_idx);
1680  if (count_distinct_desc.impl_type_ == CountDistinctImplType::Bitmap) {
1681  CHECK(row_set_mem_owner);
1682  auto count_distinct_buffer = row_set_mem_owner->allocateCountDistinctBuffer(
1683  count_distinct_desc.bitmapPaddedSizeBytes());
1684  entry.push_back(reinterpret_cast<int64_t>(count_distinct_buffer));
1685  continue;
1686  }
1687  if (count_distinct_desc.impl_type_ == CountDistinctImplType::StdSet) {
1688  auto count_distinct_set = new std::set<int64_t>();
1689  CHECK(row_set_mem_owner);
1690  row_set_mem_owner->addCountDistinctSet(count_distinct_set);
1691  entry.push_back(reinterpret_cast<int64_t>(count_distinct_set));
1692  continue;
1693  }
1694  }
1695  const bool float_argument_input = takes_float_argument(agg_info);
1696  if (agg_info.agg_kind == kCOUNT || agg_info.agg_kind == kAPPROX_COUNT_DISTINCT) {
1697  entry.push_back(0);
1698  } else if (agg_info.agg_kind == kAVG) {
1699  entry.push_back(inline_null_val(agg_info.sql_type, float_argument_input));
1700  entry.push_back(0);
1701  } else if (agg_info.agg_kind == kSINGLE_VALUE || agg_info.agg_kind == kSAMPLE) {
1702  if (agg_info.sql_type.is_geometry()) {
1703  for (int i = 0; i < agg_info.sql_type.get_physical_coord_cols() * 2; i++) {
1704  entry.push_back(0);
1705  }
1706  } else if (agg_info.sql_type.is_varlen()) {
1707  entry.push_back(0);
1708  entry.push_back(0);
1709  } else {
1710  entry.push_back(inline_null_val(agg_info.sql_type, float_argument_input));
1711  }
1712  } else {
1713  entry.push_back(inline_null_val(agg_info.sql_type, float_argument_input));
1714  }
1715  }
1716 }
TargetInfo get_target_info(const PointerType target_expr, const bool bigint_count)
Definition: TargetInfo.h:78
bool takes_float_argument(const TargetInfo &target_info)
Definition: TargetInfo.h:133
CHECK(cgen_state)
int64_t inline_null_val(const SQLTypeInfo &ti, const bool float_argument_input)
Definition: Execute.cpp:1649
bool g_bigint_count
const CountDistinctDescriptor & getCountDistinctDescriptor(const size_t idx) const
Definition: sqldefs.h:76
bool g_cluster
Definition: sqldefs.h:72
const Executor * getExecutor() const

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

std::tuple<bool, int64_t, int64_t> anonymous_namespace{Execute.cpp}::get_hpt_overflow_underflow_safe_scaled_values ( const int64_t  chunk_min,
const int64_t  chunk_max,
const SQLTypeInfo lhs_type,
const SQLTypeInfo rhs_type 
)

Definition at line 3237 of file Execute.cpp.

References CHECK(), SQLTypeInfo::get_dimension(), and DateTimeUtils::get_timestamp_precision_scale().

Referenced by Executor::skipFragment().

3241  {
3242  const int32_t ldim = lhs_type.get_dimension();
3243  const int32_t rdim = rhs_type.get_dimension();
3244  CHECK(ldim != rdim);
3245  const auto scale = DateTimeUtils::get_timestamp_precision_scale(abs(rdim - ldim));
3246  if (ldim > rdim) {
3247  // LHS type precision is more than RHS col type. No chance of overflow/underflow.
3248  return {true, chunk_min / scale, chunk_max / scale};
3249  }
3250 
3251  int64_t upscaled_chunk_min;
3252  int64_t upscaled_chunk_max;
3253 
3254  if (__builtin_mul_overflow(chunk_min, scale, &upscaled_chunk_min) ||
3255  __builtin_mul_overflow(chunk_max, scale, &upscaled_chunk_max)) {
3256  return std::make_tuple(false, chunk_min, chunk_max);
3257  }
3258 
3259  return std::make_tuple(true, upscaled_chunk_min, upscaled_chunk_max);
3260 }
CHECK(cgen_state)
HOST DEVICE int get_dimension() const
Definition: sqltypes.h:260
constexpr int64_t get_timestamp_precision_scale(const int32_t dimen)
Definition: DateTimeUtils.h:51

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

ResultSetPtr anonymous_namespace{Execute.cpp}::get_merged_result ( std::vector< std::pair< ResultSetPtr, std::vector< size_t >>> &  results_per_device)

Definition at line 828 of file Execute.cpp.

References CHECK().

Referenced by Executor::resultsUnion().

829  {
830  auto& first = results_per_device.front().first;
831  CHECK(first);
832  for (size_t dev_idx = 1; dev_idx < results_per_device.size(); ++dev_idx) {
833  const auto& next = results_per_device[dev_idx].first;
834  CHECK(next);
835  first->append(*next);
836  }
837  return std::move(first);
838 }
CHECK(cgen_state)

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

ReductionCode anonymous_namespace{Execute.cpp}::get_reduction_code ( std::vector< std::pair< ResultSetPtr, std::vector< size_t >>> &  results_per_device,
int64_t *  compilation_queue_time 
)

Definition at line 895 of file Execute.cpp.

References ResultSetReductionJIT::codegen(), Executor::compilation_mutex_, timer_start(), and timer_stop().

Referenced by Executor::reduceMultiDeviceResultSets().

897  {
898  auto clock_begin = timer_start();
899  std::lock_guard<std::mutex> compilation_lock(Executor::compilation_mutex_);
900  *compilation_queue_time = timer_stop(clock_begin);
901  const auto& this_result_set = results_per_device[0].first;
902  ResultSetReductionJIT reduction_jit(this_result_set->getQueryMemDesc(),
903  this_result_set->getTargetInfos(),
904  this_result_set->getTargetInitVals());
905  return reduction_jit.codegen();
906 };
TypeR::rep timer_stop(Type clock_begin)
Definition: measure.h:46
virtual ReductionCode codegen() const
static std::mutex compilation_mutex_
Definition: Execute.h:951
Type timer_start()
Definition: measure.h:40

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

std::string anonymous_namespace{Execute.cpp}::get_table_name ( const InputDescriptor input_desc,
const Catalog_Namespace::Catalog cat 
)

Definition at line 1049 of file Execute.cpp.

References CHECK(), Catalog_Namespace::Catalog::getMetadataForTable(), InputDescriptor::getSourceType(), InputDescriptor::getTableId(), TABLE, and to_string().

Referenced by checkWorkUnitWatchdog().

1050  {
1051  const auto source_type = input_desc.getSourceType();
1052  if (source_type == InputSourceType::TABLE) {
1053  const auto td = cat.getMetadataForTable(input_desc.getTableId());
1054  CHECK(td);
1055  return td->tableName;
1056  } else {
1057  return "$TEMPORARY_TABLE" + std::to_string(-input_desc.getTableId());
1058  }
1059 }
std::string to_string(char const *&&v)
CHECK(cgen_state)
int getTableId() const
InputSourceType getSourceType() const
const TableDescriptor * getMetadataForTable(const std::string &tableName, const bool populateFragmenter=true) const
Returns a pointer to a const TableDescriptor struct matching the provided tableName.

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

size_t anonymous_namespace{Execute.cpp}::getDeviceBasedScanLimit ( const ExecutorDeviceType  device_type,
const int  device_count 
)
inline

Definition at line 1061 of file Execute.cpp.

References GPU, and Executor::high_scan_limit.

Referenced by checkWorkUnitWatchdog().

1062  {
1063  if (device_type == ExecutorDeviceType::GPU) {
1064  return device_count * Executor::high_scan_limit;
1065  }
1067 }
static const size_t high_scan_limit
Definition: Execute.h:374

+ Here is the caller graph for this function:

bool anonymous_namespace{Execute.cpp}::has_lazy_fetched_columns ( const std::vector< ColumnLazyFetchInfo > &  fetched_cols)

Definition at line 1933 of file Execute.cpp.

Referenced by Executor::createKernels().

1933  {
1934  for (const auto& col : fetched_cols) {
1935  if (col.is_lazily_fetched) {
1936  return true;
1937  }
1938  }
1939  return false;
1940 }

+ Here is the caller graph for this function:

int64_t anonymous_namespace{Execute.cpp}::inline_null_val ( const SQLTypeInfo ti,
const bool  float_argument_input 
)

Definition at line 1649 of file Execute.cpp.

References CHECK(), SQLTypeInfo::get_type(), inline_fp_null_val(), inline_int_null_val(), SQLTypeInfo::is_boolean(), SQLTypeInfo::is_fp(), SQLTypeInfo::is_number(), SQLTypeInfo::is_string(), SQLTypeInfo::is_time(), and kFLOAT.

Referenced by fill_entries_for_empty_input().

1649  {
1650  CHECK(ti.is_number() || ti.is_time() || ti.is_boolean() || ti.is_string());
1651  if (ti.is_fp()) {
1652  if (float_argument_input && ti.get_type() == kFLOAT) {
1653  int64_t float_null_val = 0;
1654  *reinterpret_cast<float*>(may_alias_ptr(&float_null_val)) =
1655  static_cast<float>(inline_fp_null_val(ti));
1656  return float_null_val;
1657  }
1658  const auto double_null_val = inline_fp_null_val(ti);
1659  return *reinterpret_cast<const int64_t*>(may_alias_ptr(&double_null_val));
1660  }
1661  return inline_int_null_val(ti);
1662 }
bool is_fp() const
Definition: sqltypes.h:419
HOST DEVICE SQLTypes get_type() const
Definition: sqltypes.h:258
bool is_number() const
Definition: sqltypes.h:420
double inline_fp_null_val(const SQL_TYPE_INFO &ti)
bool is_time() const
Definition: sqltypes.h:421
CHECK(cgen_state)
bool is_boolean() const
Definition: sqltypes.h:422
int64_t inline_int_null_val(const SQL_TYPE_INFO &ti)
bool is_string() const
Definition: sqltypes.h:415

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

std::string anonymous_namespace{Execute.cpp}::join_type_to_string ( const JoinType  type)

Definition at line 1162 of file Execute.cpp.

References INNER, INVALID, LEFT, and UNREACHABLE.

Referenced by operator<<(), and ra_exec_unit_desc_for_caching().

1162  {
1163  switch (type) {
1164  case JoinType::INNER:
1165  return "INNER";
1166  case JoinType::LEFT:
1167  return "LEFT";
1168  case JoinType::INVALID:
1169  return "INVALID";
1170  }
1171  UNREACHABLE();
1172  return "";
1173 }
#define UNREACHABLE()
Definition: Logger.h:241

+ Here is the caller graph for this function:

size_t anonymous_namespace{Execute.cpp}::permute_storage_columnar ( const ResultSetStorage input_storage,
const QueryMemoryDescriptor input_query_mem_desc,
const ResultSetStorage output_storage,
size_t  output_row_index,
const QueryMemoryDescriptor output_query_mem_desc,
const std::vector< uint32_t > &  top_permutation 
)

This functions uses the permutation indices in "top_permutation", and permutes all group columns (if any) and aggregate columns into the output storage. In columnar layout, since different columns are not consecutive in the memory, different columns are copied back into the output storage separetely and through different memcpy operations.

output_row_index contains the current index of the output storage (input storage will be appended to it), and the final output row index is returned.

Definition at line 1799 of file Execute.cpp.

References QueryMemoryDescriptor::getColOffInBytes(), QueryMemoryDescriptor::getKeyCount(), QueryMemoryDescriptor::getPaddedSlotWidthBytes(), QueryMemoryDescriptor::getPrependedGroupColOffInBytes(), QueryMemoryDescriptor::getSlotCount(), ResultSetStorage::getUnderlyingBuffer(), and QueryMemoryDescriptor::groupColWidth().

Referenced by Executor::collectAllDeviceShardedTopResults().

1804  {
1805  const auto output_buffer = output_storage->getUnderlyingBuffer();
1806  const auto input_buffer = input_storage->getUnderlyingBuffer();
1807  for (const auto sorted_idx : top_permutation) {
1808  // permuting all group-columns in this result set into the final buffer:
1809  for (size_t group_idx = 0; group_idx < input_query_mem_desc.getKeyCount();
1810  group_idx++) {
1811  const auto input_column_ptr =
1812  input_buffer + input_query_mem_desc.getPrependedGroupColOffInBytes(group_idx) +
1813  sorted_idx * input_query_mem_desc.groupColWidth(group_idx);
1814  const auto output_column_ptr =
1815  output_buffer +
1816  output_query_mem_desc.getPrependedGroupColOffInBytes(group_idx) +
1817  output_row_index * output_query_mem_desc.groupColWidth(group_idx);
1818  memcpy(output_column_ptr,
1819  input_column_ptr,
1820  output_query_mem_desc.groupColWidth(group_idx));
1821  }
1822  // permuting all agg-columns in this result set into the final buffer:
1823  for (size_t slot_idx = 0; slot_idx < input_query_mem_desc.getSlotCount();
1824  slot_idx++) {
1825  const auto input_column_ptr =
1826  input_buffer + input_query_mem_desc.getColOffInBytes(slot_idx) +
1827  sorted_idx * input_query_mem_desc.getPaddedSlotWidthBytes(slot_idx);
1828  const auto output_column_ptr =
1829  output_buffer + output_query_mem_desc.getColOffInBytes(slot_idx) +
1830  output_row_index * output_query_mem_desc.getPaddedSlotWidthBytes(slot_idx);
1831  memcpy(output_column_ptr,
1832  input_column_ptr,
1833  output_query_mem_desc.getPaddedSlotWidthBytes(slot_idx));
1834  }
1835  ++output_row_index;
1836  }
1837  return output_row_index;
1838 }
int8_t * getUnderlyingBuffer() const
Definition: ResultSet.cpp:90
int8_t groupColWidth(const size_t key_idx) const
const int8_t getPaddedSlotWidthBytes(const size_t slot_idx) const
size_t getColOffInBytes(const size_t col_idx) const
size_t getPrependedGroupColOffInBytes(const size_t group_idx) const

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

size_t anonymous_namespace{Execute.cpp}::permute_storage_row_wise ( const ResultSetStorage input_storage,
const ResultSetStorage output_storage,
size_t  output_row_index,
const QueryMemoryDescriptor output_query_mem_desc,
const std::vector< uint32_t > &  top_permutation 
)

This functions uses the permutation indices in "top_permutation", and permutes all group columns (if any) and aggregate columns into the output storage. In row-wise, since different columns are consecutive within the memory, it suffices to perform a single memcpy operation and copy the whole row.

output_row_index contains the current index of the output storage (input storage will be appended to it), and the final output row index is returned.

Definition at line 1849 of file Execute.cpp.

References QueryMemoryDescriptor::getRowSize(), and ResultSetStorage::getUnderlyingBuffer().

Referenced by Executor::collectAllDeviceShardedTopResults().

1853  {
1854  const auto output_buffer = output_storage->getUnderlyingBuffer();
1855  const auto input_buffer = input_storage->getUnderlyingBuffer();
1856  for (const auto sorted_idx : top_permutation) {
1857  const auto row_ptr = input_buffer + sorted_idx * output_query_mem_desc.getRowSize();
1858  memcpy(output_buffer + output_row_index * output_query_mem_desc.getRowSize(),
1859  row_ptr,
1860  output_query_mem_desc.getRowSize());
1861  ++output_row_index;
1862  }
1863  return output_row_index;
1864 }
int8_t * getUnderlyingBuffer() const
Definition: ResultSet.cpp:90

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

RelAlgExecutionUnit anonymous_namespace{Execute.cpp}::replace_scan_limit ( const RelAlgExecutionUnit ra_exe_unit_in,
const size_t  new_scan_limit 
)

Definition at line 1291 of file Execute.cpp.

References RelAlgExecutionUnit::estimator, RelAlgExecutionUnit::groupby_exprs, RelAlgExecutionUnit::input_col_descs, RelAlgExecutionUnit::input_descs, RelAlgExecutionUnit::join_quals, RelAlgExecutionUnit::quals, RelAlgExecutionUnit::query_state, RelAlgExecutionUnit::simple_quals, RelAlgExecutionUnit::sort_info, RelAlgExecutionUnit::target_exprs, RelAlgExecutionUnit::union_all, and RelAlgExecutionUnit::use_bump_allocator.

Referenced by Executor::executeWorkUnit().

1292  {
1293  return {ra_exe_unit_in.input_descs,
1294  ra_exe_unit_in.input_col_descs,
1295  ra_exe_unit_in.simple_quals,
1296  ra_exe_unit_in.quals,
1297  ra_exe_unit_in.join_quals,
1298  ra_exe_unit_in.groupby_exprs,
1299  ra_exe_unit_in.target_exprs,
1300  ra_exe_unit_in.estimator,
1301  ra_exe_unit_in.sort_info,
1302  new_scan_limit,
1303  ra_exe_unit_in.use_bump_allocator,
1304  ra_exe_unit_in.union_all,
1305  ra_exe_unit_in.query_state};
1306 }
std::vector< Analyzer::Expr * > target_exprs
const std::optional< bool > union_all
std::vector< InputDescriptor > input_descs
const std::list< std::shared_ptr< Analyzer::Expr > > groupby_exprs
const SortInfo sort_info
const JoinQualsPerNestingLevel join_quals
const std::shared_ptr< Analyzer::Estimator > estimator
std::list< std::shared_ptr< Analyzer::Expr > > quals
std::shared_ptr< const query_state::QueryState > query_state
std::list< std::shared_ptr< const InputColDescriptor > > input_col_descs
std::list< std::shared_ptr< Analyzer::Expr > > simple_quals

+ Here is the caller graph for this function:

std::string anonymous_namespace{Execute.cpp}::sort_algorithm_to_string ( const SortAlgorithm  algorithm)

Definition at line 1175 of file Execute.cpp.

References Default, SpeculativeTopN, StreamingTopN, and UNREACHABLE.

Referenced by operator<<().

1175  {
1176  switch (algorithm) {
1178  return "ResultSet";
1180  return "Speculative Top N";
1182  return "Streaming Top N";
1183  }
1184  UNREACHABLE();
1185  return "";
1186 }
#define UNREACHABLE()
Definition: Logger.h:241

+ Here is the caller graph for this function:

const ColumnDescriptor* anonymous_namespace{Execute.cpp}::try_get_column_descriptor ( const InputColDescriptor col_desc,
const Catalog_Namespace::Catalog cat 
)

Definition at line 2208 of file Execute.cpp.

References get_column_descriptor_maybe(), InputColDescriptor::getColId(), InputColDescriptor::getScanDesc(), and InputDescriptor::getTableId().

Referenced by Executor::fetchChunks(), and Executor::fetchUnionChunks().

2209  {
2210  const int table_id = col_desc->getScanDesc().getTableId();
2211  const int col_id = col_desc->getColId();
2212  return get_column_descriptor_maybe(col_id, table_id, cat);
2213 }
const ColumnDescriptor * get_column_descriptor_maybe(const int col_id, const int table_id, const Catalog_Namespace::Catalog &cat)
Definition: Execute.h:149
int getColId() const
int getTableId() const
const InputDescriptor & getScanDesc() const

+ Here is the call graph for this function:

+ Here is the caller graph for this function: