OmniSciDB  085a039ca4
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
ResultSet.cpp
Go to the documentation of this file.
1 /*
2  * Copyright 2021 OmniSci, Inc.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
23 #include "ResultSet.h"
26 #include "Execute.h"
27 #include "GpuMemUtils.h"
28 #include "InPlaceSort.h"
31 #include "RelAlgExecutionUnit.h"
32 #include "RuntimeFunctions.h"
33 #include "Shared/Intervals.h"
34 #include "Shared/SqlTypesLayout.h"
35 #include "Shared/checked_alloc.h"
36 #include "Shared/likely.h"
37 #include "Shared/thread_count.h"
38 #include "Shared/threading.h"
39 
40 #include <algorithm>
41 #include <atomic>
42 #include <bitset>
43 #include <functional>
44 #include <future>
45 #include <numeric>
46 
47 size_t g_parallel_top_min = 100e3;
48 size_t g_parallel_top_max = 20e6; // In effect only with g_enable_watchdog.
49 size_t g_streaming_topn_max = 100e3;
50 constexpr int64_t uninitialized_cached_row_count{-1};
51 
52 void ResultSet::keepFirstN(const size_t n) {
53  invalidateCachedRowCount();
54  keep_first_ = n;
55 }
56 
57 void ResultSet::dropFirstN(const size_t n) {
58  invalidateCachedRowCount();
59  drop_first_ = n;
60 }
61 
62 ResultSet::ResultSet(const std::vector<TargetInfo>& targets,
63  const ExecutorDeviceType device_type,
65  const std::shared_ptr<RowSetMemoryOwner> row_set_mem_owner,
66  const Catalog_Namespace::Catalog* catalog,
67  const unsigned block_size,
68  const unsigned grid_size)
69  : targets_(targets)
70  , device_type_(device_type)
71  , device_id_(-1)
72  , query_mem_desc_(query_mem_desc)
73  , crt_row_buff_idx_(0)
74  , fetched_so_far_(0)
75  , drop_first_(0)
76  , keep_first_(0)
77  , row_set_mem_owner_(row_set_mem_owner)
78  , catalog_(catalog)
79  , block_size_(block_size)
80  , grid_size_(grid_size)
81  , data_mgr_(nullptr)
82  , separate_varlen_storage_valid_(false)
83  , just_explain_(false)
84  , for_validation_only_(false)
85  , cached_row_count_(uninitialized_cached_row_count)
86  , geo_return_type_(GeoReturnType::WktString)
87  , cached_(false)
88  , query_exec_time_(0)
89  , query_plan_(EMPTY_HASHED_PLAN_DAG_KEY)
90  , can_use_speculative_top_n_sort(std::nullopt) {}
91 
92 ResultSet::ResultSet(const std::vector<TargetInfo>& targets,
93  const std::vector<ColumnLazyFetchInfo>& lazy_fetch_info,
94  const std::vector<std::vector<const int8_t*>>& col_buffers,
95  const std::vector<std::vector<int64_t>>& frag_offsets,
96  const std::vector<int64_t>& consistent_frag_sizes,
97  const ExecutorDeviceType device_type,
98  const int device_id,
100  const std::shared_ptr<RowSetMemoryOwner> row_set_mem_owner,
101  const Catalog_Namespace::Catalog* catalog,
102  const unsigned block_size,
103  const unsigned grid_size)
104  : targets_(targets)
105  , device_type_(device_type)
106  , device_id_(device_id)
107  , query_mem_desc_(query_mem_desc)
108  , crt_row_buff_idx_(0)
109  , fetched_so_far_(0)
110  , drop_first_(0)
111  , keep_first_(0)
112  , row_set_mem_owner_(row_set_mem_owner)
113  , catalog_(catalog)
114  , block_size_(block_size)
115  , grid_size_(grid_size)
116  , lazy_fetch_info_(lazy_fetch_info)
117  , col_buffers_{col_buffers}
118  , frag_offsets_{frag_offsets}
119  , consistent_frag_sizes_{consistent_frag_sizes}
120  , data_mgr_(nullptr)
121  , separate_varlen_storage_valid_(false)
122  , just_explain_(false)
123  , for_validation_only_(false)
124  , cached_row_count_(uninitialized_cached_row_count)
125  , geo_return_type_(GeoReturnType::WktString)
126  , cached_(false)
127  , query_exec_time_(0)
128  , query_plan_(EMPTY_HASHED_PLAN_DAG_KEY)
129  , can_use_speculative_top_n_sort(std::nullopt) {}
130 
131 ResultSet::ResultSet(const std::shared_ptr<const Analyzer::Estimator> estimator,
132  const ExecutorDeviceType device_type,
133  const int device_id,
134  Data_Namespace::DataMgr* data_mgr)
135  : device_type_(device_type)
136  , device_id_(device_id)
137  , query_mem_desc_{}
138  , crt_row_buff_idx_(0)
139  , estimator_(estimator)
140  , data_mgr_(data_mgr)
141  , separate_varlen_storage_valid_(false)
142  , just_explain_(false)
143  , for_validation_only_(false)
144  , cached_row_count_(uninitialized_cached_row_count)
145  , geo_return_type_(GeoReturnType::WktString)
146  , cached_(false)
147  , query_exec_time_(0)
148  , query_plan_(EMPTY_HASHED_PLAN_DAG_KEY)
149  , can_use_speculative_top_n_sort(std::nullopt) {
150  if (device_type == ExecutorDeviceType::GPU) {
151  device_estimator_buffer_ = CudaAllocator::allocGpuAbstractBuffer(
152  data_mgr_, estimator_->getBufferSize(), device_id_);
153  data_mgr->getCudaMgr()->zeroDeviceMem(device_estimator_buffer_->getMemoryPtr(),
154  estimator_->getBufferSize(),
155  device_id_,
157  } else {
158  host_estimator_buffer_ =
159  static_cast<int8_t*>(checked_calloc(estimator_->getBufferSize(), 1));
160  }
161 }
162 
163 ResultSet::ResultSet(const std::string& explanation)
164  : device_type_(ExecutorDeviceType::CPU)
165  , device_id_(-1)
166  , fetched_so_far_(0)
167  , separate_varlen_storage_valid_(false)
168  , explanation_(explanation)
169  , just_explain_(true)
170  , for_validation_only_(false)
171  , cached_row_count_(uninitialized_cached_row_count)
172  , geo_return_type_(GeoReturnType::WktString)
173  , cached_(false)
174  , query_exec_time_(0)
175  , query_plan_(EMPTY_HASHED_PLAN_DAG_KEY)
176  , can_use_speculative_top_n_sort(std::nullopt) {}
177 
178 ResultSet::ResultSet(int64_t queue_time_ms,
179  int64_t render_time_ms,
180  const std::shared_ptr<RowSetMemoryOwner> row_set_mem_owner)
181  : device_type_(ExecutorDeviceType::CPU)
182  , device_id_(-1)
183  , fetched_so_far_(0)
184  , row_set_mem_owner_(row_set_mem_owner)
185  , timings_(QueryExecutionTimings{queue_time_ms, render_time_ms, 0, 0})
186  , separate_varlen_storage_valid_(false)
187  , just_explain_(true)
188  , for_validation_only_(false)
189  , cached_row_count_(uninitialized_cached_row_count)
190  , geo_return_type_(GeoReturnType::WktString)
191  , cached_(false)
192  , query_exec_time_(0)
193  , query_plan_(EMPTY_HASHED_PLAN_DAG_KEY)
194  , can_use_speculative_top_n_sort(std::nullopt) {}
195 
197  if (storage_) {
198  if (!storage_->buff_is_provided_) {
199  CHECK(storage_->getUnderlyingBuffer());
200  free(storage_->getUnderlyingBuffer());
201  }
202  }
203  for (auto& storage : appended_storage_) {
204  if (storage && !storage->buff_is_provided_) {
205  free(storage->getUnderlyingBuffer());
206  }
207  }
208  if (host_estimator_buffer_) {
209  CHECK(device_type_ == ExecutorDeviceType::CPU || device_estimator_buffer_);
210  free(host_estimator_buffer_);
211  }
212  if (device_estimator_buffer_) {
213  CHECK(data_mgr_);
214  data_mgr_->free(device_estimator_buffer_);
215  }
216 }
217 
218 std::string ResultSet::summaryToString() const {
219  std::ostringstream oss;
220  oss << "Result Set Info" << std::endl;
221  oss << "\tLayout: " << query_mem_desc_.queryDescTypeToString() << std::endl;
222  oss << "\tColumns: " << colCount() << std::endl;
223  oss << "\tRows: " << rowCount() << std::endl;
224  oss << "\tEntry count: " << entryCount() << std::endl;
225  const std::string is_empty = isEmpty() ? "True" : "False";
226  oss << "\tIs empty: " << is_empty << std::endl;
227  const std::string did_output_columnar = didOutputColumnar() ? "True" : "False;";
228  oss << "\tColumnar: " << did_output_columnar << std::endl;
229  oss << "\tLazy-fetched columns: " << getNumColumnsLazyFetched() << std::endl;
230  const std::string is_direct_columnar_conversion_possible =
231  isDirectColumnarConversionPossible() ? "True" : "False";
232  oss << "\tDirect columnar conversion possible: "
233  << is_direct_columnar_conversion_possible << std::endl;
234 
235  size_t num_columns_zero_copy_columnarizable{0};
236  for (size_t target_idx = 0; target_idx < targets_.size(); target_idx++) {
237  if (isZeroCopyColumnarConversionPossible(target_idx)) {
238  num_columns_zero_copy_columnarizable++;
239  }
240  }
241  oss << "\tZero-copy columnar conversion columns: "
242  << num_columns_zero_copy_columnarizable << std::endl;
243 
244  oss << "\tPermutation size: " << permutation_.size() << std::endl;
245  oss << "\tLimit: " << keep_first_ << std::endl;
246  oss << "\tOffset: " << drop_first_ << std::endl;
247  return oss.str();
248 }
249 
251  return device_type_;
252 }
253 
255  CHECK(!storage_);
256  CHECK(row_set_mem_owner_);
257  auto buff = row_set_mem_owner_->allocate(
258  query_mem_desc_.getBufferSizeBytes(device_type_), /*thread_idx=*/0);
259  storage_.reset(
260  new ResultSetStorage(targets_, query_mem_desc_, buff, /*buff_is_provided=*/true));
261  return storage_.get();
262 }
263 
265  int8_t* buff,
266  const std::vector<int64_t>& target_init_vals,
267  std::shared_ptr<VarlenOutputInfo> varlen_output_info) const {
268  CHECK(buff);
269  CHECK(!storage_);
270  storage_.reset(new ResultSetStorage(targets_, query_mem_desc_, buff, true));
271  // TODO: add both to the constructor
272  storage_->target_init_vals_ = target_init_vals;
273  if (varlen_output_info) {
274  storage_->varlen_output_info_ = varlen_output_info;
275  }
276  return storage_.get();
277 }
278 
280  const std::vector<int64_t>& target_init_vals) const {
281  CHECK(!storage_);
282  CHECK(row_set_mem_owner_);
283  auto buff = row_set_mem_owner_->allocate(
284  query_mem_desc_.getBufferSizeBytes(device_type_), /*thread_idx=*/0);
285  storage_.reset(
286  new ResultSetStorage(targets_, query_mem_desc_, buff, /*buff_is_provided=*/true));
287  storage_->target_init_vals_ = target_init_vals;
288  return storage_.get();
289 }
290 
292  if (crt_row_buff_idx_ == 0) {
293  throw std::runtime_error("current row buffer iteration index is undefined");
294  }
295  return crt_row_buff_idx_ - 1;
296 }
297 
298 // Note: that.appended_storage_ does not get appended to this.
299 void ResultSet::append(ResultSet& that) {
300  invalidateCachedRowCount();
301  if (!that.storage_) {
302  return;
303  }
304  appended_storage_.push_back(std::move(that.storage_));
305  query_mem_desc_.setEntryCount(
306  query_mem_desc_.getEntryCount() +
307  appended_storage_.back()->query_mem_desc_.getEntryCount());
308  chunks_.insert(chunks_.end(), that.chunks_.begin(), that.chunks_.end());
309  col_buffers_.insert(
310  col_buffers_.end(), that.col_buffers_.begin(), that.col_buffers_.end());
311  frag_offsets_.insert(
312  frag_offsets_.end(), that.frag_offsets_.begin(), that.frag_offsets_.end());
313  consistent_frag_sizes_.insert(consistent_frag_sizes_.end(),
314  that.consistent_frag_sizes_.begin(),
315  that.consistent_frag_sizes_.end());
316  chunk_iters_.insert(
317  chunk_iters_.end(), that.chunk_iters_.begin(), that.chunk_iters_.end());
318  if (separate_varlen_storage_valid_) {
319  CHECK(that.separate_varlen_storage_valid_);
320  serialized_varlen_buffer_.insert(serialized_varlen_buffer_.end(),
321  that.serialized_varlen_buffer_.begin(),
322  that.serialized_varlen_buffer_.end());
323  }
324  for (auto& buff : that.literal_buffers_) {
325  literal_buffers_.push_back(std::move(buff));
326  }
327 }
328 
330  auto timer = DEBUG_TIMER(__func__);
331  if (!storage_) {
332  return nullptr;
333  }
334 
335  auto executor = getExecutor();
336  CHECK(executor);
337  ResultSetPtr copied_rs = std::make_shared<ResultSet>(targets_,
338  device_type_,
339  query_mem_desc_,
340  row_set_mem_owner_,
341  executor->getCatalog(),
342  executor->blockSize(),
343  executor->gridSize());
344 
345  auto allocate_and_copy_storage =
346  [&](const ResultSetStorage* prev_storage) -> std::unique_ptr<ResultSetStorage> {
347  const auto& prev_qmd = prev_storage->query_mem_desc_;
348  const auto storage_size = prev_qmd.getBufferSizeBytes(device_type_);
349  auto buff = row_set_mem_owner_->allocate(storage_size, /*thread_idx=*/0);
350  std::unique_ptr<ResultSetStorage> new_storage;
351  new_storage.reset(new ResultSetStorage(
352  prev_storage->targets_, prev_qmd, buff, /*buff_is_provided=*/true));
353  new_storage->target_init_vals_ = prev_storage->target_init_vals_;
354  if (prev_storage->varlen_output_info_) {
355  new_storage->varlen_output_info_ = prev_storage->varlen_output_info_;
356  }
357  memcpy(new_storage->buff_, prev_storage->buff_, storage_size);
358  new_storage->query_mem_desc_ = prev_qmd;
359  return new_storage;
360  };
361 
362  copied_rs->storage_ = allocate_and_copy_storage(storage_.get());
363  if (!appended_storage_.empty()) {
364  for (const auto& storage : appended_storage_) {
365  copied_rs->appended_storage_.push_back(allocate_and_copy_storage(storage.get()));
366  }
367  }
368  std::copy(chunks_.begin(), chunks_.end(), std::back_inserter(copied_rs->chunks_));
369  std::copy(chunk_iters_.begin(),
370  chunk_iters_.end(),
371  std::back_inserter(copied_rs->chunk_iters_));
372  std::copy(col_buffers_.begin(),
373  col_buffers_.end(),
374  std::back_inserter(copied_rs->col_buffers_));
375  std::copy(frag_offsets_.begin(),
376  frag_offsets_.end(),
377  std::back_inserter(copied_rs->frag_offsets_));
378  std::copy(consistent_frag_sizes_.begin(),
379  consistent_frag_sizes_.end(),
380  std::back_inserter(copied_rs->consistent_frag_sizes_));
381  if (separate_varlen_storage_valid_) {
382  std::copy(serialized_varlen_buffer_.begin(),
383  serialized_varlen_buffer_.end(),
384  std::back_inserter(copied_rs->serialized_varlen_buffer_));
385  }
386  std::copy(literal_buffers_.begin(),
387  literal_buffers_.end(),
388  std::back_inserter(copied_rs->literal_buffers_));
389  std::copy(lazy_fetch_info_.begin(),
390  lazy_fetch_info_.end(),
391  std::back_inserter(copied_rs->lazy_fetch_info_));
392 
393  copied_rs->permutation_ = permutation_;
394  copied_rs->drop_first_ = drop_first_;
395  copied_rs->keep_first_ = keep_first_;
396  copied_rs->separate_varlen_storage_valid_ = separate_varlen_storage_valid_;
397  copied_rs->query_exec_time_ = query_exec_time_;
398  copied_rs->input_table_keys_ = input_table_keys_;
399  copied_rs->target_meta_info_ = target_meta_info_;
400  copied_rs->geo_return_type_ = geo_return_type_;
401  copied_rs->query_plan_ = query_plan_;
402  if (can_use_speculative_top_n_sort) {
403  copied_rs->can_use_speculative_top_n_sort = can_use_speculative_top_n_sort;
404  }
405 
406  return copied_rs;
407 }
408 
410  return storage_.get();
411 }
412 
413 size_t ResultSet::colCount() const {
414  return just_explain_ ? 1 : targets_.size();
415 }
416 
417 SQLTypeInfo ResultSet::getColType(const size_t col_idx) const {
418  if (just_explain_) {
419  return SQLTypeInfo(kTEXT, false);
420  }
421  CHECK_LT(col_idx, targets_.size());
422  return targets_[col_idx].agg_kind == kAVG ? SQLTypeInfo(kDOUBLE, false)
423  : targets_[col_idx].sql_type;
424 }
425 
427  constexpr bool with_generation = true;
428  return catalog_ ? row_set_mem_owner_->getOrAddStringDictProxy(
429  dict_id, with_generation, catalog_)
430  : row_set_mem_owner_->getStringDictProxy(dict_id);
431 }
432 
435  int64_t const null_int_;
436 
437  public:
438  CellCallback(StringDictionaryProxy::IdMap&& id_map, int64_t const null_int)
439  : id_map_(std::move(id_map)), null_int_(null_int) {}
440  void operator()(int8_t const* const cell_ptr) const {
441  using StringId = int32_t;
442  StringId* const string_id_ptr =
443  const_cast<StringId*>(reinterpret_cast<StringId const*>(cell_ptr));
444  if (*string_id_ptr != null_int_) {
445  *string_id_ptr = id_map_[*string_id_ptr];
446  }
447  }
448 };
449 
450 // Update any dictionary-encoded targets within storage_ with the corresponding
451 // dictionary in the given targets parameter, if their comp_param (dictionary) differs.
452 // This may modify both the storage_ values and storage_ targets.
453 // Does not iterate through appended_storage_.
454 // Iterate over targets starting at index target_idx.
455 void ResultSet::translateDictEncodedColumns(std::vector<TargetInfo> const& targets,
456  size_t const start_idx) {
457  if (storage_) {
458  CHECK_EQ(targets.size(), storage_->targets_.size());
459  RowIterationState state;
460  for (size_t target_idx = start_idx; target_idx < targets.size(); ++target_idx) {
461  auto const& type_lhs = targets[target_idx].sql_type;
462  if (type_lhs.is_dict_encoded_string()) {
463  auto& type_rhs =
464  const_cast<SQLTypeInfo&>(storage_->targets_[target_idx].sql_type);
465  CHECK(type_rhs.is_dict_encoded_string());
466  if (type_lhs.get_comp_param() != type_rhs.get_comp_param()) {
467  auto* const sdp_lhs = getStringDictionaryProxy(type_lhs.get_comp_param());
468  CHECK(sdp_lhs);
469  auto const* const sdp_rhs = getStringDictionaryProxy(type_rhs.get_comp_param());
470  CHECK(sdp_rhs);
471  state.cur_target_idx_ = target_idx;
472  CellCallback const translate_string_ids(sdp_lhs->transientUnion(*sdp_rhs),
473  inline_int_null_val(type_rhs));
474  eachCellInColumn(state, translate_string_ids);
475  type_rhs.set_comp_param(type_lhs.get_comp_param());
476  }
477  }
478  }
479  }
480 }
481 
482 // For each cell in column target_idx, callback func with pointer to datum.
483 // This currently assumes the column type is a dictionary-encoded string, but this logic
484 // can be generalized to other types.
485 void ResultSet::eachCellInColumn(RowIterationState& state, CellCallback const& func) {
486  size_t const target_idx = state.cur_target_idx_;
487  QueryMemoryDescriptor& storage_qmd = storage_->query_mem_desc_;
488  CHECK_LT(target_idx, lazy_fetch_info_.size());
489  auto& col_lazy_fetch = lazy_fetch_info_[target_idx];
490  CHECK(col_lazy_fetch.is_lazily_fetched);
491  int const target_size = storage_->targets_[target_idx].sql_type.get_size();
492  CHECK_LT(0, target_size) << storage_->targets_[target_idx].toString();
493  size_t const nrows = storage_->binSearchRowCount();
494  if (storage_qmd.didOutputColumnar()) {
495  // Logic based on ResultSet::ColumnWiseTargetAccessor::initializeOffsetsForStorage()
496  if (state.buf_ptr_ == nullptr) {
497  state.buf_ptr_ = get_cols_ptr(storage_->buff_, storage_qmd);
498  state.compact_sz1_ = storage_qmd.getPaddedSlotWidthBytes(state.agg_idx_)
499  ? storage_qmd.getPaddedSlotWidthBytes(state.agg_idx_)
500  : query_mem_desc_.getEffectiveKeyWidth();
501  }
502  for (size_t j = state.prev_target_idx_; j < state.cur_target_idx_; ++j) {
503  size_t const next_target_idx = j + 1; // Set state to reflect next target_idx j+1
505  state.buf_ptr_, storage_qmd, state.agg_idx_);
506  auto const& next_agg_info = storage_->targets_[next_target_idx];
507  state.agg_idx_ =
508  advance_slot(state.agg_idx_, next_agg_info, separate_varlen_storage_valid_);
509  state.compact_sz1_ = storage_qmd.getPaddedSlotWidthBytes(state.agg_idx_)
510  ? storage_qmd.getPaddedSlotWidthBytes(state.agg_idx_)
511  : query_mem_desc_.getEffectiveKeyWidth();
512  }
513  for (size_t i = 0; i < nrows; ++i) {
514  int8_t const* const pos_ptr = state.buf_ptr_ + i * state.compact_sz1_;
515  int64_t pos = read_int_from_buff(pos_ptr, target_size);
516  CHECK_GE(pos, 0);
517  auto& frag_col_buffers = getColumnFrag(0, target_idx, pos);
518  CHECK_LT(size_t(col_lazy_fetch.local_col_id), frag_col_buffers.size());
519  int8_t const* const col_frag = frag_col_buffers[col_lazy_fetch.local_col_id];
520  func(col_frag + pos * target_size);
521  }
522  } else {
523  size_t const key_bytes_with_padding =
525  for (size_t i = 0; i < nrows; ++i) {
526  int8_t const* const keys_ptr = row_ptr_rowwise(storage_->buff_, storage_qmd, i);
527  int8_t const* const rowwise_target_ptr = keys_ptr + key_bytes_with_padding;
528  int64_t pos = *reinterpret_cast<int64_t const*>(rowwise_target_ptr);
529  auto& frag_col_buffers = getColumnFrag(0, target_idx, pos);
530  CHECK_LT(size_t(col_lazy_fetch.local_col_id), frag_col_buffers.size());
531  int8_t const* const col_frag = frag_col_buffers[col_lazy_fetch.local_col_id];
532  func(col_frag + pos * target_size);
533  }
534  }
535 }
536 
537 namespace {
538 
539 size_t get_truncated_row_count(size_t total_row_count, size_t limit, size_t offset) {
540  if (total_row_count < offset) {
541  return 0;
542  }
543 
544  size_t total_truncated_row_count = total_row_count - offset;
545 
546  if (limit) {
547  return std::min(total_truncated_row_count, limit);
548  }
549 
550  return total_truncated_row_count;
551 }
552 
553 } // namespace
554 
555 size_t ResultSet::rowCountImpl(const bool force_parallel) const {
556  if (just_explain_) {
557  return 1;
558  }
559  if (query_mem_desc_.getQueryDescriptionType() == QueryDescriptionType::TableFunction) {
560  return entryCount();
561  }
562  if (!permutation_.empty()) {
563  // keep_first_ corresponds to SQL LIMIT
564  // drop_first_ corresponds to SQL OFFSET
565  return get_truncated_row_count(permutation_.size(), keep_first_, drop_first_);
566  }
567  if (!storage_) {
568  return 0;
569  }
570  CHECK(permutation_.empty());
571  if (query_mem_desc_.getQueryDescriptionType() == QueryDescriptionType::Projection) {
572  return binSearchRowCount();
573  }
574 
575  constexpr size_t auto_parallel_row_count_threshold{20000UL};
576  if (force_parallel || entryCount() >= auto_parallel_row_count_threshold) {
577  return parallelRowCount();
578  }
579  std::lock_guard<std::mutex> lock(row_iteration_mutex_);
580  moveToBegin();
581  size_t row_count{0};
582  while (true) {
583  auto crt_row = getNextRowUnlocked(false, false);
584  if (crt_row.empty()) {
585  break;
586  }
587  ++row_count;
588  }
589  moveToBegin();
590  return row_count;
591 }
592 
593 size_t ResultSet::rowCount(const bool force_parallel) const {
594  // cached_row_count_ is atomic, so fetch it into a local variable first
595  // to avoid repeat fetches
596  const int64_t cached_row_count = cached_row_count_;
597  if (cached_row_count != uninitialized_cached_row_count) {
598  CHECK_GE(cached_row_count, 0);
599  return cached_row_count;
600  }
601  setCachedRowCount(rowCountImpl(force_parallel));
602  return cached_row_count_;
603 }
604 
606  cached_row_count_ = uninitialized_cached_row_count;
607 }
608 
609 void ResultSet::setCachedRowCount(const size_t row_count) const {
610  const int64_t signed_row_count = static_cast<int64_t>(row_count);
611  const int64_t old_cached_row_count = cached_row_count_.exchange(signed_row_count);
612  CHECK(old_cached_row_count == uninitialized_cached_row_count ||
613  old_cached_row_count == signed_row_count);
614 }
615 
617  if (!storage_) {
618  return 0;
619  }
620 
621  size_t row_count = storage_->binSearchRowCount();
622  for (auto& s : appended_storage_) {
623  row_count += s->binSearchRowCount();
624  }
625 
626  return get_truncated_row_count(row_count, getLimit(), drop_first_);
627 }
628 
630  using namespace threading;
631  auto execute_parallel_row_count = [this, query_id = logger::query_id()](
632  const blocked_range<size_t>& r,
633  size_t row_count) {
634  auto qid_scope_guard = logger::set_thread_local_query_id(query_id);
635  for (size_t i = r.begin(); i < r.end(); ++i) {
636  if (!isRowAtEmpty(i)) {
637  ++row_count;
638  }
639  }
640  return row_count;
641  };
642  const auto row_count = parallel_reduce(blocked_range<size_t>(0, entryCount()),
643  size_t(0),
644  execute_parallel_row_count,
645  std::plus<int>());
646  return get_truncated_row_count(row_count, getLimit(), drop_first_);
647 }
648 
649 bool ResultSet::isEmpty() const {
650  // To simplify this function and de-dup logic with ResultSet::rowCount()
651  // (mismatches between the two were causing bugs), we modified this function
652  // to simply fetch rowCount(). The potential downside of this approach is that
653  // in some cases more work will need to be done, as we can't just stop at the first row.
654  // Mitigating that for most cases is the following:
655  // 1) rowCount() is cached, so the logic for actually computing row counts will run only
656  // once
657  // per result set.
658  // 2) If the cache is empty (cached_row_count_ == -1), rowCount() will use parallel
659  // methods if deemed appropriate, which in many cases could be faster for a sparse
660  // large result set that single-threaded iteration from the beginning
661  // 3) Often where isEmpty() is needed, rowCount() is also needed. Since the first call
662  // to rowCount()
663  // will be cached, there is no extra overhead in these cases
664 
665  return rowCount() == size_t(0);
666 }
667 
669  return (!storage_ && !estimator_ && !just_explain_) || cached_row_count_ == 0;
670 }
671 
673  CHECK(storage_);
674  return storage_->query_mem_desc_;
675 }
676 
677 const std::vector<TargetInfo>& ResultSet::getTargetInfos() const {
678  return targets_;
679 }
680 
681 const std::vector<int64_t>& ResultSet::getTargetInitVals() const {
682  CHECK(storage_);
683  return storage_->target_init_vals_;
684 }
685 
687  CHECK(device_type_ == ExecutorDeviceType::GPU);
688  CHECK(device_estimator_buffer_);
689  return device_estimator_buffer_->getMemoryPtr();
690 }
691 
693  return host_estimator_buffer_;
694 }
695 
697  CHECK(device_type_ == ExecutorDeviceType::GPU);
698  CHECK(!host_estimator_buffer_);
699  CHECK_EQ(size_t(0), estimator_->getBufferSize() % sizeof(int64_t));
700  host_estimator_buffer_ =
701  static_cast<int8_t*>(checked_calloc(estimator_->getBufferSize(), 1));
702  CHECK(device_estimator_buffer_);
703  auto device_buffer_ptr = device_estimator_buffer_->getMemoryPtr();
704  auto allocator = std::make_unique<CudaAllocator>(
705  data_mgr_, device_id_, getQueryEngineCudaStreamForDevice(device_id_));
706  allocator->copyFromDevice(
707  host_estimator_buffer_, device_buffer_ptr, estimator_->getBufferSize());
708 }
709 
710 void ResultSet::setQueueTime(const int64_t queue_time) {
711  timings_.executor_queue_time = queue_time;
712 }
713 
714 void ResultSet::setKernelQueueTime(const int64_t kernel_queue_time) {
715  timings_.kernel_queue_time = kernel_queue_time;
716 }
717 
718 void ResultSet::addCompilationQueueTime(const int64_t compilation_queue_time) {
719  timings_.compilation_queue_time += compilation_queue_time;
720 }
721 
722 int64_t ResultSet::getQueueTime() const {
723  return timings_.executor_queue_time + timings_.kernel_queue_time +
724  timings_.compilation_queue_time;
725 }
726 
727 int64_t ResultSet::getRenderTime() const {
728  return timings_.render_time;
729 }
730 
732  crt_row_buff_idx_ = 0;
733  fetched_so_far_ = 0;
734 }
735 
737  return keep_first_ + drop_first_;
738 }
739 
740 bool ResultSet::isExplain() const {
741  return just_explain_;
742 }
743 
745  for_validation_only_ = true;
746 }
747 
749  return for_validation_only_;
750 }
751 
753  return device_id_;
754 }
755 
758  auto query_mem_desc_copy = query_mem_desc;
759  query_mem_desc_copy.resetGroupColWidths(
760  std::vector<int8_t>(query_mem_desc_copy.getGroupbyColCount(), 8));
761  if (query_mem_desc.didOutputColumnar()) {
762  return query_mem_desc_copy;
763  }
764  query_mem_desc_copy.alignPaddedSlots();
765  return query_mem_desc_copy;
766 }
767 
768 void ResultSet::sort(const std::list<Analyzer::OrderEntry>& order_entries,
769  size_t top_n,
770  const Executor* executor) {
771  auto timer = DEBUG_TIMER(__func__);
772 
773  if (!storage_) {
774  return;
775  }
776  invalidateCachedRowCount();
777  CHECK(!targets_.empty());
778 #ifdef HAVE_CUDA
779  if (canUseFastBaselineSort(order_entries, top_n)) {
780  baselineSort(order_entries, top_n, executor);
781  return;
782  }
783 #endif // HAVE_CUDA
784  if (query_mem_desc_.sortOnGpu()) {
785  try {
786  radixSortOnGpu(order_entries);
787  } catch (const OutOfMemory&) {
788  LOG(WARNING) << "Out of GPU memory during sort, finish on CPU";
789  radixSortOnCpu(order_entries);
790  } catch (const std::bad_alloc&) {
791  LOG(WARNING) << "Out of GPU memory during sort, finish on CPU";
792  radixSortOnCpu(order_entries);
793  }
794  return;
795  }
796  // This check isn't strictly required, but allows the index buffer to be 32-bit.
797  if (query_mem_desc_.getEntryCount() > std::numeric_limits<uint32_t>::max()) {
798  throw RowSortException("Sorting more than 4B elements not supported");
799  }
800 
801  CHECK(permutation_.empty());
802 
803  if (top_n && g_parallel_top_min < entryCount()) {
804  if (g_enable_watchdog && g_parallel_top_max < entryCount()) {
805  throw WatchdogException("Sorting the result would be too slow");
806  }
807  parallelTop(order_entries, top_n, executor);
808  } else {
809  if (g_enable_watchdog && Executor::baseline_threshold < entryCount()) {
810  throw WatchdogException("Sorting the result would be too slow");
811  }
812  permutation_.resize(query_mem_desc_.getEntryCount());
813  // PermutationView is used to share common API with parallelTop().
814  PermutationView pv(permutation_.data(), 0, permutation_.size());
815  pv = initPermutationBuffer(pv, 0, permutation_.size());
816  if (top_n == 0) {
817  top_n = pv.size(); // top_n == 0 implies a full sort
818  }
819  pv = topPermutation(pv, top_n, createComparator(order_entries, pv, executor, false));
820  if (pv.size() < permutation_.size()) {
821  permutation_.resize(pv.size());
822  permutation_.shrink_to_fit();
823  }
824  }
825 }
826 
827 #ifdef HAVE_CUDA
828 void ResultSet::baselineSort(const std::list<Analyzer::OrderEntry>& order_entries,
829  const size_t top_n,
830  const Executor* executor) {
831  auto timer = DEBUG_TIMER(__func__);
832  // If we only have on GPU, it's usually faster to do multi-threaded radix sort on CPU
833  if (getGpuCount() > 1) {
834  try {
835  doBaselineSort(ExecutorDeviceType::GPU, order_entries, top_n, executor);
836  } catch (...) {
837  doBaselineSort(ExecutorDeviceType::CPU, order_entries, top_n, executor);
838  }
839  } else {
840  doBaselineSort(ExecutorDeviceType::CPU, order_entries, top_n, executor);
841  }
842 }
843 #endif // HAVE_CUDA
844 
845 // Append non-empty indexes i in [begin,end) from findStorage(i) to permutation.
847  PermutationIdx const begin,
848  PermutationIdx const end) const {
849  auto timer = DEBUG_TIMER(__func__);
850  for (PermutationIdx i = begin; i < end; ++i) {
851  const auto storage_lookup_result = findStorage(i);
852  const auto lhs_storage = storage_lookup_result.storage_ptr;
853  const auto off = storage_lookup_result.fixedup_entry_idx;
854  CHECK(lhs_storage);
855  if (!lhs_storage->isEmptyEntry(off)) {
856  permutation.push_back(i);
857  }
858  }
859  return permutation;
860 }
861 
863  return permutation_;
864 }
865 
866 void ResultSet::parallelTop(const std::list<Analyzer::OrderEntry>& order_entries,
867  const size_t top_n,
868  const Executor* executor) {
869  auto timer = DEBUG_TIMER(__func__);
870  const size_t nthreads = cpu_threads();
871 
872  // Split permutation_ into nthreads subranges and top-sort in-place.
873  permutation_.resize(query_mem_desc_.getEntryCount());
874  std::vector<PermutationView> permutation_views(nthreads);
875  threading::task_group top_sort_threads;
876  for (auto interval : makeIntervals<PermutationIdx>(0, permutation_.size(), nthreads)) {
877  top_sort_threads.run([this,
878  &order_entries,
879  &permutation_views,
880  top_n,
881  executor,
883  interval] {
884  auto qid_scope_guard = logger::set_thread_local_query_id(query_id);
885  PermutationView pv(permutation_.data() + interval.begin, 0, interval.size());
886  pv = initPermutationBuffer(pv, interval.begin, interval.end);
887  const auto compare = createComparator(order_entries, pv, executor, true);
888  permutation_views[interval.index] = topPermutation(pv, top_n, compare);
889  });
890  }
891  top_sort_threads.wait();
892 
893  // In case you are considering implementing a parallel reduction, note that the
894  // ResultSetComparator constructor is O(N) in order to materialize some of the aggregate
895  // columns as necessary to perform a comparison. This cost is why reduction is chosen to
896  // be serial instead; only one more Comparator is needed below.
897 
898  // Left-copy disjoint top-sorted subranges into one contiguous range.
899  // ++++....+++.....+++++... -> ++++++++++++............
900  auto end = permutation_.begin() + permutation_views.front().size();
901  for (size_t i = 1; i < nthreads; ++i) {
902  std::copy(permutation_views[i].begin(), permutation_views[i].end(), end);
903  end += permutation_views[i].size();
904  }
905 
906  // Top sort final range.
907  PermutationView pv(permutation_.data(), end - permutation_.begin());
908  const auto compare = createComparator(order_entries, pv, executor, false);
909  pv = topPermutation(pv, top_n, compare);
910  permutation_.resize(pv.size());
911  permutation_.shrink_to_fit();
912 }
913 
914 std::pair<size_t, size_t> ResultSet::getStorageIndex(const size_t entry_idx) const {
915  size_t fixedup_entry_idx = entry_idx;
916  auto entry_count = storage_->query_mem_desc_.getEntryCount();
917  const bool is_rowwise_layout = !storage_->query_mem_desc_.didOutputColumnar();
918  if (fixedup_entry_idx < entry_count) {
919  return {0, fixedup_entry_idx};
920  }
921  fixedup_entry_idx -= entry_count;
922  for (size_t i = 0; i < appended_storage_.size(); ++i) {
923  const auto& desc = appended_storage_[i]->query_mem_desc_;
924  CHECK_NE(is_rowwise_layout, desc.didOutputColumnar());
925  entry_count = desc.getEntryCount();
926  if (fixedup_entry_idx < entry_count) {
927  return {i + 1, fixedup_entry_idx};
928  }
929  fixedup_entry_idx -= entry_count;
930  }
931  UNREACHABLE() << "entry_idx = " << entry_idx << ", query_mem_desc_.getEntryCount() = "
932  << query_mem_desc_.getEntryCount();
933  return {};
934 }
935 
938 
940  auto [stg_idx, fixedup_entry_idx] = getStorageIndex(entry_idx);
941  return {stg_idx ? appended_storage_[stg_idx - 1].get() : storage_.get(),
942  fixedup_entry_idx,
943  stg_idx};
944 }
945 
946 template <typename BUFFER_ITERATOR_TYPE>
948  BUFFER_ITERATOR_TYPE>::materializeCountDistinctColumns() {
949  for (const auto& order_entry : order_entries_) {
950  if (is_distinct_target(result_set_->targets_[order_entry.tle_no - 1])) {
951  count_distinct_materialized_buffers_.emplace_back(
952  materializeCountDistinctColumn(order_entry));
953  }
954  }
955 }
956 
957 template <typename BUFFER_ITERATOR_TYPE>
959  BUFFER_ITERATOR_TYPE>::materializeApproxQuantileColumns() const {
960  ResultSet::ApproxQuantileBuffers approx_quantile_materialized_buffers;
961  for (const auto& order_entry : order_entries_) {
962  if (result_set_->targets_[order_entry.tle_no - 1].agg_kind == kAPPROX_QUANTILE) {
963  approx_quantile_materialized_buffers.emplace_back(
964  materializeApproxQuantileColumn(order_entry));
965  }
966  }
967  return approx_quantile_materialized_buffers;
968 }
969 
970 template <typename BUFFER_ITERATOR_TYPE>
971 std::vector<int64_t>
973  const Analyzer::OrderEntry& order_entry) const {
974  const size_t num_storage_entries = result_set_->query_mem_desc_.getEntryCount();
975  std::vector<int64_t> count_distinct_materialized_buffer(num_storage_entries);
976  const CountDistinctDescriptor count_distinct_descriptor =
977  result_set_->query_mem_desc_.getCountDistinctDescriptor(order_entry.tle_no - 1);
978  const size_t num_non_empty_entries = permutation_.size();
979 
980  const auto work = [&, query_id = logger::query_id()](const size_t start,
981  const size_t end) {
982  auto qid_scope_guard = logger::set_thread_local_query_id(query_id);
983  for (size_t i = start; i < end; ++i) {
984  const PermutationIdx permuted_idx = permutation_[i];
985  const auto storage_lookup_result = result_set_->findStorage(permuted_idx);
986  const auto storage = storage_lookup_result.storage_ptr;
987  const auto off = storage_lookup_result.fixedup_entry_idx;
988  const auto value = buffer_itr_.getColumnInternal(
989  storage->buff_, off, order_entry.tle_no - 1, storage_lookup_result);
990  count_distinct_materialized_buffer[permuted_idx] =
991  count_distinct_set_size(value.i1, count_distinct_descriptor);
992  }
993  };
994  // TODO(tlm): Allow use of tbb after we determine how to easily encapsulate the choice
995  // between thread pool types
996  if (single_threaded_) {
997  work(0, num_non_empty_entries);
998  } else {
999  threading::task_group thread_pool;
1000  for (auto interval : makeIntervals<size_t>(0, num_non_empty_entries, cpu_threads())) {
1001  thread_pool.run([=] { work(interval.begin, interval.end); });
1002  }
1003  thread_pool.wait();
1004  }
1005  return count_distinct_materialized_buffer;
1006 }
1007 
1009  static_assert(sizeof(int64_t) == sizeof(quantile::TDigest*));
1010  CHECK(t_digest);
1011  t_digest->mergeBufferFinal();
1012  double const quantile = t_digest->quantile();
1013  return boost::math::isnan(quantile) ? NULL_DOUBLE : quantile;
1014 }
1015 
1016 template <typename BUFFER_ITERATOR_TYPE>
1017 ResultSet::ApproxQuantileBuffers::value_type
1019  const Analyzer::OrderEntry& order_entry) const {
1020  ResultSet::ApproxQuantileBuffers::value_type materialized_buffer(
1021  result_set_->query_mem_desc_.getEntryCount());
1022  const size_t size = permutation_.size();
1023  const auto work = [&, query_id = logger::query_id()](const size_t start,
1024  const size_t end) {
1025  auto qid_scope_guard = logger::set_thread_local_query_id(query_id);
1026  for (size_t i = start; i < end; ++i) {
1027  const PermutationIdx permuted_idx = permutation_[i];
1028  const auto storage_lookup_result = result_set_->findStorage(permuted_idx);
1029  const auto storage = storage_lookup_result.storage_ptr;
1030  const auto off = storage_lookup_result.fixedup_entry_idx;
1031  const auto value = buffer_itr_.getColumnInternal(
1032  storage->buff_, off, order_entry.tle_no - 1, storage_lookup_result);
1033  materialized_buffer[permuted_idx] =
1034  value.i1 ? calculateQuantile(reinterpret_cast<quantile::TDigest*>(value.i1))
1035  : NULL_DOUBLE;
1036  }
1037  };
1038  if (single_threaded_) {
1039  work(0, size);
1040  } else {
1041  threading::task_group thread_pool;
1042  for (auto interval : makeIntervals<size_t>(0, size, cpu_threads())) {
1043  thread_pool.run([=] { work(interval.begin, interval.end); });
1044  }
1045  thread_pool.wait();
1046  }
1047  return materialized_buffer;
1048 }
1049 
1050 template <typename BUFFER_ITERATOR_TYPE>
1052  const PermutationIdx lhs,
1053  const PermutationIdx rhs) const {
1054  // NB: The compare function must define a strict weak ordering, otherwise
1055  // std::sort will trigger a segmentation fault (or corrupt memory).
1056  const auto lhs_storage_lookup_result = result_set_->findStorage(lhs);
1057  const auto rhs_storage_lookup_result = result_set_->findStorage(rhs);
1058  const auto lhs_storage = lhs_storage_lookup_result.storage_ptr;
1059  const auto rhs_storage = rhs_storage_lookup_result.storage_ptr;
1060  const auto fixedup_lhs = lhs_storage_lookup_result.fixedup_entry_idx;
1061  const auto fixedup_rhs = rhs_storage_lookup_result.fixedup_entry_idx;
1062  size_t materialized_count_distinct_buffer_idx{0};
1063  size_t materialized_approx_quantile_buffer_idx{0};
1064 
1065  for (const auto& order_entry : order_entries_) {
1066  CHECK_GE(order_entry.tle_no, 1);
1067  // lhs_entry_ti and rhs_entry_ti can differ on comp_param w/ UNION of string dicts.
1068  const auto& lhs_agg_info = lhs_storage->targets_[order_entry.tle_no - 1];
1069  const auto& rhs_agg_info = rhs_storage->targets_[order_entry.tle_no - 1];
1070  const auto lhs_entry_ti = get_compact_type(lhs_agg_info);
1071  const auto rhs_entry_ti = get_compact_type(rhs_agg_info);
1072  // When lhs vs rhs doesn't matter, the lhs is used. For example:
1073  bool float_argument_input = takes_float_argument(lhs_agg_info);
1074  // Need to determine if the float value has been stored as float
1075  // or if it has been compacted to a different (often larger 8 bytes)
1076  // in distributed case the floats are actually 4 bytes
1077  // TODO the above takes_float_argument() is widely used wonder if this problem
1078  // exists elsewhere
1079  if (lhs_entry_ti.get_type() == kFLOAT) {
1080  const auto is_col_lazy =
1081  !result_set_->lazy_fetch_info_.empty() &&
1082  result_set_->lazy_fetch_info_[order_entry.tle_no - 1].is_lazily_fetched;
1083  if (result_set_->query_mem_desc_.getPaddedSlotWidthBytes(order_entry.tle_no - 1) ==
1084  sizeof(float)) {
1085  float_argument_input =
1086  result_set_->query_mem_desc_.didOutputColumnar() ? !is_col_lazy : true;
1087  }
1088  }
1089 
1090  if (UNLIKELY(is_distinct_target(lhs_agg_info))) {
1091  CHECK_LT(materialized_count_distinct_buffer_idx,
1092  count_distinct_materialized_buffers_.size());
1093 
1094  const auto& count_distinct_materialized_buffer =
1095  count_distinct_materialized_buffers_[materialized_count_distinct_buffer_idx];
1096  const auto lhs_sz = count_distinct_materialized_buffer[lhs];
1097  const auto rhs_sz = count_distinct_materialized_buffer[rhs];
1098  ++materialized_count_distinct_buffer_idx;
1099  if (lhs_sz == rhs_sz) {
1100  continue;
1101  }
1102  return (lhs_sz < rhs_sz) != order_entry.is_desc;
1103  } else if (UNLIKELY(lhs_agg_info.agg_kind == kAPPROX_QUANTILE)) {
1104  CHECK_LT(materialized_approx_quantile_buffer_idx,
1105  approx_quantile_materialized_buffers_.size());
1106  const auto& approx_quantile_materialized_buffer =
1107  approx_quantile_materialized_buffers_[materialized_approx_quantile_buffer_idx];
1108  const auto lhs_value = approx_quantile_materialized_buffer[lhs];
1109  const auto rhs_value = approx_quantile_materialized_buffer[rhs];
1110  ++materialized_approx_quantile_buffer_idx;
1111  if (lhs_value == rhs_value) {
1112  continue;
1113  } else if (!lhs_entry_ti.get_notnull()) {
1114  if (lhs_value == NULL_DOUBLE) {
1115  return order_entry.nulls_first;
1116  } else if (rhs_value == NULL_DOUBLE) {
1117  return !order_entry.nulls_first;
1118  }
1119  }
1120  return (lhs_value < rhs_value) != order_entry.is_desc;
1121  }
1122 
1123  const auto lhs_v = buffer_itr_.getColumnInternal(lhs_storage->buff_,
1124  fixedup_lhs,
1125  order_entry.tle_no - 1,
1126  lhs_storage_lookup_result);
1127  const auto rhs_v = buffer_itr_.getColumnInternal(rhs_storage->buff_,
1128  fixedup_rhs,
1129  order_entry.tle_no - 1,
1130  rhs_storage_lookup_result);
1131 
1132  if (UNLIKELY(isNull(lhs_entry_ti, lhs_v, float_argument_input) &&
1133  isNull(rhs_entry_ti, rhs_v, float_argument_input))) {
1134  continue;
1135  }
1136  if (UNLIKELY(isNull(lhs_entry_ti, lhs_v, float_argument_input) &&
1137  !isNull(rhs_entry_ti, rhs_v, float_argument_input))) {
1138  return order_entry.nulls_first;
1139  }
1140  if (UNLIKELY(isNull(rhs_entry_ti, rhs_v, float_argument_input) &&
1141  !isNull(lhs_entry_ti, lhs_v, float_argument_input))) {
1142  return !order_entry.nulls_first;
1143  }
1144 
1145  if (LIKELY(lhs_v.isInt())) {
1146  CHECK(rhs_v.isInt());
1147  if (UNLIKELY(lhs_entry_ti.is_string() &&
1148  lhs_entry_ti.get_compression() == kENCODING_DICT)) {
1149  CHECK_EQ(4, lhs_entry_ti.get_logical_size());
1150  CHECK(executor_);
1151  const auto lhs_string_dict_proxy = executor_->getStringDictionaryProxy(
1152  lhs_entry_ti.get_comp_param(), result_set_->row_set_mem_owner_, false);
1153  const auto rhs_string_dict_proxy = executor_->getStringDictionaryProxy(
1154  rhs_entry_ti.get_comp_param(), result_set_->row_set_mem_owner_, false);
1155  const auto lhs_str = lhs_string_dict_proxy->getString(lhs_v.i1);
1156  const auto rhs_str = rhs_string_dict_proxy->getString(rhs_v.i1);
1157  if (lhs_str == rhs_str) {
1158  continue;
1159  }
1160  return (lhs_str < rhs_str) != order_entry.is_desc;
1161  }
1162 
1163  if (lhs_v.i1 == rhs_v.i1) {
1164  continue;
1165  }
1166  if (lhs_entry_ti.is_fp()) {
1167  if (float_argument_input) {
1168  const auto lhs_dval = *reinterpret_cast<const float*>(may_alias_ptr(&lhs_v.i1));
1169  const auto rhs_dval = *reinterpret_cast<const float*>(may_alias_ptr(&rhs_v.i1));
1170  return (lhs_dval < rhs_dval) != order_entry.is_desc;
1171  } else {
1172  const auto lhs_dval =
1173  *reinterpret_cast<const double*>(may_alias_ptr(&lhs_v.i1));
1174  const auto rhs_dval =
1175  *reinterpret_cast<const double*>(may_alias_ptr(&rhs_v.i1));
1176  return (lhs_dval < rhs_dval) != order_entry.is_desc;
1177  }
1178  }
1179  return (lhs_v.i1 < rhs_v.i1) != order_entry.is_desc;
1180  } else {
1181  if (lhs_v.isPair()) {
1182  CHECK(rhs_v.isPair());
1183  const auto lhs =
1184  pair_to_double({lhs_v.i1, lhs_v.i2}, lhs_entry_ti, float_argument_input);
1185  const auto rhs =
1186  pair_to_double({rhs_v.i1, rhs_v.i2}, rhs_entry_ti, float_argument_input);
1187  if (lhs == rhs) {
1188  continue;
1189  }
1190  return (lhs < rhs) != order_entry.is_desc;
1191  } else {
1192  CHECK(lhs_v.isStr() && rhs_v.isStr());
1193  const auto lhs = lhs_v.strVal();
1194  const auto rhs = rhs_v.strVal();
1195  if (lhs == rhs) {
1196  continue;
1197  }
1198  return (lhs < rhs) != order_entry.is_desc;
1199  }
1200  }
1201  }
1202  return false;
1203 }
1204 
1205 // Partial sort permutation into top(least by compare) n elements.
1206 // If permutation.size() <= n then sort entire permutation by compare.
1207 // Return PermutationView with new size() = min(n, permutation.size()).
1209  const size_t n,
1210  const Comparator& compare) {
1211  auto timer = DEBUG_TIMER(__func__);
1212  if (n < permutation.size()) {
1213  std::partial_sort(
1214  permutation.begin(), permutation.begin() + n, permutation.end(), compare);
1215  permutation.resize(n);
1216  } else {
1217  std::sort(permutation.begin(), permutation.end(), compare);
1218  }
1219  return permutation;
1220 }
1221 
1223  const std::list<Analyzer::OrderEntry>& order_entries) const {
1224  auto timer = DEBUG_TIMER(__func__);
1225  auto data_mgr = &catalog_->getDataMgr();
1226  const int device_id{0};
1227  auto allocator = std::make_unique<CudaAllocator>(
1228  data_mgr, device_id, getQueryEngineCudaStreamForDevice(device_id));
1229  CHECK_GT(block_size_, 0);
1230  CHECK_GT(grid_size_, 0);
1231  std::vector<int64_t*> group_by_buffers(block_size_);
1232  group_by_buffers[0] = reinterpret_cast<int64_t*>(storage_->getUnderlyingBuffer());
1233  auto dev_group_by_buffers =
1234  create_dev_group_by_buffers(allocator.get(),
1235  group_by_buffers,
1236  query_mem_desc_,
1237  block_size_,
1238  grid_size_,
1239  device_id,
1241  /*num_input_rows=*/-1,
1242  /*prepend_index_buffer=*/true,
1243  /*always_init_group_by_on_host=*/true,
1244  /*use_bump_allocator=*/false,
1245  /*has_varlen_output=*/false,
1246  /*insitu_allocator*=*/nullptr);
1248  order_entries, query_mem_desc_, dev_group_by_buffers, data_mgr, device_id);
1250  *allocator,
1251  group_by_buffers,
1252  query_mem_desc_.getBufferSizeBytes(ExecutorDeviceType::GPU),
1253  dev_group_by_buffers.data,
1254  query_mem_desc_,
1255  block_size_,
1256  grid_size_,
1257  device_id,
1258  /*use_bump_allocator=*/false,
1259  /*has_varlen_output=*/false);
1260 }
1261 
1263  const std::list<Analyzer::OrderEntry>& order_entries) const {
1264  auto timer = DEBUG_TIMER(__func__);
1265  CHECK(!query_mem_desc_.hasKeylessHash());
1266  std::vector<int64_t> tmp_buff(query_mem_desc_.getEntryCount());
1267  std::vector<int32_t> idx_buff(query_mem_desc_.getEntryCount());
1268  CHECK_EQ(size_t(1), order_entries.size());
1269  auto buffer_ptr = storage_->getUnderlyingBuffer();
1270  for (const auto& order_entry : order_entries) {
1271  const auto target_idx = order_entry.tle_no - 1;
1272  const auto sortkey_val_buff = reinterpret_cast<int64_t*>(
1273  buffer_ptr + query_mem_desc_.getColOffInBytes(target_idx));
1274  const auto chosen_bytes = query_mem_desc_.getPaddedSlotWidthBytes(target_idx);
1275  sort_groups_cpu(sortkey_val_buff,
1276  &idx_buff[0],
1277  query_mem_desc_.getEntryCount(),
1278  order_entry.is_desc,
1279  chosen_bytes);
1280  apply_permutation_cpu(reinterpret_cast<int64_t*>(buffer_ptr),
1281  &idx_buff[0],
1282  query_mem_desc_.getEntryCount(),
1283  &tmp_buff[0],
1284  sizeof(int64_t));
1285  for (size_t target_idx = 0; target_idx < query_mem_desc_.getSlotCount();
1286  ++target_idx) {
1287  if (static_cast<int>(target_idx) == order_entry.tle_no - 1) {
1288  continue;
1289  }
1290  const auto chosen_bytes = query_mem_desc_.getPaddedSlotWidthBytes(target_idx);
1291  const auto satellite_val_buff = reinterpret_cast<int64_t*>(
1292  buffer_ptr + query_mem_desc_.getColOffInBytes(target_idx));
1293  apply_permutation_cpu(satellite_val_buff,
1294  &idx_buff[0],
1295  query_mem_desc_.getEntryCount(),
1296  &tmp_buff[0],
1297  chosen_bytes);
1298  }
1299  }
1300 }
1301 
1302 size_t ResultSet::getLimit() const {
1303  return keep_first_;
1304 }
1305 
1306 const std::vector<std::string> ResultSet::getStringDictionaryPayloadCopy(
1307  const int dict_id) const {
1308  const auto sdp = row_set_mem_owner_->getOrAddStringDictProxy(
1309  dict_id, /*with_generation=*/true, catalog_);
1310  CHECK(sdp);
1311  return sdp->getDictionary()->copyStrings();
1312 }
1313 
1314 const std::pair<std::vector<int32_t>, std::vector<std::string>>
1316  const auto col_type_info = getColType(col_idx);
1317  CHECK(col_type_info.is_dict_encoded_string());
1318  std::unordered_set<int32_t> unique_string_ids_set;
1319  const size_t num_entries = entryCount();
1320  std::vector<bool> targets_to_skip(colCount(), true);
1321  targets_to_skip[col_idx] = false;
1322  const auto null_val = inline_fixed_encoding_null_val(col_type_info);
1323 
1324  for (size_t row_idx = 0; row_idx < num_entries; ++row_idx) {
1325  const auto result_row = getRowAtNoTranslations(row_idx, targets_to_skip);
1326  if (!result_row.empty()) {
1327  const auto scalar_col_val = boost::get<ScalarTargetValue>(result_row[col_idx]);
1328  const int32_t string_id = static_cast<int32_t>(boost::get<int64_t>(scalar_col_val));
1329  if (string_id != null_val) {
1330  unique_string_ids_set.emplace(string_id);
1331  }
1332  }
1333  }
1334 
1335  const size_t num_unique_strings = unique_string_ids_set.size();
1336  std::vector<int32_t> unique_string_ids(num_unique_strings);
1337  size_t string_idx{0};
1338  for (const auto unique_string_id : unique_string_ids_set) {
1339  unique_string_ids[string_idx++] = unique_string_id;
1340  }
1341 
1342  const int32_t dict_id = col_type_info.get_comp_param();
1343  const auto sdp = row_set_mem_owner_->getOrAddStringDictProxy(
1344  dict_id, /*with_generation=*/true, catalog_);
1345  CHECK(sdp);
1346 
1347  return std::make_pair(unique_string_ids, sdp->getStrings(unique_string_ids));
1348 }
1349 
1359  return false;
1360  } else if (query_mem_desc_.didOutputColumnar()) {
1361  return permutation_.empty() && (query_mem_desc_.getQueryDescriptionType() ==
1363  query_mem_desc_.getQueryDescriptionType() ==
1365  query_mem_desc_.getQueryDescriptionType() ==
1367  query_mem_desc_.getQueryDescriptionType() ==
1369  } else {
1370  CHECK(!(query_mem_desc_.getQueryDescriptionType() ==
1372  return permutation_.empty() && (query_mem_desc_.getQueryDescriptionType() ==
1374  query_mem_desc_.getQueryDescriptionType() ==
1376  }
1377 }
1378 
1380  return query_mem_desc_.didOutputColumnar() &&
1381  (query_mem_desc_.getQueryDescriptionType() == QueryDescriptionType::Projection ||
1382  query_mem_desc_.getQueryDescriptionType() ==
1384  appended_storage_.empty() && storage_ &&
1385  (lazy_fetch_info_.empty() || !lazy_fetch_info_[column_idx].is_lazily_fetched);
1386 }
1387 
1388 const int8_t* ResultSet::getColumnarBuffer(size_t column_idx) const {
1389  CHECK(isZeroCopyColumnarConversionPossible(column_idx));
1390  return storage_->getUnderlyingBuffer() + query_mem_desc_.getColOffInBytes(column_idx);
1391 }
1392 
1393 // returns a bitmap (and total number) of all single slot targets
1394 std::tuple<std::vector<bool>, size_t> ResultSet::getSingleSlotTargetBitmap() const {
1395  std::vector<bool> target_bitmap(targets_.size(), true);
1396  size_t num_single_slot_targets = 0;
1397  for (size_t target_idx = 0; target_idx < targets_.size(); target_idx++) {
1398  const auto& sql_type = targets_[target_idx].sql_type;
1399  if (targets_[target_idx].is_agg && targets_[target_idx].agg_kind == kAVG) {
1400  target_bitmap[target_idx] = false;
1401  } else if (sql_type.is_varlen()) {
1402  target_bitmap[target_idx] = false;
1403  } else {
1404  num_single_slot_targets++;
1405  }
1406  }
1407  return std::make_tuple(std::move(target_bitmap), num_single_slot_targets);
1408 }
1409 
1418 std::tuple<std::vector<bool>, size_t> ResultSet::getSupportedSingleSlotTargetBitmap()
1419  const {
1420  CHECK(isDirectColumnarConversionPossible());
1421  auto [single_slot_targets, num_single_slot_targets] = getSingleSlotTargetBitmap();
1422 
1423  for (size_t target_idx = 0; target_idx < single_slot_targets.size(); target_idx++) {
1424  const auto& target = targets_[target_idx];
1425  if (single_slot_targets[target_idx] &&
1426  (is_distinct_target(target) || target.agg_kind == kAPPROX_QUANTILE ||
1427  (target.is_agg && target.agg_kind == kSAMPLE && target.sql_type == kFLOAT))) {
1428  single_slot_targets[target_idx] = false;
1429  num_single_slot_targets--;
1430  }
1431  }
1432  CHECK_GE(num_single_slot_targets, size_t(0));
1433  return std::make_tuple(std::move(single_slot_targets), num_single_slot_targets);
1434 }
1435 
1436 // returns the starting slot index for all targets in the result set
1437 std::vector<size_t> ResultSet::getSlotIndicesForTargetIndices() const {
1438  std::vector<size_t> slot_indices(targets_.size(), 0);
1439  size_t slot_index = 0;
1440  for (size_t target_idx = 0; target_idx < targets_.size(); target_idx++) {
1441  slot_indices[target_idx] = slot_index;
1442  slot_index = advance_slot(slot_index, targets_[target_idx], false);
1443  }
1444  return slot_indices;
1445 }
1446 
1447 // namespace result_set
1448 
1449 bool result_set::can_use_parallel_algorithms(const ResultSet& rows) {
1450  return !rows.isTruncated();
1451 }
1452 
1453 namespace {
1455  bool operator()(TargetInfo const& target_info) const {
1456  return target_info.sql_type.is_dict_encoded_string();
1457  }
1458 };
1459 } // namespace
1460 
1462  std::vector<TargetInfo> const& targets) {
1463  auto const itr = std::find_if(targets.begin(), targets.end(), IsDictEncodedStr{});
1464  return itr == targets.end() ? std::nullopt
1465  : std::make_optional<size_t>(itr - targets.begin());
1466 }
1467 
1468 bool result_set::use_parallel_algorithms(const ResultSet& rows) {
1469  return result_set::can_use_parallel_algorithms(rows) && rows.entryCount() >= 20000;
1470 }
QidScopeGuard set_thread_local_query_id(QueryId const query_id)
Definition: Logger.cpp:484
bool is_agg(const Analyzer::Expr *expr)
catalog_(nullptr)
void syncEstimatorBuffer() const
Definition: ResultSet.cpp:696
#define CHECK_EQ(x, y)
Definition: Logger.h:231
const QueryMemoryDescriptor & getQueryMemDesc() const
Definition: ResultSet.cpp:672
void sort_groups_cpu(int64_t *val_buff, int32_t *idx_buff, const uint64_t entry_count, const bool desc, const uint32_t chosen_bytes)
Definition: InPlaceSort.cpp:27
GpuGroupByBuffers create_dev_group_by_buffers(DeviceAllocator *device_allocator, const std::vector< int64_t * > &group_by_buffers, const QueryMemoryDescriptor &query_mem_desc, const unsigned block_size_x, const unsigned grid_size_x, const int device_id, const ExecutorDispatchMode dispatch_mode, const int64_t num_input_rows, const bool prepend_index_buffer, const bool always_init_group_by_on_host, const bool use_bump_allocator, const bool has_varlen_output, Allocator *insitu_allocator)
Definition: GpuMemUtils.cpp:70
size_t g_parallel_top_max
Definition: ResultSet.cpp:48
std::pair< size_t, size_t > getStorageIndex(const size_t entry_idx) const
Definition: ResultSet.cpp:914
#define NULL_DOUBLE
DEVICE void push_back(T const &value)
Definition: VectorView.h:74
bool isValidationOnlyRes() const
Definition: ResultSet.cpp:748
void setValidationOnlyRes()
Definition: ResultSet.cpp:744
PermutationView initPermutationBuffer(PermutationView permutation, PermutationIdx const begin, PermutationIdx const end) const
Definition: ResultSet.cpp:846
class for a per-database catalog. also includes metadata for the current database and the current use...
Definition: Catalog.h:114
bool g_enable_direct_columnarization
Definition: Execute.cpp:122
ExecutorDeviceType
void moveToBegin() const
Definition: ResultSet.cpp:731
T advance_to_next_columnar_target_buff(T target_ptr, const QueryMemoryDescriptor &query_mem_desc, const size_t target_slot_idx)
SQLTypeInfo sql_type
Definition: TargetInfo.h:51
#define LOG(tag)
Definition: Logger.h:217
constexpr QueryPlanHash EMPTY_HASHED_PLAN_DAG_KEY
DEVICE RealType quantile(VectorView< IndexType const > const partial_sum, RealType const q) const
Definition: quantile.h:828
static const size_t baseline_threshold
Definition: Execute.h:1276
ResultSet(const std::vector< TargetInfo > &targets, const ExecutorDeviceType device_type, const QueryMemoryDescriptor &query_mem_desc, const std::shared_ptr< RowSetMemoryOwner > row_set_mem_owner, const Catalog_Namespace::Catalog *catalog, const unsigned block_size, const unsigned grid_size)
Definition: ResultSet.cpp:62
int tle_no
Definition: Analyzer.h:2232
#define UNREACHABLE()
Definition: Logger.h:267
DEVICE void sort(ARGS &&...args)
Definition: gpu_enabled.h:105
const std::vector< TargetInfo > & getTargetInfos() const
Definition: ResultSet.cpp:677
std::optional< size_t > first_dict_encoded_idx(std::vector< TargetInfo > const &)
Definition: ResultSet.cpp:1461
#define CHECK_GE(x, y)
Definition: Logger.h:236
void setKernelQueueTime(const int64_t kernel_queue_time)
Definition: ResultSet.cpp:714
size_t rowCount(const bool force_parallel=false) const
Returns the number of valid entries in the result set (i.e that will be returned from the SQL query o...
Definition: ResultSet.cpp:593
std::shared_ptr< ResultSet > ResultSetPtr
CellCallback(StringDictionaryProxy::IdMap &&id_map, int64_t const null_int)
Definition: ResultSet.cpp:438
int64_t read_int_from_buff(const int8_t *ptr, const int8_t compact_sz)
void keepFirstN(const size_t n)
Definition: ResultSet.cpp:52
size_t g_streaming_topn_max
Definition: ResultSet.cpp:49
double pair_to_double(const std::pair< int64_t, int64_t > &fp_pair, const SQLTypeInfo &ti, const bool float_argument_input)
void addCompilationQueueTime(const int64_t compilation_queue_time)
Definition: ResultSet.cpp:718
std::vector< std::vector< double >> ApproxQuantileBuffers
Definition: ResultSet.h:795
bool takes_float_argument(const TargetInfo &target_info)
Definition: TargetInfo.h:157
void parallelTop(const std::list< Analyzer::OrderEntry > &order_entries, const size_t top_n, const Executor *executor)
Definition: ResultSet.cpp:866
size_t colCount() const
Definition: ResultSet.cpp:413
void inplace_sort_gpu(const std::list< Analyzer::OrderEntry > &order_entries, const QueryMemoryDescriptor &query_mem_desc, const GpuGroupByBuffers &group_by_buffers, Data_Namespace::DataMgr *data_mgr, const int device_id)
void apply_permutation_cpu(int64_t *val_buff, int32_t *idx_buff, const uint64_t entry_count, int64_t *tmp_buff, const uint32_t chosen_bytes)
Definition: InPlaceSort.cpp:46
DEVICE void resize(size_type const size)
Definition: VectorView.h:75
#define CHECK_GT(x, y)
Definition: Logger.h:235
size_t getLimit() const
Definition: ResultSet.cpp:1302
std::vector< int64_t > materializeCountDistinctColumn(const Analyzer::OrderEntry &order_entry) const
Definition: ResultSet.cpp:972
ApproxQuantileBuffers::value_type materializeApproxQuantileColumn(const Analyzer::OrderEntry &order_entry) const
Definition: ResultSet.cpp:1018
bool isTruncated() const
Definition: ResultSet.cpp:736
size_t get_truncated_row_count(size_t total_row_count, size_t limit, size_t offset)
Definition: ResultSet.cpp:539
size_t parallelRowCount() const
Definition: ResultSet.cpp:629
DEVICE void mergeBufferFinal()
Definition: quantile.h:652
void radixSortOnCpu(const std::list< Analyzer::OrderEntry > &order_entries) const
Definition: ResultSet.cpp:1262
const SQLTypeInfo get_compact_type(const TargetInfo &target)
bool definitelyHasNoRows() const
Definition: ResultSet.cpp:668
const std::vector< std::string > getStringDictionaryPayloadCopy(const int dict_id) const
Definition: ResultSet.cpp:1306
bool use_parallel_algorithms(const ResultSet &rows)
Definition: ResultSet.cpp:1468
bool isZeroCopyColumnarConversionPossible(size_t column_idx) const
Definition: ResultSet.cpp:1379
size_t g_parallel_top_min
Definition: ResultSet.cpp:47
int8_t * getHostEstimatorBuffer() const
Definition: ResultSet.cpp:692
DEVICE size_type size() const
Definition: VectorView.h:84
void invalidateCachedRowCount() const
Definition: ResultSet.cpp:605
const ResultSetStorage * allocateStorage() const
size_t advance_slot(const size_t j, const TargetInfo &target_info, const bool separate_varlen_storage)
int64_t count_distinct_set_size(const int64_t set_handle, const CountDistinctDescriptor &count_distinct_desc)
Definition: CountDistinct.h:77
bool operator()(TargetInfo const &target_info) const
Definition: ResultSet.cpp:1455
void sort(const std::list< Analyzer::OrderEntry > &order_entries, size_t top_n, const Executor *executor)
Definition: ResultSet.cpp:768
DEVICE auto copy(ARGS &&...args)
Definition: gpu_enabled.h:51
void setQueueTime(const int64_t queue_time)
Definition: ResultSet.cpp:710
#define CHECK_NE(x, y)
Definition: Logger.h:232
void dropFirstN(const size_t n)
Definition: ResultSet.cpp:57
DEVICE T * begin() const
Definition: VectorView.h:60
std::vector< PermutationIdx > Permutation
Definition: ResultSet.h:155
std::tuple< std::vector< bool >, size_t > getSingleSlotTargetBitmap() const
Definition: ResultSet.cpp:1394
bool g_enable_watchdog
#define LIKELY(x)
Definition: likely.h:24
void * checked_calloc(const size_t nmemb, const size_t size)
Definition: checked_alloc.h:53
StorageLookupResult findStorage(const size_t entry_idx) const
Definition: ResultSet.cpp:939
bool is_distinct_target(const TargetInfo &target_info)
Definition: TargetInfo.h:153
void translate_string_ids(std::vector< int32_t > &dest_ids, const LeafHostInfo &dict_server_host, const DictRef dest_dict_ref, const std::vector< int32_t > &source_ids, const DictRef source_dict_ref, const int32_t dest_generation)
const int8_t getPaddedSlotWidthBytes(const size_t slot_idx) const
ResultSetPtr copy()
Definition: ResultSet.cpp:329
std::function< bool(const PermutationIdx, const PermutationIdx)> Comparator
Definition: ResultSet.h:157
bool g_enable_smem_group_by true
static double calculateQuantile(quantile::TDigest *const t_digest)
Definition: ResultSet.cpp:1008
T row_ptr_rowwise(T buff, const QueryMemoryDescriptor &query_mem_desc, const size_t entry_idx)
void radixSortOnGpu(const std::list< Analyzer::OrderEntry > &order_entries) const
Definition: ResultSet.cpp:1222
const ResultSetStorage * getStorage() const
Definition: ResultSet.cpp:409
Value parallel_reduce(const blocked_range< Int > &range, const Value &identity, const RealBody &real_body, const Reduction &reduction, const Partitioner &p=Partitioner())
Parallel iteration with reduction.
const std::pair< std::vector< int32_t >, std::vector< std::string > > getUniqueStringsForDictEncodedTargetCol(const size_t col_idx) const
Definition: ResultSet.cpp:1315
int64_t getQueueTime() const
Definition: ResultSet.cpp:722
#define UNLIKELY(x)
Definition: likely.h:25
uint32_t PermutationIdx
Definition: ResultSet.h:154
#define CHECK_LT(x, y)
Definition: Logger.h:233
Definition: sqltypes.h:52
SQLTypeInfo getColType(const size_t col_idx) const
Definition: ResultSet.cpp:417
std::tuple< std::vector< bool >, size_t > getSupportedSingleSlotTargetBitmap() const
Definition: ResultSet.cpp:1418
ExecutorDeviceType getDeviceType() const
Definition: ResultSet.cpp:250
StringDictionaryProxy * getStringDictionaryProxy(int const dict_id) const
Definition: ResultSet.cpp:426
const int8_t * getColumnarBuffer(size_t column_idx) const
Definition: ResultSet.cpp:1388
bool isExplain() const
Definition: ResultSet.cpp:740
void eachCellInColumn(RowIterationState &, CellCallback const &)
Definition: ResultSet.cpp:485
StringDictionaryProxy::IdMap const id_map_
Definition: ResultSet.cpp:434
static Data_Namespace::AbstractBuffer * allocGpuAbstractBuffer(Data_Namespace::DataMgr *data_mgr, const size_t num_bytes, const int device_id)
static QueryMemoryDescriptor fixupQueryMemoryDescriptor(const QueryMemoryDescriptor &)
Definition: ResultSet.cpp:756
CUstream getQueryEngineCudaStreamForDevice(int device_num)
Definition: QueryEngine.cpp:7
size_t rowCountImpl(const bool force_parallel) const
Definition: ResultSet.cpp:555
void baselineSort(const std::list< Analyzer::OrderEntry > &order_entries, const size_t top_n, const Executor *executor)
const Permutation & getPermutationBuffer() const
Definition: ResultSet.cpp:862
void append(ResultSet &that)
Definition: ResultSet.cpp:299
std::string summaryToString() const
Definition: ResultSet.cpp:218
data_mgr_(data_mgr)
static PermutationView topPermutation(PermutationView, const size_t n, const Comparator &)
Definition: ResultSet.cpp:1208
size_t getCurrentRowBufferIndex() const
Definition: ResultSet.cpp:291
bool g_enable_watchdog false
Definition: Execute.cpp:79
#define CHECK(condition)
Definition: Logger.h:223
QueryId query_id()
Definition: Logger.cpp:470
#define DEBUG_TIMER(name)
Definition: Logger.h:370
int8_t * getDeviceEstimatorBuffer() const
Definition: ResultSet.cpp:686
int64_t inline_int_null_val(const SQL_TYPE_INFO &ti)
int64_t inline_fixed_encoding_null_val(const SQL_TYPE_INFO &ti)
bool operator()(const PermutationIdx lhs, const PermutationIdx rhs) const
Definition: ResultSet.cpp:1051
Basic constructors and methods of the row set interface.
bool isEmpty() const
Returns a boolean signifying whether there are valid entries in the result set.
Definition: ResultSet.cpp:649
bool is_dict_encoded_string() const
Definition: sqltypes.h:548
const std::vector< int64_t > & getTargetInitVals() const
Definition: ResultSet.cpp:681
std::vector< size_t > getSlotIndicesForTargetIndices() const
Definition: ResultSet.cpp:1437
Allocate GPU memory using GpuBuffers via DataMgr.
constexpr double n
Definition: Utm.h:38
Execution unit for relational algebra. It&#39;s a low-level description of any relational algebra operati...
constexpr int64_t uninitialized_cached_row_count
Definition: ResultSet.cpp:50
Definition: Analyzer.h:2227
int cpu_threads()
Definition: thread_count.h:24
T get_cols_ptr(T buff, const QueryMemoryDescriptor &query_mem_desc)
Definition: sqldefs.h:74
void translateDictEncodedColumns(std::vector< TargetInfo > const &, size_t const start_idx)
Definition: ResultSet.cpp:455
void copy_group_by_buffers_from_gpu(DeviceAllocator &device_allocator, const std::vector< int64_t * > &group_by_buffers, const size_t groups_buffer_size, const int8_t *group_by_dev_buffers_mem, const QueryMemoryDescriptor &query_mem_desc, const unsigned block_size_x, const unsigned grid_size_x, const int device_id, const bool prepend_index_buffer, const bool has_varlen_output)
void operator()(int8_t const *const cell_ptr) const
Definition: ResultSet.cpp:440
bool can_use_parallel_algorithms(const ResultSet &rows)
Definition: ResultSet.cpp:1449
int64_t getRenderTime() const
Definition: ResultSet.cpp:727
void setCachedRowCount(const size_t row_count) const
Definition: ResultSet.cpp:609
bool isDirectColumnarConversionPossible() const
Definition: ResultSet.cpp:1357
size_t get_key_bytes_rowwise(const QueryMemoryDescriptor &query_mem_desc)
FORCE_INLINE HOST DEVICE T align_to_int64(T addr)
size_t binSearchRowCount() const
Definition: ResultSet.cpp:616
int getDeviceId() const
Definition: ResultSet.cpp:752
DEVICE T * end() const
Definition: VectorView.h:68