OmniSciDB  340b00dbf6
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
ResultSet.h
Go to the documentation of this file.
1 /*
2  * Copyright 2017 MapD Technologies, Inc.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
25 #ifndef QUERYENGINE_RESULTSET_H
26 #define QUERYENGINE_RESULTSET_H
27 
28 #include "CardinalityEstimator.h"
29 #include "DataMgr/Chunk/Chunk.h"
31 #include "ResultSetStorage.h"
32 #include "TargetValue.h"
33 
34 #include <atomic>
35 #include <functional>
36 #include <list>
37 
38 /*
39  * Stores the underlying buffer and the meta-data for a result set. The buffer
40  * format reflects the main requirements for result sets. Not all queries
41  * specify a GROUP BY clause, but since it's the most important and challenging
42  * case we'll focus on it. Note that the meta-data is stored separately from
43  * the buffer and it's not transferred to GPU.
44  *
45  * 1. It has to be efficient for reduction of partial GROUP BY query results
46  * from multiple devices / cores, the cardinalities can be high. Reduction
47  * currently happens on the host.
48  * 2. No conversions should be needed when buffers are transferred from GPU to
49  * host for reduction. This implies the buffer needs to be "flat", with no
50  * pointers to chase since they have no meaning in a different address space.
51  * 3. Must be size-efficient.
52  *
53  * There are several variations of the format of a result set buffer, but the
54  * most common is a sequence of entries which represent a row in the result or
55  * an empty slot. One entry looks as follows:
56  *
57  * +-+-+-+-+-+-+-+-+-+-+-+--?--+-+-+-+-+-+-+-+-+-+-+-+-+
58  * |key_0| ... |key_N-1| padding |value_0|...|value_N-1|
59  * +-+-+-+-+-+-+-+-+-+-+-+--?--+-+-+-+-+-+-+-+-+-+-+-+-+
60  *
61  * (key_0 ... key_N-1) is a multiple component key, unique within the buffer.
62  * It stores the tuple specified by the GROUP BY clause. All components have
63  * the same width, 4 or 8 bytes. For the 4-byte components, 4-byte padding is
64  * added if the number of components is odd. Not all entries in the buffer are
65  * valid; an empty entry contains EMPTY_KEY_{64, 32} for 8-byte / 4-byte width,
66  * respectively. An empty entry is ignored by subsequent operations on the
67  * result set (reduction, iteration, sort etc).
68  *
69  * value_0 through value_N-1 are 8-byte fields which hold the columns of the
70  * result, like aggregates and projected expressions. They're reduced between
71  * multiple partial results for identical (key_0 ... key_N-1) tuples.
72  *
73  * The order of entries is decided by the type of hash used, which depends on
74  * the range of the keys. For small enough ranges, a perfect hash is used. When
75  * a perfect hash isn't feasible, open addressing (using MurmurHash) with linear
76  * probing is used instead, with a 50% fill rate.
77  */
78 
79 struct ReductionCode;
80 
81 namespace Analyzer {
82 
83 class Expr;
84 class Estimator;
85 struct OrderEntry;
86 
87 } // namespace Analyzer
88 
89 class Executor;
90 
91 class ResultSet;
92 
94  public:
95  using value_type = std::vector<TargetValue>;
96  using difference_type = std::ptrdiff_t;
97  using pointer = std::vector<TargetValue>*;
98  using reference = std::vector<TargetValue>&;
99  using iterator_category = std::input_iterator_tag;
100 
101  bool operator==(const ResultSetRowIterator& other) const {
102  return result_set_ == other.result_set_ &&
104  }
105  bool operator!=(const ResultSetRowIterator& other) const { return !(*this == other); }
106 
107  inline value_type operator*() const;
108  inline ResultSetRowIterator& operator++(void);
110  ResultSetRowIterator iter(*this);
111  ++(*this);
112  return iter;
113  }
114 
115  size_t getCurrentRowBufferIndex() const {
116  if (crt_row_buff_idx_ == 0) {
117  throw std::runtime_error("current row buffer iteration index is undefined");
118  }
119  return crt_row_buff_idx_ - 1;
120  }
121 
122  private:
130 
132  bool translate_strings,
133  bool decimal_to_double)
134  : result_set_(rs)
135  , crt_row_buff_idx_(0)
136  , global_entry_idx_(0)
138  , fetched_so_far_(0)
139  , translate_strings_(translate_strings)
140  , decimal_to_double_(decimal_to_double){};
141 
143 
144  friend class ResultSet;
145 };
146 
147 class TSerializedRows;
148 class ResultSetBuilder;
149 
150 using AppendedStorage = std::vector<std::unique_ptr<ResultSetStorage>>;
151 
152 class ResultSet {
153  public:
155  // Can use derivatives of the builder class to construct a ResultSet
156 
157  ResultSet(const std::vector<TargetInfo>& targets,
158  const ExecutorDeviceType device_type,
160  const std::shared_ptr<RowSetMemoryOwner> row_set_mem_owner,
161  const Executor* executor);
162 
163  ResultSet(const std::vector<TargetInfo>& targets,
164  const std::vector<ColumnLazyFetchInfo>& lazy_fetch_info,
165  const std::vector<std::vector<const int8_t*>>& col_buffers,
166  const std::vector<std::vector<int64_t>>& frag_offsets,
167  const std::vector<int64_t>& consistent_frag_sizes,
168  const ExecutorDeviceType device_type,
169  const int device_id,
171  const std::shared_ptr<RowSetMemoryOwner> row_set_mem_owner,
172  const Executor* executor);
173 
174  ResultSet(const std::shared_ptr<const Analyzer::Estimator>,
175  const ExecutorDeviceType device_type,
176  const int device_id,
177  Data_Namespace::DataMgr* data_mgr);
178 
179  ResultSet(const std::string& explanation);
180 
181  ResultSet(int64_t queue_time_ms,
182  int64_t render_time_ms,
183  const std::shared_ptr<RowSetMemoryOwner> row_set_mem_owner);
184 
185  ~ResultSet();
186 
187  inline ResultSetRowIterator rowIterator(size_t from_logical_index,
188  bool translate_strings,
189  bool decimal_to_double) const {
190  ResultSetRowIterator rowIterator(this, translate_strings, decimal_to_double);
191 
192  // move to first logical position
193  ++rowIterator;
194 
195  for (size_t index = 0; index < from_logical_index; index++) {
196  ++rowIterator;
197  }
198 
199  return rowIterator;
200  }
201 
202  inline ResultSetRowIterator rowIterator(bool translate_strings,
203  bool decimal_to_double) const {
204  return rowIterator(0, translate_strings, decimal_to_double);
205  }
206 
208 
209  const ResultSetStorage* allocateStorage() const;
210 
211  const ResultSetStorage* allocateStorage(int8_t*, const std::vector<int64_t>&) const;
212 
213  const ResultSetStorage* allocateStorage(const std::vector<int64_t>&) const;
214 
215  void updateStorageEntryCount(const size_t new_entry_count) {
217  query_mem_desc_.setEntryCount(new_entry_count);
218  CHECK(storage_);
219  storage_->updateEntryCount(new_entry_count);
220  }
221 
222  std::vector<TargetValue> getNextRow(const bool translate_strings,
223  const bool decimal_to_double) const;
224 
225  size_t getCurrentRowBufferIndex() const;
226 
227  std::vector<TargetValue> getRowAt(const size_t index) const;
228 
229  TargetValue getRowAt(const size_t row_idx,
230  const size_t col_idx,
231  const bool translate_strings,
232  const bool decimal_to_double = true) const;
233 
234  // Specialized random access getter for result sets with a single column to
235  // avoid the overhead of building a std::vector<TargetValue> result with only
236  // one element. Only used by RelAlgTranslator::getInIntegerSetExpr currently.
237  OneIntegerColumnRow getOneColRow(const size_t index) const;
238 
239  std::vector<TargetValue> getRowAtNoTranslations(
240  const size_t index,
241  const std::vector<bool>& targets_to_skip = {}) const;
242 
243  bool isRowAtEmpty(const size_t index) const;
244 
245  void sort(const std::list<Analyzer::OrderEntry>& order_entries, const size_t top_n);
246 
247  void keepFirstN(const size_t n);
248 
249  void dropFirstN(const size_t n);
250 
251  void append(ResultSet& that);
252 
253  const ResultSetStorage* getStorage() const;
254 
255  size_t colCount() const;
256 
257  SQLTypeInfo getColType(const size_t col_idx) const;
258 
259  size_t rowCount(const bool force_parallel = false) const;
260 
261  void setCachedRowCount(const size_t row_count) const;
262 
263  size_t entryCount() const;
264 
265  size_t getBufferSizeBytes(const ExecutorDeviceType device_type) const;
266 
267  bool definitelyHasNoRows() const;
268 
269  const QueryMemoryDescriptor& getQueryMemDesc() const;
270 
271  const std::vector<TargetInfo>& getTargetInfos() const;
272 
273  const std::vector<int64_t>& getTargetInitVals() const;
274 
275  int8_t* getDeviceEstimatorBuffer() const;
276 
277  int8_t* getHostEstimatorBuffer() const;
278 
279  void syncEstimatorBuffer() const;
280 
281  size_t getNDVEstimator() const;
282 
284  // all in ms
286  int64_t render_time{0};
288  int64_t kernel_queue_time{0};
289  };
290 
291  void setQueueTime(const int64_t queue_time);
292  void setKernelQueueTime(const int64_t kernel_queue_time);
293  void addCompilationQueueTime(const int64_t compilation_queue_time);
294 
295  int64_t getQueueTime() const;
296  int64_t getRenderTime() const;
297 
298  void moveToBegin() const;
299 
300  bool isTruncated() const;
301 
302  bool isExplain() const;
303 
304  bool isGeoColOnGpu(const size_t col_idx) const;
305  int getDeviceId() const;
306 
307  // Called from the executor because in the new ResultSet we assume the 'padded' field
308  // in SlotSize already contains the padding, whereas in the executor it's computed.
309  // Once the buffer initialization moves to ResultSet we can remove this method.
311 
312  void fillOneEntry(const std::vector<int64_t>& entry) {
313  CHECK(storage_);
314  if (storage_->query_mem_desc_.didOutputColumnar()) {
315  storage_->fillOneEntryColWise(entry);
316  } else {
317  storage_->fillOneEntryRowWise(entry);
318  }
319  }
320 
321  void initializeStorage() const;
322 
323  void holdChunks(const std::list<std::shared_ptr<Chunk_NS::Chunk>>& chunks) {
324  chunks_ = chunks;
325  }
326  void holdChunkIterators(const std::shared_ptr<std::list<ChunkIter>> chunk_iters) {
327  chunk_iters_.push_back(chunk_iters);
328  }
329  void holdLiterals(std::vector<int8_t>& literal_buff) {
330  literal_buffers_.push_back(std::move(literal_buff));
331  }
332 
333  std::shared_ptr<RowSetMemoryOwner> getRowSetMemOwner() const {
334  return row_set_mem_owner_;
335  }
336 
337  const std::vector<uint32_t>& getPermutationBuffer() const;
338  const bool isPermutationBufferEmpty() const { return permutation_.empty(); };
339 
340  void serialize(TSerializedRows& serialized_rows) const;
341 
342  static std::unique_ptr<ResultSet> unserialize(const TSerializedRows& serialized_rows,
343  const Executor*);
344 
345  size_t getLimit() const;
346 
350  enum class GeoReturnType {
353  WktString,
358  };
361 
362  void copyColumnIntoBuffer(const size_t column_idx,
363  int8_t* output_buffer,
364  const size_t output_buffer_size) const;
365 
367 
368  bool didOutputColumnar() const { return this->query_mem_desc_.didOutputColumnar(); }
369 
370  bool isZeroCopyColumnarConversionPossible(size_t column_idx) const;
371  const int8_t* getColumnarBuffer(size_t column_idx) const;
372 
375  }
376 
377  const int8_t getPaddedSlotWidthBytes(const size_t slot_idx) const {
378  return query_mem_desc_.getPaddedSlotWidthBytes(slot_idx);
379  }
380 
381  // returns a bitmap of all single-slot targets, as well as its count
382  std::tuple<std::vector<bool>, size_t> getSingleSlotTargetBitmap() const;
383 
384  std::tuple<std::vector<bool>, size_t> getSupportedSingleSlotTargetBitmap() const;
385 
386  std::vector<size_t> getSlotIndicesForTargetIndices() const;
387 
388  const std::vector<ColumnLazyFetchInfo>& getLazyFetchInfo() const {
389  return lazy_fetch_info_;
390  }
391 
392  void setSeparateVarlenStorageValid(const bool val) {
394  }
395 
396  std::shared_ptr<const std::vector<std::string>> getStringDictionaryPayloadCopy(
397  const int dict_id) const;
398 
399  template <typename ENTRY_TYPE, QueryDescriptionType QUERY_TYPE, bool COLUMNAR_FORMAT>
400  ENTRY_TYPE getEntryAt(const size_t row_idx,
401  const size_t target_idx,
402  const size_t slot_idx) const;
403 
404  private:
406 
407  std::vector<TargetValue> getNextRowImpl(const bool translate_strings,
408  const bool decimal_to_double) const;
409 
410  std::vector<TargetValue> getNextRowUnlocked(const bool translate_strings,
411  const bool decimal_to_double) const;
412 
413  std::vector<TargetValue> getRowAt(const size_t index,
414  const bool translate_strings,
415  const bool decimal_to_double,
416  const bool fixup_count_distinct_pointers,
417  const std::vector<bool>& targets_to_skip = {}) const;
418 
419  // NOTE: just for direct columnarization use at the moment
420  template <typename ENTRY_TYPE>
421  ENTRY_TYPE getColumnarPerfectHashEntryAt(const size_t row_idx,
422  const size_t target_idx,
423  const size_t slot_idx) const;
424 
425  template <typename ENTRY_TYPE>
426  ENTRY_TYPE getRowWisePerfectHashEntryAt(const size_t row_idx,
427  const size_t target_idx,
428  const size_t slot_idx) const;
429 
430  template <typename ENTRY_TYPE>
431  ENTRY_TYPE getRowWiseBaselineEntryAt(const size_t row_idx,
432  const size_t target_idx,
433  const size_t slot_idx) const;
434 
435  template <typename ENTRY_TYPE>
436  ENTRY_TYPE getColumnarBaselineEntryAt(const size_t row_idx,
437  const size_t target_idx,
438  const size_t slot_idx) const;
439 
440  size_t binSearchRowCount() const;
441 
442  size_t parallelRowCount() const;
443 
444  size_t advanceCursorToNextEntry() const;
445 
446  void radixSortOnGpu(const std::list<Analyzer::OrderEntry>& order_entries) const;
447 
448  void radixSortOnCpu(const std::list<Analyzer::OrderEntry>& order_entries) const;
449 
450  static bool isNull(const SQLTypeInfo& ti,
451  const InternalTargetValue& val,
452  const bool float_argument_input);
453 
455  int8_t* rowwise_target_ptr,
456  int8_t* keys_ptr,
457  const size_t entry_buff_idx,
458  const TargetInfo& target_info,
459  const size_t target_logical_idx,
460  const size_t slot_idx,
461  const bool translate_strings,
462  const bool decimal_to_double,
463  const bool fixup_count_distinct_pointers) const;
464 
465  TargetValue getTargetValueFromBufferColwise(const int8_t* col_ptr,
466  const int8_t* keys_ptr,
468  const size_t local_entry_idx,
469  const size_t global_entry_idx,
470  const TargetInfo& target_info,
471  const size_t target_logical_idx,
472  const size_t slot_idx,
473  const bool translate_strings,
474  const bool decimal_to_double) const;
475 
476  TargetValue makeTargetValue(const int8_t* ptr,
477  const int8_t compact_sz,
478  const TargetInfo& target_info,
479  const size_t target_logical_idx,
480  const bool translate_strings,
481  const bool decimal_to_double,
482  const size_t entry_buff_idx) const;
483 
484  TargetValue makeVarlenTargetValue(const int8_t* ptr1,
485  const int8_t compact_sz1,
486  const int8_t* ptr2,
487  const int8_t compact_sz2,
488  const TargetInfo& target_info,
489  const size_t target_logical_idx,
490  const bool translate_strings,
491  const size_t entry_buff_idx) const;
492 
494  int8_t* ptr1;
495  int8_t compact_sz1;
496  int8_t* ptr2;
497  int8_t compact_sz2;
498 
500  : ptr1(nullptr), compact_sz1(0), ptr2(nullptr), compact_sz2(0) {}
501  };
502  TargetValue makeGeoTargetValue(const int8_t* geo_target_ptr,
503  const size_t slot_idx,
504  const TargetInfo& target_info,
505  const size_t target_logical_idx,
506  const size_t entry_buff_idx) const;
507 
510  const size_t fixedup_entry_idx;
511  const size_t storage_idx;
512  };
513 
515  const int8_t* buff,
516  const size_t entry_idx,
517  const size_t target_logical_idx,
518  const StorageLookupResult& storage_lookup_result) const;
519 
520  InternalTargetValue getVarlenOrderEntry(const int64_t str_ptr,
521  const size_t str_len) const;
522 
523  int64_t lazyReadInt(const int64_t ival,
524  const size_t target_logical_idx,
525  const StorageLookupResult& storage_lookup_result) const;
526 
530  std::pair<size_t, size_t> getStorageIndex(const size_t entry_idx) const;
531 
532  const std::vector<const int8_t*>& getColumnFrag(const size_t storge_idx,
533  const size_t col_logical_idx,
534  int64_t& global_idx) const;
535 
536  StorageLookupResult findStorage(const size_t entry_idx) const;
537 
538  struct TargetOffsets {
539  const int8_t* ptr1;
540  const size_t compact_sz1;
541  const int8_t* ptr2;
542  const size_t compact_sz2;
543  };
544 
546  RowWiseTargetAccessor(const ResultSet* result_set)
547  : result_set_(result_set)
549  , key_width_(result_set_->query_mem_desc_.getEffectiveKeyWidth())
553  }
554 
556  const int8_t* buff,
557  const size_t entry_idx,
558  const size_t target_logical_idx,
559  const StorageLookupResult& storage_lookup_result) const;
560 
562 
563  inline const int8_t* get_rowwise_ptr(const int8_t* buff,
564  const size_t entry_idx) const {
565  return buff + entry_idx * row_bytes_;
566  }
567 
568  std::vector<std::vector<TargetOffsets>> offsets_for_storage_;
569 
571 
572  // Row-wise iteration
573  const size_t row_bytes_;
574  const size_t key_width_;
576  };
577 
579  ColumnWiseTargetAccessor(const ResultSet* result_set) : result_set_(result_set) {
581  }
582 
584 
586  const int8_t* buff,
587  const size_t entry_idx,
588  const size_t target_logical_idx,
589  const StorageLookupResult& storage_lookup_result) const;
590 
591  std::vector<std::vector<TargetOffsets>> offsets_for_storage_;
592 
594  };
595 
596  template <typename BUFFER_ITERATOR_TYPE>
598  using BufferIteratorType = BUFFER_ITERATOR_TYPE;
599 
600  ResultSetComparator(const std::list<Analyzer::OrderEntry>& order_entries,
601  const bool use_heap,
602  const ResultSet* result_set)
603  : order_entries_(order_entries)
604  , use_heap_(use_heap)
605  , result_set_(result_set)
606  , buffer_itr_(result_set) {
608  }
609 
611 
612  std::vector<int64_t> materializeCountDistinctColumn(
613  const Analyzer::OrderEntry& order_entry) const;
614 
615  bool operator()(const uint32_t lhs, const uint32_t rhs) const;
616 
617  // TODO(adb): make order_entries_ a pointer
618  const std::list<Analyzer::OrderEntry> order_entries_;
619  const bool use_heap_;
622  std::vector<std::vector<int64_t>> count_distinct_materialized_buffers_;
623  };
624 
625  std::function<bool(const uint32_t, const uint32_t)> createComparator(
626  const std::list<Analyzer::OrderEntry>& order_entries,
627  const bool use_heap) {
628  auto timer = DEBUG_TIMER(__func__);
631  std::make_unique<ResultSetComparator<ColumnWiseTargetAccessor>>(
632  order_entries, use_heap, this);
633  return [this](const uint32_t lhs, const uint32_t rhs) -> bool {
634  return (*this->column_wise_comparator_)(lhs, rhs);
635  };
636  } else {
637  row_wise_comparator_ = std::make_unique<ResultSetComparator<RowWiseTargetAccessor>>(
638  order_entries, use_heap, this);
639  return [this](const uint32_t lhs, const uint32_t rhs) -> bool {
640  return (*this->row_wise_comparator_)(lhs, rhs);
641  };
642  }
643  }
644 
645  static void topPermutation(
646  std::vector<uint32_t>& to_sort,
647  const size_t n,
648  const std::function<bool(const uint32_t, const uint32_t)> compare);
649 
650  void sortPermutation(const std::function<bool(const uint32_t, const uint32_t)> compare);
651 
652  std::vector<uint32_t> initPermutationBuffer(const size_t start, const size_t step);
653 
654  void parallelTop(const std::list<Analyzer::OrderEntry>& order_entries,
655  const size_t top_n);
656 
657  void baselineSort(const std::list<Analyzer::OrderEntry>& order_entries,
658  const size_t top_n);
659 
660  void doBaselineSort(const ExecutorDeviceType device_type,
661  const std::list<Analyzer::OrderEntry>& order_entries,
662  const size_t top_n);
663 
664  bool canUseFastBaselineSort(const std::list<Analyzer::OrderEntry>& order_entries,
665  const size_t top_n);
666 
668 
669  int getGpuCount() const;
670 
671  void serializeProjection(TSerializedRows& serialized_rows) const;
672  void serializeVarlenAggColumn(int8_t* buf,
673  std::vector<std::string>& varlen_bufer) const;
674 
675  void serializeCountDistinctColumns(TSerializedRows&) const;
676 
677  void unserializeCountDistinctColumns(const TSerializedRows&);
678 
680 
681  using BufferSet = std::set<int64_t>;
682  void create_active_buffer_set(BufferSet& count_distinct_active_buffer_set) const;
683 
684  int64_t getDistinctBufferRefFromBufferRowwise(int8_t* rowwise_target_ptr,
685  const TargetInfo& target_info) const;
686 
687  const std::vector<TargetInfo> targets_;
689  const int device_id_;
691  mutable std::unique_ptr<ResultSetStorage> storage_;
693  mutable size_t crt_row_buff_idx_;
694  mutable size_t fetched_so_far_;
695  size_t drop_first_;
696  size_t keep_first_;
697  std::shared_ptr<RowSetMemoryOwner> row_set_mem_owner_;
698  std::vector<uint32_t> permutation_;
699 
701  const Executor* executor_; // TODO(alex): remove
702 
703  std::list<std::shared_ptr<Chunk_NS::Chunk>> chunks_;
704  std::vector<std::shared_ptr<std::list<ChunkIter>>> chunk_iters_;
705  // TODO(miyu): refine by using one buffer and
706  // setting offset instead of ptr in group by buffer.
707  std::vector<std::vector<int8_t>> literal_buffers_;
708  const std::vector<ColumnLazyFetchInfo> lazy_fetch_info_;
709  std::vector<std::vector<std::vector<const int8_t*>>> col_buffers_;
710  std::vector<std::vector<std::vector<int64_t>>> frag_offsets_;
711  std::vector<std::vector<int64_t>> consistent_frag_sizes_;
712 
713  const std::shared_ptr<const Analyzer::Estimator> estimator_;
715  mutable int8_t* host_estimator_buffer_{nullptr};
717 
718  // only used by serialization
719  using SerializedVarlenBufferStorage = std::vector<std::string>;
720 
721  std::vector<SerializedVarlenBufferStorage> serialized_varlen_buffer_;
723  std::string explanation_;
724  const bool just_explain_;
725  mutable std::atomic<int64_t> cached_row_count_;
726  mutable std::mutex row_iteration_mutex_;
727 
728  // only used by geo
730 
731  // comparators used for sorting (note that the actual compare function is accessed using
732  // the createComparator method)
733  std::unique_ptr<ResultSetComparator<RowWiseTargetAccessor>> row_wise_comparator_;
734  std::unique_ptr<ResultSetComparator<ColumnWiseTargetAccessor>> column_wise_comparator_;
735 
736  friend class ResultSetManager;
737  friend class ResultSetRowIterator;
738  friend class ColumnarResults;
739 };
740 
743  return {};
744  }
745 
746  if (result_set_->just_explain_) {
747  return {result_set_->explanation_};
748  }
749 
750  return result_set_->getRowAt(
752 }
753 
755  if (!result_set_->storage_ && !result_set_->just_explain_) {
756  global_entry_idx_valid_ = false;
757  } else if (result_set_->just_explain_) {
759  fetched_so_far_ = 1;
760  } else {
761  result_set_->advanceCursorToNextEntry(*this);
762  }
763  return *this;
764 }
765 
767  public:
768  ResultSet* reduce(std::vector<ResultSet*>&);
769 
770  std::shared_ptr<ResultSet> getOwnResultSet();
771 
772  void rewriteVarlenAggregates(ResultSet*);
773 
774  private:
775  std::shared_ptr<ResultSet> rs_;
776 };
777 
778 class RowSortException : public std::runtime_error {
779  public:
780  RowSortException(const std::string& cause) : std::runtime_error(cause) {}
781 };
782 
783 namespace result_set {
784 
785 bool can_use_parallel_algorithms(const ResultSet& rows);
786 
787 bool use_parallel_algorithms(const ResultSet& rows);
788 
789 } // namespace result_set
790 
791 #endif // QUERYENGINE_RESULTSET_H
void setSeparateVarlenStorageValid(const bool val)
Definition: ResultSet.h:392
const std::list< Analyzer::OrderEntry > order_entries_
Definition: ResultSet.h:618
void setGeoReturnType(const GeoReturnType val)
Definition: ResultSet.h:360
void serializeVarlenAggColumn(int8_t *buf, std::vector< std::string > &varlen_bufer) const
std::mutex row_iteration_mutex_
Definition: ResultSet.h:726
InternalTargetValue getColumnInternal(const int8_t *buff, const size_t entry_idx, const size_t target_logical_idx, const StorageLookupResult &storage_lookup_result) const
void syncEstimatorBuffer() const
Definition: ResultSet.cpp:408
const int8_t * ptr1
Definition: ResultSet.h:539
const size_t compact_sz2
Definition: ResultSet.h:542
void baselineSort(const std::list< Analyzer::OrderEntry > &order_entries, const size_t top_n)
void holdChunks(const std::list< std::shared_ptr< Chunk_NS::Chunk >> &chunks)
Definition: ResultSet.h:323
const QueryMemoryDescriptor & getQueryMemDesc() const
Definition: ResultSet.cpp:384
std::unique_ptr< ResultSetComparator< ColumnWiseTargetAccessor > > column_wise_comparator_
Definition: ResultSet.h:734
std::pair< size_t, size_t > getStorageIndex(const size_t entry_idx) const
Definition: ResultSet.cpp:611
std::shared_ptr< RowSetMemoryOwner > getRowSetMemOwner() const
Definition: ResultSet.h:333
bool didOutputColumnar() const
Definition: ResultSet.h:368
std::ptrdiff_t difference_type
Definition: ResultSet.h:96
ENTRY_TYPE getRowWisePerfectHashEntryAt(const size_t row_idx, const size_t target_idx, const size_t slot_idx) const
void setEntryCount(const size_t val)
double decimal_to_double(const SQLTypeInfo &otype, int64_t oval)
AppendedStorage appended_storage_
Definition: ResultSet.h:692
ENTRY_TYPE getColumnarPerfectHashEntryAt(const size_t row_idx, const size_t target_idx, const size_t slot_idx) const
GeoReturnType geo_return_type_
Definition: ResultSet.h:729
ExecutorDeviceType
const BufferIteratorType buffer_itr_
Definition: ResultSet.h:621
void moveToBegin() const
Definition: ResultSet.cpp:444
Utility functions for easy access to the result set buffers.
std::shared_ptr< ResultSet > rs_
Definition: ResultSet.h:775
std::vector< std::string > SerializedVarlenBufferStorage
Definition: ResultSet.h:719
void initializeStorage() const
QueryDescriptionType getQueryDescriptionType() const
Definition: ResultSet.h:373
ResultSetRowIterator(const ResultSet *rs, bool translate_strings, bool decimal_to_double)
Definition: ResultSet.h:131
const Executor * executor_
Definition: ResultSet.h:701
void unserializeCountDistinctColumns(const TSerializedRows &)
std::vector< TargetValue > getNextRow(const bool translate_strings, const bool decimal_to_double) const
static bool isNull(const SQLTypeInfo &ti, const InternalTargetValue &val, const bool float_argument_input)
QueryMemoryDescriptor query_mem_desc_
Definition: ResultSet.h:690
const std::vector< TargetInfo > & getTargetInfos() const
Definition: ResultSet.cpp:389
std::unique_ptr< ResultSetStorage > storage_
Definition: ResultSet.h:691
void setKernelQueueTime(const int64_t kernel_queue_time)
Definition: ResultSet.cpp:427
bool operator==(const ResultSetRowIterator &other) const
Definition: ResultSet.h:101
ENTRY_TYPE getEntryAt(const size_t row_idx, const size_t target_idx, const size_t slot_idx) const
size_t rowCount(const bool force_parallel=false) const
Definition: ResultSet.cpp:287
ResultSetRowIterator(const ResultSet *rs)
Definition: ResultSet.h:142
TargetValue makeGeoTargetValue(const int8_t *geo_target_ptr, const size_t slot_idx, const TargetInfo &target_info, const size_t target_logical_idx, const size_t entry_buff_idx) const
TargetValue getTargetValueFromBufferRowwise(int8_t *rowwise_target_ptr, int8_t *keys_ptr, const size_t entry_buff_idx, const TargetInfo &target_info, const size_t target_logical_idx, const size_t slot_idx, const bool translate_strings, const bool decimal_to_double, const bool fixup_count_distinct_pointers) const
size_t keep_first_
Definition: ResultSet.h:696
void keepFirstN(const size_t n)
Definition: ResultSet.cpp:46
std::vector< std::shared_ptr< std::list< ChunkIter > > > chunk_iters_
Definition: ResultSet.h:704
void addCompilationQueueTime(const int64_t compilation_queue_time)
Definition: ResultSet.cpp:431
void serialize(TSerializedRows &serialized_rows) const
std::vector< SerializedVarlenBufferStorage > serialized_varlen_buffer_
Definition: ResultSet.h:721
const size_t compact_sz1
Definition: ResultSet.h:540
int64_t lazyReadInt(const int64_t ival, const size_t target_logical_idx, const StorageLookupResult &storage_lookup_result) const
bool operator!=(const ResultSetRowIterator &other) const
Definition: ResultSet.h:105
size_t colCount() const
Definition: ResultSet.cpp:256
OneIntegerColumnRow getOneColRow(const size_t index) const
TargetValue getTargetValueFromBufferColwise(const int8_t *col_ptr, const int8_t *keys_ptr, const QueryMemoryDescriptor &query_mem_desc, const size_t local_entry_idx, const size_t global_entry_idx, const TargetInfo &target_info, const size_t target_logical_idx, const size_t slot_idx, const bool translate_strings, const bool decimal_to_double) const
void rewriteVarlenAggregates(ResultSet *)
size_t getLimit() const
Definition: ResultSet.cpp:927
const bool just_explain_
Definition: ResultSet.h:724
std::vector< int64_t > materializeCountDistinctColumn(const Analyzer::OrderEntry &order_entry) const
Definition: ResultSet.cpp:656
ResultSetRowIterator rowIterator(size_t from_logical_index, bool translate_strings, bool decimal_to_double) const
Definition: ResultSet.h:187
bool isTruncated() const
Definition: ResultSet.cpp:449
std::vector< uint32_t > permutation_
Definition: ResultSet.h:698
std::atomic< int64_t > cached_row_count_
Definition: ResultSet.h:725
const bool isPermutationBufferEmpty() const
Definition: ResultSet.h:338
ResultSet(const std::vector< TargetInfo > &targets, const ExecutorDeviceType device_type, const QueryMemoryDescriptor &query_mem_desc, const std::shared_ptr< RowSetMemoryOwner > row_set_mem_owner, const Executor *executor)
Definition: ResultSet.cpp:56
size_t parallelRowCount() const
Definition: ResultSet.cpp:345
const size_t key_bytes_with_padding_
Definition: ResultSet.h:575
const ResultSet * result_set_
Definition: ResultSet.h:570
std::vector< uint32_t > initPermutationBuffer(const size_t start, const size_t step)
Definition: ResultSet.cpp:548
std::vector< TargetValue > getRowAtNoTranslations(const size_t index, const std::vector< bool > &targets_to_skip={}) const
const ResultSet * result_set_
Definition: ResultSet.h:123
void radixSortOnCpu(const std::list< Analyzer::OrderEntry > &order_entries) const
Definition: ResultSet.cpp:887
InternalTargetValue getColumnInternal(const int8_t *buff, const size_t entry_idx, const size_t target_logical_idx, const StorageLookupResult &storage_lookup_result) const
std::unique_ptr< ResultSetComparator< RowWiseTargetAccessor > > row_wise_comparator_
Definition: ResultSet.h:733
bool definitelyHasNoRows() const
Definition: ResultSet.cpp:380
ColumnWiseTargetAccessor(const ResultSet *result_set)
Definition: ResultSet.h:579
bool use_parallel_algorithms(const ResultSet &rows)
Definition: ResultSet.cpp:1036
bool isZeroCopyColumnarConversionPossible(size_t column_idx) const
Definition: ResultSet.cpp:964
std::input_iterator_tag iterator_category
Definition: ResultSet.h:99
size_t global_entry_idx_
Definition: ResultSet.h:125
int8_t * getHostEstimatorBuffer() const
Definition: ResultSet.cpp:404
InternalTargetValue getVarlenOrderEntry(const int64_t str_ptr, const size_t str_len) const
const std::vector< TargetInfo > targets_
Definition: ResultSet.h:687
std::shared_ptr< RowSetMemoryOwner > row_set_mem_owner_
Definition: ResultSet.h:697
size_t drop_first_
Definition: ResultSet.h:695
const ResultSetStorage * allocateStorage() const
std::shared_ptr< const std::vector< std::string > > getStringDictionaryPayloadCopy(const int dict_id) const
Definition: ResultSet.cpp:931
const int8_t * ptr2
Definition: ResultSet.h:541
std::list< std::shared_ptr< Chunk_NS::Chunk > > chunks_
Definition: ResultSet.h:703
QueryExecutionTimings timings_
Definition: ResultSet.h:700
const ResultSet * result_set_
Definition: ResultSet.h:620
void setQueueTime(const int64_t queue_time)
Definition: ResultSet.cpp:423
void dropFirstN(const size_t n)
Definition: ResultSet.cpp:51
std::vector< std::vector< int8_t > > literal_buffers_
Definition: ResultSet.h:707
std::vector< std::vector< TargetOffsets > > offsets_for_storage_
Definition: ResultSet.h:591
void doBaselineSort(const ExecutorDeviceType device_type, const std::list< Analyzer::OrderEntry > &order_entries, const size_t top_n)
bool canUseFastBaselineSort(const std::list< Analyzer::OrderEntry > &order_entries, const size_t top_n)
std::vector< TargetValue > & reference
Definition: ResultSet.h:98
ResultSet * reduce(std::vector< ResultSet * > &)
std::tuple< std::vector< bool >, size_t > getSingleSlotTargetBitmap() const
Definition: ResultSet.cpp:977
ResultSetRowIterator & operator++(void)
Definition: ResultSet.h:754
std::shared_ptr< ResultSet > getOwnResultSet()
StorageLookupResult findStorage(const size_t entry_idx) const
Definition: ResultSet.cpp:636
An AbstractBuffer is a unit of data management for a data manager.
static void topPermutation(std::vector< uint32_t > &to_sort, const size_t n, const std::function< bool(const uint32_t, const uint32_t)> compare)
Definition: ResultSet.cpp:830
const int8_t getPaddedSlotWidthBytes(const size_t slot_idx) const
value_type operator*() const
Definition: ResultSet.h:741
const std::vector< ColumnLazyFetchInfo > lazy_fetch_info_
Definition: ResultSet.h:708
RowWiseTargetAccessor(const ResultSet *result_set)
Definition: ResultSet.h:546
void copyColumnIntoBuffer(const size_t column_idx, int8_t *output_buffer, const size_t output_buffer_size) const
RowSortException(const std::string &cause)
Definition: ResultSet.h:780
friend ResultSetBuilder
Definition: ResultSet.h:154
void fixupCountDistinctPointers()
void radixSortOnGpu(const std::list< Analyzer::OrderEntry > &order_entries) const
Definition: ResultSet.cpp:852
const ResultSetStorage * getStorage() const
Definition: ResultSet.cpp:252
QueryDescriptionType getQueryDescriptionType() const
Data_Namespace::DataMgr * data_mgr_
Definition: ResultSet.h:716
Basic constructors and methods of the row set interface.
int64_t getQueueTime() const
Definition: ResultSet.cpp:435
std::vector< TargetValue > getRowAt(const size_t index) const
void fillOneEntry(const std::vector< int64_t > &entry)
Definition: ResultSet.h:312
void updateStorageEntryCount(const size_t new_entry_count)
Definition: ResultSet.h:215
void serializeProjection(TSerializedRows &serialized_rows) const
ResultSetRowIterator operator++(int)
Definition: ResultSet.h:109
const std::shared_ptr< const Analyzer::Estimator > estimator_
Definition: ResultSet.h:713
SQLTypeInfo getColType(const size_t col_idx) const
Definition: ResultSet.cpp:260
GeoReturnType getGeoReturnType() const
Definition: ResultSet.h:359
void holdChunkIterators(const std::shared_ptr< std::list< ChunkIter >> chunk_iters)
Definition: ResultSet.h:326
std::tuple< std::vector< bool >, size_t > getSupportedSingleSlotTargetBitmap() const
Definition: ResultSet.cpp:1001
ExecutorDeviceType getDeviceType() const
Definition: ResultSet.cpp:178
const int8_t * getColumnarBuffer(size_t column_idx) const
Definition: ResultSet.cpp:971
size_t get_row_bytes(const QueryMemoryDescriptor &query_mem_desc)
bool isExplain() const
Definition: ResultSet.cpp:453
ResultSetComparator(const std::list< Analyzer::OrderEntry > &order_entries, const bool use_heap, const ResultSet *result_set)
Definition: ResultSet.h:600
std::vector< TargetValue > value_type
Definition: ResultSet.h:95
void sortPermutation(const std::function< bool(const uint32_t, const uint32_t)> compare)
Definition: ResultSet.cpp:846
bool isGeoColOnGpu(const size_t col_idx) const
const int8_t * get_rowwise_ptr(const int8_t *buff, const size_t entry_idx) const
Definition: ResultSet.h:563
size_t getNDVEstimator() const
std::vector< std::vector< std::vector< const int8_t * > > > col_buffers_
Definition: ResultSet.h:709
bool isRowAtEmpty(const size_t index) const
const int8_t getPaddedSlotWidthBytes(const size_t slot_idx) const
Definition: ResultSet.h:377
size_t entryCount() const
static QueryMemoryDescriptor fixupQueryMemoryDescriptor(const QueryMemoryDescriptor &)
Definition: ResultSet.cpp:461
TargetValue makeTargetValue(const int8_t *ptr, const int8_t compact_sz, const TargetInfo &target_info, const size_t target_logical_idx, const bool translate_strings, const bool decimal_to_double, const size_t entry_buff_idx) const
size_t getCurrentRowBufferIndex() const
Definition: ResultSet.h:115
void append(ResultSet &that)
Definition: ResultSet.cpp:222
std::string explanation_
Definition: ResultSet.h:723
std::vector< std::vector< int64_t > > consistent_frag_sizes_
Definition: ResultSet.h:711
bool operator()(const uint32_t lhs, const uint32_t rhs) const
Definition: ResultSet.cpp:701
int8_t * host_estimator_buffer_
Definition: ResultSet.h:715
friend class ResultSet
Definition: ResultSet.h:142
const ExecutorDeviceType device_type_
Definition: ResultSet.h:688
std::vector< TargetValue > getNextRowImpl(const bool translate_strings, const bool decimal_to_double) const
size_t getCurrentRowBufferIndex() const
Definition: ResultSet.cpp:214
void holdLiterals(std::vector< int8_t > &literal_buff)
Definition: ResultSet.h:329
bool g_enable_watchdog false
Definition: Execute.cpp:73
void parallelTop(const std::list< Analyzer::OrderEntry > &order_entries, const size_t top_n)
Definition: ResultSet.cpp:571
#define CHECK(condition)
Definition: Logger.h:197
#define DEBUG_TIMER(name)
Definition: Logger.h:313
int getGpuCount() const
size_t getBufferSizeBytes(const ExecutorDeviceType device_type) const
int8_t * getDeviceEstimatorBuffer() const
Definition: ResultSet.cpp:398
size_t fetched_so_far_
Definition: ResultSet.h:694
size_t crt_row_buff_idx_
Definition: ResultSet.h:693
Estimators to be used when precise cardinality isn&#39;t useful.
QueryDescriptionType
Definition: Types.h:26
int64_t getDistinctBufferRefFromBufferRowwise(int8_t *rowwise_target_ptr, const TargetInfo &target_info) const
std::vector< std::vector< std::vector< int64_t > > > frag_offsets_
Definition: ResultSet.h:710
bool separate_varlen_storage_valid_
Definition: ResultSet.h:722
boost::variant< ScalarTargetValue, ArrayTargetValue, GeoTargetValue, GeoTargetValuePtr > TargetValue
Definition: TargetValue.h:167
Executor(const ExecutorId id, const size_t block_size_x, const size_t grid_size_x, const size_t max_gpu_slab_size, const std::string &debug_dir, const std::string &debug_file)
Definition: Execute.cpp:131
std::vector< TargetValue > getNextRowUnlocked(const bool translate_strings, const bool decimal_to_double) const
std::vector< TargetValue > * pointer
Definition: ResultSet.h:97
const std::vector< int64_t > & getTargetInitVals() const
Definition: ResultSet.cpp:393
std::vector< size_t > getSlotIndicesForTargetIndices() const
Definition: ResultSet.cpp:1020
const std::vector< uint32_t > & getPermutationBuffer() const
Definition: ResultSet.cpp:567
size_t advanceCursorToNextEntry() const
void create_active_buffer_set(BufferSet &count_distinct_active_buffer_set) const
std::set< int64_t > BufferSet
Definition: ResultSet.h:681
std::function< bool(const uint32_t, const uint32_t)> createComparator(const std::list< Analyzer::OrderEntry > &order_entries, const bool use_heap)
Definition: ResultSet.h:625
void sort(const std::list< Analyzer::OrderEntry > &order_entries, const size_t top_n)
Definition: ResultSet.cpp:473
ResultSetRowIterator rowIterator(bool translate_strings, bool decimal_to_double) const
Definition: ResultSet.h:202
Definition: Analyzer.h:1413
BUFFER_ITERATOR_TYPE BufferIteratorType
Definition: ResultSet.h:598
ENTRY_TYPE getColumnarBaselineEntryAt(const size_t row_idx, const size_t target_idx, const size_t slot_idx) const
Data_Namespace::DataMgr * getDataManager() const
size_t crt_row_buff_idx_
Definition: ResultSet.h:124
std::vector< std::vector< int64_t > > count_distinct_materialized_buffers_
Definition: ResultSet.h:622
Data_Namespace::AbstractBuffer * device_estimator_buffer_
Definition: ResultSet.h:714
InternalTargetValue getColumnInternal(const int8_t *buff, const size_t entry_idx, const size_t target_logical_idx, const StorageLookupResult &storage_lookup_result) const
std::vector< std::vector< TargetOffsets > > offsets_for_storage_
Definition: ResultSet.h:568
bool global_entry_idx_valid_
Definition: ResultSet.h:126
bool can_use_parallel_algorithms(const ResultSet &rows)
Definition: ResultSet.cpp:1032
int64_t getRenderTime() const
Definition: ResultSet.cpp:440
void setCachedRowCount(const size_t row_count) const
Definition: ResultSet.cpp:327
bool isDirectColumnarConversionPossible() const
Definition: ResultSet.cpp:946
size_t get_key_bytes_rowwise(const QueryMemoryDescriptor &query_mem_desc)
FORCE_INLINE HOST DEVICE T align_to_int64(T addr)
TargetValue makeVarlenTargetValue(const int8_t *ptr1, const int8_t compact_sz1, const int8_t *ptr2, const int8_t compact_sz2, const TargetInfo &target_info, const size_t target_logical_idx, const bool translate_strings, const size_t entry_buff_idx) const
void serializeCountDistinctColumns(TSerializedRows &) const
const ResultSetStorage * storage_ptr
Definition: ResultSet.h:509
std::vector< std::unique_ptr< ResultSetStorage >> AppendedStorage
Definition: ResultSet.h:150
const std::vector< const int8_t * > & getColumnFrag(const size_t storge_idx, const size_t col_logical_idx, int64_t &global_idx) const
size_t binSearchRowCount() const
Definition: ResultSet.cpp:332
const std::vector< ColumnLazyFetchInfo > & getLazyFetchInfo() const
Definition: ResultSet.h:388
ENTRY_TYPE getRowWiseBaselineEntryAt(const size_t row_idx, const size_t target_idx, const size_t slot_idx) const
int getDeviceId() const
Definition: ResultSet.cpp:457
static std::unique_ptr< ResultSet > unserialize(const TSerializedRows &serialized_rows, const Executor *)
const int device_id_
Definition: ResultSet.h:689