OmniSciDB  fe05a0c208
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
ResultSet.h
Go to the documentation of this file.
1 /*
2  * Copyright 2017 MapD Technologies, Inc.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
25 #ifndef QUERYENGINE_RESULTSET_H
26 #define QUERYENGINE_RESULTSET_H
27 
28 #include "CardinalityEstimator.h"
29 #include "DataMgr/Chunk/Chunk.h"
31 #include "ResultSetStorage.h"
32 #include "Shared/quantile.h"
33 #include "TargetValue.h"
34 
35 #include <atomic>
36 #include <functional>
37 #include <list>
38 
39 /*
40  * Stores the underlying buffer and the meta-data for a result set. The buffer
41  * format reflects the main requirements for result sets. Not all queries
42  * specify a GROUP BY clause, but since it's the most important and challenging
43  * case we'll focus on it. Note that the meta-data is stored separately from
44  * the buffer and it's not transferred to GPU.
45  *
46  * 1. It has to be efficient for reduction of partial GROUP BY query results
47  * from multiple devices / cores, the cardinalities can be high. Reduction
48  * currently happens on the host.
49  * 2. No conversions should be needed when buffers are transferred from GPU to
50  * host for reduction. This implies the buffer needs to be "flat", with no
51  * pointers to chase since they have no meaning in a different address space.
52  * 3. Must be size-efficient.
53  *
54  * There are several variations of the format of a result set buffer, but the
55  * most common is a sequence of entries which represent a row in the result or
56  * an empty slot. One entry looks as follows:
57  *
58  * +-+-+-+-+-+-+-+-+-+-+-+--?--+-+-+-+-+-+-+-+-+-+-+-+-+
59  * |key_0| ... |key_N-1| padding |value_0|...|value_N-1|
60  * +-+-+-+-+-+-+-+-+-+-+-+--?--+-+-+-+-+-+-+-+-+-+-+-+-+
61  *
62  * (key_0 ... key_N-1) is a multiple component key, unique within the buffer.
63  * It stores the tuple specified by the GROUP BY clause. All components have
64  * the same width, 4 or 8 bytes. For the 4-byte components, 4-byte padding is
65  * added if the number of components is odd. Not all entries in the buffer are
66  * valid; an empty entry contains EMPTY_KEY_{64, 32} for 8-byte / 4-byte width,
67  * respectively. An empty entry is ignored by subsequent operations on the
68  * result set (reduction, iteration, sort etc).
69  *
70  * value_0 through value_N-1 are 8-byte fields which hold the columns of the
71  * result, like aggregates and projected expressions. They're reduced between
72  * multiple partial results for identical (key_0 ... key_N-1) tuples.
73  *
74  * The order of entries is decided by the type of hash used, which depends on
75  * the range of the keys. For small enough ranges, a perfect hash is used. When
76  * a perfect hash isn't feasible, open addressing (using MurmurHash) with linear
77  * probing is used instead, with a 50% fill rate.
78  */
79 
80 struct ReductionCode;
81 
82 namespace Analyzer {
83 
84 class Expr;
85 class Estimator;
86 struct OrderEntry;
87 
88 } // namespace Analyzer
89 
90 class Executor;
91 
92 class ResultSet;
93 
95  public:
96  using value_type = std::vector<TargetValue>;
97  using difference_type = std::ptrdiff_t;
98  using pointer = std::vector<TargetValue>*;
99  using reference = std::vector<TargetValue>&;
100  using iterator_category = std::input_iterator_tag;
101 
102  bool operator==(const ResultSetRowIterator& other) const {
103  return result_set_ == other.result_set_ &&
105  }
106  bool operator!=(const ResultSetRowIterator& other) const { return !(*this == other); }
107 
108  inline value_type operator*() const;
109  inline ResultSetRowIterator& operator++(void);
111  ResultSetRowIterator iter(*this);
112  ++(*this);
113  return iter;
114  }
115 
116  size_t getCurrentRowBufferIndex() const {
117  if (crt_row_buff_idx_ == 0) {
118  throw std::runtime_error("current row buffer iteration index is undefined");
119  }
120  return crt_row_buff_idx_ - 1;
121  }
122 
123  private:
131 
133  bool translate_strings,
134  bool decimal_to_double)
135  : result_set_(rs)
136  , crt_row_buff_idx_(0)
137  , global_entry_idx_(0)
139  , fetched_so_far_(0)
140  , translate_strings_(translate_strings)
141  , decimal_to_double_(decimal_to_double){};
142 
144 
145  friend class ResultSet;
146 };
147 
148 class TSerializedRows;
149 class ResultSetBuilder;
150 
151 using AppendedStorage = std::vector<std::unique_ptr<ResultSetStorage>>;
152 using PermutationIdx = uint32_t;
153 using Permutation = std::vector<PermutationIdx>;
155 using Comparator = std::function<bool(const PermutationIdx, const PermutationIdx)>;
156 
157 class ResultSet {
158  public:
160  // Can use derivatives of the builder class to construct a ResultSet
161 
162  ResultSet(const std::vector<TargetInfo>& targets,
163  const ExecutorDeviceType device_type,
165  const std::shared_ptr<RowSetMemoryOwner> row_set_mem_owner,
166  const Catalog_Namespace::Catalog* catalog,
167  const unsigned block_size,
168  const unsigned grid_size);
169 
170  ResultSet(const std::vector<TargetInfo>& targets,
171  const std::vector<ColumnLazyFetchInfo>& lazy_fetch_info,
172  const std::vector<std::vector<const int8_t*>>& col_buffers,
173  const std::vector<std::vector<int64_t>>& frag_offsets,
174  const std::vector<int64_t>& consistent_frag_sizes,
175  const ExecutorDeviceType device_type,
176  const int device_id,
178  const std::shared_ptr<RowSetMemoryOwner> row_set_mem_owner,
179  const Catalog_Namespace::Catalog* catalog,
180  const unsigned block_size,
181  const unsigned grid_size);
182 
183  ResultSet(const std::shared_ptr<const Analyzer::Estimator>,
184  const ExecutorDeviceType device_type,
185  const int device_id,
186  Data_Namespace::DataMgr* data_mgr);
187 
188  ResultSet(const std::string& explanation);
189 
190  ResultSet(int64_t queue_time_ms,
191  int64_t render_time_ms,
192  const std::shared_ptr<RowSetMemoryOwner> row_set_mem_owner);
193 
194  ~ResultSet();
195 
196  std::string toString() const {
197  return typeName(this) + "(targets=" + ::toString(targets_) +
198  ", query_mem_desc=" + ::toString(query_mem_desc_) + ")";
199  }
200 
201  inline ResultSetRowIterator rowIterator(size_t from_logical_index,
202  bool translate_strings,
203  bool decimal_to_double) const {
204  ResultSetRowIterator rowIterator(this, translate_strings, decimal_to_double);
205 
206  // move to first logical position
207  ++rowIterator;
208 
209  for (size_t index = 0; index < from_logical_index; index++) {
210  ++rowIterator;
211  }
212 
213  return rowIterator;
214  }
215 
216  inline ResultSetRowIterator rowIterator(bool translate_strings,
217  bool decimal_to_double) const {
218  return rowIterator(0, translate_strings, decimal_to_double);
219  }
220 
222 
223  const ResultSetStorage* allocateStorage() const;
224 
225  const ResultSetStorage* allocateStorage(int8_t*, const std::vector<int64_t>&) const;
226 
227  const ResultSetStorage* allocateStorage(const std::vector<int64_t>&) const;
228 
229  void updateStorageEntryCount(const size_t new_entry_count) {
231  query_mem_desc_.setEntryCount(new_entry_count);
232  CHECK(storage_);
233  storage_->updateEntryCount(new_entry_count);
234  }
235 
236  std::vector<TargetValue> getNextRow(const bool translate_strings,
237  const bool decimal_to_double) const;
238 
239  size_t getCurrentRowBufferIndex() const;
240 
241  std::vector<TargetValue> getRowAt(const size_t index) const;
242 
243  TargetValue getRowAt(const size_t row_idx,
244  const size_t col_idx,
245  const bool translate_strings,
246  const bool decimal_to_double = true) const;
247 
248  // Specialized random access getter for result sets with a single column to
249  // avoid the overhead of building a std::vector<TargetValue> result with only
250  // one element. Only used by RelAlgTranslator::getInIntegerSetExpr currently.
251  OneIntegerColumnRow getOneColRow(const size_t index) const;
252 
253  std::vector<TargetValue> getRowAtNoTranslations(
254  const size_t index,
255  const std::vector<bool>& targets_to_skip = {}) const;
256 
257  bool isRowAtEmpty(const size_t index) const;
258 
259  void sort(const std::list<Analyzer::OrderEntry>& order_entries,
260  size_t top_n,
261  const Executor* executor);
262 
263  void keepFirstN(const size_t n);
264 
265  void dropFirstN(const size_t n);
266 
267  void append(ResultSet& that);
268 
269  const ResultSetStorage* getStorage() const;
270 
271  size_t colCount() const;
272 
273  SQLTypeInfo getColType(const size_t col_idx) const;
274 
275  size_t rowCount(const bool force_parallel = false) const;
276 
277  void setCachedRowCount(const size_t row_count) const;
278 
279  size_t entryCount() const;
280 
281  size_t getBufferSizeBytes(const ExecutorDeviceType device_type) const;
282 
283  bool definitelyHasNoRows() const;
284 
285  const QueryMemoryDescriptor& getQueryMemDesc() const;
286 
287  const std::vector<TargetInfo>& getTargetInfos() const;
288 
289  const std::vector<int64_t>& getTargetInitVals() const;
290 
291  int8_t* getDeviceEstimatorBuffer() const;
292 
293  int8_t* getHostEstimatorBuffer() const;
294 
295  void syncEstimatorBuffer() const;
296 
297  size_t getNDVEstimator() const;
298 
300  // all in ms
302  int64_t render_time{0};
304  int64_t kernel_queue_time{0};
305  };
306 
307  void setQueueTime(const int64_t queue_time);
308  void setKernelQueueTime(const int64_t kernel_queue_time);
309  void addCompilationQueueTime(const int64_t compilation_queue_time);
310 
311  int64_t getQueueTime() const;
312  int64_t getRenderTime() const;
313 
314  void moveToBegin() const;
315 
316  bool isTruncated() const;
317 
318  bool isExplain() const;
319 
320  void setValidationOnlyRes();
321  bool isValidationOnlyRes() const;
322 
323  std::string getExplanation() const {
324  if (just_explain_) {
325  return explanation_;
326  }
327  return {};
328  }
329 
330  bool isGeoColOnGpu(const size_t col_idx) const;
331  int getDeviceId() const;
332 
333  // Called from the executor because in the new ResultSet we assume the 'padded' field
334  // in SlotSize already contains the padding, whereas in the executor it's computed.
335  // Once the buffer initialization moves to ResultSet we can remove this method.
337 
338  void fillOneEntry(const std::vector<int64_t>& entry) {
339  CHECK(storage_);
340  if (storage_->query_mem_desc_.didOutputColumnar()) {
341  storage_->fillOneEntryColWise(entry);
342  } else {
343  storage_->fillOneEntryRowWise(entry);
344  }
345  }
346 
347  void initializeStorage() const;
348 
349  void holdChunks(const std::list<std::shared_ptr<Chunk_NS::Chunk>>& chunks) {
350  chunks_ = chunks;
351  }
352  void holdChunkIterators(const std::shared_ptr<std::list<ChunkIter>> chunk_iters) {
353  chunk_iters_.push_back(chunk_iters);
354  }
355  void holdLiterals(std::vector<int8_t>& literal_buff) {
356  literal_buffers_.push_back(std::move(literal_buff));
357  }
358 
359  std::shared_ptr<RowSetMemoryOwner> getRowSetMemOwner() const {
360  return row_set_mem_owner_;
361  }
362 
363  const Permutation& getPermutationBuffer() const;
364  const bool isPermutationBufferEmpty() const { return permutation_.empty(); };
365 
366  void serialize(TSerializedRows& serialized_rows) const;
367 
368  static std::unique_ptr<ResultSet> unserialize(const TSerializedRows& serialized_rows,
369  const Executor*);
370 
371  size_t getLimit() const;
372 
376  enum class GeoReturnType {
379  WktString,
384  };
387 
388  void copyColumnIntoBuffer(const size_t column_idx,
389  int8_t* output_buffer,
390  const size_t output_buffer_size) const;
391 
393 
394  bool didOutputColumnar() const { return this->query_mem_desc_.didOutputColumnar(); }
395 
396  bool isZeroCopyColumnarConversionPossible(size_t column_idx) const;
397  const int8_t* getColumnarBuffer(size_t column_idx) const;
398 
401  }
402 
403  const int8_t getPaddedSlotWidthBytes(const size_t slot_idx) const {
404  return query_mem_desc_.getPaddedSlotWidthBytes(slot_idx);
405  }
406 
407  // returns a bitmap of all single-slot targets, as well as its count
408  std::tuple<std::vector<bool>, size_t> getSingleSlotTargetBitmap() const;
409 
410  std::tuple<std::vector<bool>, size_t> getSupportedSingleSlotTargetBitmap() const;
411 
412  std::vector<size_t> getSlotIndicesForTargetIndices() const;
413 
414  const std::vector<ColumnLazyFetchInfo>& getLazyFetchInfo() const {
415  return lazy_fetch_info_;
416  }
417 
418  void setSeparateVarlenStorageValid(const bool val) {
420  }
421 
422  std::shared_ptr<const std::vector<std::string>> getStringDictionaryPayloadCopy(
423  const int dict_id) const;
424 
425  template <typename ENTRY_TYPE, QueryDescriptionType QUERY_TYPE, bool COLUMNAR_FORMAT>
426  ENTRY_TYPE getEntryAt(const size_t row_idx,
427  const size_t target_idx,
428  const size_t slot_idx) const;
429 
430  static double calculateQuantile(quantile::TDigest* const t_digest, double const q);
431 
432  private:
434 
435  std::vector<TargetValue> getNextRowImpl(const bool translate_strings,
436  const bool decimal_to_double) const;
437 
438  std::vector<TargetValue> getNextRowUnlocked(const bool translate_strings,
439  const bool decimal_to_double) const;
440 
441  std::vector<TargetValue> getRowAt(const size_t index,
442  const bool translate_strings,
443  const bool decimal_to_double,
444  const bool fixup_count_distinct_pointers,
445  const std::vector<bool>& targets_to_skip = {}) const;
446 
447  // NOTE: just for direct columnarization use at the moment
448  template <typename ENTRY_TYPE>
449  ENTRY_TYPE getColumnarPerfectHashEntryAt(const size_t row_idx,
450  const size_t target_idx,
451  const size_t slot_idx) const;
452 
453  template <typename ENTRY_TYPE>
454  ENTRY_TYPE getRowWisePerfectHashEntryAt(const size_t row_idx,
455  const size_t target_idx,
456  const size_t slot_idx) const;
457 
458  template <typename ENTRY_TYPE>
459  ENTRY_TYPE getRowWiseBaselineEntryAt(const size_t row_idx,
460  const size_t target_idx,
461  const size_t slot_idx) const;
462 
463  template <typename ENTRY_TYPE>
464  ENTRY_TYPE getColumnarBaselineEntryAt(const size_t row_idx,
465  const size_t target_idx,
466  const size_t slot_idx) const;
467 
468  size_t binSearchRowCount() const;
469 
470  size_t parallelRowCount() const;
471 
472  size_t advanceCursorToNextEntry() const;
473 
474  void radixSortOnGpu(const std::list<Analyzer::OrderEntry>& order_entries) const;
475 
476  void radixSortOnCpu(const std::list<Analyzer::OrderEntry>& order_entries) const;
477 
478  static bool isNull(const SQLTypeInfo& ti,
479  const InternalTargetValue& val,
480  const bool float_argument_input);
481 
483  int8_t* rowwise_target_ptr,
484  int8_t* keys_ptr,
485  const size_t entry_buff_idx,
486  const TargetInfo& target_info,
487  const size_t target_logical_idx,
488  const size_t slot_idx,
489  const bool translate_strings,
490  const bool decimal_to_double,
491  const bool fixup_count_distinct_pointers) const;
492 
493  TargetValue getTargetValueFromBufferColwise(const int8_t* col_ptr,
494  const int8_t* keys_ptr,
496  const size_t local_entry_idx,
497  const size_t global_entry_idx,
498  const TargetInfo& target_info,
499  const size_t target_logical_idx,
500  const size_t slot_idx,
501  const bool translate_strings,
502  const bool decimal_to_double) const;
503 
504  TargetValue makeTargetValue(const int8_t* ptr,
505  const int8_t compact_sz,
506  const TargetInfo& target_info,
507  const size_t target_logical_idx,
508  const bool translate_strings,
509  const bool decimal_to_double,
510  const size_t entry_buff_idx) const;
511 
512  TargetValue makeVarlenTargetValue(const int8_t* ptr1,
513  const int8_t compact_sz1,
514  const int8_t* ptr2,
515  const int8_t compact_sz2,
516  const TargetInfo& target_info,
517  const size_t target_logical_idx,
518  const bool translate_strings,
519  const size_t entry_buff_idx) const;
520 
522  int8_t* ptr1;
523  int8_t compact_sz1;
524  int8_t* ptr2;
525  int8_t compact_sz2;
526 
528  : ptr1(nullptr), compact_sz1(0), ptr2(nullptr), compact_sz2(0) {}
529  };
530  TargetValue makeGeoTargetValue(const int8_t* geo_target_ptr,
531  const size_t slot_idx,
532  const TargetInfo& target_info,
533  const size_t target_logical_idx,
534  const size_t entry_buff_idx) const;
535 
538  const size_t fixedup_entry_idx;
539  const size_t storage_idx;
540  };
541 
543  const int8_t* buff,
544  const size_t entry_idx,
545  const size_t target_logical_idx,
546  const StorageLookupResult& storage_lookup_result) const;
547 
548  InternalTargetValue getVarlenOrderEntry(const int64_t str_ptr,
549  const size_t str_len) const;
550 
551  int64_t lazyReadInt(const int64_t ival,
552  const size_t target_logical_idx,
553  const StorageLookupResult& storage_lookup_result) const;
554 
558  std::pair<size_t, size_t> getStorageIndex(const size_t entry_idx) const;
559 
560  const std::vector<const int8_t*>& getColumnFrag(const size_t storge_idx,
561  const size_t col_logical_idx,
562  int64_t& global_idx) const;
563 
564  StorageLookupResult findStorage(const size_t entry_idx) const;
565 
566  struct TargetOffsets {
567  const int8_t* ptr1;
568  const size_t compact_sz1;
569  const int8_t* ptr2;
570  const size_t compact_sz2;
571  };
572 
574  RowWiseTargetAccessor(const ResultSet* result_set)
575  : result_set_(result_set)
577  , key_width_(result_set_->query_mem_desc_.getEffectiveKeyWidth())
581  }
582 
584  const int8_t* buff,
585  const size_t entry_idx,
586  const size_t target_logical_idx,
587  const StorageLookupResult& storage_lookup_result) const;
588 
590 
591  inline const int8_t* get_rowwise_ptr(const int8_t* buff,
592  const size_t entry_idx) const {
593  return buff + entry_idx * row_bytes_;
594  }
595 
596  std::vector<std::vector<TargetOffsets>> offsets_for_storage_;
597 
599 
600  // Row-wise iteration
601  const size_t row_bytes_;
602  const size_t key_width_;
604  };
605 
607  ColumnWiseTargetAccessor(const ResultSet* result_set) : result_set_(result_set) {
609  }
610 
612 
614  const int8_t* buff,
615  const size_t entry_idx,
616  const size_t target_logical_idx,
617  const StorageLookupResult& storage_lookup_result) const;
618 
619  std::vector<std::vector<TargetOffsets>> offsets_for_storage_;
620 
622  };
623 
624  using ApproxMedianBuffers = std::vector<std::vector<double>>;
625 
626  template <typename BUFFER_ITERATOR_TYPE>
628  using BufferIteratorType = BUFFER_ITERATOR_TYPE;
629 
630  ResultSetComparator(const std::list<Analyzer::OrderEntry>& order_entries,
631  const ResultSet* result_set,
632  const PermutationView permutation,
633  const Executor* executor,
634  const bool single_threaded)
635  : order_entries_(order_entries)
636  , result_set_(result_set)
637  , permutation_(permutation)
638  , buffer_itr_(result_set)
639  , executor_(executor)
640  , single_threaded_(single_threaded)
643  }
644 
647 
648  std::vector<int64_t> materializeCountDistinctColumn(
649  const Analyzer::OrderEntry& order_entry) const;
650  ApproxMedianBuffers::value_type materializeApproxMedianColumn(
651  const Analyzer::OrderEntry& order_entry) const;
652 
653  bool operator()(const PermutationIdx lhs, const PermutationIdx rhs) const;
654 
655  const std::list<Analyzer::OrderEntry>& order_entries_;
659  const Executor* executor_;
660  const bool single_threaded_;
661  std::vector<std::vector<int64_t>> count_distinct_materialized_buffers_;
663  };
664 
665  Comparator createComparator(const std::list<Analyzer::OrderEntry>& order_entries,
666  const PermutationView permutation,
667  const Executor* executor,
668  const bool single_threaded) {
669  auto timer = DEBUG_TIMER(__func__);
672  order_entries, this, permutation, executor, single_threaded)](
673  const PermutationIdx lhs, const PermutationIdx rhs) {
674  return rsc(lhs, rhs);
675  };
676  } else {
678  order_entries, this, permutation, executor, single_threaded)](
679  const PermutationIdx lhs, const PermutationIdx rhs) {
680  return rsc(lhs, rhs);
681  };
682  }
683  }
684 
686  const size_t n,
687  const Comparator&);
688 
690  PermutationIdx const begin,
691  PermutationIdx const end) const;
692 
693  void parallelTop(const std::list<Analyzer::OrderEntry>& order_entries,
694  const size_t top_n,
695  const Executor* executor);
696 
697  void baselineSort(const std::list<Analyzer::OrderEntry>& order_entries,
698  const size_t top_n,
699  const Executor* executor);
700 
701  void doBaselineSort(const ExecutorDeviceType device_type,
702  const std::list<Analyzer::OrderEntry>& order_entries,
703  const size_t top_n,
704  const Executor* executor);
705 
706  bool canUseFastBaselineSort(const std::list<Analyzer::OrderEntry>& order_entries,
707  const size_t top_n);
708 
710 
711  int getGpuCount() const;
712 
713  void serializeProjection(TSerializedRows& serialized_rows) const;
714  void serializeVarlenAggColumn(int8_t* buf,
715  std::vector<std::string>& varlen_bufer) const;
716 
717  void serializeCountDistinctColumns(TSerializedRows&) const;
718 
719  void unserializeCountDistinctColumns(const TSerializedRows&);
720 
722 
723  using BufferSet = std::set<int64_t>;
724  void create_active_buffer_set(BufferSet& count_distinct_active_buffer_set) const;
725 
726  int64_t getDistinctBufferRefFromBufferRowwise(int8_t* rowwise_target_ptr,
727  const TargetInfo& target_info) const;
728 
729  const std::vector<TargetInfo> targets_;
731  const int device_id_;
733  mutable std::unique_ptr<ResultSetStorage> storage_;
735  mutable size_t crt_row_buff_idx_;
736  mutable size_t fetched_so_far_;
737  size_t drop_first_;
738  size_t keep_first_;
739  std::shared_ptr<RowSetMemoryOwner> row_set_mem_owner_;
741 
743  unsigned block_size_{0};
744  unsigned grid_size_{0};
746 
747  std::list<std::shared_ptr<Chunk_NS::Chunk>> chunks_;
748  std::vector<std::shared_ptr<std::list<ChunkIter>>> chunk_iters_;
749  // TODO(miyu): refine by using one buffer and
750  // setting offset instead of ptr in group by buffer.
751  std::vector<std::vector<int8_t>> literal_buffers_;
752  const std::vector<ColumnLazyFetchInfo> lazy_fetch_info_;
753  std::vector<std::vector<std::vector<const int8_t*>>> col_buffers_;
754  std::vector<std::vector<std::vector<int64_t>>> frag_offsets_;
755  std::vector<std::vector<int64_t>> consistent_frag_sizes_;
756 
757  const std::shared_ptr<const Analyzer::Estimator> estimator_;
759  mutable int8_t* host_estimator_buffer_{nullptr};
761 
762  // only used by serialization
763  using SerializedVarlenBufferStorage = std::vector<std::string>;
764 
765  std::vector<SerializedVarlenBufferStorage> serialized_varlen_buffer_;
767  std::string explanation_;
768  const bool just_explain_;
770  mutable std::atomic<int64_t> cached_row_count_;
771  mutable std::mutex row_iteration_mutex_;
772 
773  // only used by geo
775 
776  friend class ResultSetManager;
777  friend class ResultSetRowIterator;
778  friend class ColumnarResults;
779 };
780 
783  return {};
784  }
785 
786  if (result_set_->just_explain_) {
787  return {result_set_->explanation_};
788  }
789 
790  return result_set_->getRowAt(
792 }
793 
795  if (!result_set_->storage_ && !result_set_->just_explain_) {
796  global_entry_idx_valid_ = false;
797  } else if (result_set_->just_explain_) {
799  fetched_so_far_ = 1;
800  } else {
801  result_set_->advanceCursorToNextEntry(*this);
802  }
803  return *this;
804 }
805 
807  public:
808  ResultSet* reduce(std::vector<ResultSet*>&);
809 
810  std::shared_ptr<ResultSet> getOwnResultSet();
811 
812  void rewriteVarlenAggregates(ResultSet*);
813 
814  private:
815  std::shared_ptr<ResultSet> rs_;
816 };
817 
818 class RowSortException : public std::runtime_error {
819  public:
820  RowSortException(const std::string& cause) : std::runtime_error(cause) {}
821 };
822 
823 namespace result_set {
824 
825 bool can_use_parallel_algorithms(const ResultSet& rows);
826 
827 bool use_parallel_algorithms(const ResultSet& rows);
828 
829 } // namespace result_set
830 
831 #endif // QUERYENGINE_RESULTSET_H
void setSeparateVarlenStorageValid(const bool val)
Definition: ResultSet.h:418
void setGeoReturnType(const GeoReturnType val)
Definition: ResultSet.h:386
void serializeVarlenAggColumn(int8_t *buf, std::vector< std::string > &varlen_bufer) const
std::mutex row_iteration_mutex_
Definition: ResultSet.h:771
InternalTargetValue getColumnInternal(const int8_t *buff, const size_t entry_idx, const size_t target_logical_idx, const StorageLookupResult &storage_lookup_result) const
void syncEstimatorBuffer() const
Definition: ResultSet.cpp:425
const int8_t * ptr1
Definition: ResultSet.h:567
const size_t compact_sz2
Definition: ResultSet.h:570
void holdChunks(const std::list< std::shared_ptr< Chunk_NS::Chunk >> &chunks)
Definition: ResultSet.h:349
const QueryMemoryDescriptor & getQueryMemDesc() const
Definition: ResultSet.cpp:401
std::pair< size_t, size_t > getStorageIndex(const size_t entry_idx) const
Definition: ResultSet.cpp:638
std::shared_ptr< RowSetMemoryOwner > getRowSetMemOwner() const
Definition: ResultSet.h:359
bool isValidationOnlyRes() const
Definition: ResultSet.cpp:478
Permutation permutation_
Definition: ResultSet.h:740
bool didOutputColumnar() const
Definition: ResultSet.h:394
void setValidationOnlyRes()
Definition: ResultSet.cpp:474
PermutationView initPermutationBuffer(PermutationView permutation, PermutationIdx const begin, PermutationIdx const end) const
Definition: ResultSet.cpp:576
bool for_validation_only_
Definition: ResultSet.h:769
std::ptrdiff_t difference_type
Definition: ResultSet.h:97
ENTRY_TYPE getRowWisePerfectHashEntryAt(const size_t row_idx, const size_t target_idx, const size_t slot_idx) const
class for a per-database catalog. also includes metadata for the current database and the current use...
Definition: Catalog.h:102
void setEntryCount(const size_t val)
double decimal_to_double(const SQLTypeInfo &otype, int64_t oval)
AppendedStorage appended_storage_
Definition: ResultSet.h:734
ENTRY_TYPE getColumnarPerfectHashEntryAt(const size_t row_idx, const size_t target_idx, const size_t slot_idx) const
GeoReturnType geo_return_type_
Definition: ResultSet.h:774
ExecutorDeviceType
const BufferIteratorType buffer_itr_
Definition: ResultSet.h:658
void moveToBegin() const
Definition: ResultSet.cpp:461
Utility functions for easy access to the result set buffers.
std::shared_ptr< ResultSet > rs_
Definition: ResultSet.h:815
const Executor * executor_
Definition: ResultSet.h:659
std::vector< std::string > SerializedVarlenBufferStorage
Definition: ResultSet.h:763
void initializeStorage() const
QueryDescriptionType getQueryDescriptionType() const
Definition: ResultSet.h:399
ResultSetRowIterator(const ResultSet *rs, bool translate_strings, bool decimal_to_double)
Definition: ResultSet.h:132
const Catalog_Namespace::Catalog * catalog_
Definition: ResultSet.h:742
void unserializeCountDistinctColumns(const TSerializedRows &)
std::vector< TargetValue > getNextRow(const bool translate_strings, const bool decimal_to_double) const
static bool isNull(const SQLTypeInfo &ti, const InternalTargetValue &val, const bool float_argument_input)
ResultSet(const std::vector< TargetInfo > &targets, const ExecutorDeviceType device_type, const QueryMemoryDescriptor &query_mem_desc, const std::shared_ptr< RowSetMemoryOwner > row_set_mem_owner, const Catalog_Namespace::Catalog *catalog, const unsigned block_size, const unsigned grid_size)
Definition: ResultSet.cpp:60
QueryMemoryDescriptor query_mem_desc_
Definition: ResultSet.h:732
const std::vector< TargetInfo > & getTargetInfos() const
Definition: ResultSet.cpp:406
std::unique_ptr< ResultSetStorage > storage_
Definition: ResultSet.h:733
void setKernelQueueTime(const int64_t kernel_queue_time)
Definition: ResultSet.cpp:444
bool operator==(const ResultSetRowIterator &other) const
Definition: ResultSet.h:102
std::string getExplanation() const
Definition: ResultSet.h:323
ENTRY_TYPE getEntryAt(const size_t row_idx, const size_t target_idx, const size_t slot_idx) const
size_t rowCount(const bool force_parallel=false) const
Definition: ResultSet.cpp:304
ResultSetRowIterator(const ResultSet *rs)
Definition: ResultSet.h:143
TargetValue makeGeoTargetValue(const int8_t *geo_target_ptr, const size_t slot_idx, const TargetInfo &target_info, const size_t target_logical_idx, const size_t entry_buff_idx) const
TargetValue getTargetValueFromBufferRowwise(int8_t *rowwise_target_ptr, int8_t *keys_ptr, const size_t entry_buff_idx, const TargetInfo &target_info, const size_t target_logical_idx, const size_t slot_idx, const bool translate_strings, const bool decimal_to_double, const bool fixup_count_distinct_pointers) const
size_t keep_first_
Definition: ResultSet.h:738
void keepFirstN(const size_t n)
Definition: ResultSet.cpp:50
std::vector< std::shared_ptr< std::list< ChunkIter > > > chunk_iters_
Definition: ResultSet.h:748
void addCompilationQueueTime(const int64_t compilation_queue_time)
Definition: ResultSet.cpp:448
void parallelTop(const std::list< Analyzer::OrderEntry > &order_entries, const size_t top_n, const Executor *executor)
Definition: ResultSet.cpp:596
void serialize(TSerializedRows &serialized_rows) const
std::vector< SerializedVarlenBufferStorage > serialized_varlen_buffer_
Definition: ResultSet.h:765
const size_t compact_sz1
Definition: ResultSet.h:568
int64_t lazyReadInt(const int64_t ival, const size_t target_logical_idx, const StorageLookupResult &storage_lookup_result) const
bool operator!=(const ResultSetRowIterator &other) const
Definition: ResultSet.h:106
size_t colCount() const
Definition: ResultSet.cpp:273
OneIntegerColumnRow getOneColRow(const size_t index) const
TargetValue getTargetValueFromBufferColwise(const int8_t *col_ptr, const int8_t *keys_ptr, const QueryMemoryDescriptor &query_mem_desc, const size_t local_entry_idx, const size_t global_entry_idx, const TargetInfo &target_info, const size_t target_logical_idx, const size_t slot_idx, const bool translate_strings, const bool decimal_to_double) const
void rewriteVarlenAggregates(ResultSet *)
size_t getLimit() const
Definition: ResultSet.cpp:1013
const bool just_explain_
Definition: ResultSet.h:768
std::vector< int64_t > materializeCountDistinctColumn(const Analyzer::OrderEntry &order_entry) const
Definition: ResultSet.cpp:696
ResultSetRowIterator rowIterator(size_t from_logical_index, bool translate_strings, bool decimal_to_double) const
Definition: ResultSet.h:201
unsigned block_size_
Definition: ResultSet.h:743
bool isTruncated() const
Definition: ResultSet.cpp:466
std::atomic< int64_t > cached_row_count_
Definition: ResultSet.h:770
const bool isPermutationBufferEmpty() const
Definition: ResultSet.h:364
size_t parallelRowCount() const
Definition: ResultSet.cpp:362
const size_t key_bytes_with_padding_
Definition: ResultSet.h:603
const ResultSet * result_set_
Definition: ResultSet.h:598
std::vector< TargetValue > getRowAtNoTranslations(const size_t index, const std::vector< bool > &targets_to_skip={}) const
const ResultSet * result_set_
Definition: ResultSet.h:124
void radixSortOnCpu(const std::list< Analyzer::OrderEntry > &order_entries) const
Definition: ResultSet.cpp:973
InternalTargetValue getColumnInternal(const int8_t *buff, const size_t entry_idx, const size_t target_logical_idx, const StorageLookupResult &storage_lookup_result) const
bool definitelyHasNoRows() const
Definition: ResultSet.cpp:397
ColumnWiseTargetAccessor(const ResultSet *result_set)
Definition: ResultSet.h:607
bool use_parallel_algorithms(const ResultSet &rows)
Definition: ResultSet.cpp:1122
bool isZeroCopyColumnarConversionPossible(size_t column_idx) const
Definition: ResultSet.cpp:1050
std::input_iterator_tag iterator_category
Definition: ResultSet.h:100
size_t global_entry_idx_
Definition: ResultSet.h:126
int8_t * getHostEstimatorBuffer() const
Definition: ResultSet.cpp:421
InternalTargetValue getVarlenOrderEntry(const int64_t str_ptr, const size_t str_len) const
const std::vector< TargetInfo > targets_
Definition: ResultSet.h:729
std::shared_ptr< RowSetMemoryOwner > row_set_mem_owner_
Definition: ResultSet.h:739
size_t drop_first_
Definition: ResultSet.h:737
const ResultSetStorage * allocateStorage() const
std::shared_ptr< const std::vector< std::string > > getStringDictionaryPayloadCopy(const int dict_id) const
Definition: ResultSet.cpp:1017
std::string toString() const
Definition: ResultSet.h:196
const int8_t * ptr2
Definition: ResultSet.h:569
std::list< std::shared_ptr< Chunk_NS::Chunk > > chunks_
Definition: ResultSet.h:747
ApproxMedianBuffers materializeApproxMedianColumns() const
Definition: ResultSet.cpp:683
QueryExecutionTimings timings_
Definition: ResultSet.h:745
const ResultSet * result_set_
Definition: ResultSet.h:656
void sort(const std::list< Analyzer::OrderEntry > &order_entries, size_t top_n, const Executor *executor)
Definition: ResultSet.cpp:498
void setQueueTime(const int64_t queue_time)
Definition: ResultSet.cpp:440
void dropFirstN(const size_t n)
Definition: ResultSet.cpp:55
std::vector< std::vector< int8_t > > literal_buffers_
Definition: ResultSet.h:751
std::vector< std::vector< TargetOffsets > > offsets_for_storage_
Definition: ResultSet.h:619
bool canUseFastBaselineSort(const std::list< Analyzer::OrderEntry > &order_entries, const size_t top_n)
unsigned grid_size_
Definition: ResultSet.h:744
const std::list< Analyzer::OrderEntry > & order_entries_
Definition: ResultSet.h:655
std::vector< TargetValue > & reference
Definition: ResultSet.h:99
std::vector< PermutationIdx > Permutation
Definition: ResultSet.h:153
ResultSet * reduce(std::vector< ResultSet * > &)
std::tuple< std::vector< bool >, size_t > getSingleSlotTargetBitmap() const
Definition: ResultSet.cpp:1063
ResultSetRowIterator & operator++(void)
Definition: ResultSet.h:794
std::shared_ptr< ResultSet > getOwnResultSet()
StorageLookupResult findStorage(const size_t entry_idx) const
Definition: ResultSet.cpp:663
Comparator createComparator(const std::list< Analyzer::OrderEntry > &order_entries, const PermutationView permutation, const Executor *executor, const bool single_threaded)
Definition: ResultSet.h:665
An AbstractBuffer is a unit of data management for a data manager.
const PermutationView permutation_
Definition: ResultSet.h:657
const int8_t getPaddedSlotWidthBytes(const size_t slot_idx) const
value_type operator*() const
Definition: ResultSet.h:781
const std::vector< ColumnLazyFetchInfo > lazy_fetch_info_
Definition: ResultSet.h:752
RowWiseTargetAccessor(const ResultSet *result_set)
Definition: ResultSet.h:574
std::vector< std::vector< double >> ApproxMedianBuffers
Definition: ResultSet.h:624
void copyColumnIntoBuffer(const size_t column_idx, int8_t *output_buffer, const size_t output_buffer_size) const
RowSortException(const std::string &cause)
Definition: ResultSet.h:820
std::function< bool(const PermutationIdx, const PermutationIdx)> Comparator
Definition: ResultSet.h:155
friend ResultSetBuilder
Definition: ResultSet.h:159
void fixupCountDistinctPointers()
void radixSortOnGpu(const std::list< Analyzer::OrderEntry > &order_entries) const
Definition: ResultSet.cpp:936
const ResultSetStorage * getStorage() const
Definition: ResultSet.cpp:269
QueryDescriptionType getQueryDescriptionType() const
Data_Namespace::DataMgr * data_mgr_
Definition: ResultSet.h:760
static double calculateQuantile(quantile::TDigest *const t_digest, double const q)
Definition: ResultSet.cpp:729
Basic constructors and methods of the row set interface.
int64_t getQueueTime() const
Definition: ResultSet.cpp:452
std::vector< TargetValue > getRowAt(const size_t index) const
void fillOneEntry(const std::vector< int64_t > &entry)
Definition: ResultSet.h:338
void updateStorageEntryCount(const size_t new_entry_count)
Definition: ResultSet.h:229
uint32_t PermutationIdx
Definition: ResultSet.h:152
void serializeProjection(TSerializedRows &serialized_rows) const
ResultSetRowIterator operator++(int)
Definition: ResultSet.h:110
const std::shared_ptr< const Analyzer::Estimator > estimator_
Definition: ResultSet.h:757
SQLTypeInfo getColType(const size_t col_idx) const
Definition: ResultSet.cpp:277
GeoReturnType getGeoReturnType() const
Definition: ResultSet.h:385
ApproxMedianBuffers::value_type materializeApproxMedianColumn(const Analyzer::OrderEntry &order_entry) const
Definition: ResultSet.cpp:739
void holdChunkIterators(const std::shared_ptr< std::list< ChunkIter >> chunk_iters)
Definition: ResultSet.h:352
std::tuple< std::vector< bool >, size_t > getSupportedSingleSlotTargetBitmap() const
Definition: ResultSet.cpp:1087
ExecutorDeviceType getDeviceType() const
Definition: ResultSet.cpp:195
const int8_t * getColumnarBuffer(size_t column_idx) const
Definition: ResultSet.cpp:1057
ResultSetComparator(const std::list< Analyzer::OrderEntry > &order_entries, const ResultSet *result_set, const PermutationView permutation, const Executor *executor, const bool single_threaded)
Definition: ResultSet.h:630
size_t get_row_bytes(const QueryMemoryDescriptor &query_mem_desc)
bool isExplain() const
Definition: ResultSet.cpp:470
std::vector< TargetValue > value_type
Definition: ResultSet.h:96
bool isGeoColOnGpu(const size_t col_idx) const
const int8_t * get_rowwise_ptr(const int8_t *buff, const size_t entry_idx) const
Definition: ResultSet.h:591
size_t getNDVEstimator() const
std::vector< std::vector< std::vector< const int8_t * > > > col_buffers_
Definition: ResultSet.h:753
bool isRowAtEmpty(const size_t index) const
const int8_t getPaddedSlotWidthBytes(const size_t slot_idx) const
Definition: ResultSet.h:403
size_t entryCount() const
static QueryMemoryDescriptor fixupQueryMemoryDescriptor(const QueryMemoryDescriptor &)
Definition: ResultSet.cpp:486
std::string typeName(const T *v)
Definition: toString.h:82
TargetValue makeTargetValue(const int8_t *ptr, const int8_t compact_sz, const TargetInfo &target_info, const size_t target_logical_idx, const bool translate_strings, const bool decimal_to_double, const size_t entry_buff_idx) const
void baselineSort(const std::list< Analyzer::OrderEntry > &order_entries, const size_t top_n, const Executor *executor)
size_t getCurrentRowBufferIndex() const
Definition: ResultSet.h:116
const Permutation & getPermutationBuffer() const
Definition: ResultSet.cpp:592
void append(ResultSet &that)
Definition: ResultSet.cpp:239
std::string explanation_
Definition: ResultSet.h:767
std::vector< std::vector< int64_t > > consistent_frag_sizes_
Definition: ResultSet.h:755
int8_t * host_estimator_buffer_
Definition: ResultSet.h:759
friend class ResultSet
Definition: ResultSet.h:143
const ExecutorDeviceType device_type_
Definition: ResultSet.h:730
std::vector< TargetValue > getNextRowImpl(const bool translate_strings, const bool decimal_to_double) const
static PermutationView topPermutation(PermutationView, const size_t n, const Comparator &)
Definition: ResultSet.cpp:922
size_t getCurrentRowBufferIndex() const
Definition: ResultSet.cpp:231
void holdLiterals(std::vector< int8_t > &literal_buff)
Definition: ResultSet.h:355
bool g_enable_watchdog false
Definition: Execute.cpp:76
#define CHECK(condition)
Definition: Logger.h:203
#define DEBUG_TIMER(name)
Definition: Logger.h:319
int getGpuCount() const
size_t getBufferSizeBytes(const ExecutorDeviceType device_type) const
int8_t * getDeviceEstimatorBuffer() const
Definition: ResultSet.cpp:415
size_t fetched_so_far_
Definition: ResultSet.h:736
size_t crt_row_buff_idx_
Definition: ResultSet.h:735
Estimators to be used when precise cardinality isn&#39;t useful.
QueryDescriptionType
Definition: Types.h:26
int64_t getDistinctBufferRefFromBufferRowwise(int8_t *rowwise_target_ptr, const TargetInfo &target_info) const
std::vector< std::vector< std::vector< int64_t > > > frag_offsets_
Definition: ResultSet.h:754
bool operator()(const PermutationIdx lhs, const PermutationIdx rhs) const
Definition: ResultSet.cpp:771
void doBaselineSort(const ExecutorDeviceType device_type, const std::list< Analyzer::OrderEntry > &order_entries, const size_t top_n, const Executor *executor)
bool separate_varlen_storage_valid_
Definition: ResultSet.h:766
boost::variant< ScalarTargetValue, ArrayTargetValue, GeoTargetValue, GeoTargetValuePtr > TargetValue
Definition: TargetValue.h:167
Executor(const ExecutorId id, const size_t block_size_x, const size_t grid_size_x, const size_t max_gpu_slab_size, const std::string &debug_dir, const std::string &debug_file)
Definition: Execute.cpp:144
std::vector< TargetValue > getNextRowUnlocked(const bool translate_strings, const bool decimal_to_double) const
std::vector< TargetValue > * pointer
Definition: ResultSet.h:98
const std::vector< int64_t > & getTargetInitVals() const
Definition: ResultSet.cpp:410
std::vector< size_t > getSlotIndicesForTargetIndices() const
Definition: ResultSet.cpp:1106
size_t advanceCursorToNextEntry() const
void create_active_buffer_set(BufferSet &count_distinct_active_buffer_set) const
std::set< int64_t > BufferSet
Definition: ResultSet.h:723
ResultSetRowIterator rowIterator(bool translate_strings, bool decimal_to_double) const
Definition: ResultSet.h:216
Definition: Analyzer.h:1413
BUFFER_ITERATOR_TYPE BufferIteratorType
Definition: ResultSet.h:628
ENTRY_TYPE getColumnarBaselineEntryAt(const size_t row_idx, const size_t target_idx, const size_t slot_idx) const
Data_Namespace::DataMgr * getDataManager() const
size_t crt_row_buff_idx_
Definition: ResultSet.h:125
std::vector< std::vector< int64_t > > count_distinct_materialized_buffers_
Definition: ResultSet.h:661
Data_Namespace::AbstractBuffer * device_estimator_buffer_
Definition: ResultSet.h:758
InternalTargetValue getColumnInternal(const int8_t *buff, const size_t entry_idx, const size_t target_logical_idx, const StorageLookupResult &storage_lookup_result) const
const ApproxMedianBuffers approx_median_materialized_buffers_
Definition: ResultSet.h:662
std::vector< std::vector< TargetOffsets > > offsets_for_storage_
Definition: ResultSet.h:596
bool global_entry_idx_valid_
Definition: ResultSet.h:127
bool can_use_parallel_algorithms(const ResultSet &rows)
Definition: ResultSet.cpp:1118
int64_t getRenderTime() const
Definition: ResultSet.cpp:457
void setCachedRowCount(const size_t row_count) const
Definition: ResultSet.cpp:344
bool isDirectColumnarConversionPossible() const
Definition: ResultSet.cpp:1032
size_t get_key_bytes_rowwise(const QueryMemoryDescriptor &query_mem_desc)
FORCE_INLINE HOST DEVICE T align_to_int64(T addr)
TargetValue makeVarlenTargetValue(const int8_t *ptr1, const int8_t compact_sz1, const int8_t *ptr2, const int8_t compact_sz2, const TargetInfo &target_info, const size_t target_logical_idx, const bool translate_strings, const size_t entry_buff_idx) const
void serializeCountDistinctColumns(TSerializedRows &) const
const ResultSetStorage * storage_ptr
Definition: ResultSet.h:537
std::vector< std::unique_ptr< ResultSetStorage >> AppendedStorage
Definition: ResultSet.h:151
const std::vector< const int8_t * > & getColumnFrag(const size_t storge_idx, const size_t col_logical_idx, int64_t &global_idx) const
size_t binSearchRowCount() const
Definition: ResultSet.cpp:349
const std::vector< ColumnLazyFetchInfo > & getLazyFetchInfo() const
Definition: ResultSet.h:414
ENTRY_TYPE getRowWiseBaselineEntryAt(const size_t row_idx, const size_t target_idx, const size_t slot_idx) const
int getDeviceId() const
Definition: ResultSet.cpp:482
static std::unique_ptr< ResultSet > unserialize(const TSerializedRows &serialized_rows, const Executor *)
const int device_id_
Definition: ResultSet.h:731