OmniSciDB  085a039ca4
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
ResultSetStorage.h
Go to the documentation of this file.
1 /*
2  * Copyright 2020 OmniSci, Inc.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
25 #ifndef QUERYENGINE_RESULTSETSTORAGE_H
26 #define QUERYENGINE_RESULTSETSTORAGE_H
27 
28 #include "CardinalityEstimator.h"
29 #include "DataMgr/Chunk/Chunk.h"
31 #include "TargetValue.h"
32 
33 #include <atomic>
34 #include <functional>
35 #include <list>
36 
37 /*
38  * Stores the underlying buffer and the meta-data for a result set. The buffer
39  * format reflects the main requirements for result sets. Not all queries
40  * specify a GROUP BY clause, but since it's the most important and challenging
41  * case we'll focus on it. Note that the meta-data is stored separately from
42  * the buffer and it's not transferred to GPU.
43  *
44  * 1. It has to be efficient for reduction of partial GROUP BY query results
45  * from multiple devices / cores, the cardinalities can be high. Reduction
46  * currently happens on the host.
47  * 2. No conversions should be needed when buffers are transferred from GPU to
48  * host for reduction. This implies the buffer needs to be "flat", with no
49  * pointers to chase since they have no meaning in a different address space.
50  * 3. Must be size-efficient.
51  *
52  * There are several variations of the format of a result set buffer, but the
53  * most common is a sequence of entries which represent a row in the result or
54  * an empty slot. One entry looks as follows:
55  *
56  * +-+-+-+-+-+-+-+-+-+-+-+--?--+-+-+-+-+-+-+-+-+-+-+-+-+
57  * |key_0| ... |key_N-1| padding |value_0|...|value_N-1|
58  * +-+-+-+-+-+-+-+-+-+-+-+--?--+-+-+-+-+-+-+-+-+-+-+-+-+
59  *
60  * (key_0 ... key_N-1) is a multiple component key, unique within the buffer.
61  * It stores the tuple specified by the GROUP BY clause. All components have
62  * the same width, 4 or 8 bytes. For the 4-byte components, 4-byte padding is
63  * added if the number of components is odd. Not all entries in the buffer are
64  * valid; an empty entry contains EMPTY_KEY_{64, 32} for 8-byte / 4-byte width,
65  * respectively. An empty entry is ignored by subsequent operations on the
66  * result set (reduction, iteration, sort etc).
67  *
68  * value_0 through value_N-1 are 8-byte fields which hold the columns of the
69  * result, like aggregates and projected expressions. They're reduced between
70  * multiple partial results for identical (key_0 ... key_N-1) tuples.
71  *
72  * The order of entries is decided by the type of hash used, which depends on
73  * the range of the keys. For small enough ranges, a perfect hash is used. When
74  * a perfect hash isn't feasible, open addressing (using MurmurHash) with linear
75  * probing is used instead, with a 50% fill rate.
76  */
77 
78 struct ReductionCode;
79 
81  const bool is_lazily_fetched;
82  const int local_col_id;
84 };
85 
87  const int64_t value;
88  const bool valid;
89 };
90 
93  int8_t* cpu_buffer_ptr;
94 
95  int8_t* computeCpuOffset(const int64_t gpu_offset_address) const;
96 };
97 
99  private:
100  ResultSetStorage(const std::vector<TargetInfo>& targets,
102  int8_t* buff,
103  const bool buff_is_provided);
104 
105  public:
106  void reduce(const ResultSetStorage& that,
107  const std::vector<std::string>& serialized_varlen_buffer,
108  const ReductionCode& reduction_code,
109  const size_t executor_id) const;
110 
112  const std::vector<std::string>& serialized_varlen_buffer) const;
113 
114  int8_t* getUnderlyingBuffer() const;
115 
116  size_t getEntryCount() const { return query_mem_desc_.getEntryCount(); }
117 
118  template <class KeyType>
119  void moveEntriesToBuffer(int8_t* new_buff, const size_t new_entry_count) const;
120 
121  template <class KeyType>
122  void moveOneEntryToBuffer(const size_t entry_index,
123  int64_t* new_buff_i64,
124  const size_t new_entry_count,
125  const size_t key_count,
126  const size_t row_qw_count,
127  const int64_t* src_buff,
128  const size_t key_byte_width) const;
129 
130  void updateEntryCount(const size_t new_entry_count) {
131  query_mem_desc_.setEntryCount(new_entry_count);
132  }
133 
134  void reduceOneApproxQuantileSlot(int8_t* this_ptr1,
135  const int8_t* that_ptr1,
136  const size_t target_logical_idx,
137  const ResultSetStorage& that) const;
138 
139  // Reduces results for a single row when using interleaved bin layouts
140  static bool reduceSingleRow(const int8_t* row_ptr,
141  const int8_t warp_count,
142  const bool is_columnar,
143  const bool replace_bitmap_ptr_with_bitmap_sz,
144  std::vector<int64_t>& agg_vals,
145  const QueryMemoryDescriptor& query_mem_desc,
146  const std::vector<TargetInfo>& targets,
147  const std::vector<int64_t>& agg_init_vals);
148 
149  private:
151  int8_t* this_buff,
152  const int8_t* that_buff,
153  const ResultSetStorage& that,
154  const size_t start_index,
155  const size_t end_index,
156  const std::vector<std::string>& serialized_varlen_buffer,
157  const size_t executor_id) const;
158 
159  void copyKeyColWise(const size_t entry_idx,
160  int8_t* this_buff,
161  const int8_t* that_buff) const;
162 
163  bool isEmptyEntry(const size_t entry_idx, const int8_t* buff) const;
164  bool isEmptyEntry(const size_t entry_idx) const;
165  bool isEmptyEntryColumnar(const size_t entry_idx, const int8_t* buff) const;
166 
167  void reduceOneEntryBaseline(int8_t* this_buff,
168  const int8_t* that_buff,
169  const size_t i,
170  const size_t that_entry_count,
171  const ResultSetStorage& that) const;
172 
173  void reduceOneEntrySlotsBaseline(int64_t* this_entry_slots,
174  const int64_t* that_buff,
175  const size_t that_entry_idx,
176  const size_t that_entry_count,
177  const ResultSetStorage& that) const;
178 
179  void initializeBaselineValueSlots(int64_t* this_entry_slots) const;
180 
181  void reduceOneSlotBaseline(int64_t* this_buff,
182  const size_t this_slot,
183  const int64_t* that_buff,
184  const size_t that_entry_count,
185  const size_t that_slot,
186  const TargetInfo& target_info,
187  const size_t target_logical_idx,
188  const size_t target_slot_idx,
189  const size_t init_agg_val_idx,
190  const ResultSetStorage& that) const;
191 
193  void reduceOneSlotSingleValue(int8_t* this_ptr1,
194  const TargetInfo& target_info,
195  const size_t target_slot_idx,
196  const size_t init_agg_val_idx,
197  const int8_t* that_ptr1) const;
198 
200  void reduceOneSlot(int8_t* this_ptr1,
201  int8_t* this_ptr2,
202  const int8_t* that_ptr1,
203  const int8_t* that_ptr2,
204  const TargetInfo& target_info,
205  const size_t target_logical_idx,
206  const size_t target_slot_idx,
207  const size_t init_agg_val_idx,
208  const ResultSetStorage& that,
209  const size_t first_slot_idx_for_target,
210  const std::vector<std::string>& serialized_varlen_buffer) const;
211 
212  void reduceOneCountDistinctSlot(int8_t* this_ptr1,
213  const int8_t* that_ptr1,
214  const size_t target_logical_idx,
215  const ResultSetStorage& that) const;
216 
217  void fillOneEntryRowWise(const std::vector<int64_t>& entry);
218 
219  void fillOneEntryColWise(const std::vector<int64_t>& entry);
220 
221  void initializeRowWise() const;
222 
223  void initializeColWise() const;
224 
226  return varlen_output_info_.get();
227  }
228 
229  // TODO(alex): remove the following two methods, see comment about
230  // count_distinct_sets_mapping_.
231  void addCountDistinctSetPointerMapping(const int64_t remote_ptr, const int64_t ptr);
232 
233  int64_t mappedPtr(const int64_t) const;
234 
235  size_t binSearchRowCount() const;
236 
237  const std::vector<TargetInfo> targets_;
239  int8_t* buff_;
240  const bool buff_is_provided_;
241  std::vector<int64_t> target_init_vals_;
242  // Provisional field used for multi-node until we improve the count distinct
243  // and flatten the main group by buffer and the distinct buffers in a single,
244  // contiguous buffer which we'll be able to serialize as a no-op. Used to
245  // re-route the pointers in the result set received over the wire to this
246  // machine address-space. Not efficient at all, just a placeholder!
247  std::unordered_map<int64_t, int64_t> count_distinct_sets_mapping_;
248 
249  // ptr to host varlen buffer and gpu address computation info
250  std::shared_ptr<VarlenOutputInfo> varlen_output_info_;
251 
252  friend class ResultSet;
253  friend class ResultSetManager;
254 };
255 
256 using GroupValueInfo = std::pair<int64_t*, bool>;
257 
258 namespace result_set {
259 
260 int64_t lazy_decode(const ColumnLazyFetchInfo& col_lazy_fetch,
261  const int8_t* byte_stream,
262  const int64_t pos);
263 
264 void fill_empty_key(void* key_ptr, const size_t key_count, const size_t key_width);
265 
266 int8_t get_width_for_slot(const size_t target_slot_idx,
267  const bool float_argument_input,
269 
270 size_t get_byteoff_of_slot(const size_t slot_idx,
271  const QueryMemoryDescriptor& query_mem_desc);
272 
273 GroupValueInfo get_group_value_reduction(int64_t* groups_buffer,
274  const uint32_t groups_buffer_entry_count,
275  const int64_t* key,
276  const uint32_t key_count,
277  const size_t key_width,
278  const QueryMemoryDescriptor& query_mem_desc,
279  const int64_t* that_buff_i64,
280  const size_t that_entry_idx,
281  const size_t that_entry_count,
282  const uint32_t row_size_quad);
283 
284 std::vector<int64_t> initialize_target_values_for_storage(
285  const std::vector<TargetInfo>& targets);
286 
287 } // namespace result_set
288 
289 #endif // QUERYENGINE_RESULTSETSTORAGE_H
const SQLTypeInfo type
GroupValueInfo get_group_value_reduction(int64_t *groups_buffer, const uint32_t groups_buffer_entry_count, const int64_t *key, const uint32_t key_count, const size_t key_width, const QueryMemoryDescriptor &query_mem_desc, const int64_t *that_buff_i64, const size_t that_entry_idx, const size_t that_entry_count, const uint32_t row_size_quad)
void moveOneEntryToBuffer(const size_t entry_index, int64_t *new_buff_i64, const size_t new_entry_count, const size_t key_count, const size_t row_qw_count, const int64_t *src_buff, const size_t key_byte_width) const
bool isEmptyEntry(const size_t entry_idx, const int8_t *buff) const
const std::vector< TargetInfo > targets_
ALWAYS_INLINE void reduceOneSlot(int8_t *this_ptr1, int8_t *this_ptr2, const int8_t *that_ptr1, const int8_t *that_ptr2, const TargetInfo &target_info, const size_t target_logical_idx, const size_t target_slot_idx, const size_t init_agg_val_idx, const ResultSetStorage &that, const size_t first_slot_idx_for_target, const std::vector< std::string > &serialized_varlen_buffer) const
void setEntryCount(const size_t val)
std::vector< int64_t > target_init_vals_
void initializeColWise() const
void reduceOneEntryBaseline(int8_t *this_buff, const int8_t *that_buff, const size_t i, const size_t that_entry_count, const ResultSetStorage &that) const
bool isEmptyEntryColumnar(const size_t entry_idx, const int8_t *buff) const
Utility functions for easy access to the result set buffers.
void addCountDistinctSetPointerMapping(const int64_t remote_ptr, const int64_t ptr)
ResultSetStorage(const std::vector< TargetInfo > &targets, const QueryMemoryDescriptor &query_mem_desc, int8_t *buff, const bool buff_is_provided)
void initializeRowWise() const
void reduceOneApproxQuantileSlot(int8_t *this_ptr1, const int8_t *that_ptr1, const size_t target_logical_idx, const ResultSetStorage &that) const
void initializeBaselineValueSlots(int64_t *this_entry_slots) const
void reduceOneSlotBaseline(int64_t *this_buff, const size_t this_slot, const int64_t *that_buff, const size_t that_entry_count, const size_t that_slot, const TargetInfo &target_info, const size_t target_logical_idx, const size_t target_slot_idx, const size_t init_agg_val_idx, const ResultSetStorage &that) const
std::shared_ptr< VarlenOutputInfo > varlen_output_info_
friend class ResultSet
std::unordered_map< int64_t, int64_t > count_distinct_sets_mapping_
int8_t * getUnderlyingBuffer() const
const VarlenOutputInfo * getVarlenOutputInfo() const
const bool buff_is_provided_
int8_t get_width_for_slot(const size_t target_slot_idx, const bool float_argument_input, const QueryMemoryDescriptor &query_mem_desc)
void reduceOneEntrySlotsBaseline(int64_t *this_entry_slots, const int64_t *that_buff, const size_t that_entry_idx, const size_t that_entry_count, const ResultSetStorage &that) const
size_t get_byteoff_of_slot(const size_t slot_idx, const QueryMemoryDescriptor &query_mem_desc)
void copyKeyColWise(const size_t entry_idx, int8_t *this_buff, const int8_t *that_buff) const
void reduceOneCountDistinctSlot(int8_t *this_ptr1, const int8_t *that_ptr1, const size_t target_logical_idx, const ResultSetStorage &that) const
void moveEntriesToBuffer(int8_t *new_buff, const size_t new_entry_count) const
void fill_empty_key(void *key_ptr, const size_t key_count, const size_t key_width)
void updateEntryCount(const size_t new_entry_count)
size_t binSearchRowCount() const
std::pair< int64_t *, bool > GroupValueInfo
int64_t lazy_decode(const ColumnLazyFetchInfo &col_lazy_fetch, const int8_t *byte_stream, const int64_t pos)
void fillOneEntryColWise(const std::vector< int64_t > &entry)
void reduce(const ResultSetStorage &that, const std::vector< std::string > &serialized_varlen_buffer, const ReductionCode &reduction_code, const size_t executor_id) const
size_t getEntryCount() const
std::vector< int64_t > initialize_target_values_for_storage(const std::vector< TargetInfo > &targets)
void reduceEntriesNoCollisionsColWise(int8_t *this_buff, const int8_t *that_buff, const ResultSetStorage &that, const size_t start_index, const size_t end_index, const std::vector< std::string > &serialized_varlen_buffer, const size_t executor_id) const
ALWAYS_INLINE void reduceOneSlotSingleValue(int8_t *this_ptr1, const TargetInfo &target_info, const size_t target_slot_idx, const size_t init_agg_val_idx, const int8_t *that_ptr1) const
int64_t mappedPtr(const int64_t) const
int8_t * computeCpuOffset(const int64_t gpu_offset_address) const
const bool is_lazily_fetched
Estimators to be used when precise cardinality isn&#39;t useful.
void fillOneEntryRowWise(const std::vector< int64_t > &entry)
void rewriteAggregateBufferOffsets(const std::vector< std::string > &serialized_varlen_buffer) const
#define ALWAYS_INLINE
static bool reduceSingleRow(const int8_t *row_ptr, const int8_t warp_count, const bool is_columnar, const bool replace_bitmap_ptr_with_bitmap_sz, std::vector< int64_t > &agg_vals, const QueryMemoryDescriptor &query_mem_desc, const std::vector< TargetInfo > &targets, const std::vector< int64_t > &agg_init_vals)
QueryMemoryDescriptor query_mem_desc_