OmniSciDB  94e8789169
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
ResultSetStorage.h
Go to the documentation of this file.
1 /*
2  * Copyright 2020 OmniSci, Inc.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
25 #ifndef QUERYENGINE_RESULTSETSTORAGE_H
26 #define QUERYENGINE_RESULTSETSTORAGE_H
27 
28 #include "CardinalityEstimator.h"
29 #include "DataMgr/Chunk/Chunk.h"
31 #include "TargetValue.h"
32 
33 #include <atomic>
34 #include <functional>
35 #include <list>
36 
37 /*
38  * Stores the underlying buffer and the meta-data for a result set. The buffer
39  * format reflects the main requirements for result sets. Not all queries
40  * specify a GROUP BY clause, but since it's the most important and challenging
41  * case we'll focus on it. Note that the meta-data is stored separately from
42  * the buffer and it's not transferred to GPU.
43  *
44  * 1. It has to be efficient for reduction of partial GROUP BY query results
45  * from multiple devices / cores, the cardinalities can be high. Reduction
46  * currently happens on the host.
47  * 2. No conversions should be needed when buffers are transferred from GPU to
48  * host for reduction. This implies the buffer needs to be "flat", with no
49  * pointers to chase since they have no meaning in a different address space.
50  * 3. Must be size-efficient.
51  *
52  * There are several variations of the format of a result set buffer, but the
53  * most common is a sequence of entries which represent a row in the result or
54  * an empty slot. One entry looks as follows:
55  *
56  * +-+-+-+-+-+-+-+-+-+-+-+--?--+-+-+-+-+-+-+-+-+-+-+-+-+
57  * |key_0| ... |key_N-1| padding |value_0|...|value_N-1|
58  * +-+-+-+-+-+-+-+-+-+-+-+--?--+-+-+-+-+-+-+-+-+-+-+-+-+
59  *
60  * (key_0 ... key_N-1) is a multiple component key, unique within the buffer.
61  * It stores the tuple specified by the GROUP BY clause. All components have
62  * the same width, 4 or 8 bytes. For the 4-byte components, 4-byte padding is
63  * added if the number of components is odd. Not all entries in the buffer are
64  * valid; an empty entry contains EMPTY_KEY_{64, 32} for 8-byte / 4-byte width,
65  * respectively. An empty entry is ignored by subsequent operations on the
66  * result set (reduction, iteration, sort etc).
67  *
68  * value_0 through value_N-1 are 8-byte fields which hold the columns of the
69  * result, like aggregates and projected expressions. They're reduced between
70  * multiple partial results for identical (key_0 ... key_N-1) tuples.
71  *
72  * The order of entries is decided by the type of hash used, which depends on
73  * the range of the keys. For small enough ranges, a perfect hash is used. When
74  * a perfect hash isn't feasible, open addressing (using MurmurHash) with linear
75  * probing is used instead, with a 50% fill rate.
76  */
77 
78 struct ReductionCode;
79 
81  const bool is_lazily_fetched;
82  const int local_col_id;
84 };
85 
87  const int64_t value;
88  const bool valid;
89 };
90 
92  private:
93  ResultSetStorage(const std::vector<TargetInfo>& targets,
95  int8_t* buff,
96  const bool buff_is_provided);
97 
98  public:
99  void reduce(const ResultSetStorage& that,
100  const std::vector<std::string>& serialized_varlen_buffer,
101  const ReductionCode& reduction_code) const;
102 
104  const std::vector<std::string>& serialized_varlen_buffer) const;
105 
106  int8_t* getUnderlyingBuffer() const;
107 
108  size_t getEntryCount() const { return query_mem_desc_.getEntryCount(); }
109 
110  template <class KeyType>
111  void moveEntriesToBuffer(int8_t* new_buff, const size_t new_entry_count) const;
112 
113  template <class KeyType>
114  void moveOneEntryToBuffer(const size_t entry_index,
115  int64_t* new_buff_i64,
116  const size_t new_entry_count,
117  const size_t key_count,
118  const size_t row_qw_count,
119  const int64_t* src_buff,
120  const size_t key_byte_width) const;
121 
122  void updateEntryCount(const size_t new_entry_count) {
123  query_mem_desc_.setEntryCount(new_entry_count);
124  }
125 
126  void reduceOneApproxMedianSlot(int8_t* this_ptr1,
127  const int8_t* that_ptr1,
128  const size_t target_logical_idx,
129  const ResultSetStorage& that) const;
130 
131  // Reduces results for a single row when using interleaved bin layouts
132  static bool reduceSingleRow(const int8_t* row_ptr,
133  const int8_t warp_count,
134  const bool is_columnar,
135  const bool replace_bitmap_ptr_with_bitmap_sz,
136  std::vector<int64_t>& agg_vals,
137  const QueryMemoryDescriptor& query_mem_desc,
138  const std::vector<TargetInfo>& targets,
139  const std::vector<int64_t>& agg_init_vals);
140 
141  private:
143  int8_t* this_buff,
144  const int8_t* that_buff,
145  const ResultSetStorage& that,
146  const size_t start_index,
147  const size_t end_index,
148  const std::vector<std::string>& serialized_varlen_buffer) const;
149 
150  void copyKeyColWise(const size_t entry_idx,
151  int8_t* this_buff,
152  const int8_t* that_buff) const;
153 
154  bool isEmptyEntry(const size_t entry_idx, const int8_t* buff) const;
155  bool isEmptyEntry(const size_t entry_idx) const;
156  bool isEmptyEntryColumnar(const size_t entry_idx, const int8_t* buff) const;
157 
158  void reduceOneEntryBaseline(int8_t* this_buff,
159  const int8_t* that_buff,
160  const size_t i,
161  const size_t that_entry_count,
162  const ResultSetStorage& that) const;
163 
164  void reduceOneEntrySlotsBaseline(int64_t* this_entry_slots,
165  const int64_t* that_buff,
166  const size_t that_entry_idx,
167  const size_t that_entry_count,
168  const ResultSetStorage& that) const;
169 
170  void initializeBaselineValueSlots(int64_t* this_entry_slots) const;
171 
172  void reduceOneSlotBaseline(int64_t* this_buff,
173  const size_t this_slot,
174  const int64_t* that_buff,
175  const size_t that_entry_count,
176  const size_t that_slot,
177  const TargetInfo& target_info,
178  const size_t target_logical_idx,
179  const size_t target_slot_idx,
180  const size_t init_agg_val_idx,
181  const ResultSetStorage& that) const;
182 
184  void reduceOneSlotSingleValue(int8_t* this_ptr1,
185  const TargetInfo& target_info,
186  const size_t target_slot_idx,
187  const size_t init_agg_val_idx,
188  const int8_t* that_ptr1) const;
189 
191  void reduceOneSlot(int8_t* this_ptr1,
192  int8_t* this_ptr2,
193  const int8_t* that_ptr1,
194  const int8_t* that_ptr2,
195  const TargetInfo& target_info,
196  const size_t target_logical_idx,
197  const size_t target_slot_idx,
198  const size_t init_agg_val_idx,
199  const ResultSetStorage& that,
200  const size_t first_slot_idx_for_target,
201  const std::vector<std::string>& serialized_varlen_buffer) const;
202 
203  void reduceOneCountDistinctSlot(int8_t* this_ptr1,
204  const int8_t* that_ptr1,
205  const size_t target_logical_idx,
206  const ResultSetStorage& that) const;
207 
208  void fillOneEntryRowWise(const std::vector<int64_t>& entry);
209 
210  void fillOneEntryColWise(const std::vector<int64_t>& entry);
211 
212  void initializeRowWise() const;
213 
214  void initializeColWise() const;
215 
216  // TODO(alex): remove the following two methods, see comment about
217  // count_distinct_sets_mapping_.
218  void addCountDistinctSetPointerMapping(const int64_t remote_ptr, const int64_t ptr);
219 
220  int64_t mappedPtr(const int64_t) const;
221 
222  size_t binSearchRowCount() const;
223 
224  const std::vector<TargetInfo> targets_;
226  int8_t* buff_;
227  const bool buff_is_provided_;
228  std::vector<int64_t> target_init_vals_;
229  // Provisional field used for multi-node until we improve the count distinct
230  // and flatten the main group by buffer and the distinct buffers in a single,
231  // contiguous buffer which we'll be able to serialize as a no-op. Used to
232  // re-route the pointers in the result set received over the wire to this
233  // machine address-space. Not efficient at all, just a placeholder!
234  std::unordered_map<int64_t, int64_t> count_distinct_sets_mapping_;
235 
236  friend class ResultSet;
237  friend class ResultSetManager;
238 };
239 
240 using GroupValueInfo = std::pair<int64_t*, bool>;
241 
242 namespace result_set {
243 
244 int64_t lazy_decode(const ColumnLazyFetchInfo& col_lazy_fetch,
245  const int8_t* byte_stream,
246  const int64_t pos);
247 
248 void fill_empty_key(void* key_ptr, const size_t key_count, const size_t key_width);
249 
250 int8_t get_width_for_slot(const size_t target_slot_idx,
251  const bool float_argument_input,
253 
254 size_t get_byteoff_of_slot(const size_t slot_idx,
255  const QueryMemoryDescriptor& query_mem_desc);
256 
257 GroupValueInfo get_group_value_reduction(int64_t* groups_buffer,
258  const uint32_t groups_buffer_entry_count,
259  const int64_t* key,
260  const uint32_t key_count,
261  const size_t key_width,
262  const QueryMemoryDescriptor& query_mem_desc,
263  const int64_t* that_buff_i64,
264  const size_t that_entry_idx,
265  const size_t that_entry_count,
266  const uint32_t row_size_quad);
267 
268 std::vector<int64_t> initialize_target_values_for_storage(
269  const std::vector<TargetInfo>& targets);
270 
271 } // namespace result_set
272 
273 #endif // QUERYENGINE_RESULTSETSTORAGE_H
const SQLTypeInfo type
GroupValueInfo get_group_value_reduction(int64_t *groups_buffer, const uint32_t groups_buffer_entry_count, const int64_t *key, const uint32_t key_count, const size_t key_width, const QueryMemoryDescriptor &query_mem_desc, const int64_t *that_buff_i64, const size_t that_entry_idx, const size_t that_entry_count, const uint32_t row_size_quad)
void moveOneEntryToBuffer(const size_t entry_index, int64_t *new_buff_i64, const size_t new_entry_count, const size_t key_count, const size_t row_qw_count, const int64_t *src_buff, const size_t key_byte_width) const
bool isEmptyEntry(const size_t entry_idx, const int8_t *buff) const
const std::vector< TargetInfo > targets_
ALWAYS_INLINE void reduceOneSlot(int8_t *this_ptr1, int8_t *this_ptr2, const int8_t *that_ptr1, const int8_t *that_ptr2, const TargetInfo &target_info, const size_t target_logical_idx, const size_t target_slot_idx, const size_t init_agg_val_idx, const ResultSetStorage &that, const size_t first_slot_idx_for_target, const std::vector< std::string > &serialized_varlen_buffer) const
void setEntryCount(const size_t val)
void reduceEntriesNoCollisionsColWise(int8_t *this_buff, const int8_t *that_buff, const ResultSetStorage &that, const size_t start_index, const size_t end_index, const std::vector< std::string > &serialized_varlen_buffer) const
std::vector< int64_t > target_init_vals_
void initializeColWise() const
void reduceOneEntryBaseline(int8_t *this_buff, const int8_t *that_buff, const size_t i, const size_t that_entry_count, const ResultSetStorage &that) const
bool isEmptyEntryColumnar(const size_t entry_idx, const int8_t *buff) const
Utility functions for easy access to the result set buffers.
void addCountDistinctSetPointerMapping(const int64_t remote_ptr, const int64_t ptr)
ResultSetStorage(const std::vector< TargetInfo > &targets, const QueryMemoryDescriptor &query_mem_desc, int8_t *buff, const bool buff_is_provided)
void initializeRowWise() const
void initializeBaselineValueSlots(int64_t *this_entry_slots) const
void reduceOneSlotBaseline(int64_t *this_buff, const size_t this_slot, const int64_t *that_buff, const size_t that_entry_count, const size_t that_slot, const TargetInfo &target_info, const size_t target_logical_idx, const size_t target_slot_idx, const size_t init_agg_val_idx, const ResultSetStorage &that) const
friend class ResultSet
std::unordered_map< int64_t, int64_t > count_distinct_sets_mapping_
void reduce(const ResultSetStorage &that, const std::vector< std::string > &serialized_varlen_buffer, const ReductionCode &reduction_code) const
int8_t * getUnderlyingBuffer() const
const bool buff_is_provided_
int8_t get_width_for_slot(const size_t target_slot_idx, const bool float_argument_input, const QueryMemoryDescriptor &query_mem_desc)
void reduceOneEntrySlotsBaseline(int64_t *this_entry_slots, const int64_t *that_buff, const size_t that_entry_idx, const size_t that_entry_count, const ResultSetStorage &that) const
size_t get_byteoff_of_slot(const size_t slot_idx, const QueryMemoryDescriptor &query_mem_desc)
void copyKeyColWise(const size_t entry_idx, int8_t *this_buff, const int8_t *that_buff) const
void reduceOneCountDistinctSlot(int8_t *this_ptr1, const int8_t *that_ptr1, const size_t target_logical_idx, const ResultSetStorage &that) const
void moveEntriesToBuffer(int8_t *new_buff, const size_t new_entry_count) const
void fill_empty_key(void *key_ptr, const size_t key_count, const size_t key_width)
void updateEntryCount(const size_t new_entry_count)
size_t binSearchRowCount() const
std::pair< int64_t *, bool > GroupValueInfo
int64_t lazy_decode(const ColumnLazyFetchInfo &col_lazy_fetch, const int8_t *byte_stream, const int64_t pos)
void fillOneEntryColWise(const std::vector< int64_t > &entry)
size_t getEntryCount() const
std::vector< int64_t > initialize_target_values_for_storage(const std::vector< TargetInfo > &targets)
void reduceOneApproxMedianSlot(int8_t *this_ptr1, const int8_t *that_ptr1, const size_t target_logical_idx, const ResultSetStorage &that) const
ALWAYS_INLINE void reduceOneSlotSingleValue(int8_t *this_ptr1, const TargetInfo &target_info, const size_t target_slot_idx, const size_t init_agg_val_idx, const int8_t *that_ptr1) const
int64_t mappedPtr(const int64_t) const
const bool is_lazily_fetched
Estimators to be used when precise cardinality isn&#39;t useful.
void fillOneEntryRowWise(const std::vector< int64_t > &entry)
void rewriteAggregateBufferOffsets(const std::vector< std::string > &serialized_varlen_buffer) const
#define ALWAYS_INLINE
static bool reduceSingleRow(const int8_t *row_ptr, const int8_t warp_count, const bool is_columnar, const bool replace_bitmap_ptr_with_bitmap_sz, std::vector< int64_t > &agg_vals, const QueryMemoryDescriptor &query_mem_desc, const std::vector< TargetInfo > &targets, const std::vector< int64_t > &agg_init_vals)
QueryMemoryDescriptor query_mem_desc_