OmniSciDB  a7179b2938
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
ResultSetStorage.h
Go to the documentation of this file.
1 /*
2  * Copyright 2020 OmniSci, Inc.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
25 #ifndef QUERYENGINE_RESULTSETSTORAGE_H
26 #define QUERYENGINE_RESULTSETSTORAGE_H
27 
28 #include "CardinalityEstimator.h"
29 #include "DataMgr/Chunk/Chunk.h"
31 #include "TargetValue.h"
32 
33 #include <atomic>
34 #include <functional>
35 #include <list>
36 
37 /*
38  * Stores the underlying buffer and the meta-data for a result set. The buffer
39  * format reflects the main requirements for result sets. Not all queries
40  * specify a GROUP BY clause, but since it's the most important and challenging
41  * case we'll focus on it. Note that the meta-data is stored separately from
42  * the buffer and it's not transferred to GPU.
43  *
44  * 1. It has to be efficient for reduction of partial GROUP BY query results
45  * from multiple devices / cores, the cardinalities can be high. Reduction
46  * currently happens on the host.
47  * 2. No conversions should be needed when buffers are transferred from GPU to
48  * host for reduction. This implies the buffer needs to be "flat", with no
49  * pointers to chase since they have no meaning in a different address space.
50  * 3. Must be size-efficient.
51  *
52  * There are several variations of the format of a result set buffer, but the
53  * most common is a sequence of entries which represent a row in the result or
54  * an empty slot. One entry looks as follows:
55  *
56  * +-+-+-+-+-+-+-+-+-+-+-+--?--+-+-+-+-+-+-+-+-+-+-+-+-+
57  * |key_0| ... |key_N-1| padding |value_0|...|value_N-1|
58  * +-+-+-+-+-+-+-+-+-+-+-+--?--+-+-+-+-+-+-+-+-+-+-+-+-+
59  *
60  * (key_0 ... key_N-1) is a multiple component key, unique within the buffer.
61  * It stores the tuple specified by the GROUP BY clause. All components have
62  * the same width, 4 or 8 bytes. For the 4-byte components, 4-byte padding is
63  * added if the number of components is odd. Not all entries in the buffer are
64  * valid; an empty entry contains EMPTY_KEY_{64, 32} for 8-byte / 4-byte width,
65  * respectively. An empty entry is ignored by subsequent operations on the
66  * result set (reduction, iteration, sort etc).
67  *
68  * value_0 through value_N-1 are 8-byte fields which hold the columns of the
69  * result, like aggregates and projected expressions. They're reduced between
70  * multiple partial results for identical (key_0 ... key_N-1) tuples.
71  *
72  * The order of entries is decided by the type of hash used, which depends on
73  * the range of the keys. For small enough ranges, a perfect hash is used. When
74  * a perfect hash isn't feasible, open addressing (using MurmurHash) with linear
75  * probing is used instead, with a 50% fill rate.
76  */
77 
78 struct ReductionCode;
79 
81  const bool is_lazily_fetched;
82  const int local_col_id;
84 };
85 
87  const int64_t value;
88  const bool valid;
89 };
90 
92  private:
93  ResultSetStorage(const std::vector<TargetInfo>& targets,
95  int8_t* buff,
96  const bool buff_is_provided);
97 
98  public:
99  void reduce(const ResultSetStorage& that,
100  const std::vector<std::string>& serialized_varlen_buffer,
101  const ReductionCode& reduction_code) const;
102 
104  const std::vector<std::string>& serialized_varlen_buffer) const;
105 
106  int8_t* getUnderlyingBuffer() const;
107 
108  size_t getEntryCount() const { return query_mem_desc_.getEntryCount(); }
109 
110  template <class KeyType>
111  void moveEntriesToBuffer(int8_t* new_buff, const size_t new_entry_count) const;
112 
113  template <class KeyType>
114  void moveOneEntryToBuffer(const size_t entry_index,
115  int64_t* new_buff_i64,
116  const size_t new_entry_count,
117  const size_t key_count,
118  const size_t row_qw_count,
119  const int64_t* src_buff,
120  const size_t key_byte_width) const;
121 
122  void updateEntryCount(const size_t new_entry_count) {
123  query_mem_desc_.setEntryCount(new_entry_count);
124  }
125 
126  // Reduces results for a single row when using interleaved bin layouts
127  static bool reduceSingleRow(const int8_t* row_ptr,
128  const int8_t warp_count,
129  const bool is_columnar,
130  const bool replace_bitmap_ptr_with_bitmap_sz,
131  std::vector<int64_t>& agg_vals,
132  const QueryMemoryDescriptor& query_mem_desc,
133  const std::vector<TargetInfo>& targets,
134  const std::vector<int64_t>& agg_init_vals);
135 
136  private:
138  int8_t* this_buff,
139  const int8_t* that_buff,
140  const ResultSetStorage& that,
141  const size_t start_index,
142  const size_t end_index,
143  const std::vector<std::string>& serialized_varlen_buffer) const;
144 
145  void copyKeyColWise(const size_t entry_idx,
146  int8_t* this_buff,
147  const int8_t* that_buff) const;
148 
149  bool isEmptyEntry(const size_t entry_idx, const int8_t* buff) const;
150  bool isEmptyEntry(const size_t entry_idx) const;
151  bool isEmptyEntryColumnar(const size_t entry_idx, const int8_t* buff) const;
152 
153  void reduceOneEntryBaseline(int8_t* this_buff,
154  const int8_t* that_buff,
155  const size_t i,
156  const size_t that_entry_count,
157  const ResultSetStorage& that) const;
158 
159  void reduceOneEntrySlotsBaseline(int64_t* this_entry_slots,
160  const int64_t* that_buff,
161  const size_t that_entry_idx,
162  const size_t that_entry_count,
163  const ResultSetStorage& that) const;
164 
165  void initializeBaselineValueSlots(int64_t* this_entry_slots) const;
166 
167  void reduceOneSlotBaseline(int64_t* this_buff,
168  const size_t this_slot,
169  const int64_t* that_buff,
170  const size_t that_entry_count,
171  const size_t that_slot,
172  const TargetInfo& target_info,
173  const size_t target_logical_idx,
174  const size_t target_slot_idx,
175  const size_t init_agg_val_idx,
176  const ResultSetStorage& that) const;
177 
179  void reduceOneSlotSingleValue(int8_t* this_ptr1,
180  const TargetInfo& target_info,
181  const size_t target_slot_idx,
182  const size_t init_agg_val_idx,
183  const int8_t* that_ptr1) const;
184 
186  void reduceOneSlot(int8_t* this_ptr1,
187  int8_t* this_ptr2,
188  const int8_t* that_ptr1,
189  const int8_t* that_ptr2,
190  const TargetInfo& target_info,
191  const size_t target_logical_idx,
192  const size_t target_slot_idx,
193  const size_t init_agg_val_idx,
194  const ResultSetStorage& that,
195  const size_t first_slot_idx_for_target,
196  const std::vector<std::string>& serialized_varlen_buffer) const;
197 
198  void reduceOneCountDistinctSlot(int8_t* this_ptr1,
199  const int8_t* that_ptr1,
200  const size_t target_logical_idx,
201  const ResultSetStorage& that) const;
202 
203  void fillOneEntryRowWise(const std::vector<int64_t>& entry);
204 
205  void fillOneEntryColWise(const std::vector<int64_t>& entry);
206 
207  void initializeRowWise() const;
208 
209  void initializeColWise() const;
210 
211  // TODO(alex): remove the following two methods, see comment about
212  // count_distinct_sets_mapping_.
213  void addCountDistinctSetPointerMapping(const int64_t remote_ptr, const int64_t ptr);
214 
215  int64_t mappedPtr(const int64_t) const;
216 
217  size_t binSearchRowCount() const;
218 
219  const std::vector<TargetInfo> targets_;
221  int8_t* buff_;
222  const bool buff_is_provided_;
223  std::vector<int64_t> target_init_vals_;
224  // Provisional field used for multi-node until we improve the count distinct
225  // and flatten the main group by buffer and the distinct buffers in a single,
226  // contiguous buffer which we'll be able to serialize as a no-op. Used to
227  // re-route the pointers in the result set received over the wire to this
228  // machine address-space. Not efficient at all, just a placeholder!
229  std::unordered_map<int64_t, int64_t> count_distinct_sets_mapping_;
230 
231  friend class ResultSet;
232  friend class ResultSetManager;
233 };
234 
235 using GroupValueInfo = std::pair<int64_t*, bool>;
236 
237 namespace result_set {
238 
239 int64_t lazy_decode(const ColumnLazyFetchInfo& col_lazy_fetch,
240  const int8_t* byte_stream,
241  const int64_t pos);
242 
243 void fill_empty_key(void* key_ptr, const size_t key_count, const size_t key_width);
244 
245 int8_t get_width_for_slot(const size_t target_slot_idx,
246  const bool float_argument_input,
248 
249 size_t get_byteoff_of_slot(const size_t slot_idx,
250  const QueryMemoryDescriptor& query_mem_desc);
251 
252 GroupValueInfo get_group_value_reduction(int64_t* groups_buffer,
253  const uint32_t groups_buffer_entry_count,
254  const int64_t* key,
255  const uint32_t key_count,
256  const size_t key_width,
257  const QueryMemoryDescriptor& query_mem_desc,
258  const int64_t* that_buff_i64,
259  const size_t that_entry_idx,
260  const size_t that_entry_count,
261  const uint32_t row_size_quad);
262 
263 std::vector<int64_t> initialize_target_values_for_storage(
264  const std::vector<TargetInfo>& targets);
265 
266 } // namespace result_set
267 
268 #endif // QUERYENGINE_RESULTSETSTORAGE_H
const SQLTypeInfo type
GroupValueInfo get_group_value_reduction(int64_t *groups_buffer, const uint32_t groups_buffer_entry_count, const int64_t *key, const uint32_t key_count, const size_t key_width, const QueryMemoryDescriptor &query_mem_desc, const int64_t *that_buff_i64, const size_t that_entry_idx, const size_t that_entry_count, const uint32_t row_size_quad)
void moveOneEntryToBuffer(const size_t entry_index, int64_t *new_buff_i64, const size_t new_entry_count, const size_t key_count, const size_t row_qw_count, const int64_t *src_buff, const size_t key_byte_width) const
bool isEmptyEntry(const size_t entry_idx, const int8_t *buff) const
const std::vector< TargetInfo > targets_
ALWAYS_INLINE void reduceOneSlot(int8_t *this_ptr1, int8_t *this_ptr2, const int8_t *that_ptr1, const int8_t *that_ptr2, const TargetInfo &target_info, const size_t target_logical_idx, const size_t target_slot_idx, const size_t init_agg_val_idx, const ResultSetStorage &that, const size_t first_slot_idx_for_target, const std::vector< std::string > &serialized_varlen_buffer) const
void setEntryCount(const size_t val)
void reduceEntriesNoCollisionsColWise(int8_t *this_buff, const int8_t *that_buff, const ResultSetStorage &that, const size_t start_index, const size_t end_index, const std::vector< std::string > &serialized_varlen_buffer) const
std::vector< int64_t > target_init_vals_
void initializeColWise() const
void reduceOneEntryBaseline(int8_t *this_buff, const int8_t *that_buff, const size_t i, const size_t that_entry_count, const ResultSetStorage &that) const
bool isEmptyEntryColumnar(const size_t entry_idx, const int8_t *buff) const
Utility functions for easy access to the result set buffers.
void addCountDistinctSetPointerMapping(const int64_t remote_ptr, const int64_t ptr)
ResultSetStorage(const std::vector< TargetInfo > &targets, const QueryMemoryDescriptor &query_mem_desc, int8_t *buff, const bool buff_is_provided)
void initializeRowWise() const
void initializeBaselineValueSlots(int64_t *this_entry_slots) const
void reduceOneSlotBaseline(int64_t *this_buff, const size_t this_slot, const int64_t *that_buff, const size_t that_entry_count, const size_t that_slot, const TargetInfo &target_info, const size_t target_logical_idx, const size_t target_slot_idx, const size_t init_agg_val_idx, const ResultSetStorage &that) const
friend class ResultSet
std::unordered_map< int64_t, int64_t > count_distinct_sets_mapping_
void reduce(const ResultSetStorage &that, const std::vector< std::string > &serialized_varlen_buffer, const ReductionCode &reduction_code) const
int8_t * getUnderlyingBuffer() const
const bool buff_is_provided_
int8_t get_width_for_slot(const size_t target_slot_idx, const bool float_argument_input, const QueryMemoryDescriptor &query_mem_desc)
void reduceOneEntrySlotsBaseline(int64_t *this_entry_slots, const int64_t *that_buff, const size_t that_entry_idx, const size_t that_entry_count, const ResultSetStorage &that) const
size_t get_byteoff_of_slot(const size_t slot_idx, const QueryMemoryDescriptor &query_mem_desc)
void copyKeyColWise(const size_t entry_idx, int8_t *this_buff, const int8_t *that_buff) const
void reduceOneCountDistinctSlot(int8_t *this_ptr1, const int8_t *that_ptr1, const size_t target_logical_idx, const ResultSetStorage &that) const
void moveEntriesToBuffer(int8_t *new_buff, const size_t new_entry_count) const
void fill_empty_key(void *key_ptr, const size_t key_count, const size_t key_width)
void updateEntryCount(const size_t new_entry_count)
size_t binSearchRowCount() const
std::pair< int64_t *, bool > GroupValueInfo
int64_t lazy_decode(const ColumnLazyFetchInfo &col_lazy_fetch, const int8_t *byte_stream, const int64_t pos)
void fillOneEntryColWise(const std::vector< int64_t > &entry)
size_t getEntryCount() const
std::vector< int64_t > initialize_target_values_for_storage(const std::vector< TargetInfo > &targets)
ALWAYS_INLINE void reduceOneSlotSingleValue(int8_t *this_ptr1, const TargetInfo &target_info, const size_t target_slot_idx, const size_t init_agg_val_idx, const int8_t *that_ptr1) const
int64_t mappedPtr(const int64_t) const
const bool is_lazily_fetched
Estimators to be used when precise cardinality isn&#39;t useful.
void fillOneEntryRowWise(const std::vector< int64_t > &entry)
void rewriteAggregateBufferOffsets(const std::vector< std::string > &serialized_varlen_buffer) const
#define ALWAYS_INLINE
static bool reduceSingleRow(const int8_t *row_ptr, const int8_t warp_count, const bool is_columnar, const bool replace_bitmap_ptr_with_bitmap_sz, std::vector< int64_t > &agg_vals, const QueryMemoryDescriptor &query_mem_desc, const std::vector< TargetInfo > &targets, const std::vector< int64_t > &agg_init_vals)
QueryMemoryDescriptor query_mem_desc_