OmniSciDB  c07336695a
QueryMemoryDescriptor.h
Go to the documentation of this file.
1 /*
2  * Copyright 2017 MapD Technologies, Inc.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
25 #ifndef QUERYENGINE_QUERYMEMORYDESCRIPTOR_H
26 #define QUERYENGINE_QUERYMEMORYDESCRIPTOR_H
27 
28 #include "../CompilationOptions.h"
29 #include "../CountDistinct.h"
30 #include "ColSlotContext.h"
31 #include "Types.h"
32 
33 #include <boost/optional.hpp>
34 #include "Shared/Logger.h"
35 
36 #include <algorithm>
37 #include <cstddef>
38 #include <cstdint>
39 #include <memory>
40 #include <numeric>
41 #include <unordered_map>
42 #include <vector>
43 
44 #include <Shared/SqlTypesLayout.h>
45 #include <Shared/TargetInfo.h>
46 
47 extern bool g_cluster;
48 
49 class Executor;
51 class RenderInfo;
52 class RowSetMemoryOwner;
53 struct InputTableInfo;
54 
55 // Shared: threads in the same block share memory, atomic operations required
56 // SharedForKeylessOneColumnKnownRange: special case of "Shared", but for keyless
57 // aggregates with single column group by
59 
60 struct RelAlgExecutionUnit;
61 class TResultSetBufferDescriptor;
63 struct ColRangeInfo;
64 struct KeylessInfo;
65 
67  public:
69 
70  // constructor for init call
71  QueryMemoryDescriptor(const Executor* executor,
72  const RelAlgExecutionUnit& ra_exe_unit,
73  const std::vector<InputTableInfo>& query_infos,
74  const bool allow_multifrag,
75  const bool keyless_hash,
76  const bool interleaved_bins_on_gpu,
77  const int32_t idx_target_as_key,
78  const ColRangeInfo& col_range_info,
79  const ColSlotContext& col_slot_context,
80  const std::vector<int8_t>& group_col_widths,
81  const int8_t group_col_compact_width,
82  const std::vector<ssize_t>& target_groupby_indices,
83  const size_t entry_count,
84  const GroupByMemSharing sharing,
85  const bool shared_mem_for_group_by,
86  const CountDistinctDescriptors count_distinct_descriptors,
87  const bool sort_on_gpu_hint,
88  const bool output_columnar,
89  const bool render_output,
90  const bool must_use_baseline_sort);
91 
92  QueryMemoryDescriptor(const Executor* executor,
93  const size_t entry_count,
94  const QueryDescriptionType query_desc_type);
95 
96  QueryMemoryDescriptor(const QueryDescriptionType query_desc_type,
97  const int64_t min_val,
98  const int64_t max_val,
99  const bool has_nulls,
100  const std::vector<int8_t>& group_col_widths);
101 
102  // Serialization
103  QueryMemoryDescriptor(const TResultSetBufferDescriptor& thrift_query_memory_descriptor);
104  static TResultSetBufferDescriptor toThrift(const QueryMemoryDescriptor&);
105 
106  bool operator==(const QueryMemoryDescriptor& other) const;
107 
108  static std::unique_ptr<QueryMemoryDescriptor> init(
109  const Executor* executor,
110  const RelAlgExecutionUnit& ra_exe_unit,
111  const std::vector<InputTableInfo>& query_infos,
112  const ColRangeInfo& col_range_info,
113  const KeylessInfo& keyless_info,
114  const bool allow_multifrag,
115  const ExecutorDeviceType device_type,
116  const int8_t crt_min_byte_width,
117  const bool sort_on_gpu_hint,
118  const size_t shard_count,
119  const size_t max_groups_buffer_entry_count,
120  RenderInfo* render_info,
121  const CountDistinctDescriptors count_distinct_descriptors,
122  const bool must_use_baseline_sort,
123  const bool output_columnar_hint);
124 
125  std::unique_ptr<QueryExecutionContext> getQueryExecutionContext(
126  const RelAlgExecutionUnit&,
127  const Executor* executor,
128  const ExecutorDeviceType device_type,
129  const ExecutorDispatchMode dispatch_mode,
130  const int device_id,
131  const int64_t num_rows,
132  const std::vector<std::vector<const int8_t*>>& col_buffers,
133  const std::vector<std::vector<uint64_t>>& frag_offsets,
134  std::shared_ptr<RowSetMemoryOwner>,
135  const bool output_columnar,
136  const bool sort_on_gpu,
137  RenderInfo*) const;
138 
139  static bool many_entries(const int64_t max_val,
140  const int64_t min_val,
141  const int64_t bucket) {
142  return max_val - min_val > 10000 * std::max(bucket, int64_t(1));
143  }
144 
146  const CountDistinctDescriptors& count_distinct_descriptors) {
147  return std::all_of(count_distinct_descriptors.begin(),
148  count_distinct_descriptors.end(),
149  [](const CountDistinctDescriptor& desc) {
150  return desc.impl_type_ == CountDistinctImplType::Invalid;
151  });
152  }
153 
155  return countDescriptorsLogicallyEmpty(count_distinct_descriptors_);
156  }
157 
158  static int8_t pick_target_compact_width(const RelAlgExecutionUnit& ra_exe_unit,
159  const std::vector<InputTableInfo>& query_infos,
160  const int8_t crt_min_byte_width);
161 
162  // Getters and Setters
163  const Executor* getExecutor() const { return executor_; }
164 
165  QueryDescriptionType getQueryDescriptionType() const { return query_desc_type_; }
166  void setQueryDescriptionType(const QueryDescriptionType val) { query_desc_type_ = val; }
168  return getQueryDescriptionType() == QueryDescriptionType::GroupByPerfectHash &&
169  getGroupbyColCount() == 1;
170  }
171 
172  bool hasKeylessHash() const { return keyless_hash_; }
173  void setHasKeylessHash(const bool val) { keyless_hash_ = val; }
174 
175  bool hasInterleavedBinsOnGpu() const { return interleaved_bins_on_gpu_; }
176  void setHasInterleavedBinsOnGpu(const bool val) { interleaved_bins_on_gpu_ = val; }
177 
178  int32_t getTargetIdxForKey() const { return idx_target_as_key_; }
179  void setTargetIdxForKey(const int32_t val) { idx_target_as_key_ = val; }
180 
181  size_t groupColWidthsSize() const { return group_col_widths_.size(); }
182  int8_t groupColWidth(const size_t key_idx) const {
183  CHECK_LT(key_idx, group_col_widths_.size());
184  return group_col_widths_[key_idx];
185  }
186  size_t getPrependedGroupColOffInBytes(const size_t group_idx) const;
187  size_t getPrependedGroupBufferSizeInBytes() const;
188 
189  const auto groupColWidthsBegin() const { return group_col_widths_.begin(); }
190  const auto groupColWidthsEnd() const { return group_col_widths_.end(); }
191  void clearGroupColWidths() { group_col_widths_.clear(); }
192 
193  bool isGroupBy() const { return !group_col_widths_.empty(); }
194 
195  void setGroupColCompactWidth(const int8_t val) { group_col_compact_width_ = val; }
196 
197  size_t getColCount() const;
198  size_t getSlotCount() const;
199 
200  const int8_t getPaddedSlotWidthBytes(const size_t slot_idx) const;
201  const int8_t getLogicalSlotWidthBytes(const size_t slot_idx) const;
202 
203  const int8_t getSlotIndexForSingleSlotCol(const size_t col_idx) const;
204 
205  size_t getPaddedColWidthForRange(const size_t offset, const size_t range) const {
206  size_t ret = 0;
207  for (size_t i = offset; i < offset + range; i++) {
208  ret += static_cast<size_t>(getPaddedSlotWidthBytes(i));
209  }
210  return ret;
211  }
212 
213  void useConsistentSlotWidthSize(const int8_t slot_width_size);
214  size_t getRowWidth() const;
215 
216  int8_t updateActualMinByteWidth(const int8_t actual_min_byte_width) const;
217 
218  void addColSlotInfo(const std::vector<std::tuple<int8_t, int8_t>>& slots_for_col);
219 
220  void clearSlotInfo();
221 
222  void alignPaddedSlots();
223 
224  ssize_t getTargetGroupbyIndex(const size_t target_idx) const {
225  CHECK_LT(target_idx, target_groupby_indices_.size());
226  return target_groupby_indices_[target_idx];
227  }
228  size_t targetGroupbyIndicesSize() const { return target_groupby_indices_.size(); }
229  void clearTargetGroupbyIndices() { target_groupby_indices_.clear(); }
230 
231  size_t getEntryCount() const { return entry_count_; }
232  void setEntryCount(const size_t val) { entry_count_ = val; }
233 
234  int64_t getMinVal() const { return min_val_; }
235  int64_t getMaxVal() const { return max_val_; }
236  int64_t getBucket() const { return bucket_; }
237 
238  bool hasNulls() const { return has_nulls_; }
239  GroupByMemSharing getGpuMemSharing() const { return sharing_; }
240 
241  const CountDistinctDescriptor& getCountDistinctDescriptor(const size_t idx) const {
242  CHECK_LT(idx, count_distinct_descriptors_.size());
243  return count_distinct_descriptors_[idx];
244  }
246  return count_distinct_descriptors_.size();
247  }
248 
249  bool sortOnGpu() const { return sort_on_gpu_; }
250 
251  bool canOutputColumnar() const;
252  bool didOutputColumnar() const { return output_columnar_; }
253  void setOutputColumnar(const bool val);
254 
255  bool isLogicalSizedColumnsAllowed() const;
256 
257  bool mustUseBaselineSort() const { return must_use_baseline_sort_; }
258 
259  // TODO(adb): remove and store this info more naturally in another
260  // member
261  bool forceFourByteFloat() const { return force_4byte_float_; }
262  void setForceFourByteFloat(const bool val) { force_4byte_float_ = val; }
263 
264  // Getters derived from state
265  size_t getGroupbyColCount() const { return group_col_widths_.size(); }
266  size_t getKeyCount() const { return keyless_hash_ ? 0 : getGroupbyColCount(); }
267  size_t getBufferColSlotCount() const;
268 
269  size_t getBufferSizeBytes(const RelAlgExecutionUnit& ra_exe_unit,
270  const unsigned thread_count,
271  const ExecutorDeviceType device_type) const;
272  size_t getBufferSizeBytes(const ExecutorDeviceType device_type) const;
273  size_t getBufferSizeBytes(const ExecutorDeviceType device_type,
274  const size_t override_entry_count) const;
275 
276  const ColSlotContext& getColSlotContext() const { return col_slot_context_; }
277 
278  // TODO(alex): remove
279  bool usesGetGroupValueFast() const;
280 
281  bool blocksShareMemory() const;
282  bool threadsShareMemory() const;
283 
284  bool lazyInitGroups(const ExecutorDeviceType) const;
285 
286  bool interleavedBins(const ExecutorDeviceType) const;
287 
288  size_t sharedMemBytes(const ExecutorDeviceType) const;
289 
290  size_t getColOffInBytes(const size_t col_idx) const;
291  size_t getColOffInBytesInNextBin(const size_t col_idx) const;
292  size_t getNextColOffInBytes(const int8_t* col_ptr,
293  const size_t bin,
294  const size_t col_idx) const;
295  size_t getColOnlyOffInBytes(const size_t col_idx) const;
296  size_t getRowSize() const;
297  size_t getColsSize() const;
298  size_t getWarpCount() const;
299 
300  size_t getCompactByteWidth() const;
301 
302  inline size_t getEffectiveKeyWidth() const {
303  return group_col_compact_width_ ? group_col_compact_width_ : sizeof(int64_t);
304  }
305 
306  bool isWarpSyncRequired(const ExecutorDeviceType) const;
307 
308  std::string toString() const;
309 
310  protected:
311  void resetGroupColWidths(const std::vector<int8_t>& new_group_col_widths) {
312  group_col_widths_ = new_group_col_widths;
313  }
314 
315  private:
322  std::vector<int8_t> group_col_widths_;
323  int8_t group_col_compact_width_; // compact width for all group
324  // cols if able to be consistent
325  // otherwise 0
326  std::vector<ssize_t> target_groupby_indices_;
327  size_t entry_count_; // the number of entries in the main buffer
328  int64_t min_val_; // meaningful for OneColKnownRange,
329  // MultiColPerfectHash only
330  int64_t max_val_;
331  int64_t bucket_;
333  GroupByMemSharing sharing_; // meaningful for GPU only
339 
341 
343 
344  size_t getTotalBytesOfColumnarBuffers() const;
345  size_t getTotalBytesOfColumnarBuffers(const size_t num_entries_per_column) const;
346  size_t getTotalBytesOfColumnarProjections(const size_t projection_count) const;
347 
348  friend class ResultSet;
349  friend class QueryExecutionContext;
350 
351  template <typename META_CLASS_TYPE>
353 };
354 
355 inline void set_notnull(TargetInfo& target, const bool not_null) {
356  target.skip_null_val = !not_null;
357  auto new_type = get_compact_type(target);
358  new_type.set_notnull(not_null);
359  set_compact_type(target, new_type);
360 }
361 
362 std::vector<TargetInfo> target_exprs_to_infos(
363  const std::vector<Analyzer::Expr*>& targets,
364  const QueryMemoryDescriptor& query_mem_desc);
365 
366 #endif // QUERYENGINE_QUERYMEMORYDESCRIPTOR_H
void set_compact_type(TargetInfo &target, const SQLTypeInfo &new_type)
static bool many_entries(const int64_t max_val, const int64_t min_val, const int64_t bucket)
const int8_t const int64_t * num_rows
ssize_t getTargetGroupbyIndex(const size_t target_idx) const
void setEntryCount(const size_t val)
ExecutorDeviceType
void setHasKeylessHash(const bool val)
void setGroupColCompactWidth(const int8_t val)
GroupByMemSharing getGpuMemSharing() const
size_t getCountDistinctDescriptorsSize() const
bool skip_null_val
Definition: TargetInfo.h:44
void setQueryDescriptionType(const QueryDescriptionType val)
const auto groupColWidthsEnd() const
const Executor * getExecutor() const
ExecutorDispatchMode
const SQLTypeInfo get_compact_type(const TargetInfo &target)
void setTargetIdxForKey(const int32_t val)
std::vector< CountDistinctDescriptor > CountDistinctDescriptors
Definition: CountDistinct.h:35
Provides column info and slot info for the output buffer and some metadata helpers.
void init(LogOptions const &log_opts)
Definition: Logger.cpp:260
bool countDistinctDescriptorsLogicallyEmpty() const
CountDistinctDescriptors count_distinct_descriptors_
size_t getPaddedColWidthForRange(const size_t offset, const size_t range) const
size_t targetGroupbyIndicesSize() const
int8_t groupColWidth(const size_t key_idx) const
int32_t getTargetIdxForKey() const
#define CHECK_LT(x, y)
Definition: Logger.h:197
const auto groupColWidthsBegin() const
QueryDescriptionType query_desc_type_
bool operator==(const SlotSize &lhs, const SlotSize &rhs)
void setForceFourByteFloat(const bool val)
const ColSlotContext & getColSlotContext() const
GroupByMemSharing
std::vector< int8_t > group_col_widths_
std::vector< TargetInfo > target_exprs_to_infos(const std::vector< Analyzer::Expr *> &targets, const QueryMemoryDescriptor &query_mem_desc)
QueryDescriptionType
Definition: Types.h:26
bool g_cluster
void resetGroupColWidths(const std::vector< int8_t > &new_group_col_widths)
const CountDistinctDescriptor & getCountDistinctDescriptor(const size_t idx) const
void setHasInterleavedBinsOnGpu(const bool val)
static bool countDescriptorsLogicallyEmpty(const CountDistinctDescriptors &count_distinct_descriptors)
QueryDescriptionType getQueryDescriptionType() const
bool isSingleColumnGroupByWithPerfectHash() const
std::vector< ssize_t > target_groupby_indices_
void set_notnull(TargetInfo &target, const bool not_null)
size_t getEffectiveKeyWidth() const
void sort_on_gpu(int64_t *val_buff, int32_t *key_buff, const uint64_t entry_count, const bool desc, const uint32_t chosen_bytes, ThrustAllocator &alloc)