OmniSciDB  c1a53651b2
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
ColumnarResults.h
Go to the documentation of this file.
1 /*
2  * Copyright 2022 HEAVY.AI, Inc.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #ifndef COLUMNAR_RESULTS_H
18 #define COLUMNAR_RESULTS_H
19 #include "ResultSet.h"
20 #include "Shared/SqlTypesLayout.h"
21 
22 #include "../Shared/checked_alloc.h"
23 
24 #include <memory>
25 #include <unordered_map>
26 
27 class ColumnarConversionNotSupported : public std::runtime_error {
28  public:
30  : std::runtime_error(
31  "Columnar conversion not supported for variable length types") {}
32 };
33 
40 class ColumnBitmap {
41  public:
42  ColumnBitmap(const size_t num_elements_per_bank, size_t num_banks)
43  : bitmaps_(num_banks, std::vector<bool>(num_elements_per_bank, false)) {}
44 
45  inline bool get(const size_t index, const size_t bank_index) const {
46  CHECK_LT(bank_index, bitmaps_.size());
47  CHECK_LT(index, bitmaps_[bank_index].size());
48  return bitmaps_[bank_index][index];
49  }
50 
51  inline void set(const size_t index, const size_t bank_index, const bool val) {
52  CHECK_LT(bank_index, bitmaps_.size());
53  CHECK_LT(index, bitmaps_[bank_index].size());
54  bitmaps_[bank_index][index] = val;
55  }
56 
57  private:
58  std::vector<std::vector<bool>> bitmaps_;
59 };
60 
62  public:
63  ColumnarResults(const std::shared_ptr<RowSetMemoryOwner> row_set_mem_owner,
64  const ResultSet& rows,
65  const size_t num_columns,
66  const std::vector<SQLTypeInfo>& target_types,
67  const size_t executor_id,
68  const size_t thread_idx,
69  const bool is_parallel_execution_enforced = false);
70 
71  ColumnarResults(const std::shared_ptr<RowSetMemoryOwner> row_set_mem_owner,
72  const int8_t* one_col_buffer,
73  const size_t num_rows,
74  const SQLTypeInfo& target_type,
75  const size_t executor_id,
76  const size_t thread_idx);
77 
78  static std::unique_ptr<ColumnarResults> mergeResults(
79  const std::shared_ptr<RowSetMemoryOwner> row_set_mem_owner,
80  const std::vector<std::unique_ptr<ColumnarResults>>& sub_results);
81 
82  const std::vector<int8_t*>& getColumnBuffers() const { return column_buffers_; }
83 
84  const size_t size() const { return num_rows_; }
85 
86  const SQLTypeInfo& getColumnType(const int col_id) const {
87  CHECK_GE(col_id, 0);
88  CHECK_LT(static_cast<size_t>(col_id), target_types_.size());
89  return target_types_[col_id];
90  }
91 
92  bool isParallelConversion() const { return parallel_conversion_; }
94 
95  // functions used to read content from the result set (direct columnarization, group by
96  // queries)
97  using ReadFunction =
98  std::function<int64_t(const ResultSet&, const size_t, const size_t, const size_t)>;
99 
100  // functions used to write back contents into output column buffers (direct
101  // columnarization, group by queries)
102  using WriteFunction = std::function<void(const ResultSet&,
103  const size_t,
104  const size_t,
105  const size_t,
106  const size_t,
107  const ReadFunction&)>;
108 
109  protected:
110  std::vector<int8_t*> column_buffers_;
111  size_t num_rows_;
112 
113  private:
114  ColumnarResults(const size_t num_rows,
115  const std::vector<SQLTypeInfo>& target_types,
116  const std::vector<size_t>& padded_target_sizes)
117  : num_rows_(num_rows)
118  , target_types_(target_types)
119  , padded_target_sizes_(padded_target_sizes) {}
120  inline void writeBackCell(const TargetValue& col_val,
121  const size_t row_idx,
122  const size_t column_idx,
123  std::mutex* write_mutex = nullptr);
124  void materializeAllColumnsDirectly(const ResultSet& rows, const size_t num_columns);
125  void materializeAllColumnsThroughIteration(const ResultSet& rows,
126  const size_t num_columns);
127 
128  // Direct columnarization for group by queries (perfect hash or baseline hash)
129  void materializeAllColumnsGroupBy(const ResultSet& rows, const size_t num_columns);
130 
131  // Direct columnarization for Projections (only output is columnar)
132  void materializeAllColumnsProjection(const ResultSet& rows, const size_t num_columns);
133 
134  void materializeAllColumnsTableFunction(const ResultSet& rows,
135  const size_t num_columns);
136 
137  void copyAllNonLazyColumns(const std::vector<ColumnLazyFetchInfo>& lazy_fetch_info,
138  const ResultSet& rows,
139  const size_t num_columns);
140  void materializeAllLazyColumns(const std::vector<ColumnLazyFetchInfo>& lazy_fetch_info,
141  const ResultSet& rows,
142  const size_t num_columns);
143 
144  void locateAndCountEntries(const ResultSet& rows,
145  ColumnBitmap& bitmap,
146  std::vector<size_t>& non_empty_per_thread,
147  const size_t entry_count,
148  const size_t num_threads,
149  const size_t size_per_thread) const;
150  void compactAndCopyEntries(const ResultSet& rows,
151  const ColumnBitmap& bitmap,
152  const std::vector<size_t>& non_empty_per_thread,
153  const size_t num_columns,
154  const size_t entry_count,
155  const size_t num_threads,
156  const size_t size_per_thread);
158  const ResultSet& rows,
159  const ColumnBitmap& bitmap,
160  const std::vector<size_t>& non_empty_per_thread,
161  const std::vector<size_t>& global_offsets,
162  const std::vector<bool>& targets_to_skip,
163  const std::vector<size_t>& slot_idx_per_target_idx,
164  const size_t num_columns,
165  const size_t entry_count,
166  const size_t num_threads,
167  const size_t size_per_thread);
169  const ResultSet& rows,
170  const ColumnBitmap& bitmap,
171  const std::vector<size_t>& non_empty_per_thread,
172  const std::vector<size_t>& global_offsets,
173  const std::vector<size_t>& slot_idx_per_target_idx,
174  const size_t num_columns,
175  const size_t entry_count,
176  const size_t num_threads,
177  const size_t size_per_thread);
178 
179  template <typename DATA_TYPE>
180  void writeBackCellDirect(const ResultSet& rows,
181  const size_t input_buffer_entry_idx,
182  const size_t output_buffer_entry_idx,
183  const size_t target_idx,
184  const size_t slot_idx,
185  const ReadFunction& read_function);
186 
187  std::vector<WriteFunction> initWriteFunctions(
188  const ResultSet& rows,
189  const std::vector<bool>& targets_to_skip = {});
190 
191  template <QueryDescriptionType QUERY_TYPE, bool COLUMNAR_OUTPUT>
192  std::vector<ReadFunction> initReadFunctions(
193  const ResultSet& rows,
194  const std::vector<size_t>& slot_idx_per_target_idx,
195  const std::vector<bool>& targets_to_skip = {});
196 
197  std::tuple<std::vector<WriteFunction>, std::vector<ReadFunction>>
198  initAllConversionFunctions(const ResultSet& rows,
199  const std::vector<size_t>& slot_idx_per_target_idx,
200  const std::vector<bool>& targets_to_skip = {});
201 
202  const std::vector<SQLTypeInfo> target_types_;
203  bool parallel_conversion_; // multi-threaded execution of columnar conversion
204  bool direct_columnar_conversion_; // whether columnar conversion might happen directly
205  // with minimal ussage of result set's iterator access
206  size_t thread_idx_;
207  std::shared_ptr<Executor> executor_;
208  std::vector<size_t> padded_target_sizes_;
209 };
210 
211 using ColumnCacheMap =
212  std::unordered_map<shared::TableKey,
213  std::unordered_map<int, std::shared_ptr<const ColumnarResults>>>;
214 
215 #endif // COLUMNAR_RESULTS_H
void writeBackCell(const TargetValue &col_val, const size_t row_idx, const size_t column_idx, std::mutex *write_mutex=nullptr)
bool isParallelConversion() const
std::vector< int8_t * > column_buffers_
void materializeAllColumnsTableFunction(const ResultSet &rows, const size_t num_columns)
static std::unique_ptr< ColumnarResults > mergeResults(const std::shared_ptr< RowSetMemoryOwner > row_set_mem_owner, const std::vector< std::unique_ptr< ColumnarResults >> &sub_results)
std::vector< ReadFunction > initReadFunctions(const ResultSet &rows, const std::vector< size_t > &slot_idx_per_target_idx, const std::vector< bool > &targets_to_skip={})
ColumnarResults(const size_t num_rows, const std::vector< SQLTypeInfo > &target_types, const std::vector< size_t > &padded_target_sizes)
void locateAndCountEntries(const ResultSet &rows, ColumnBitmap &bitmap, std::vector< size_t > &non_empty_per_thread, const size_t entry_count, const size_t num_threads, const size_t size_per_thread) const
#define CHECK_GE(x, y)
Definition: Logger.h:306
void set(const size_t index, const size_t bank_index, const bool val)
bool direct_columnar_conversion_
void compactAndCopyEntries(const ResultSet &rows, const ColumnBitmap &bitmap, const std::vector< size_t > &non_empty_per_thread, const size_t num_columns, const size_t entry_count, const size_t num_threads, const size_t size_per_thread)
std::function< int64_t(const ResultSet &, const size_t, const size_t, const size_t)> ReadFunction
std::function< void(const ResultSet &, const size_t, const size_t, const size_t, const size_t, const ReadFunction &)> WriteFunction
void materializeAllColumnsThroughIteration(const ResultSet &rows, const size_t num_columns)
const size_t size() const
std::vector< WriteFunction > initWriteFunctions(const ResultSet &rows, const std::vector< bool > &targets_to_skip={})
void materializeAllColumnsGroupBy(const ResultSet &rows, const size_t num_columns)
std::tuple< std::vector< WriteFunction >, std::vector< ReadFunction > > initAllConversionFunctions(const ResultSet &rows, const std::vector< size_t > &slot_idx_per_target_idx, const std::vector< bool > &targets_to_skip={})
bool isDirectColumnarConversionPossible() const
#define CHECK_LT(x, y)
Definition: Logger.h:303
void materializeAllColumnsDirectly(const ResultSet &rows, const size_t num_columns)
void writeBackCellDirect(const ResultSet &rows, const size_t input_buffer_entry_idx, const size_t output_buffer_entry_idx, const size_t target_idx, const size_t slot_idx, const ReadFunction &read_function)
ColumnBitmap(const size_t num_elements_per_bank, size_t num_banks)
std::unordered_map< shared::TableKey, std::unordered_map< int, std::shared_ptr< const ColumnarResults >>> ColumnCacheMap
std::shared_ptr< Executor > executor_
std::vector< size_t > padded_target_sizes_
void copyAllNonLazyColumns(const std::vector< ColumnLazyFetchInfo > &lazy_fetch_info, const ResultSet &rows, const size_t num_columns)
std::vector< std::vector< bool > > bitmaps_
bool g_enable_watchdog false
Definition: Execute.cpp:79
void materializeAllColumnsProjection(const ResultSet &rows, const size_t num_columns)
Basic constructors and methods of the row set interface.
boost::variant< ScalarTargetValue, ArrayTargetValue, GeoTargetValue, GeoTargetValuePtr > TargetValue
Definition: TargetValue.h:195
void compactAndCopyEntriesWithTargetSkipping(const ResultSet &rows, const ColumnBitmap &bitmap, const std::vector< size_t > &non_empty_per_thread, const std::vector< size_t > &global_offsets, const std::vector< bool > &targets_to_skip, const std::vector< size_t > &slot_idx_per_target_idx, const size_t num_columns, const size_t entry_count, const size_t num_threads, const size_t size_per_thread)
void compactAndCopyEntriesWithoutTargetSkipping(const ResultSet &rows, const ColumnBitmap &bitmap, const std::vector< size_t > &non_empty_per_thread, const std::vector< size_t > &global_offsets, const std::vector< size_t > &slot_idx_per_target_idx, const size_t num_columns, const size_t entry_count, const size_t num_threads, const size_t size_per_thread)
const std::vector< int8_t * > & getColumnBuffers() const
const std::vector< SQLTypeInfo > target_types_
void materializeAllLazyColumns(const std::vector< ColumnLazyFetchInfo > &lazy_fetch_info, const ResultSet &rows, const size_t num_columns)
ColumnarResults(const std::shared_ptr< RowSetMemoryOwner > row_set_mem_owner, const ResultSet &rows, const size_t num_columns, const std::vector< SQLTypeInfo > &target_types, const size_t executor_id, const size_t thread_idx, const bool is_parallel_execution_enforced=false)
const SQLTypeInfo & getColumnType(const int col_id) const