OmniSciDB  6686921089
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
ColumnarResults.h
Go to the documentation of this file.
1 /*
2  * Copyright 2017 MapD Technologies, Inc.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #ifndef COLUMNAR_RESULTS_H
18 #define COLUMNAR_RESULTS_H
19 #include "ResultSet.h"
20 #include "Shared/SqlTypesLayout.h"
21 
22 #include "../Shared/checked_alloc.h"
23 
24 #include <memory>
25 #include <unordered_map>
26 
27 class ColumnarConversionNotSupported : public std::runtime_error {
28  public:
30  : std::runtime_error(
31  "Columnar conversion not supported for variable length types") {}
32 };
33 
40 class ColumnBitmap {
41  public:
42  ColumnBitmap(const size_t num_elements_per_bank, size_t num_banks)
43  : bitmaps_(num_banks, std::vector<bool>(num_elements_per_bank, false)) {}
44 
45  inline bool get(const size_t index, const size_t bank_index) const {
46  CHECK_LT(bank_index, bitmaps_.size());
47  CHECK_LT(index, bitmaps_[bank_index].size());
48  return bitmaps_[bank_index][index];
49  }
50 
51  inline void set(const size_t index, const size_t bank_index, const bool val) {
52  CHECK_LT(bank_index, bitmaps_.size());
53  CHECK_LT(index, bitmaps_[bank_index].size());
54  bitmaps_[bank_index][index] = val;
55  }
56 
57  private:
58  std::vector<std::vector<bool>> bitmaps_;
59 };
60 
62  public:
63  ColumnarResults(const std::shared_ptr<RowSetMemoryOwner> row_set_mem_owner,
64  const ResultSet& rows,
65  const size_t num_columns,
66  const std::vector<SQLTypeInfo>& target_types,
67  const size_t executor_id,
68  const size_t thread_idx,
69  const bool is_parallel_execution_enforced = false);
70 
71  ColumnarResults(const std::shared_ptr<RowSetMemoryOwner> row_set_mem_owner,
72  const int8_t* one_col_buffer,
73  const size_t num_rows,
74  const SQLTypeInfo& target_type,
75  const size_t executor_id,
76  const size_t thread_idx);
77 
78  static std::unique_ptr<ColumnarResults> mergeResults(
79  const std::shared_ptr<RowSetMemoryOwner> row_set_mem_owner,
80  const std::vector<std::unique_ptr<ColumnarResults>>& sub_results);
81 
82  const std::vector<int8_t*>& getColumnBuffers() const { return column_buffers_; }
83 
84  const size_t size() const { return num_rows_; }
85 
86  const SQLTypeInfo& getColumnType(const int col_id) const {
87  CHECK_GE(col_id, 0);
88  CHECK_LT(static_cast<size_t>(col_id), target_types_.size());
89  return target_types_[col_id];
90  }
91 
92  bool isParallelConversion() const { return parallel_conversion_; }
94 
95  // functions used to read content from the result set (direct columnarization, group by
96  // queries)
97  using ReadFunction =
98  std::function<int64_t(const ResultSet&, const size_t, const size_t, const size_t)>;
99 
100  // functions used to write back contents into output column buffers (direct
101  // columnarization, group by queries)
102  using WriteFunction = std::function<void(const ResultSet&,
103  const size_t,
104  const size_t,
105  const size_t,
106  const size_t,
107  const ReadFunction&)>;
108 
109  protected:
110  std::vector<int8_t*> column_buffers_;
111  size_t num_rows_;
112 
113  private:
114  ColumnarResults(const size_t num_rows, const std::vector<SQLTypeInfo>& target_types)
115  : num_rows_(num_rows), target_types_(target_types) {}
116  inline void writeBackCell(const TargetValue& col_val,
117  const size_t row_idx,
118  const size_t column_idx);
119  void materializeAllColumnsDirectly(const ResultSet& rows, const size_t num_columns);
120  void materializeAllColumnsThroughIteration(const ResultSet& rows,
121  const size_t num_columns);
122 
123  // Direct columnarization for group by queries (perfect hash or baseline hash)
124  void materializeAllColumnsGroupBy(const ResultSet& rows, const size_t num_columns);
125 
126  // Direct columnarization for Projections (only output is columnar)
127  void materializeAllColumnsProjection(const ResultSet& rows, const size_t num_columns);
128 
129  void copyAllNonLazyColumns(const std::vector<ColumnLazyFetchInfo>& lazy_fetch_info,
130  const ResultSet& rows,
131  const size_t num_columns);
132  void materializeAllLazyColumns(const std::vector<ColumnLazyFetchInfo>& lazy_fetch_info,
133  const ResultSet& rows,
134  const size_t num_columns);
135 
136  void locateAndCountEntries(const ResultSet& rows,
137  ColumnBitmap& bitmap,
138  std::vector<size_t>& non_empty_per_thread,
139  const size_t entry_count,
140  const size_t num_threads,
141  const size_t size_per_thread) const;
142  void compactAndCopyEntries(const ResultSet& rows,
143  const ColumnBitmap& bitmap,
144  const std::vector<size_t>& non_empty_per_thread,
145  const size_t num_columns,
146  const size_t entry_count,
147  const size_t num_threads,
148  const size_t size_per_thread);
150  const ResultSet& rows,
151  const ColumnBitmap& bitmap,
152  const std::vector<size_t>& non_empty_per_thread,
153  const std::vector<size_t>& global_offsets,
154  const std::vector<bool>& targets_to_skip,
155  const std::vector<size_t>& slot_idx_per_target_idx,
156  const size_t num_columns,
157  const size_t entry_count,
158  const size_t num_threads,
159  const size_t size_per_thread);
161  const ResultSet& rows,
162  const ColumnBitmap& bitmap,
163  const std::vector<size_t>& non_empty_per_thread,
164  const std::vector<size_t>& global_offsets,
165  const std::vector<size_t>& slot_idx_per_target_idx,
166  const size_t num_columns,
167  const size_t entry_count,
168  const size_t num_threads,
169  const size_t size_per_thread);
170 
171  template <typename DATA_TYPE>
172  void writeBackCellDirect(const ResultSet& rows,
173  const size_t input_buffer_entry_idx,
174  const size_t output_buffer_entry_idx,
175  const size_t target_idx,
176  const size_t slot_idx,
177  const ReadFunction& read_function);
178 
179  std::vector<WriteFunction> initWriteFunctions(
180  const ResultSet& rows,
181  const std::vector<bool>& targets_to_skip = {});
182 
183  template <QueryDescriptionType QUERY_TYPE, bool COLUMNAR_OUTPUT>
184  std::vector<ReadFunction> initReadFunctions(
185  const ResultSet& rows,
186  const std::vector<size_t>& slot_idx_per_target_idx,
187  const std::vector<bool>& targets_to_skip = {});
188 
189  std::tuple<std::vector<WriteFunction>, std::vector<ReadFunction>>
190  initAllConversionFunctions(const ResultSet& rows,
191  const std::vector<size_t>& slot_idx_per_target_idx,
192  const std::vector<bool>& targets_to_skip = {});
193 
194  const std::vector<SQLTypeInfo> target_types_;
195  bool parallel_conversion_; // multi-threaded execution of columnar conversion
196  bool direct_columnar_conversion_; // whether columnar conversion might happen directly
197  // with minimal ussage of result set's iterator access
198  size_t thread_idx_;
199  std::shared_ptr<Executor> executor_;
200 };
201 
202 using ColumnCacheMap =
203  std::unordered_map<int,
204  std::unordered_map<int, std::shared_ptr<const ColumnarResults>>>;
205 
206 #endif // COLUMNAR_RESULTS_H
bool isParallelConversion() const
std::vector< int8_t * > column_buffers_
static std::unique_ptr< ColumnarResults > mergeResults(const std::shared_ptr< RowSetMemoryOwner > row_set_mem_owner, const std::vector< std::unique_ptr< ColumnarResults >> &sub_results)
std::vector< ReadFunction > initReadFunctions(const ResultSet &rows, const std::vector< size_t > &slot_idx_per_target_idx, const std::vector< bool > &targets_to_skip={})
void locateAndCountEntries(const ResultSet &rows, ColumnBitmap &bitmap, std::vector< size_t > &non_empty_per_thread, const size_t entry_count, const size_t num_threads, const size_t size_per_thread) const
#define CHECK_GE(x, y)
Definition: Logger.h:222
void set(const size_t index, const size_t bank_index, const bool val)
bool direct_columnar_conversion_
void compactAndCopyEntries(const ResultSet &rows, const ColumnBitmap &bitmap, const std::vector< size_t > &non_empty_per_thread, const size_t num_columns, const size_t entry_count, const size_t num_threads, const size_t size_per_thread)
std::function< int64_t(const ResultSet &, const size_t, const size_t, const size_t)> ReadFunction
std::function< void(const ResultSet &, const size_t, const size_t, const size_t, const size_t, const ReadFunction &)> WriteFunction
void materializeAllColumnsThroughIteration(const ResultSet &rows, const size_t num_columns)
ColumnarResults(const size_t num_rows, const std::vector< SQLTypeInfo > &target_types)
const size_t size() const
std::vector< WriteFunction > initWriteFunctions(const ResultSet &rows, const std::vector< bool > &targets_to_skip={})
void materializeAllColumnsGroupBy(const ResultSet &rows, const size_t num_columns)
std::tuple< std::vector< WriteFunction >, std::vector< ReadFunction > > initAllConversionFunctions(const ResultSet &rows, const std::vector< size_t > &slot_idx_per_target_idx, const std::vector< bool > &targets_to_skip={})
bool isDirectColumnarConversionPossible() const
std::unordered_map< int, std::unordered_map< int, std::shared_ptr< const ColumnarResults >>> ColumnCacheMap
#define CHECK_LT(x, y)
Definition: Logger.h:219
void materializeAllColumnsDirectly(const ResultSet &rows, const size_t num_columns)
void writeBackCellDirect(const ResultSet &rows, const size_t input_buffer_entry_idx, const size_t output_buffer_entry_idx, const size_t target_idx, const size_t slot_idx, const ReadFunction &read_function)
ColumnBitmap(const size_t num_elements_per_bank, size_t num_banks)
std::shared_ptr< Executor > executor_
void writeBackCell(const TargetValue &col_val, const size_t row_idx, const size_t column_idx)
void copyAllNonLazyColumns(const std::vector< ColumnLazyFetchInfo > &lazy_fetch_info, const ResultSet &rows, const size_t num_columns)
std::vector< std::vector< bool > > bitmaps_
bool g_enable_watchdog false
Definition: Execute.cpp:76
void materializeAllColumnsProjection(const ResultSet &rows, const size_t num_columns)
Basic constructors and methods of the row set interface.
boost::variant< ScalarTargetValue, ArrayTargetValue, GeoTargetValue, GeoTargetValuePtr > TargetValue
Definition: TargetValue.h:167
void compactAndCopyEntriesWithTargetSkipping(const ResultSet &rows, const ColumnBitmap &bitmap, const std::vector< size_t > &non_empty_per_thread, const std::vector< size_t > &global_offsets, const std::vector< bool > &targets_to_skip, const std::vector< size_t > &slot_idx_per_target_idx, const size_t num_columns, const size_t entry_count, const size_t num_threads, const size_t size_per_thread)
void compactAndCopyEntriesWithoutTargetSkipping(const ResultSet &rows, const ColumnBitmap &bitmap, const std::vector< size_t > &non_empty_per_thread, const std::vector< size_t > &global_offsets, const std::vector< size_t > &slot_idx_per_target_idx, const size_t num_columns, const size_t entry_count, const size_t num_threads, const size_t size_per_thread)
const std::vector< int8_t * > & getColumnBuffers() const
const std::vector< SQLTypeInfo > target_types_
void materializeAllLazyColumns(const std::vector< ColumnLazyFetchInfo > &lazy_fetch_info, const ResultSet &rows, const size_t num_columns)
ColumnarResults(const std::shared_ptr< RowSetMemoryOwner > row_set_mem_owner, const ResultSet &rows, const size_t num_columns, const std::vector< SQLTypeInfo > &target_types, const size_t executor_id, const size_t thread_idx, const bool is_parallel_execution_enforced=false)
const SQLTypeInfo & getColumnType(const int col_id) const