OmniSciDB  8fa3bf436f
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
ColumnarResults.h
Go to the documentation of this file.
1 /*
2  * Copyright 2017 MapD Technologies, Inc.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #ifndef COLUMNAR_RESULTS_H
18 #define COLUMNAR_RESULTS_H
19 #include "ResultSet.h"
20 #include "Shared/SqlTypesLayout.h"
21 
22 #include "../Shared/checked_alloc.h"
23 
24 #include <memory>
25 #include <unordered_map>
26 
27 class ColumnarConversionNotSupported : public std::runtime_error {
28  public:
30  : std::runtime_error(
31  "Columnar conversion not supported for variable length types") {}
32 };
33 
40 class ColumnBitmap {
41  public:
42  ColumnBitmap(const size_t num_elements_per_bank, size_t num_banks)
43  : bitmaps_(num_banks, std::vector<bool>(num_elements_per_bank, false)) {}
44 
45  inline bool get(const size_t index, const size_t bank_index) const {
46  CHECK_LT(bank_index, bitmaps_.size());
47  CHECK_LT(index, bitmaps_[bank_index].size());
48  return bitmaps_[bank_index][index];
49  }
50 
51  inline void set(const size_t index, const size_t bank_index, const bool val) {
52  CHECK_LT(bank_index, bitmaps_.size());
53  CHECK_LT(index, bitmaps_[bank_index].size());
54  bitmaps_[bank_index][index] = val;
55  }
56 
57  private:
58  std::vector<std::vector<bool>> bitmaps_;
59 };
60 
62  public:
63  ColumnarResults(const std::shared_ptr<RowSetMemoryOwner> row_set_mem_owner,
64  const ResultSet& rows,
65  const size_t num_columns,
66  const std::vector<SQLTypeInfo>& target_types,
67  const size_t thread_idx,
68  const bool is_parallel_execution_enforced = false);
69 
70  ColumnarResults(const std::shared_ptr<RowSetMemoryOwner> row_set_mem_owner,
71  const int8_t* one_col_buffer,
72  const size_t num_rows,
73  const SQLTypeInfo& target_type,
74  const size_t thread_idx);
75 
76  static std::unique_ptr<ColumnarResults> mergeResults(
77  const std::shared_ptr<RowSetMemoryOwner> row_set_mem_owner,
78  const std::vector<std::unique_ptr<ColumnarResults>>& sub_results);
79 
80  const std::vector<int8_t*>& getColumnBuffers() const { return column_buffers_; }
81 
82  const size_t size() const { return num_rows_; }
83 
84  const SQLTypeInfo& getColumnType(const int col_id) const {
85  CHECK_GE(col_id, 0);
86  CHECK_LT(static_cast<size_t>(col_id), target_types_.size());
87  return target_types_[col_id];
88  }
89 
90  bool isParallelConversion() const { return parallel_conversion_; }
92 
93  // functions used to read content from the result set (direct columnarization, group by
94  // queries)
95  using ReadFunction =
96  std::function<int64_t(const ResultSet&, const size_t, const size_t, const size_t)>;
97 
98  // functions used to write back contents into output column buffers (direct
99  // columnarization, group by queries)
100  using WriteFunction = std::function<void(const ResultSet&,
101  const size_t,
102  const size_t,
103  const size_t,
104  const size_t,
105  const ReadFunction&)>;
106 
107  protected:
108  std::vector<int8_t*> column_buffers_;
109  size_t num_rows_;
110 
111  private:
112  ColumnarResults(const size_t num_rows, const std::vector<SQLTypeInfo>& target_types)
113  : num_rows_(num_rows), target_types_(target_types) {}
114  inline void writeBackCell(const TargetValue& col_val,
115  const size_t row_idx,
116  const size_t column_idx);
117  void materializeAllColumnsDirectly(const ResultSet& rows, const size_t num_columns);
118  void materializeAllColumnsThroughIteration(const ResultSet& rows,
119  const size_t num_columns);
120 
121  // Direct columnarization for group by queries (perfect hash or baseline hash)
122  void materializeAllColumnsGroupBy(const ResultSet& rows, const size_t num_columns);
123 
124  // Direct columnarization for Projections (only output is columnar)
125  void materializeAllColumnsProjection(const ResultSet& rows, const size_t num_columns);
126 
127  void copyAllNonLazyColumns(const std::vector<ColumnLazyFetchInfo>& lazy_fetch_info,
128  const ResultSet& rows,
129  const size_t num_columns);
130  void materializeAllLazyColumns(const std::vector<ColumnLazyFetchInfo>& lazy_fetch_info,
131  const ResultSet& rows,
132  const size_t num_columns);
133 
134  void locateAndCountEntries(const ResultSet& rows,
135  ColumnBitmap& bitmap,
136  std::vector<size_t>& non_empty_per_thread,
137  const size_t entry_count,
138  const size_t num_threads,
139  const size_t size_per_thread) const;
140  void compactAndCopyEntries(const ResultSet& rows,
141  const ColumnBitmap& bitmap,
142  const std::vector<size_t>& non_empty_per_thread,
143  const size_t num_columns,
144  const size_t entry_count,
145  const size_t num_threads,
146  const size_t size_per_thread);
148  const ResultSet& rows,
149  const ColumnBitmap& bitmap,
150  const std::vector<size_t>& non_empty_per_thread,
151  const std::vector<size_t>& global_offsets,
152  const std::vector<bool>& targets_to_skip,
153  const std::vector<size_t>& slot_idx_per_target_idx,
154  const size_t num_columns,
155  const size_t entry_count,
156  const size_t num_threads,
157  const size_t size_per_thread);
159  const ResultSet& rows,
160  const ColumnBitmap& bitmap,
161  const std::vector<size_t>& non_empty_per_thread,
162  const std::vector<size_t>& global_offsets,
163  const std::vector<size_t>& slot_idx_per_target_idx,
164  const size_t num_columns,
165  const size_t entry_count,
166  const size_t num_threads,
167  const size_t size_per_thread);
168 
169  template <typename DATA_TYPE>
170  void writeBackCellDirect(const ResultSet& rows,
171  const size_t input_buffer_entry_idx,
172  const size_t output_buffer_entry_idx,
173  const size_t target_idx,
174  const size_t slot_idx,
175  const ReadFunction& read_function);
176 
177  std::vector<WriteFunction> initWriteFunctions(
178  const ResultSet& rows,
179  const std::vector<bool>& targets_to_skip = {});
180 
181  template <QueryDescriptionType QUERY_TYPE, bool COLUMNAR_OUTPUT>
182  std::vector<ReadFunction> initReadFunctions(
183  const ResultSet& rows,
184  const std::vector<size_t>& slot_idx_per_target_idx,
185  const std::vector<bool>& targets_to_skip = {});
186 
187  std::tuple<std::vector<WriteFunction>, std::vector<ReadFunction>>
188  initAllConversionFunctions(const ResultSet& rows,
189  const std::vector<size_t>& slot_idx_per_target_idx,
190  const std::vector<bool>& targets_to_skip = {});
191 
192  const std::vector<SQLTypeInfo> target_types_;
193  bool parallel_conversion_; // multi-threaded execution of columnar conversion
194  bool direct_columnar_conversion_; // whether columnar conversion might happen directly
195  // with minimal ussage of result set's iterator access
196  size_t thread_idx_;
197 };
198 
199 using ColumnCacheMap =
200  std::unordered_map<int,
201  std::unordered_map<int, std::shared_ptr<const ColumnarResults>>>;
202 
203 #endif // COLUMNAR_RESULTS_H
bool isParallelConversion() const
std::vector< int8_t * > column_buffers_
static std::unique_ptr< ColumnarResults > mergeResults(const std::shared_ptr< RowSetMemoryOwner > row_set_mem_owner, const std::vector< std::unique_ptr< ColumnarResults >> &sub_results)
std::vector< ReadFunction > initReadFunctions(const ResultSet &rows, const std::vector< size_t > &slot_idx_per_target_idx, const std::vector< bool > &targets_to_skip={})
void locateAndCountEntries(const ResultSet &rows, ColumnBitmap &bitmap, std::vector< size_t > &non_empty_per_thread, const size_t entry_count, const size_t num_threads, const size_t size_per_thread) const
#define CHECK_GE(x, y)
Definition: Logger.h:216
void set(const size_t index, const size_t bank_index, const bool val)
bool direct_columnar_conversion_
void compactAndCopyEntries(const ResultSet &rows, const ColumnBitmap &bitmap, const std::vector< size_t > &non_empty_per_thread, const size_t num_columns, const size_t entry_count, const size_t num_threads, const size_t size_per_thread)
ColumnarResults(const std::shared_ptr< RowSetMemoryOwner > row_set_mem_owner, const ResultSet &rows, const size_t num_columns, const std::vector< SQLTypeInfo > &target_types, const size_t thread_idx, const bool is_parallel_execution_enforced=false)
std::function< int64_t(const ResultSet &, const size_t, const size_t, const size_t)> ReadFunction
std::function< void(const ResultSet &, const size_t, const size_t, const size_t, const size_t, const ReadFunction &)> WriteFunction
void materializeAllColumnsThroughIteration(const ResultSet &rows, const size_t num_columns)
ColumnarResults(const size_t num_rows, const std::vector< SQLTypeInfo > &target_types)
const size_t size() const
std::vector< WriteFunction > initWriteFunctions(const ResultSet &rows, const std::vector< bool > &targets_to_skip={})
void materializeAllColumnsGroupBy(const ResultSet &rows, const size_t num_columns)
std::tuple< std::vector< WriteFunction >, std::vector< ReadFunction > > initAllConversionFunctions(const ResultSet &rows, const std::vector< size_t > &slot_idx_per_target_idx, const std::vector< bool > &targets_to_skip={})
bool isDirectColumnarConversionPossible() const
std::unordered_map< int, std::unordered_map< int, std::shared_ptr< const ColumnarResults >>> ColumnCacheMap
#define CHECK_LT(x, y)
Definition: Logger.h:213
void materializeAllColumnsDirectly(const ResultSet &rows, const size_t num_columns)
void writeBackCellDirect(const ResultSet &rows, const size_t input_buffer_entry_idx, const size_t output_buffer_entry_idx, const size_t target_idx, const size_t slot_idx, const ReadFunction &read_function)
ColumnBitmap(const size_t num_elements_per_bank, size_t num_banks)
void writeBackCell(const TargetValue &col_val, const size_t row_idx, const size_t column_idx)
void copyAllNonLazyColumns(const std::vector< ColumnLazyFetchInfo > &lazy_fetch_info, const ResultSet &rows, const size_t num_columns)
std::vector< std::vector< bool > > bitmaps_
bool g_enable_watchdog false
Definition: Execute.cpp:76
void materializeAllColumnsProjection(const ResultSet &rows, const size_t num_columns)
Basic constructors and methods of the row set interface.
boost::variant< ScalarTargetValue, ArrayTargetValue, GeoTargetValue, GeoTargetValuePtr > TargetValue
Definition: TargetValue.h:167
void compactAndCopyEntriesWithTargetSkipping(const ResultSet &rows, const ColumnBitmap &bitmap, const std::vector< size_t > &non_empty_per_thread, const std::vector< size_t > &global_offsets, const std::vector< bool > &targets_to_skip, const std::vector< size_t > &slot_idx_per_target_idx, const size_t num_columns, const size_t entry_count, const size_t num_threads, const size_t size_per_thread)
void compactAndCopyEntriesWithoutTargetSkipping(const ResultSet &rows, const ColumnBitmap &bitmap, const std::vector< size_t > &non_empty_per_thread, const std::vector< size_t > &global_offsets, const std::vector< size_t > &slot_idx_per_target_idx, const size_t num_columns, const size_t entry_count, const size_t num_threads, const size_t size_per_thread)
const std::vector< int8_t * > & getColumnBuffers() const
const std::vector< SQLTypeInfo > target_types_
void materializeAllLazyColumns(const std::vector< ColumnLazyFetchInfo > &lazy_fetch_info, const ResultSet &rows, const size_t num_columns)
const SQLTypeInfo & getColumnType(const int col_id) const