OmniSciDB  1dac507f6e
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
ColumnarResults.h
Go to the documentation of this file.
1 /*
2  * Copyright 2017 MapD Technologies, Inc.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #ifndef COLUMNAR_RESULTS_H
18 #define COLUMNAR_RESULTS_H
19 #include "ResultSet.h"
20 #include "Shared/SqlTypesLayout.h"
21 
22 #include "../Shared/checked_alloc.h"
23 
24 #include <memory>
25 #include <unordered_map>
26 
27 class ColumnarConversionNotSupported : public std::runtime_error {
28  public:
30  : std::runtime_error(
31  "Columnar conversion not supported for variable length types") {}
32 };
33 
40 class ColumnBitmap {
41  public:
42  ColumnBitmap(const size_t num_elements_per_bank, size_t num_banks)
43  : bitmaps_(num_banks, std::vector<bool>(num_elements_per_bank, false)) {}
44 
45  inline bool get(const size_t index, const size_t bank_index) const {
46  CHECK_LT(bank_index, bitmaps_.size());
47  CHECK_LT(index, bitmaps_[bank_index].size());
48  return bitmaps_[bank_index][index];
49  }
50 
51  inline void set(const size_t index, const size_t bank_index, const bool val) {
52  CHECK_LT(bank_index, bitmaps_.size());
53  CHECK_LT(index, bitmaps_[bank_index].size());
54  bitmaps_[bank_index][index] = val;
55  }
56 
57  private:
58  std::vector<std::vector<bool>> bitmaps_;
59 };
60 
62  public:
63  ColumnarResults(const std::shared_ptr<RowSetMemoryOwner> row_set_mem_owner,
64  const ResultSet& rows,
65  const size_t num_columns,
66  const std::vector<SQLTypeInfo>& target_types,
67  const bool is_parallel_execution_enforced = false);
68 
69  ColumnarResults(const std::shared_ptr<RowSetMemoryOwner> row_set_mem_owner,
70  const int8_t* one_col_buffer,
71  const size_t num_rows,
72  const SQLTypeInfo& target_type);
73 
74  static std::unique_ptr<ColumnarResults> mergeResults(
75  const std::shared_ptr<RowSetMemoryOwner> row_set_mem_owner,
76  const std::vector<std::unique_ptr<ColumnarResults>>& sub_results);
77 
78  const std::vector<int8_t*>& getColumnBuffers() const { return column_buffers_; }
79 
80  const size_t size() const { return num_rows_; }
81 
82  const SQLTypeInfo& getColumnType(const int col_id) const {
83  CHECK_GE(col_id, 0);
84  CHECK_LT(static_cast<size_t>(col_id), target_types_.size());
85  return target_types_[col_id];
86  }
87 
88  bool isParallelConversion() const { return parallel_conversion_; }
90 
91  // functions used to read content from the result set (direct columnarization, group by
92  // queries)
93  using ReadFunction =
94  std::function<int64_t(const ResultSet&, const size_t, const size_t, const size_t)>;
95 
96  // functions used to write back contents into output column buffers (direct
97  // columnarization, group by queries)
98  using WriteFunction = std::function<void(const ResultSet&,
99  const size_t,
100  const size_t,
101  const size_t,
102  const size_t,
103  const ReadFunction&)>;
104 
105  protected:
106  std::vector<int8_t*> column_buffers_;
107  size_t num_rows_;
108 
109  private:
110  ColumnarResults(const size_t num_rows, const std::vector<SQLTypeInfo>& target_types)
111  : num_rows_(num_rows), target_types_(target_types) {}
112  inline void writeBackCell(const TargetValue& col_val,
113  const size_t row_idx,
114  const size_t column_idx);
115  void materializeAllColumnsDirectly(const ResultSet& rows, const size_t num_columns);
116  void materializeAllColumnsThroughIteration(const ResultSet& rows,
117  const size_t num_columns);
118 
119  // Direct columnarization for group by queries (perfect hash or baseline hash)
120  void materializeAllColumnsGroupBy(const ResultSet& rows, const size_t num_columns);
121 
122  // Direct columnarization for Projections (only output is columnar)
123  void materializeAllColumnsProjection(const ResultSet& rows, const size_t num_columns);
124 
125  void copyAllNonLazyColumns(const std::vector<ColumnLazyFetchInfo>& lazy_fetch_info,
126  const ResultSet& rows,
127  const size_t num_columns);
128  void materializeAllLazyColumns(const std::vector<ColumnLazyFetchInfo>& lazy_fetch_info,
129  const ResultSet& rows,
130  const size_t num_columns);
131 
132  void locateAndCountEntries(const ResultSet& rows,
133  ColumnBitmap& bitmap,
134  std::vector<size_t>& non_empty_per_thread,
135  const size_t entry_count,
136  const size_t num_threads,
137  const size_t size_per_thread) const;
138  void compactAndCopyEntries(const ResultSet& rows,
139  const ColumnBitmap& bitmap,
140  const std::vector<size_t>& non_empty_per_thread,
141  const size_t num_columns,
142  const size_t entry_count,
143  const size_t num_threads,
144  const size_t size_per_thread);
146  const ResultSet& rows,
147  const ColumnBitmap& bitmap,
148  const std::vector<size_t>& non_empty_per_thread,
149  const std::vector<size_t>& global_offsets,
150  const std::vector<bool>& targets_to_skip,
151  const std::vector<size_t>& slot_idx_per_target_idx,
152  const size_t num_columns,
153  const size_t entry_count,
154  const size_t num_threads,
155  const size_t size_per_thread);
157  const ResultSet& rows,
158  const ColumnBitmap& bitmap,
159  const std::vector<size_t>& non_empty_per_thread,
160  const std::vector<size_t>& global_offsets,
161  const std::vector<size_t>& slot_idx_per_target_idx,
162  const size_t num_columns,
163  const size_t entry_count,
164  const size_t num_threads,
165  const size_t size_per_thread);
166 
167  template <typename DATA_TYPE>
168  void writeBackCellDirect(const ResultSet& rows,
169  const size_t input_buffer_entry_idx,
170  const size_t output_buffer_entry_idx,
171  const size_t target_idx,
172  const size_t slot_idx,
173  const ReadFunction& read_function);
174 
175  std::vector<WriteFunction> initWriteFunctions(
176  const ResultSet& rows,
177  const std::vector<bool>& targets_to_skip = {});
178 
179  template <QueryDescriptionType QUERY_TYPE, bool COLUMNAR_OUTPUT>
180  std::vector<ReadFunction> initReadFunctions(
181  const ResultSet& rows,
182  const std::vector<size_t>& slot_idx_per_target_idx,
183  const std::vector<bool>& targets_to_skip = {});
184 
185  std::tuple<std::vector<WriteFunction>, std::vector<ReadFunction>>
186  initAllConversionFunctions(const ResultSet& rows,
187  const std::vector<size_t>& slot_idx_per_target_idx,
188  const std::vector<bool>& targets_to_skip = {});
189 
190  const std::vector<SQLTypeInfo> target_types_;
191  bool parallel_conversion_; // multi-threaded execution of columnar conversion
192  bool
193  direct_columnar_conversion_; // whether columnar conversion might happen directly
194  // with minimal ussage of result set's iterator access
195 };
196 
197 typedef std::
198  unordered_map<int, std::unordered_map<int, std::shared_ptr<const ColumnarResults>>>
200 
201 #endif // COLUMNAR_RESULTS_H
bool isParallelConversion() const
std::vector< int8_t * > column_buffers_
const int8_t const int64_t * num_rows
static std::unique_ptr< ColumnarResults > mergeResults(const std::shared_ptr< RowSetMemoryOwner > row_set_mem_owner, const std::vector< std::unique_ptr< ColumnarResults >> &sub_results)
std::vector< ReadFunction > initReadFunctions(const ResultSet &rows, const std::vector< size_t > &slot_idx_per_target_idx, const std::vector< bool > &targets_to_skip={})
void locateAndCountEntries(const ResultSet &rows, ColumnBitmap &bitmap, std::vector< size_t > &non_empty_per_thread, const size_t entry_count, const size_t num_threads, const size_t size_per_thread) const
#define CHECK_GE(x, y)
Definition: Logger.h:203
void set(const size_t index, const size_t bank_index, const bool val)
bool direct_columnar_conversion_
void compactAndCopyEntries(const ResultSet &rows, const ColumnBitmap &bitmap, const std::vector< size_t > &non_empty_per_thread, const size_t num_columns, const size_t entry_count, const size_t num_threads, const size_t size_per_thread)
std::function< int64_t(const ResultSet &, const size_t, const size_t, const size_t)> ReadFunction
std::function< void(const ResultSet &, const size_t, const size_t, const size_t, const size_t, const ReadFunction &)> WriteFunction
void materializeAllColumnsThroughIteration(const ResultSet &rows, const size_t num_columns)
ColumnarResults(const size_t num_rows, const std::vector< SQLTypeInfo > &target_types)
const size_t size() const
std::vector< WriteFunction > initWriteFunctions(const ResultSet &rows, const std::vector< bool > &targets_to_skip={})
void materializeAllColumnsGroupBy(const ResultSet &rows, const size_t num_columns)
std::tuple< std::vector< WriteFunction >, std::vector< ReadFunction > > initAllConversionFunctions(const ResultSet &rows, const std::vector< size_t > &slot_idx_per_target_idx, const std::vector< bool > &targets_to_skip={})
bool isDirectColumnarConversionPossible() const
#define CHECK_LT(x, y)
Definition: Logger.h:200
void materializeAllColumnsDirectly(const ResultSet &rows, const size_t num_columns)
void writeBackCellDirect(const ResultSet &rows, const size_t input_buffer_entry_idx, const size_t output_buffer_entry_idx, const size_t target_idx, const size_t slot_idx, const ReadFunction &read_function)
ColumnBitmap(const size_t num_elements_per_bank, size_t num_banks)
void writeBackCell(const TargetValue &col_val, const size_t row_idx, const size_t column_idx)
void copyAllNonLazyColumns(const std::vector< ColumnLazyFetchInfo > &lazy_fetch_info, const ResultSet &rows, const size_t num_columns)
std::vector< std::vector< bool > > bitmaps_
bool g_enable_watchdog false
Definition: Execute.cpp:71
ColumnarResults(const std::shared_ptr< RowSetMemoryOwner > row_set_mem_owner, const ResultSet &rows, const size_t num_columns, const std::vector< SQLTypeInfo > &target_types, const bool is_parallel_execution_enforced=false)
void materializeAllColumnsProjection(const ResultSet &rows, const size_t num_columns)
std::unordered_map< int, std::unordered_map< int, std::shared_ptr< const ColumnarResults > > > ColumnCacheMap
Basic constructors and methods of the row set interface.
boost::variant< ScalarTargetValue, ArrayTargetValue, GeoTargetValue, GeoTargetValuePtr > TargetValue
Definition: TargetValue.h:167
void compactAndCopyEntriesWithTargetSkipping(const ResultSet &rows, const ColumnBitmap &bitmap, const std::vector< size_t > &non_empty_per_thread, const std::vector< size_t > &global_offsets, const std::vector< bool > &targets_to_skip, const std::vector< size_t > &slot_idx_per_target_idx, const size_t num_columns, const size_t entry_count, const size_t num_threads, const size_t size_per_thread)
void compactAndCopyEntriesWithoutTargetSkipping(const ResultSet &rows, const ColumnBitmap &bitmap, const std::vector< size_t > &non_empty_per_thread, const std::vector< size_t > &global_offsets, const std::vector< size_t > &slot_idx_per_target_idx, const size_t num_columns, const size_t entry_count, const size_t num_threads, const size_t size_per_thread)
const std::vector< int8_t * > & getColumnBuffers() const
const std::vector< SQLTypeInfo > target_types_
void materializeAllLazyColumns(const std::vector< ColumnLazyFetchInfo > &lazy_fetch_info, const ResultSet &rows, const size_t num_columns)
const SQLTypeInfo & getColumnType(const int col_id) const