OmniSciDB  06b3bd477c
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
QueryFragmentDescriptor.h
Go to the documentation of this file.
1 /*
2  * Copyright 2020 OmniSci, Inc.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
23 #pragma once
24 
25 #include <deque>
26 #include <functional>
27 #include <map>
28 #include <memory>
29 #include <optional>
30 #include <ostream>
31 #include <set>
32 #include <unordered_map>
33 #include <vector>
34 
35 #include "DataMgr/ChunkMetadata.h"
37 #include "Shared/Logger.h"
38 
39 namespace Fragmenter_Namespace {
40 class FragmentInfo;
41 }
42 
43 namespace Data_Namespace {
44 struct MemoryInfo;
45 }
46 
47 class Executor;
48 class InputDescriptor;
49 struct InputTableInfo;
50 struct RelAlgExecutionUnit;
51 
53  int table_id;
54  std::vector<size_t> fragment_ids;
55 };
56 
57 using FragmentsList = std::vector<FragmentsPerTable>;
58 using TableFragments = std::vector<Fragmenter_Namespace::FragmentInfo>;
59 
61  int device_id;
63  std::optional<size_t> outer_tuple_count; // only for fragments with an exact tuple
64  // count available in metadata
65 };
66 
68  public:
70  const std::vector<InputTableInfo>& query_infos,
71  const std::vector<Data_Namespace::MemoryInfo>& gpu_mem_infos,
72  const double gpu_input_mem_limit_percent,
73  const std::vector<size_t> allowed_outer_fragment_indices);
74 
75  static void computeAllTablesFragments(
76  std::map<int, const TableFragments*>& all_tables_fragments,
77  const RelAlgExecutionUnit& ra_exe_unit,
78  const std::vector<InputTableInfo>& query_infos);
79 
80  void buildFragmentKernelMap(const RelAlgExecutionUnit& ra_exe_unit,
81  const std::vector<uint64_t>& frag_offsets,
82  const int device_count,
83  const ExecutorDeviceType& device_type,
84  const bool enable_multifrag_kernels,
85  const bool enable_inner_join_fragment_skipping,
86  Executor* executor);
87 
92  template <typename DISPATCH_FCN>
93  void assignFragsToMultiDispatch(DISPATCH_FCN f) const {
94  for (const auto& device_itr : execution_kernels_per_device_) {
95  const auto& execution_kernels = device_itr.second;
96  CHECK_EQ(execution_kernels.size(), size_t(1));
97 
98  const auto& fragments_list = execution_kernels.front().fragments;
99  f(device_itr.first, fragments_list, rowid_lookup_key_);
100  }
101  }
102 
109  template <typename DISPATCH_FCN>
110  void assignFragsToKernelDispatch(DISPATCH_FCN f,
111  const RelAlgExecutionUnit& ra_exe_unit) const {
112  if (execution_kernels_per_device_.empty()) {
113  return;
114  }
115 
116  size_t tuple_count = 0;
117 
118  std::unordered_map<int, size_t> execution_kernel_index;
119  for (const auto& device_itr : execution_kernels_per_device_) {
120  CHECK(execution_kernel_index.insert(std::make_pair(device_itr.first, size_t(0)))
121  .second);
122  }
123 
124  bool dispatch_finished = false;
125  while (!dispatch_finished) {
126  dispatch_finished = true;
127  for (const auto& device_itr : execution_kernels_per_device_) {
128  auto& kernel_idx = execution_kernel_index[device_itr.first];
129  if (kernel_idx < device_itr.second.size()) {
130  dispatch_finished = false;
131  const auto& execution_kernel = device_itr.second[kernel_idx++];
132  f(device_itr.first, execution_kernel.fragments, rowid_lookup_key_);
133  if (terminateDispatchMaybe(tuple_count, ra_exe_unit, execution_kernel)) {
134  return;
135  }
136  }
137  }
138  }
139  }
140 
142  return rowid_lookup_key_ < 0 && !execution_kernels_per_device_.empty();
143  }
144 
145  protected:
146  std::vector<size_t> allowed_outer_fragment_indices_;
148  int64_t rowid_lookup_key_ = -1;
149 
150  std::map<int, const TableFragments*> selected_tables_fragments_;
151 
152  std::map<int, std::vector<ExecutionKernelDescriptor>> execution_kernels_per_device_;
153 
155  std::map<size_t, size_t> tuple_count_per_device_;
156  std::map<size_t, size_t> available_gpu_mem_bytes_;
157 
159  const std::vector<uint64_t>& frag_offsets,
160  const int device_count,
161  const size_t num_bytes_for_row,
162  const ExecutorDeviceType& device_type,
163  Executor* executor);
164 
165  void buildFragmentPerKernelMap(const RelAlgExecutionUnit& ra_exe_unit,
166  const std::vector<uint64_t>& frag_offsets,
167  const int device_count,
168  const size_t num_bytes_for_row,
169  const ExecutorDeviceType& device_type,
170  Executor* executor);
171 
172  void buildMultifragKernelMap(const RelAlgExecutionUnit& ra_exe_unit,
173  const std::vector<uint64_t>& frag_offsets,
174  const int device_count,
175  const size_t num_bytes_for_row,
176  const ExecutorDeviceType& device_type,
177  const bool enable_inner_join_fragment_skipping,
178  Executor* executor);
179 
181  const TableFragments* fragments,
182  const RelAlgExecutionUnit& ra_exe_unit,
183  const InputDescriptor& table_desc,
184  const std::vector<uint64_t>& frag_offsets,
185  const int device_count,
186  const size_t num_bytes_for_row,
187  const ChunkMetadataVector& deleted_chunk_metadata_vec,
188  const std::optional<size_t> table_desc_offset,
189  const ExecutorDeviceType& device_type,
190  Executor* executor);
191 
192  bool terminateDispatchMaybe(size_t& tuple_count,
193  const RelAlgExecutionUnit& ra_exe_unit,
194  const ExecutionKernelDescriptor& kernel) const;
195 
197  const int device_id,
198  const size_t num_cols);
199 };
200 
201 std::ostream& operator<<(std::ostream&, FragmentsPerTable const&);
std::map< int, const TableFragments * > selected_tables_fragments_
#define CHECK_EQ(x, y)
Definition: Logger.h:205
QueryFragmentDescriptor(const RelAlgExecutionUnit &ra_exe_unit, const std::vector< InputTableInfo > &query_infos, const std::vector< Data_Namespace::MemoryInfo > &gpu_mem_infos, const double gpu_input_mem_limit_percent, const std::vector< size_t > allowed_outer_fragment_indices)
std::optional< size_t > outer_tuple_count
std::map< size_t, size_t > tuple_count_per_device_
ExecutorDeviceType
bool terminateDispatchMaybe(size_t &tuple_count, const RelAlgExecutionUnit &ra_exe_unit, const ExecutionKernelDescriptor &kernel) const
std::ostream & operator<<(std::ostream &os, const SessionInfo &session_info)
Definition: SessionInfo.cpp:53
std::vector< Fragmenter_Namespace::FragmentInfo > TableFragments
void assignFragsToKernelDispatch(DISPATCH_FCN f, const RelAlgExecutionUnit &ra_exe_unit) const
void buildFragmentPerKernelMap(const RelAlgExecutionUnit &ra_exe_unit, const std::vector< uint64_t > &frag_offsets, const int device_count, const size_t num_bytes_for_row, const ExecutorDeviceType &device_type, Executor *executor)
std::vector< FragmentsPerTable > FragmentsList
CHECK(cgen_state)
std::map< int, std::vector< ExecutionKernelDescriptor > > execution_kernels_per_device_
Used by Fragmenter classes to store info about each fragment - the fragment id and number of tuples(r...
Definition: Fragmenter.h:78
void checkDeviceMemoryUsage(const Fragmenter_Namespace::FragmentInfo &fragment, const int device_id, const size_t num_cols)
std::vector< std::pair< ChunkKey, std::shared_ptr< ChunkMetadata >>> ChunkMetadataVector
void buildMultifragKernelMap(const RelAlgExecutionUnit &ra_exe_unit, const std::vector< uint64_t > &frag_offsets, const int device_count, const size_t num_bytes_for_row, const ExecutorDeviceType &device_type, const bool enable_inner_join_fragment_skipping, Executor *executor)
void assignFragsToMultiDispatch(DISPATCH_FCN f) const
std::vector< size_t > allowed_outer_fragment_indices_
void buildFragmentPerKernelMapForUnion(const RelAlgExecutionUnit &ra_exe_unit, const std::vector< uint64_t > &frag_offsets, const int device_count, const size_t num_bytes_for_row, const ExecutorDeviceType &device_type, Executor *executor)
std::vector< size_t > fragment_ids
void buildFragmentKernelMap(const RelAlgExecutionUnit &ra_exe_unit, const std::vector< uint64_t > &frag_offsets, const int device_count, const ExecutorDeviceType &device_type, const bool enable_multifrag_kernels, const bool enable_inner_join_fragment_skipping, Executor *executor)
void buildFragmentPerKernelForTable(const TableFragments *fragments, const RelAlgExecutionUnit &ra_exe_unit, const InputDescriptor &table_desc, const std::vector< uint64_t > &frag_offsets, const int device_count, const size_t num_bytes_for_row, const ChunkMetadataVector &deleted_chunk_metadata_vec, const std::optional< size_t > table_desc_offset, const ExecutorDeviceType &device_type, Executor *executor)
std::map< size_t, size_t > available_gpu_mem_bytes_
static void computeAllTablesFragments(std::map< int, const TableFragments * > &all_tables_fragments, const RelAlgExecutionUnit &ra_exe_unit, const std::vector< InputTableInfo > &query_infos)