32 #include <unordered_map>
39 namespace Fragmenter_Namespace {
43 namespace Data_Namespace {
70 const std::vector<InputTableInfo>& query_infos,
71 const std::vector<Data_Namespace::MemoryInfo>& gpu_mem_infos,
72 const double gpu_input_mem_limit_percent,
73 const std::vector<size_t> allowed_outer_fragment_indices);
76 std::map<int, const TableFragments*>& all_tables_fragments,
78 const std::vector<InputTableInfo>& query_infos);
81 const std::vector<uint64_t>& frag_offsets,
82 const int device_count,
84 const bool enable_multifrag_kernels,
85 const bool enable_inner_join_fragment_skipping,
92 template <
typename DISPATCH_FCN>
95 const auto& execution_kernels = device_itr.second;
96 CHECK_EQ(execution_kernels.size(), size_t(1));
98 const auto& fragments_list = execution_kernels.front().fragments;
109 template <
typename DISPATCH_FCN>
116 size_t tuple_count = 0;
118 std::unordered_map<int, size_t> execution_kernel_index;
120 CHECK(execution_kernel_index.insert(std::make_pair(device_itr.first,
size_t(0)))
124 bool dispatch_finished =
false;
125 while (!dispatch_finished) {
126 dispatch_finished =
true;
127 for (
const auto& device_itr : execution_kernels_per_device_) {
128 auto& kernel_idx = execution_kernel_index[device_itr.first];
129 if (kernel_idx < device_itr.second.size()) {
130 dispatch_finished =
false;
131 const auto& execution_kernel = device_itr.second[kernel_idx++];
159 const std::vector<uint64_t>& frag_offsets,
160 const int device_count,
161 const size_t num_bytes_for_row,
166 const std::vector<uint64_t>& frag_offsets,
167 const int device_count,
168 const size_t num_bytes_for_row,
173 const std::vector<uint64_t>& frag_offsets,
174 const int device_count,
175 const size_t num_bytes_for_row,
177 const bool enable_inner_join_fragment_skipping,
184 const bool is_temporary_table,
185 const std::vector<uint64_t>& frag_offsets,
186 const int device_count,
187 const size_t num_bytes_for_row,
189 const std::optional<size_t> table_desc_offset,
199 const size_t num_cols);
std::map< int, const TableFragments * > selected_tables_fragments_
QueryFragmentDescriptor(const RelAlgExecutionUnit &ra_exe_unit, const std::vector< InputTableInfo > &query_infos, const std::vector< Data_Namespace::MemoryInfo > &gpu_mem_infos, const double gpu_input_mem_limit_percent, const std::vector< size_t > allowed_outer_fragment_indices)
std::optional< size_t > outer_tuple_count
std::map< size_t, size_t > tuple_count_per_device_
bool terminateDispatchMaybe(size_t &tuple_count, const RelAlgExecutionUnit &ra_exe_unit, const ExecutionKernelDescriptor &kernel) const
int64_t rowid_lookup_key_
std::ostream & operator<<(std::ostream &os, const SessionInfo &session_info)
std::vector< Fragmenter_Namespace::FragmentInfo > TableFragments
void assignFragsToKernelDispatch(DISPATCH_FCN f, const RelAlgExecutionUnit &ra_exe_unit) const
void buildFragmentPerKernelMap(const RelAlgExecutionUnit &ra_exe_unit, const std::vector< uint64_t > &frag_offsets, const int device_count, const size_t num_bytes_for_row, const ExecutorDeviceType &device_type, Executor *executor)
std::vector< FragmentsPerTable > FragmentsList
double gpu_input_mem_limit_percent_
bool shouldCheckWorkUnitWatchdog() const
std::map< int, std::vector< ExecutionKernelDescriptor > > execution_kernels_per_device_
Used by Fragmenter classes to store info about each fragment - the fragment id and number of tuples(r...
void checkDeviceMemoryUsage(const Fragmenter_Namespace::FragmentInfo &fragment, const int device_id, const size_t num_cols)
size_t outer_fragments_size_
void buildMultifragKernelMap(const RelAlgExecutionUnit &ra_exe_unit, const std::vector< uint64_t > &frag_offsets, const int device_count, const size_t num_bytes_for_row, const ExecutorDeviceType &device_type, const bool enable_inner_join_fragment_skipping, Executor *executor)
void assignFragsToMultiDispatch(DISPATCH_FCN f) const
void buildFragmentPerKernelForTable(const TableFragments *fragments, const RelAlgExecutionUnit &ra_exe_unit, const InputDescriptor &table_desc, const bool is_temporary_table, const std::vector< uint64_t > &frag_offsets, const int device_count, const size_t num_bytes_for_row, const ChunkMetadataVector &deleted_chunk_metadata_vec, const std::optional< size_t > table_desc_offset, const ExecutorDeviceType &device_type, Executor *executor)
std::vector< size_t > allowed_outer_fragment_indices_
void buildFragmentPerKernelMapForUnion(const RelAlgExecutionUnit &ra_exe_unit, const std::vector< uint64_t > &frag_offsets, const int device_count, const size_t num_bytes_for_row, const ExecutorDeviceType &device_type, Executor *executor)
std::vector< size_t > fragment_ids
void buildFragmentKernelMap(const RelAlgExecutionUnit &ra_exe_unit, const std::vector< uint64_t > &frag_offsets, const int device_count, const ExecutorDeviceType &device_type, const bool enable_multifrag_kernels, const bool enable_inner_join_fragment_skipping, Executor *executor)
std::map< size_t, size_t > available_gpu_mem_bytes_
static void computeAllTablesFragments(std::map< int, const TableFragments * > &all_tables_fragments, const RelAlgExecutionUnit &ra_exe_unit, const std::vector< InputTableInfo > &query_infos)