#include <QueryFragmentDescriptor.h>

Public Member Functions
	QueryFragmentDescriptor (const RelAlgExecutionUnit &ra_exe_unit, const std::vector< InputTableInfo > &query_infos, const std::vector< Data_Namespace::MemoryInfo > &gpu_mem_infos, const double gpu_input_mem_limit_percent, const std::vector< size_t > allowed_outer_fragment_indices)

void	buildFragmentKernelMap (const RelAlgExecutionUnit &ra_exe_unit, const std::vector< uint64_t > &frag_offsets, const int device_count, const ExecutorDeviceType &device_type, const bool enable_multifrag_kernels, const bool enable_inner_join_fragment_skipping, Executor *executor)

template<typename DISPATCH_FCN >
void	assignFragsToMultiDispatch (DISPATCH_FCN f) const

template<typename DISPATCH_FCN >
void	assignFragsToKernelDispatch (DISPATCH_FCN f, const RelAlgExecutionUnit &ra_exe_unit) const

bool	shouldCheckWorkUnitWatchdog () const

Static Public Member Functions
static void	computeAllTablesFragments (std::map< shared::TableKey, const TableFragments * > &all_tables_fragments, const RelAlgExecutionUnit &ra_exe_unit, const std::vector< InputTableInfo > &query_infos)

Protected Member Functions
void	buildFragmentPerKernelMapForUnion (const RelAlgExecutionUnit &ra_exe_unit, const std::vector< uint64_t > &frag_offsets, const int device_count, const size_t num_bytes_for_row, const ExecutorDeviceType &device_type, Executor *executor)

void	buildFragmentPerKernelMap (const RelAlgExecutionUnit &ra_exe_unit, const std::vector< uint64_t > &frag_offsets, const int device_count, const size_t num_bytes_for_row, const ExecutorDeviceType &device_type, Executor *executor)

void	buildMultifragKernelMap (const RelAlgExecutionUnit &ra_exe_unit, const std::vector< uint64_t > &frag_offsets, const int device_count, const size_t num_bytes_for_row, const ExecutorDeviceType &device_type, const bool enable_inner_join_fragment_skipping, Executor *executor)

void	buildFragmentPerKernelForTable (const TableFragments fragments, const RelAlgExecutionUnit &ra_exe_unit, const InputDescriptor &table_desc, const bool is_temporary_table, const std::vector< uint64_t > &frag_offsets, const int device_count, const size_t num_bytes_for_row, const ChunkMetadataVector &deleted_chunk_metadata_vec, const std::optional< size_t > table_desc_offset, const ExecutorDeviceType &device_type, Executor executor)

bool	terminateDispatchMaybe (size_t &tuple_count, const RelAlgExecutionUnit &ra_exe_unit, const ExecutionKernelDescriptor &kernel) const

void	checkDeviceMemoryUsage (const Fragmenter_Namespace::FragmentInfo &fragment, const int device_id, const size_t num_cols)

Protected Attributes
std::vector< size_t >	allowed_outer_fragment_indices_

size_t	outer_fragments_size_ = 0

int64_t	rowid_lookup_key_ = -1

std::map< shared::TableKey, const TableFragments * >	selected_tables_fragments_

std::map< int, std::vector < ExecutionKernelDescriptor > >	execution_kernels_per_device_

double	gpu_input_mem_limit_percent_

std::map< size_t, size_t >	tuple_count_per_device_

std::map< size_t, size_t >	available_gpu_mem_bytes_

Detailed Description

Definition at line 68 of file QueryFragmentDescriptor.h.

Constructor & Destructor Documentation

QueryFragmentDescriptor::QueryFragmentDescriptor	(	const RelAlgExecutionUnit &	ra_exe_unit,
		const std::vector< InputTableInfo > &	query_infos,
		const std::vector< Data_Namespace::MemoryInfo > &	gpu_mem_infos,
		const double	gpu_input_mem_limit_percent,
		const std::vector< size_t >	allowed_outer_fragment_indices
	)

Definition at line 25 of file QueryFragmentDescriptor.cpp.

References available_gpu_mem_bytes_, CHECK_EQ, RelAlgExecutionUnit::input_descs, and selected_tables_fragments_.

     : allowed_outer_fragment_indices_(allowed_outer_fragment_indices)
     , gpu_input_mem_limit_percent_(gpu_input_mem_limit_percent) {
   const size_t input_desc_count{ra_exe_unit.input_descs.size()};
   CHECK_EQ(query_infos.size(), input_desc_count);
   for (size_t table_idx = 0; table_idx < input_desc_count; ++table_idx) {
     const auto& table_key = ra_exe_unit.input_descs[table_idx].getTableKey();
     if (!selected_tables_fragments_.count(table_key)) {
       selected_tables_fragments_[table_key] = &query_infos[table_idx].info.fragments;
     }
   }
 
   for (size_t device_id = 0; device_id < gpu_mem_infos.size(); device_id++) {
     const auto& gpu_mem_info = gpu_mem_infos[device_id];
     available_gpu_mem_bytes_[device_id] =
         gpu_mem_info.maxNumPages * gpu_mem_info.pageSize;
   }
 }

Member Function Documentation

template<typename DISPATCH_FCN >

void QueryFragmentDescriptor::assignFragsToKernelDispatch	(	DISPATCH_FCN	f,
		const RelAlgExecutionUnit &	ra_exe_unit
	)		const

inline

Dispatch one fragment for each device. Iterate the device map and dispatch one kernel for each device per iteration. This allows balanced dispatch as well as early termination if the number of rows passing the kernel can be computed at dispatch time and the scan limit is reached.

Definition at line 111 of file QueryFragmentDescriptor.h.

References CHECK, execution_kernels_per_device_, f(), rowid_lookup_key_, and terminateDispatchMaybe().

                                                                                  {
     if (execution_kernels_per_device_.empty()) {
       return;
     }
 
     size_t tuple_count = 0;
 
     std::unordered_map<int, size_t> execution_kernel_index;
     for (const auto& device_itr : execution_kernels_per_device_) {
       CHECK(execution_kernel_index.insert(std::make_pair(device_itr.first, size_t(0)))
                 .second);
     }
 
     bool dispatch_finished = false;
     while (!dispatch_finished) {
       dispatch_finished = true;
       for (const auto& device_itr : execution_kernels_per_device_) {
         auto& kernel_idx = execution_kernel_index[device_itr.first];
         if (kernel_idx < device_itr.second.size()) {
           dispatch_finished = false;
           const auto& execution_kernel = device_itr.second[kernel_idx++];
           f(device_itr.first, execution_kernel.fragments, rowid_lookup_key_);
           if (terminateDispatchMaybe(tuple_count, ra_exe_unit, execution_kernel)) {
             return;
           }
         }
       }
     }
   }

Here is the call graph for this function:

template<typename DISPATCH_FCN >

void QueryFragmentDescriptor::assignFragsToMultiDispatch ( DISPATCH_FCN f ) const

inline

Dispatch multi-fragment kernels. Currently GPU only. Each GPU should have only one kernel, with multiple fragments in its fragments list.

Definition at line 94 of file QueryFragmentDescriptor.h.

References CHECK_EQ, execution_kernels_per_device_, f(), and rowid_lookup_key_.

                                                         {
     for (const auto& device_itr : execution_kernels_per_device_) {
       const auto& execution_kernels = device_itr.second;
       CHECK_EQ(execution_kernels.size(), size_t(1));
 
       const auto& fragments_list = execution_kernels.front().fragments;
       f(device_itr.first, fragments_list, rowid_lookup_key_);
     }
   }

Here is the call graph for this function:

void QueryFragmentDescriptor::buildFragmentKernelMap	(	const RelAlgExecutionUnit &	ra_exe_unit,
		const std::vector< uint64_t > &	frag_offsets,
		const int	device_count,
		const ExecutorDeviceType &	device_type,
		const bool	enable_multifrag_kernels,
		const bool	enable_inner_join_fragment_skipping,
		Executor *	executor
	)

Definition at line 63 of file QueryFragmentDescriptor.cpp.

References buildFragmentPerKernelMap(), buildFragmentPerKernelMapForUnion(), buildMultifragKernelMap(), RelAlgExecutionUnit::input_descs, and RelAlgExecutionUnit::union_all.

                         {
   // For joins, only consider the cardinality of the LHS
   // columns in the bytes per row count.
   std::set<shared::TableKey> lhs_table_keys;
   for (const auto& input_desc : ra_exe_unit.input_descs) {
     if (input_desc.getNestLevel() == 0) {
       lhs_table_keys.insert(input_desc.getTableKey());
     }
   }
 
   const auto num_bytes_for_row = executor->getNumBytesForFetchedRow(lhs_table_keys);
 
   if (ra_exe_unit.union_all) {
     buildFragmentPerKernelMapForUnion(ra_exe_unit,
                                       frag_offsets,
                                       device_count,
                                       num_bytes_for_row,
                                       device_type,
                                       executor);
   } else if (enable_multifrag_kernels) {
     buildMultifragKernelMap(ra_exe_unit,
                             frag_offsets,
                             device_count,
                             num_bytes_for_row,
                             device_type,
                             enable_inner_join_fragment_skipping,
                             executor);
   } else {
     buildFragmentPerKernelMap(ra_exe_unit,
                               frag_offsets,
                               device_count,
                               num_bytes_for_row,
                               device_type,
                               executor);
   }
 }

Here is the call graph for this function:

void QueryFragmentDescriptor::buildFragmentPerKernelForTable	(	const TableFragments *	fragments,
		const RelAlgExecutionUnit &	ra_exe_unit,
		const InputDescriptor &	table_desc,
		const bool	is_temporary_table,
		const std::vector< uint64_t > &	frag_offsets,
		const int	device_count,
		const size_t	num_bytes_for_row,
		const ChunkMetadataVector &	deleted_chunk_metadata_vec,
		const std::optional< size_t >	table_desc_offset,
		const ExecutorDeviceType &	device_type,
		Executor *	executor
	)

protected

Definition at line 107 of file QueryFragmentDescriptor.cpp.

References allowed_outer_fragment_indices_, CHECK, CHECK_GE, CHECK_GT, checkDeviceMemoryUsage(), CPU, Data_Namespace::CPU_LEVEL, execution_kernels_per_device_, GPU, Data_Namespace::GPU_LEVEL, RelAlgExecutionUnit::input_descs, rowid_lookup_key_, selected_tables_fragments_, and RelAlgExecutionUnit::simple_quals.

Referenced by buildFragmentPerKernelMap(), and buildFragmentPerKernelMapForUnion().

                         {
   auto get_fragment_tuple_count = [&deleted_chunk_metadata_vec, &is_temporary_table](
                                       const auto& fragment) -> std::optional<size_t> {
     // returning std::nullopt disables execution dispatch optimizations based on tuple
     // counts as it signals to the dispatch mechanism that a reliable tuple count cannot
     // be obtained. This is the case for fragments which have deleted rows, temporary
     // table fragments, or fragments in a UNION query.
     if (is_temporary_table) {
       // 31 Mar 2021 MAT TODO I think that the fragment Tuple count should be ok
       // need to double check that at some later date
       return std::nullopt;
     }
     if (deleted_chunk_metadata_vec.empty()) {
       return fragment.getNumTuples();
     }
     const auto fragment_id = fragment.fragmentId;
     CHECK_GE(fragment_id, 0);
     if (static_cast<size_t>(fragment_id) < deleted_chunk_metadata_vec.size()) {
       const auto& chunk_metadata = deleted_chunk_metadata_vec[fragment_id];
       if (chunk_metadata.second->chunkStats.max.tinyintval == 1) {
         return std::nullopt;
       }
     }
     return fragment.getNumTuples();
   };
 
   for (size_t i = 0; i < fragments->size(); i++) {
     if (!allowed_outer_fragment_indices_.empty()) {
       if (std::find(allowed_outer_fragment_indices_.begin(),
                     allowed_outer_fragment_indices_.end(),
                     i) == allowed_outer_fragment_indices_.end()) {
         continue;
       }
     }
 
     const auto& fragment = (*fragments)[i];
     const auto skip_frag = executor->skipFragment(
         table_desc, fragment, ra_exe_unit.simple_quals, frag_offsets, i);
     if (skip_frag.first) {
       continue;
     }
     rowid_lookup_key_ = std::max(rowid_lookup_key_, skip_frag.second);
     const int chosen_device_count =
         device_type == ExecutorDeviceType::CPU ? 1 : device_count;
     CHECK_GT(chosen_device_count, 0);
     const auto memory_level = device_type == ExecutorDeviceType::GPU
                                   ? Data_Namespace::GPU_LEVEL
                                   : Data_Namespace::CPU_LEVEL;
     const int device_id = (device_type == ExecutorDeviceType::CPU || fragment.shard == -1)
                               ? fragment.deviceIds[static_cast<int>(memory_level)]
                               : fragment.shard % chosen_device_count;
 
     if (device_type == ExecutorDeviceType::GPU) {
       checkDeviceMemoryUsage(fragment, device_id, num_bytes_for_row);
     }
 
     ExecutionKernelDescriptor execution_kernel_desc{
         device_id, {}, get_fragment_tuple_count(fragment)};
     if (table_desc_offset) {
       const auto frag_ids =
           executor->getTableFragmentIndices(ra_exe_unit,
                                             device_type,
                                             *table_desc_offset,
                                             i,
                                             selected_tables_fragments_,
                                             executor->getInnerTabIdToJoinCond());
       const auto& table_key = ra_exe_unit.input_descs[*table_desc_offset].getTableKey();
       execution_kernel_desc.fragments.emplace_back(
           FragmentsPerTable{table_key, frag_ids});
 
     } else {
       for (size_t j = 0; j < ra_exe_unit.input_descs.size(); ++j) {
         const auto frag_ids =
             executor->getTableFragmentIndices(ra_exe_unit,
                                               device_type,
                                               j,
                                               i,
                                               selected_tables_fragments_,
                                               executor->getInnerTabIdToJoinCond());
         const auto& table_key = ra_exe_unit.input_descs[j].getTableKey();
         auto table_frags_it = selected_tables_fragments_.find(table_key);
         CHECK(table_frags_it != selected_tables_fragments_.end());
 
         execution_kernel_desc.fragments.emplace_back(
             FragmentsPerTable{table_key, frag_ids});
       }
     }
 
     auto itr = execution_kernels_per_device_.find(device_id);
     if (itr == execution_kernels_per_device_.end()) {
       auto const pair = execution_kernels_per_device_.insert(std::make_pair(
           device_id,
           std::vector<ExecutionKernelDescriptor>{std::move(execution_kernel_desc)}));
       CHECK(pair.second);
     } else {
       itr->second.emplace_back(std::move(execution_kernel_desc));
     }
   }
 }

Here is the call graph for this function:

Here is the caller graph for this function:

void QueryFragmentDescriptor::buildFragmentPerKernelMap	(	const RelAlgExecutionUnit &	ra_exe_unit,
		const std::vector< uint64_t > &	frag_offsets,
		const int	device_count,
		const size_t	num_bytes_for_row,
		const ExecutorDeviceType &	device_type,
		Executor *	executor
	)

protected

Definition at line 282 of file QueryFragmentDescriptor.cpp.

References buildFragmentPerKernelForTable(), CHECK, CHECK_GT, Catalog_Namespace::SysCatalog::getCatalog(), RelAlgExecutionUnit::input_descs, Catalog_Namespace::SysCatalog::instance(), outer_fragments_size_, selected_tables_fragments_, and table_is_temporary().

Referenced by buildFragmentKernelMap().

                         {
   const auto& outer_table_desc = ra_exe_unit.input_descs.front();
   const auto& outer_table_key = outer_table_desc.getTableKey();
   auto it = selected_tables_fragments_.find(outer_table_key);
   CHECK(it != selected_tables_fragments_.end());
   const auto outer_fragments = it->second;
   outer_fragments_size_ = outer_fragments->size();
 
   ChunkMetadataVector deleted_chunk_metadata_vec;
 
   bool is_temporary_table = false;
   if (outer_table_key.table_id > 0) {
     CHECK_GT(outer_table_key.db_id, 0);
     const auto catalog =
         Catalog_Namespace::SysCatalog::instance().getCatalog(outer_table_key.db_id);
     CHECK(catalog);
     // Temporary tables will not have a table descriptor and not have deleted rows.
     const auto td = catalog->getMetadataForTable(outer_table_key.table_id);
     CHECK(td);
     if (table_is_temporary(td)) {
       // for temporary tables, we won't have delete column metadata available. However, we
       // know the table fits in memory as it is a temporary table, so signal to the lower
       // layers that we can disregard the early out select * optimization
       is_temporary_table = true;
     } else {
       const auto deleted_cd = catalog->getDeletedColumnIfRowsDeleted(td);
       if (deleted_cd) {
         // 01 Apr 2021 MAT TODO this code is called on logical tables (ie not the shards)
         // I wonder if this makes sense in those cases
         td->fragmenter->getFragmenterId();
         auto frags = td->fragmenter->getFragmentsForQuery().fragments;
         for (auto frag : frags) {
           auto chunk_meta_it =
               frag.getChunkMetadataMapPhysical().find(deleted_cd->columnId);
           if (chunk_meta_it != frag.getChunkMetadataMapPhysical().end()) {
             const auto& chunk_meta = chunk_meta_it->second;
             ChunkKey chunk_key_prefix = {outer_table_key.db_id,
                                          outer_table_key.table_id,
                                          deleted_cd->columnId,
                                          frag.fragmentId};
             deleted_chunk_metadata_vec.emplace_back(
                 std::pair{chunk_key_prefix, chunk_meta});
           }
         }
       }
     }
   }
 
   buildFragmentPerKernelForTable(outer_fragments,
                                  ra_exe_unit,
                                  outer_table_desc,
                                  is_temporary_table,
                                  frag_offsets,
                                  device_count,
                                  num_bytes_for_row,
                                  deleted_chunk_metadata_vec,
                                  std::nullopt,
                                  device_type,
                                  executor);
 }

Here is the call graph for this function:

Here is the caller graph for this function:

void QueryFragmentDescriptor::buildFragmentPerKernelMapForUnion	(	const RelAlgExecutionUnit &	ra_exe_unit,
		const std::vector< uint64_t > &	frag_offsets,
		const int	device_count,
		const size_t	num_bytes_for_row,
		const ExecutorDeviceType &	device_type,
		Executor *	executor
	)

protected

Definition at line 218 of file QueryFragmentDescriptor.cpp.

References gpu_enabled::accumulate(), buildFragmentPerKernelForTable(), CHECK, CHECK_GT, execution_kernels_per_device_, Catalog_Namespace::get_metadata_for_table(), RelAlgExecutionUnit::input_descs, shared::printContainer(), selected_tables_fragments_, table_is_temporary(), and VLOG.

Referenced by buildFragmentKernelMap().

                         {
   for (size_t j = 0; j < ra_exe_unit.input_descs.size(); ++j) {
     auto const& table_desc = ra_exe_unit.input_descs[j];
     const auto& table_key = table_desc.getTableKey();
     TableFragments const* fragments = selected_tables_fragments_.at(table_key);
 
     auto data_mgr = executor->getDataMgr();
     ChunkMetadataVector deleted_chunk_metadata_vec;
 
     bool is_temporary_table = false;
     if (table_key.table_id > 0) {
       // Temporary tables will not have a table descriptor and not have deleted rows.
       CHECK_GT(table_key.db_id, 0);
       const auto td = Catalog_Namespace::get_metadata_for_table(table_key);
       CHECK(td);
       if (table_is_temporary(td)) {
         // for temporary tables, we won't have delete column metadata available. However,
         // we know the table fits in memory as it is a temporary table, so signal to the
         // lower layers that we can disregard the early out select * optimization
         is_temporary_table = true;
       } else {
         const auto deleted_cd = executor->plan_state_->getDeletedColForTable(table_key);
         if (deleted_cd) {
           ChunkKey chunk_key_prefix = {
               table_key.db_id, table_key.table_id, deleted_cd->columnId};
           data_mgr->getChunkMetadataVecForKeyPrefix(deleted_chunk_metadata_vec,
                                                     chunk_key_prefix);
         }
       }
     }
 
     buildFragmentPerKernelForTable(fragments,
                                    ra_exe_unit,
                                    table_desc,
                                    is_temporary_table,
                                    frag_offsets,
                                    device_count,
                                    num_bytes_for_row,
                                    {},
                                    j,
                                    device_type,
                                    executor);
 
     std::vector<int> table_ids =
         std::accumulate(execution_kernels_per_device_[0].begin(),
                         execution_kernels_per_device_[0].end(),
                         std::vector<int>(),
                         [](auto&& vec, auto& exe_kern) {
                           vec.push_back(exe_kern.fragments[0].table_key.table_id);
                           return vec;
                         });
     VLOG(1) << "execution_kernels_per_device_.size()="
             << execution_kernels_per_device_.size()
             << " execution_kernels_per_device_[0][*].fragments[0].table_id="
             << shared::printContainer(table_ids);
   }
 }

Here is the call graph for this function:

Here is the caller graph for this function:

void QueryFragmentDescriptor::buildMultifragKernelMap	(	const RelAlgExecutionUnit &	ra_exe_unit,
		const std::vector< uint64_t > &	frag_offsets,
		const int	device_count,
		const size_t	num_bytes_for_row,
		const ExecutorDeviceType &	device_type,
		const bool	enable_inner_join_fragment_skipping,
		Executor *	executor
	)

protected

Definition at line 349 of file QueryFragmentDescriptor.cpp.

References allowed_outer_fragment_indices_, CHECK, CHECK_EQ, checkDeviceMemoryUsage(), execution_kernels_per_device_, GPU, Data_Namespace::GPU_LEVEL, RelAlgExecutionUnit::input_descs, outer_fragments_size_, rowid_lookup_key_, selected_tables_fragments_, and RelAlgExecutionUnit::simple_quals.

Referenced by buildFragmentKernelMap().

                         {
   // Allocate all the fragments of the tables involved in the query to available
   // devices. The basic idea: the device is decided by the outer table in the
   // query (the first table in a join) and we need to broadcast the fragments
   // in the inner table to each device. Sharding will change this model.
   const auto& outer_table_desc = ra_exe_unit.input_descs.front();
   const auto& outer_table_key = outer_table_desc.getTableKey();
   auto it = selected_tables_fragments_.find(outer_table_key);
   CHECK(it != selected_tables_fragments_.end());
   const auto outer_fragments = it->second;
   outer_fragments_size_ = outer_fragments->size();
 
   const auto inner_table_id_to_join_condition = executor->getInnerTabIdToJoinCond();
 
   for (size_t outer_frag_id = 0; outer_frag_id < outer_fragments->size();
        ++outer_frag_id) {
     if (!allowed_outer_fragment_indices_.empty()) {
       if (std::find(allowed_outer_fragment_indices_.begin(),
                     allowed_outer_fragment_indices_.end(),
                     outer_frag_id) == allowed_outer_fragment_indices_.end()) {
         continue;
       }
     }
 
     const auto& fragment = (*outer_fragments)[outer_frag_id];
     auto skip_frag = executor->skipFragment(outer_table_desc,
                                             fragment,
                                             ra_exe_unit.simple_quals,
                                             frag_offsets,
                                             outer_frag_id);
     if (enable_inner_join_fragment_skipping &&
         (skip_frag == std::pair<bool, int64_t>(false, -1))) {
       skip_frag = executor->skipFragmentInnerJoins(
           outer_table_desc, ra_exe_unit, fragment, frag_offsets, outer_frag_id);
     }
     if (skip_frag.first) {
       continue;
     }
     const int device_id =
         fragment.shard == -1
             ? fragment.deviceIds[static_cast<int>(Data_Namespace::GPU_LEVEL)]
             : fragment.shard % device_count;
     if (device_type == ExecutorDeviceType::GPU) {
       checkDeviceMemoryUsage(fragment, device_id, num_bytes_for_row);
     }
     for (size_t j = 0; j < ra_exe_unit.input_descs.size(); ++j) {
       const auto& table_key = ra_exe_unit.input_descs[j].getTableKey();
       auto table_frags_it = selected_tables_fragments_.find(table_key);
       CHECK(table_frags_it != selected_tables_fragments_.end());
       const auto frag_ids =
           executor->getTableFragmentIndices(ra_exe_unit,
                                             device_type,
                                             j,
                                             outer_frag_id,
                                             selected_tables_fragments_,
                                             inner_table_id_to_join_condition);
 
       if (execution_kernels_per_device_.find(device_id) ==
           execution_kernels_per_device_.end()) {
         std::vector<ExecutionKernelDescriptor> kernel_descs{
             ExecutionKernelDescriptor{device_id, FragmentsList{}, std::nullopt}};
         CHECK(
             execution_kernels_per_device_.insert(std::make_pair(device_id, kernel_descs))
                 .second);
       }
 
       // Multifrag kernels only have one execution kernel per device. Grab the execution
       // kernel object and push back into its fragments list.
       CHECK_EQ(execution_kernels_per_device_[device_id].size(), size_t(1));
       auto& execution_kernel = execution_kernels_per_device_[device_id].front();
 
       auto& kernel_frag_list = execution_kernel.fragments;
       if (kernel_frag_list.size() < j + 1) {
         kernel_frag_list.emplace_back(FragmentsPerTable{table_key, frag_ids});
       } else {
         CHECK_EQ(kernel_frag_list[j].table_key, table_key);
         auto& curr_frag_ids = kernel_frag_list[j].fragment_ids;
         for (const int frag_id : frag_ids) {
           if (std::find(curr_frag_ids.begin(), curr_frag_ids.end(), frag_id) ==
               curr_frag_ids.end()) {
             curr_frag_ids.push_back(frag_id);
           }
         }
       }
     }
     rowid_lookup_key_ = std::max(rowid_lookup_key_, skip_frag.second);
   }
 }

Here is the call graph for this function:

Here is the caller graph for this function:

void QueryFragmentDescriptor::checkDeviceMemoryUsage	(	const Fragmenter_Namespace::FragmentInfo &	fragment,
		const int	device_id,
		const size_t	num_cols
	)

protected

Definition at line 479 of file QueryFragmentDescriptor.cpp.

References available_gpu_mem_bytes_, CHECK_GE, g_cluster, Fragmenter_Namespace::FragmentInfo::getNumTuples(), gpu_input_mem_limit_percent_, LOG, tuple_count_per_device_, and logger::WARNING.

Referenced by buildFragmentPerKernelForTable(), and buildMultifragKernelMap().

                                     {
   if (g_cluster) {
     // Disabled in distributed mode for now
     return;
   }
   CHECK_GE(device_id, 0);
   tuple_count_per_device_[device_id] += fragment.getNumTuples();
   const size_t gpu_bytes_limit =
       available_gpu_mem_bytes_[device_id] * gpu_input_mem_limit_percent_;
   if (tuple_count_per_device_[device_id] * num_bytes_for_row > gpu_bytes_limit) {
     LOG(WARNING) << "Not enough memory on device " << device_id
                  << " for input chunks totaling "
                  << tuple_count_per_device_[device_id] * num_bytes_for_row
                  << " bytes (available device memory: " << gpu_bytes_limit << " bytes)";
     throw QueryMustRunOnCpu();
   }
 }

Here is the call graph for this function:

Here is the caller graph for this function:

void QueryFragmentDescriptor::computeAllTablesFragments	(	std::map< shared::TableKey, const TableFragments * > &	all_tables_fragments,
		const RelAlgExecutionUnit &	ra_exe_unit,
		const std::vector< InputTableInfo > &	query_infos
	)

static

Definition at line 49 of file QueryFragmentDescriptor.cpp.

References CHECK_EQ, and RelAlgExecutionUnit::input_descs.

Referenced by ExecutionKernel::runImpl().

                                                   {
   for (size_t tab_idx = 0; tab_idx < ra_exe_unit.input_descs.size(); ++tab_idx) {
     const auto& table_key = ra_exe_unit.input_descs[tab_idx].getTableKey();
     CHECK_EQ(query_infos[tab_idx].table_key, table_key);
     const auto& fragments = query_infos[tab_idx].info.fragments;
     if (!all_tables_fragments.count(table_key)) {
       all_tables_fragments.insert(std::make_pair(table_key, &fragments));
     }
   }
 }

Here is the caller graph for this function:

bool QueryFragmentDescriptor::shouldCheckWorkUnitWatchdog ( ) const

inline

Definition at line 142 of file QueryFragmentDescriptor.h.

References execution_kernels_per_device_, and rowid_lookup_key_.

                                            {
     return rowid_lookup_key_ < 0 && !execution_kernels_per_device_.empty();
   }

bool QueryFragmentDescriptor::terminateDispatchMaybe	(	size_t &	tuple_count,
		const RelAlgExecutionUnit &	ra_exe_unit,
		const ExecutionKernelDescriptor &	kernel
	)		const

protected

Definition at line 461 of file QueryFragmentDescriptor.cpp.

References anonymous_namespace{QueryFragmentDescriptor.cpp}::is_sample_query(), SortInfo::limit, SortInfo::offset, ExecutionKernelDescriptor::outer_tuple_count, and RelAlgExecutionUnit::sort_info.

Referenced by assignFragsToKernelDispatch().

                                                    {
   const auto sample_query_limit =
       ra_exe_unit.sort_info.limit.value_or(0) + ra_exe_unit.sort_info.offset;
   if (!kernel.outer_tuple_count) {
     return false;
   } else {
     tuple_count += *kernel.outer_tuple_count;
     if (is_sample_query(ra_exe_unit) && sample_query_limit > 0 &&
         tuple_count >= sample_query_limit) {
       return true;
     }
   }
   return false;
 }

Here is the call graph for this function:

Here is the caller graph for this function:

Member Data Documentation

std::vector<size_t> QueryFragmentDescriptor::allowed_outer_fragment_indices_

protected

Definition at line 147 of file QueryFragmentDescriptor.h.

Referenced by buildFragmentPerKernelForTable(), and buildMultifragKernelMap().

std::map<size_t, size_t> QueryFragmentDescriptor::available_gpu_mem_bytes_

protected

Definition at line 157 of file QueryFragmentDescriptor.h.

Referenced by checkDeviceMemoryUsage(), and QueryFragmentDescriptor().

std::map<int, std::vector<ExecutionKernelDescriptor> > QueryFragmentDescriptor::execution_kernels_per_device_

protected

Definition at line 153 of file QueryFragmentDescriptor.h.

Referenced by assignFragsToKernelDispatch(), assignFragsToMultiDispatch(), buildFragmentPerKernelForTable(), buildFragmentPerKernelMapForUnion(), buildMultifragKernelMap(), and shouldCheckWorkUnitWatchdog().

double QueryFragmentDescriptor::gpu_input_mem_limit_percent_

protected

Definition at line 155 of file QueryFragmentDescriptor.h.

Referenced by checkDeviceMemoryUsage().

size_t QueryFragmentDescriptor::outer_fragments_size_ = 0

protected

Definition at line 148 of file QueryFragmentDescriptor.h.

Referenced by buildFragmentPerKernelMap(), and buildMultifragKernelMap().

int64_t QueryFragmentDescriptor::rowid_lookup_key_ = -1

protected

Definition at line 149 of file QueryFragmentDescriptor.h.

Referenced by assignFragsToKernelDispatch(), assignFragsToMultiDispatch(), buildFragmentPerKernelForTable(), buildMultifragKernelMap(), and shouldCheckWorkUnitWatchdog().

std::map<shared::TableKey, const TableFragments*> QueryFragmentDescriptor::selected_tables_fragments_

protected

Definition at line 151 of file QueryFragmentDescriptor.h.

Referenced by buildFragmentPerKernelForTable(), buildFragmentPerKernelMap(), buildFragmentPerKernelMapForUnion(), buildMultifragKernelMap(), and QueryFragmentDescriptor().

std::map<size_t, size_t> QueryFragmentDescriptor::tuple_count_per_device_

protected

Definition at line 156 of file QueryFragmentDescriptor.h.

Referenced by checkDeviceMemoryUsage().

The documentation for this class was generated from the following files:

/home/jenkins-slave/workspace/core-os-doxygen/QueryEngine/Descriptors/QueryFragmentDescriptor.h
/home/jenkins-slave/workspace/core-os-doxygen/QueryEngine/Descriptors/QueryFragmentDescriptor.cpp

Public Member Functions

Static Public Member Functions

Protected Member Functions

Protected Attributes

Detailed Description

Constructor & Destructor Documentation

Member Function Documentation

Member Data Documentation