72 const std::shared_ptr<Analyzer::BinOper> condition,
74 const std::vector<InputTableInfo>& query_infos,
77 const int device_count,
85 auto range_expr_col_var =
87 if (!range_expr_col_var || !range_expr_col_var->get_type_info().is_geometry()) {
88 throw HashJoinFail(
"Could not build hash tables for range join | " +
92 CHECK(range_expr_col_var->get_type_info().is_geometry());
94 auto coords_column_key = range_expr_col_var->getColumnKey();
95 coords_column_key.column_id = coords_column_key.column_id + 1;
99 auto range_join_inner_col_expr = makeExpr<Analyzer::ColumnVar>(
100 coords_cd->columnType, coords_column_key, range_expr_col_var->get_rte_idx());
102 std::vector<InnerOuter> inner_outer_pairs;
103 inner_outer_pairs.emplace_back(
105 condition->get_left_operand()});
107 const auto& query_info =
112 if (total_entries > static_cast<size_t>(std::numeric_limits<int32_t>::max())) {
118 condition.get(), executor, inner_outer_pairs)
121 auto join_hash_table = std::make_shared<RangeJoinHashTable>(condition,
124 range_join_inner_col_expr,
132 hashtable_build_dag_map,
133 table_id_to_node_map);
139 throw HashJoinFail(std::string(
"Could not build a 1-to-1 correspondence for columns "
140 "involved in equijoin | ") +
143 throw HashJoinFail(std::string(
"Could not build hash tables for equijoin | ") +
147 }
catch (
const std::exception& e) {
148 LOG(
FATAL) <<
"Fatal error while attempting to build hash tables for join: "
152 return join_hash_table;
159 const auto& query_info =
163 if (query_info.fragments.empty()) {
170 std::vector<ColumnsForDevice> columns_per_device;
173 std::vector<std::vector<Fragmenter_Namespace::FragmentInfo>> fragments_per_device;
174 std::vector<std::unique_ptr<CudaAllocator>> dev_buff_owners;
176 for (
int device_id = 0; device_id <
device_count_; ++device_id) {
177 fragments_per_device.emplace_back(
180 : query_info.fragments);
182 dev_buff_owners.emplace_back(std::make_unique<CudaAllocator>(
188 const auto columns_for_device =
192 ? dev_buff_owners[device_id].
get()
194 columns_per_device.push_back(columns_for_device);
199 const auto bucket_range =
203 CHECK(bucket_range->get_type_info().is_fp() &&
204 bucket_range->get_type_info().get_size() == 8);
206 const auto bucket_range_datum = bucket_range->get_constval();
220 std::vector<InnerOuter> inner_outer_pairs_for_cache_lookup;
221 inner_outer_pairs_for_cache_lookup.emplace_back(
InnerOuter{
224 auto hashtable_access_path_info =
232 fragments_per_device,
235 table_keys_ = hashtable_access_path_info.table_keys;
237 auto get_inner_table_key = [&inner_outer_pairs_for_cache_lookup]() {
238 auto col_var = inner_outer_pairs_for_cache_lookup.front().first;
239 return col_var->getTableKey();
243 const auto& inner_table_key = get_inner_table_key();
254 fragments_per_device,
258 get_inner_table_key().table_id > 0) {
259 std::vector<size_t> per_device_chunk_key;
260 for (
int device_id = 0; device_id <
device_count_; ++device_id) {
262 boost::hash_combine(chunk_key_hash,
264 per_device_chunk_key.push_back(chunk_key_hash);
266 inner_outer_pairs_for_cache_lookup,
267 columns_per_device.front().join_columns.front().num_elems,
281 if (
auto generic_hash_table =
285 if (
auto hash_table =
286 std::dynamic_pointer_cast<BaselineHashTable>(generic_hash_table)) {
297 for (
int device_id = 0; device_id <
device_count_; ++device_id) {
298 auto gpu_hash_table = copyCpuHashTableToGpu(hash_table,
300 hash_table->getEntryCount(),
301 hash_table->getEmittedKeysCount(),
320 auto [entry_count, emitted_keys_count] =
326 VLOG(1) <<
"Finalized range join hash table: entry count " << entry_count
327 <<
" hash table size " << hash_table_size;
329 std::vector<std::future<void>> init_threads;
330 for (
int device_id = 0; device_id <
device_count_; ++device_id) {
331 init_threads.push_back(
335 columns_per_device[device_id],
342 for (
auto& init_thread : init_threads) {
345 for (
auto& init_thread : init_threads) {
353 const size_t entry_count,
354 const size_t emitted_keys_count,
363 VLOG(1) <<
"Building range join hash table on CPU.";
374 auto gpu_hash_table = copyCpuHashTableToGpu(
375 hash_table, layout, entry_count, emitted_keys_count, device_id);
389 auto hash_table = initHashTableOnGpu(columns_for_device.
join_columns,
406 std::shared_ptr<BaselineHashTable> RangeJoinHashTable::initHashTableOnGpu(
407 const std::vector<JoinColumn>& join_columns,
408 const std::vector<JoinColumnTypeInfo>& join_column_types,
409 const std::vector<JoinBucketInfo>& join_bucket_info,
411 const size_t entry_count,
412 const size_t emitted_keys_count,
413 const size_t device_id) {
416 VLOG(1) <<
"Building range join hash table on GPU.";
424 CHECK(!join_bucket_info.empty());
426 auto& inverse_bucket_sizes_for_dimension =
427 join_bucket_info[0].inverse_bucket_sizes_for_dimension;
430 inverse_bucket_sizes_for_dimension, allocator);
433 inverse_bucket_sizes_for_dimension.size(),
450 std::string(
"Unrecognized error when initializing GPU range join hash table (") +
458 const std::vector<JoinColumn>& join_columns,
459 const std::vector<JoinColumnTypeInfo>& join_column_types,
460 const std::vector<JoinBucketInfo>& join_bucket_info,
462 const size_t entry_count,
463 const size_t emitted_keys_count) {
465 decltype(std::chrono::steady_clock::now()) ts1, ts2;
466 ts1 = std::chrono::steady_clock::now();
467 const auto composite_key_info =
469 CHECK(!join_columns.empty());
470 CHECK(!join_bucket_info.empty());
473 const auto key_component_count =
474 join_bucket_info[0].inverse_bucket_sizes_for_dimension.size();
480 join_bucket_info[0].inverse_bucket_sizes_for_dimension.data());
484 dummy_str_proxy_translation_maps_ptrs_and_offsets;
486 builder.initHashTableOnCpu(&key_handler,
491 dummy_str_proxy_translation_maps_ptrs_and_offsets,
499 ts2 = std::chrono::steady_clock::now();
501 throw HashJoinFail(std::string(
"Unrecognized error when initializing CPU "
502 "range join hash table (") +
505 std::shared_ptr<BaselineHashTable> hash_table = builder.getHashTable();
506 auto hashtable_build_time =
507 std::chrono::duration_cast<std::chrono::milliseconds>(ts2 - ts1).count();
512 hashtable_build_time);
517 const size_t shard_count,
518 std::vector<ColumnsForDevice>& columns_per_device) {
520 const auto [tuple_count, emitted_keys_count] =
525 const auto entry_count = 2 * std::max(tuple_count,
size_t(1));
527 return std::make_pair(
533 const std::vector<double>& inverse_bucket_sizes_for_dimension,
534 std::vector<ColumnsForDevice>& columns_per_device,
535 const size_t chosen_max_hashtable_size,
536 const double chosen_bucket_threshold) {
553 .bitmap_sz_bits = 11,
558 .sub_bitmap_count = 1,
561 const auto padded_size_bytes = count_distinct_desc.bitmapPaddedSizeBytes();
563 CHECK(!columns_per_device.empty() && !columns_per_device.front().join_columns.empty());
564 if (columns_per_device.front().join_columns.front().num_elems == 0) {
565 return std::make_pair(0, 0);
568 for (
auto& columns_for_device : columns_per_device) {
569 columns_for_device.setBucketInfo(inverse_bucket_sizes_for_dimension,
574 CHECK_EQ(columns_per_device.front().join_columns.size(),
575 columns_per_device.front().join_buckets.size());
577 const auto composite_key_info =
580 std::vector<uint8_t> hll_buffer_all_cpus(thread_count * padded_size_bytes);
581 auto hll_result = &hll_buffer_all_cpus[0];
583 std::vector<int32_t> num_keys_for_row;
584 num_keys_for_row.resize(columns_per_device.front().join_columns[0].num_elems);
588 count_distinct_desc.bitmap_sz_bits,
590 columns_per_device.front().join_columns,
591 columns_per_device.front().join_column_types,
592 columns_per_device.front().join_buckets,
596 for (
int i = 1; i < thread_count; ++i) {
598 hll_result + i * padded_size_bytes,
599 1 << count_distinct_desc.bitmap_sz_bits);
601 return std::make_pair(
hll_size(hll_result, count_distinct_desc.bitmap_sz_bits),
602 num_keys_for_row.size() > 0 ? num_keys_for_row.back() : 0);
605 auto& data_mgr = *
executor_->getDataMgr();
606 std::vector<std::vector<uint8_t>> host_hll_buffers(
device_count_);
607 for (
auto& host_hll_buffer : host_hll_buffers) {
608 host_hll_buffer.resize(count_distinct_desc.bitmapPaddedSizeBytes());
610 std::vector<size_t> emitted_keys_count_device_threads(
device_count_, 0);
611 std::vector<std::future<void>> approximate_distinct_device_threads;
612 for (
int device_id = 0; device_id <
device_count_; ++device_id) {
613 approximate_distinct_device_threads.emplace_back(
std::async(
617 &count_distinct_desc,
620 &emitted_keys_count_device_threads,
622 auto allocator = std::make_unique<CudaAllocator>(
624 auto device_hll_buffer =
625 allocator->alloc(count_distinct_desc.bitmapPaddedSizeBytes());
626 data_mgr.getCudaMgr()->zeroDeviceMem(
628 count_distinct_desc.bitmapPaddedSizeBytes(),
631 const auto& columns_for_device = columns_per_device[device_id];
633 columns_for_device.join_columns, *allocator);
635 CHECK_GT(columns_for_device.join_buckets.size(), 0u);
636 const auto& bucket_sizes_for_dimension =
637 columns_for_device.join_buckets[0].inverse_bucket_sizes_for_dimension;
638 auto bucket_sizes_gpu =
639 allocator->alloc(bucket_sizes_for_dimension.size() *
sizeof(double));
640 allocator->copyToDevice(bucket_sizes_gpu,
641 bucket_sizes_for_dimension.data(),
642 bucket_sizes_for_dimension.size() *
sizeof(double));
643 const size_t row_counts_buffer_sz =
644 columns_per_device.front().join_columns[0].num_elems *
sizeof(int32_t);
645 auto row_counts_buffer = allocator->alloc(row_counts_buffer_sz);
646 data_mgr.getCudaMgr()->zeroDeviceMem(
648 row_counts_buffer_sz,
651 const auto key_handler =
653 bucket_sizes_for_dimension.size(),
655 reinterpret_cast<double*
>(bucket_sizes_gpu));
656 const auto key_handler_gpu =
659 reinterpret_cast<uint8_t*>(device_hll_buffer),
660 count_distinct_desc.bitmap_sz_bits,
661 reinterpret_cast<int32_t*>(row_counts_buffer),
663 columns_for_device.join_columns[0].num_elems,
667 auto& host_emitted_keys_count = emitted_keys_count_device_threads[device_id];
668 allocator->copyFromDevice(
669 &host_emitted_keys_count,
671 (columns_per_device.front().join_columns[0].num_elems - 1) *
675 auto& host_hll_buffer = host_hll_buffers[device_id];
676 allocator->copyFromDevice(&host_hll_buffer[0],
678 count_distinct_desc.bitmapPaddedSizeBytes());
681 for (
auto& child : approximate_distinct_device_threads) {
685 auto& result_hll_buffer = host_hll_buffers.front();
686 auto hll_result =
reinterpret_cast<int32_t*
>(&result_hll_buffer[0]);
687 for (
int device_id = 1; device_id <
device_count_; ++device_id) {
688 auto& host_hll_buffer = host_hll_buffers[device_id];
690 reinterpret_cast<int32_t*>(&host_hll_buffer[0]),
691 1 << count_distinct_desc.bitmap_sz_bits);
693 size_t emitted_keys_count = 0;
694 for (
auto& emitted_keys_count_device : emitted_keys_count_device_threads) {
695 emitted_keys_count += emitted_keys_count_device;
697 return std::make_pair(
hll_size(hll_result, count_distinct_desc.bitmap_sz_bits),
705 #define LL_CONTEXT executor_->cgen_state_->context_
706 #define LL_BUILDER executor_->cgen_state_->ir_builder_
707 #define LL_INT(v) executor_->cgen_state_->llInt(v)
708 #define LL_FP(v) executor_->cgen_state_->llFp(v)
709 #define ROW_FUNC executor_->cgen_state_->row_func_
712 llvm::Value* offset_ptr) {
714 CHECK(key_component_width == 4 || key_component_width == 8);
716 llvm::Value* key_buff_lv{
nullptr};
717 switch (key_component_width) {
731 const auto outer_col = inner_outer_pair.second;
732 const auto outer_col_ti = outer_col->get_type_info();
734 if (outer_col_ti.is_geometry()) {
742 llvm::Value* arr_ptr{
nullptr};
744 if (
auto outer_col_var = dynamic_cast<const Analyzer::ColumnVar*>(outer_col)) {
745 const auto col_lvs = code_generator.
codegen(outer_col,
true, co);
746 CHECK_EQ(col_lvs.size(), size_t(1));
747 auto column_key = outer_col_var->getColumnKey();
748 column_key.column_id = column_key.column_id + 1;
751 const auto coords_ti = coords_cd->columnType;
753 const auto array_buff_ptr =
executor_->cgen_state_->emitExternalCall(
755 llvm::Type::getInt8PtrTy(
executor_->cgen_state_->context_),
756 {col_lvs.front(), code_generator.
posArg(outer_col)});
757 CHECK(array_buff_ptr);
759 <<
"Only TINYINT coordinates columns are supported in geo overlaps "
763 }
else if (
auto geo_expr_outer_col =
764 dynamic_cast<const Analyzer::GeoOperator*>(outer_col)) {
765 const auto geo_expr_name = geo_expr_outer_col->getName();
766 if (
func_resolve(geo_expr_name,
"ST_Point"sv,
"ST_Transform"sv,
"ST_Centroid"sv)) {
770 const auto col_lvs = code_generator.codegen(outer_col,
true, co);
776 col_lvs[0], llvm::Type::getInt8PtrTy(
executor_->cgen_state_->context_));
778 throw std::runtime_error(
779 "RHS key of the range join operator has a geospatial function which is not "
784 throw std::runtime_error(
"Range join operator has an invalid rhs key: " +
785 outer_col->toString());
790 LL_BUILDER.CreateLoad(offset_ptr->getType()->getPointerElementType(),
792 "packed_bucket_offset");
793 const auto x_offset =
796 const auto y_offset_shifted =
798 const auto y_offset =
801 const auto x_bucket_offset =
803 const auto y_bucket_offset =
806 for (
size_t i = 0; i < 2; i++) {
807 const auto key_comp_dest_lv =
LL_BUILDER.CreateGEP(
808 key_buff_lv->getType()->getScalarType()->getPointerElementType(),
812 const auto funcName =
isProbeCompressed() ?
"get_bucket_key_for_range_compressed"
813 :
"get_bucket_key_for_range_double";
817 auto bucket_key =
executor_->cgen_state_->emitExternalCall(
822 auto bucket_key_shifted = i == 0
823 ?
LL_BUILDER.CreateAdd(x_bucket_offset, bucket_key)
824 :
LL_BUILDER.CreateAdd(y_bucket_offset, bucket_key);
828 LL_BUILDER.CreateStore(col_lv, key_comp_dest_lv);
831 LOG(
FATAL) <<
"Range join key currently only supported for geospatial types.";
839 llvm::Value* range_offset) {
841 CHECK(key_component_width == 4 || key_component_width == 8);
843 auto key_buff_lv =
codegenKey(co, range_offset);
847 const auto composite_dict_ptr_type =
848 llvm::Type::getIntNPtrTy(
LL_CONTEXT, key_component_width * 8);
850 const auto composite_key_dict =
851 hash_ptr->getType()->isPointerTy()
852 ?
LL_BUILDER.CreatePointerCast(hash_ptr, composite_dict_ptr_type)
853 :
LL_BUILDER.CreateIntToPtr(hash_ptr, composite_dict_ptr_type);
857 const auto funcName =
858 "get_composite_key_index_" +
std::to_string(key_component_width * 8);
860 const auto key =
executor_->cgen_state_->emitExternalCall(funcName,
863 LL_INT(key_component_count),
867 auto one_to_many_ptr = hash_ptr;
868 if (one_to_many_ptr->getType()->isPointerTy()) {
872 CHECK(one_to_many_ptr->getType()->isIntegerTy(64));
static std::vector< int > collectFragmentIds(const std::vector< Fragmenter_Namespace::FragmentInfo > &fragments)
const JoinType join_type_
virtual HashJoinMatchingSet codegenMatchingSet(const CompilationOptions &, const size_t)=0
llvm::Value * codegenKey(const CompilationOptions &co, llvm::Value *offset)
static llvm::Value * codegenHashTableLoad(const size_t table_idx, Executor *executor)
std::shared_ptr< HashTable > initHashTableOnCpuFromCache(QueryPlanHash key, CacheItemType item_type, DeviceIdentifier device_identifier)
shared::TableKey getInnerTableId() const noexceptoverride
static bool isInvalidHashTableCacheKey(const std::vector< QueryPlanHash > &cache_keys)
std::pair< const Analyzer::ColumnVar *, const Analyzer::Expr * > InnerOuter
HashType getHashType() const noexceptoverride
std::vector< ChunkKey > cache_key_chunks
T * transfer_flat_object_to_gpu(const T &object, DeviceAllocator &allocator)
size_t getEntryCount() const
static void checkHashJoinReplicationConstraint(const shared::TableKey &table_key, const size_t shard_count, const Executor *executor)
void hll_unify(T1 *lhs, T2 *rhs, const size_t m)
llvm::Value * posArg(const Analyzer::Expr *) const
std::vector< std::shared_ptr< HashTable > > hash_tables_for_device_
llvm::Value * castArrayPointer(llvm::Value *ptr, const SQLTypeInfo &elem_ti)
void reifyWithLayout(const HashType layout) override
const ColumnDescriptor * get_metadata_for_column(const ::shared::ColumnKey &column_key)
const InputTableInfo & get_inner_query_info(const shared::TableKey &inner_table_key, const std::vector< InputTableInfo > &query_infos)
int initHashTableOnGpu(KEY_HANDLER *key_handler, const std::vector< JoinColumn > &join_columns, const HashType layout, const JoinType join_type, const size_t key_component_width, const size_t key_component_count, const size_t keyspace_entry_count, const size_t emitted_keys_count, const int device_id, const Executor *executor, const RegisteredQueryHint &query_hint)
#define DEBUG_TIMER_NEW_THREAD(parent_thread_id)
CountDistinctImplType impl_type_
void putHashTableOnCpuToCache(QueryPlanHash key, CacheItemType item_type, std::shared_ptr< HashTable > hashtable_ptr, DeviceIdentifier device_identifier, size_t hashtable_building_time)
llvm::Type * get_int_type(const int width, llvm::LLVMContext &context)
size_t hll_size(const T *M, const size_t bitmap_sz_bits)
const Expr * get_left_operand() const
size_t calculateHashTableSize(size_t number_of_dimensions, size_t emitted_keys_count, size_t entry_count) const
const std::shared_ptr< Analyzer::BinOper > condition_
const std::vector< JoinColumnTypeInfo > join_column_types
void approximate_distinct_tuples_on_device_range(uint8_t *hll_buffer, const uint32_t b, int32_t *row_counts_buffer, const RangeKeyHandler *key_handler, const size_t num_elems, const size_t block_size_x, const size_t grid_size_x)
future< Result > async(Fn &&fn, Args &&...args)
std::string toString() const override
std::unordered_map< size_t, HashTableBuildDag > HashTableBuildDagMap
std::shared_ptr< BaselineHashTable > initHashTableOnCpu(const std::vector< JoinColumn > &join_columns, const std::vector< JoinColumnTypeInfo > &join_column_types, const std::vector< JoinBucketInfo > &join_bucket_info, const HashType layout, const size_t entry_count, const size_t emitted_keys_count)
RegisteredQueryHint query_hints_
QueryPlanHash getAlternativeCacheKey(AlternativeCacheKeyForOverlapsHashJoin &info)
std::pair< size_t, size_t > approximateTupleCount(const std::vector< double > &inverse_bucket_sizes_for_dimension, std::vector< ColumnsForDevice > &columns_per_device, const size_t chosen_max_hashtable_size, const double chosen_bucket_threshold) override
const std::vector< InputTableInfo > & query_infos_
std::vector< Fragmenter_Namespace::FragmentInfo > only_shards_for_device(const std::vector< Fragmenter_Namespace::FragmentInfo > &fragments, const int device_id, const int device_count)
HashTableBuildDagMap hashtable_build_dag_map_
static std::unordered_set< size_t > getAlternativeTableKeys(const std::vector< ChunkKey > &chunk_keys, const shared::TableKey &inner_table_key)
std::pair< size_t, size_t > computeRangeHashTableCounts(const size_t shard_count, std::vector< ColumnsForDevice > &columns_per_device)
std::vector< QueryPlanHash > hashtable_cache_key_
HashJoinMatchingSet codegenMatchingSetWithOffset(const CompilationOptions &, const size_t, llvm::Value *)
const double bucket_threshold_
const Expr * get_right_operand() const
void setOverlapsHashtableMetaInfo(size_t max_table_size_bytes, double bucket_threshold, std::vector< double > &bucket_sizes)
static std::shared_ptr< RangeJoinHashTable > getInstance(const std::shared_ptr< Analyzer::BinOper > condition, const Analyzer::RangeOper *range_expr, const std::vector< InputTableInfo > &query_infos, const Data_Namespace::MemoryLevel memory_level, const JoinType join_type, const int device_count, ColumnCacheMap &column_cache, Executor *executor, const HashTableBuildDagMap &hashtable_build_dag_map, const RegisteredQueryHint &query_hints, const TableIdToNodeMap &table_id_to_node_map)
virtual shared::TableKey getInnerTableId() const noexcept=0
std::unordered_map< shared::TableKey, const RelAlgNode * > TableIdToNodeMap
std::vector< llvm::Value * > codegen(const Analyzer::Expr *, const bool fetch_columns, const CompilationOptions &)
std::vector< double > inverse_bucket_sizes_for_dimension_
std::optional< HashType > layout_override_
std::pair< std::vector< const int32_t * >, std::vector< int32_t >> StrProxyTranslationMapsPtrsAndOffsets
bool isProbeCompressed() const
std::unique_ptr< BaselineHashTable > getHashTable()
static std::string getHashTypeString(HashType ht) noexcept
void setInverseBucketSizeInfo(const std::vector< double > &inverse_bucket_sizes, std::vector< ColumnsForDevice > &columns_per_device, const size_t device_count)
LocalIdsScopeGuard setNewThreadId() const
size_t get_entries_per_device(const size_t total_entries, const size_t shard_count, const size_t device_count, const Data_Namespace::MemoryLevel memory_level)
ColumnsForDevice fetchColumnsForDevice(const std::vector< Fragmenter_Namespace::FragmentInfo > &fragments, const int device_id, DeviceAllocator *dev_buff_owner)
size_t getKeyComponentCount() const
std::unordered_map< shared::TableKey, std::unordered_map< int, std::shared_ptr< const ColumnarResults >>> ColumnCacheMap
CUstream getQueryEngineCudaStreamForDevice(int device_num)
size_t offsetBufferOff() const noexceptoverride
bool isInnerColCompressed() const
size_t shardCount() const
void approximate_distinct_tuples_range(uint8_t *hll_buffer_all_cpus, std::vector< int32_t > &row_counts, const uint32_t b, const size_t padded_size_bytes, const std::vector< JoinColumn > &join_column_per_key, const std::vector< JoinColumnTypeInfo > &type_info_per_key, const std::vector< JoinBucketInfo > &join_buckets_per_key, const bool is_compressed, const int thread_count)
size_t getNumTuplesUpperBound() const
#define DEBUG_TIMER(name)
std::mutex cpu_hash_table_buff_mutex_
const Data_Namespace::MemoryLevel memory_level_
void generateCacheKey(const size_t max_hashtable_size, const double bucket_threshold, const std::vector< double > &bucket_sizes, std::vector< std::vector< Fragmenter_Namespace::FragmentInfo >> &fragments_per_device, int device_count)
size_t getComponentBufferSize() const noexceptoverride
static std::unique_ptr< HashtableRecycler > hash_table_cache_
std::vector< InnerOuter > inner_outer_pairs_
std::unordered_set< size_t > table_keys_
void reifyForDevice(const ColumnsForDevice &columns_for_device, const HashType layout, const size_t entry_count, const size_t emitted_keys_count, const int device_id, const logger::ThreadLocalIds parent_thread_local_ids)
Data_Namespace::MemoryLevel effective_memory_level_
T * transfer_vector_of_flat_objects_to_gpu(const std::vector< T > &vec, DeviceAllocator &allocator)
static size_t getShardCountForCondition(const Analyzer::BinOper *condition, const Executor *executor, const std::vector< InnerOuter > &inner_outer_pairs)
Data_Namespace::MemoryLevel getEffectiveMemoryLevel(const std::vector< InnerOuter > &inner_outer_pairs) const
std::vector< JoinBucketInfo > join_buckets
const Analyzer::RangeOper * range_expr_
static constexpr DeviceIdentifier CPU_DEVICE_IDENTIFIER
static HashtableAccessPathInfo getHashtableAccessPathInfo(const std::vector< InnerOuter > &inner_outer_pairs, const std::vector< InnerOuterStringOpInfos > &inner_outer_string_op_infos_pairs, const SQLOps op_type, const JoinType join_type, const HashTableBuildDagMap &hashtable_build_dag_map, int device_count, int shard_count, const std::vector< std::vector< Fragmenter_Namespace::FragmentInfo >> &frags_for_device, Executor *executor)
CompositeKeyInfo composite_key_info_
size_t getKeyComponentWidth() const
ThreadLocalIds thread_local_ids()
const std::vector< JoinColumn > join_columns
static bool layoutRequiresAdditionalBuffers(HashType layout) noexcept
const size_t max_hashtable_size_
static CompositeKeyInfo getCompositeKeyInfo(const std::vector< InnerOuter > &inner_outer_pairs, const Executor *executor, const std::vector< InnerOuterStringOpInfos > &inner_outer_string_op_infos_pairs={})