OmniSciDB  72c90bc290
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
RangeJoinHashTable.h
Go to the documentation of this file.
1 /*
2  * Copyright 2022 HEAVY.AI, Inc.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #pragma once
18 
20 
22  public:
23  RangeJoinHashTable(const std::shared_ptr<Analyzer::BinOper> condition,
24  const JoinType join_type,
25  const Analyzer::RangeOper* range_expr,
26  std::shared_ptr<Analyzer::ColumnVar> inner_col_expr,
27  const std::vector<InputTableInfo>& query_infos,
28  const Data_Namespace::MemoryLevel memory_level,
29  ColumnCacheMap& column_cache,
30  Executor* executor,
31  const std::vector<InnerOuter>& inner_outer_pairs,
32  const int device_count,
33  const RegisteredQueryHint& query_hints,
34  const HashTableBuildDagMap& hashtable_build_dag_map,
35  const TableIdToNodeMap& table_id_to_node_map)
37  join_type,
38  query_infos,
39  memory_level,
40  column_cache,
41  executor,
42  inner_outer_pairs,
43  device_count,
44  query_hints,
45  hashtable_build_dag_map,
46  table_id_to_node_map)
47  , range_expr_(range_expr)
48  , inner_col_expr_(std::move(inner_col_expr)) {}
49 
50  ~RangeJoinHashTable() override = default;
51 
52  static std::shared_ptr<RangeJoinHashTable> getInstance(
53  const std::shared_ptr<Analyzer::BinOper> condition,
54  const Analyzer::RangeOper* range_expr,
55  const std::vector<InputTableInfo>& query_infos,
56  const Data_Namespace::MemoryLevel memory_level,
57  const JoinType join_type,
58  const int device_count,
59  ColumnCacheMap& column_cache,
60  Executor* executor,
61  const HashTableBuildDagMap& hashtable_build_dag_map,
62  const RegisteredQueryHint& query_hints,
63  const TableIdToNodeMap& table_id_to_node_map);
64 
65  protected:
66  void reifyWithLayout(const HashType layout) override;
67 
68  void reifyForDevice(const ColumnsForDevice& columns_for_device,
69  const HashType layout,
70  const size_t entry_count,
71  const size_t emitted_keys_count,
72  const int device_id,
73  const logger::ThreadLocalIds parent_thread_local_ids);
74 
75  std::shared_ptr<BaselineHashTable> initHashTableOnCpu(
76  const std::vector<JoinColumn>& join_columns,
77  const std::vector<JoinColumnTypeInfo>& join_column_types,
78  const std::vector<JoinBucketInfo>& join_bucket_info,
79  const BaselineHashTableEntryInfo hash_table_entry_info);
80 
81 #ifdef HAVE_CUDA
82  std::shared_ptr<BaselineHashTable> initHashTableOnGpu(
83  const std::vector<JoinColumn>& join_columns,
84  const std::vector<JoinColumnTypeInfo>& join_column_types,
85  const std::vector<JoinBucketInfo>& join_bucket_info,
86  const BaselineHashTableEntryInfo hash_table_entry_info,
87  const size_t device_id);
88 #endif
89 
90  HashType getHashType() const noexcept override {
91  return HashType::OneToMany;
92  }
93 
94  std::pair<size_t, size_t> approximateTupleCount(
95  const std::vector<double>& inverse_bucket_sizes_for_dimension,
96  std::vector<ColumnsForDevice>& columns_per_device,
97  const size_t chosen_max_hashtable_size,
98  const double chosen_bucket_threshold) override;
99 
100  std::pair<size_t, size_t> computeRangeHashTableCounts(
101  const size_t shard_count,
102  std::vector<ColumnsForDevice>& columns_per_device);
103 
104  public:
105  llvm::Value* codegenKey(const CompilationOptions& co, llvm::Value* offset);
106 
108  const size_t,
109  llvm::Value*);
110 
111  private:
112  inline bool isInnerColCompressed() const {
115  }
116 
117  inline bool isProbeCompressed() const {
118  const auto& inner_outer_pair = getInnerOuterPairs()[0];
119  const auto outer_col = inner_outer_pair.second;
120  const auto outer_col_ti = outer_col->get_type_info();
121 
122  return outer_col_ti.get_compression() == kENCODING_GEOINT;
123  }
124 
126  std::shared_ptr<Analyzer::ColumnVar> inner_col_expr_;
128  const double bucket_threshold_{std::numeric_limits<double>::max()};
129  const size_t max_hashtable_size_{std::numeric_limits<size_t>::max()};
130 };
JoinType
Definition: sqldefs.h:174
llvm::Value * codegenKey(const CompilationOptions &co, llvm::Value *offset)
HashType getHashType() const noexceptoverride
void reifyWithLayout(const HashType layout) override
const Expr * get_left_operand() const
Definition: Analyzer.h:552
std::shared_ptr< Analyzer::ColumnVar > inner_col_expr_
const std::vector< InnerOuter > & getInnerOuterPairs() const
std::unordered_map< size_t, HashTableBuildDag > HashTableBuildDagMap
std::pair< size_t, size_t > approximateTupleCount(const std::vector< double > &inverse_bucket_sizes_for_dimension, std::vector< ColumnsForDevice > &columns_per_device, const size_t chosen_max_hashtable_size, const double chosen_bucket_threshold) override
std::pair< size_t, size_t > computeRangeHashTableCounts(const size_t shard_count, std::vector< ColumnsForDevice > &columns_per_device)
HashJoinMatchingSet codegenMatchingSetWithOffset(const CompilationOptions &, const size_t, llvm::Value *)
const double bucket_threshold_
const SQLTypeInfo & get_type_info() const
Definition: Analyzer.h:79
static std::shared_ptr< RangeJoinHashTable > getInstance(const std::shared_ptr< Analyzer::BinOper > condition, const Analyzer::RangeOper *range_expr, const std::vector< InputTableInfo > &query_infos, const Data_Namespace::MemoryLevel memory_level, const JoinType join_type, const int device_count, ColumnCacheMap &column_cache, Executor *executor, const HashTableBuildDagMap &hashtable_build_dag_map, const RegisteredQueryHint &query_hints, const TableIdToNodeMap &table_id_to_node_map)
std::unordered_map< shared::TableKey, const RelAlgNode * > TableIdToNodeMap
bool isProbeCompressed() const
HOST DEVICE EncodingType get_compression() const
Definition: sqltypes.h:399
std::unordered_map< shared::TableKey, std::unordered_map< int, std::shared_ptr< const ColumnarResults >>> ColumnCacheMap
std::shared_ptr< BaselineHashTable > initHashTableOnCpu(const std::vector< JoinColumn > &join_columns, const std::vector< JoinColumnTypeInfo > &join_column_types, const std::vector< JoinBucketInfo > &join_bucket_info, const BaselineHashTableEntryInfo hash_table_entry_info)
bool isInnerColCompressed() const
void reifyForDevice(const ColumnsForDevice &columns_for_device, const HashType layout, const size_t entry_count, const size_t emitted_keys_count, const int device_id, const logger::ThreadLocalIds parent_thread_local_ids)
Data_Namespace::MemoryLevel effective_memory_level_
const Analyzer::RangeOper * range_expr_
~RangeJoinHashTable() override=default
RangeJoinHashTable(const std::shared_ptr< Analyzer::BinOper > condition, const JoinType join_type, const Analyzer::RangeOper *range_expr, std::shared_ptr< Analyzer::ColumnVar > inner_col_expr, const std::vector< InputTableInfo > &query_infos, const Data_Namespace::MemoryLevel memory_level, ColumnCacheMap &column_cache, Executor *executor, const std::vector< InnerOuter > &inner_outer_pairs, const int device_count, const RegisteredQueryHint &query_hints, const HashTableBuildDagMap &hashtable_build_dag_map, const TableIdToNodeMap &table_id_to_node_map)
HashType
Definition: HashTable.h:19
const size_t max_hashtable_size_