OmniSciDB  085a039ca4
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
RangeJoinHashTable.h
Go to the documentation of this file.
1 /*
2  * Copyright 2021 OmniSci, Inc.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #pragma once
18 
20 
22  public:
23  RangeJoinHashTable(const std::shared_ptr<Analyzer::BinOper> condition,
24  const JoinType join_type,
25  const Analyzer::RangeOper* range_expr,
26  std::shared_ptr<Analyzer::ColumnVar> inner_col_expr,
27  const std::vector<InputTableInfo>& query_infos,
28  const Data_Namespace::MemoryLevel memory_level,
29  ColumnCacheMap& column_cache,
30  Executor* executor,
31  const std::vector<InnerOuter>& inner_outer_pairs,
32  const int device_count,
33  const HashTableBuildDagMap& hashtable_build_dag_map,
34  const TableIdToNodeMap& table_id_to_node_map)
35  : OverlapsJoinHashTable(condition,
36  join_type,
37  query_infos,
38  memory_level,
39  column_cache,
40  executor,
41  inner_outer_pairs,
42  device_count,
43  hashtable_build_dag_map,
44  table_id_to_node_map)
45  , range_expr_(range_expr)
46  , inner_col_expr_(std::move(inner_col_expr)) {}
47 
48  ~RangeJoinHashTable() override = default;
49 
50  static std::shared_ptr<RangeJoinHashTable> getInstance(
51  const std::shared_ptr<Analyzer::BinOper> condition,
52  const Analyzer::RangeOper* range_expr,
53  const std::vector<InputTableInfo>& query_infos,
54  const Data_Namespace::MemoryLevel memory_level,
55  const JoinType join_type,
56  const int device_count,
57  ColumnCacheMap& column_cache,
58  Executor* executor,
59  const HashTableBuildDagMap& hashtable_build_dag_map,
60  const RegisteredQueryHint& query_hint,
61  const TableIdToNodeMap& table_id_to_node_map);
62 
63  protected:
64  void reifyWithLayout(const HashType layout) override;
65 
66  void reifyForDevice(const ColumnsForDevice& columns_for_device,
67  const HashType layout,
68  const size_t entry_count,
69  const size_t emitted_keys_count,
70  const int device_id,
71  const logger::ThreadId parent_thread_id);
72 
73  std::shared_ptr<BaselineHashTable> initHashTableOnCpu(
74  const std::vector<JoinColumn>& join_columns,
75  const std::vector<JoinColumnTypeInfo>& join_column_types,
76  const std::vector<JoinBucketInfo>& join_bucket_info,
77  const HashType layout,
78  const size_t entry_count,
79  const size_t emitted_keys_count);
80 
81 #ifdef HAVE_CUDA
82  std::shared_ptr<BaselineHashTable> initHashTableOnGpu(
83  const std::vector<JoinColumn>& join_columns,
84  const std::vector<JoinColumnTypeInfo>& join_column_types,
85  const std::vector<JoinBucketInfo>& join_bucket_info,
86  const HashType layout,
87  const size_t entry_count,
88  const size_t emitted_keys_count,
89  const size_t device_id);
90 #endif
91 
92  HashType getHashType() const noexcept override { return HashType::OneToMany; }
93 
94  std::pair<size_t, size_t> approximateTupleCount(
95  const std::vector<double>& inverse_bucket_sizes_for_dimension,
96  std::vector<ColumnsForDevice>& columns_per_device,
97  const size_t chosen_max_hashtable_size,
98  const double chosen_bucket_threshold) override;
99 
100  std::pair<size_t, size_t> computeRangeHashTableCounts(
101  const size_t shard_count,
102  std::vector<ColumnsForDevice>& columns_per_device);
103 
104  public:
105  llvm::Value* codegenKey(const CompilationOptions& co, llvm::Value* offset);
106 
108  const size_t,
109  llvm::Value*);
110 
111  private:
112  inline bool isInnerColCompressed() const {
115  }
116 
117  inline bool isProbeCompressed() const {
118  const auto& inner_outer_pair = getInnerOuterPairs()[0];
119  const auto outer_col = inner_outer_pair.second;
120  const auto outer_col_ti = outer_col->get_type_info();
121 
122  return outer_col_ti.get_compression() == kENCODING_GEOINT;
123  }
124 
126  std::shared_ptr<Analyzer::ColumnVar> inner_col_expr_;
128  const double bucket_threshold_{std::numeric_limits<double>::max()};
129  const size_t max_hashtable_size_{std::numeric_limits<size_t>::max()};
130 };
JoinType
Definition: sqldefs.h:136
llvm::Value * codegenKey(const CompilationOptions &co, llvm::Value *offset)
HashType getHashType() const noexceptoverride
void reifyWithLayout(const HashType layout) override
const Expr * get_left_operand() const
Definition: Analyzer.h:546
std::shared_ptr< Analyzer::ColumnVar > inner_col_expr_
std::unordered_map< size_t, HashTableBuildDag > HashTableBuildDagMap
std::shared_ptr< BaselineHashTable > initHashTableOnCpu(const std::vector< JoinColumn > &join_columns, const std::vector< JoinColumnTypeInfo > &join_column_types, const std::vector< JoinBucketInfo > &join_bucket_info, const HashType layout, const size_t entry_count, const size_t emitted_keys_count)
RangeJoinHashTable(const std::shared_ptr< Analyzer::BinOper > condition, const JoinType join_type, const Analyzer::RangeOper *range_expr, std::shared_ptr< Analyzer::ColumnVar > inner_col_expr, const std::vector< InputTableInfo > &query_infos, const Data_Namespace::MemoryLevel memory_level, ColumnCacheMap &column_cache, Executor *executor, const std::vector< InnerOuter > &inner_outer_pairs, const int device_count, const HashTableBuildDagMap &hashtable_build_dag_map, const TableIdToNodeMap &table_id_to_node_map)
std::pair< size_t, size_t > approximateTupleCount(const std::vector< double > &inverse_bucket_sizes_for_dimension, std::vector< ColumnsForDevice > &columns_per_device, const size_t chosen_max_hashtable_size, const double chosen_bucket_threshold) override
std::unordered_map< int, const RelAlgNode * > TableIdToNodeMap
const std::vector< InnerOuter > & getInnerOuterPairs() const
static std::shared_ptr< RangeJoinHashTable > getInstance(const std::shared_ptr< Analyzer::BinOper > condition, const Analyzer::RangeOper *range_expr, const std::vector< InputTableInfo > &query_infos, const Data_Namespace::MemoryLevel memory_level, const JoinType join_type, const int device_count, ColumnCacheMap &column_cache, Executor *executor, const HashTableBuildDagMap &hashtable_build_dag_map, const RegisteredQueryHint &query_hint, const TableIdToNodeMap &table_id_to_node_map)
std::pair< size_t, size_t > computeRangeHashTableCounts(const size_t shard_count, std::vector< ColumnsForDevice > &columns_per_device)
HashJoinMatchingSet codegenMatchingSetWithOffset(const CompilationOptions &, const size_t, llvm::Value *)
const double bucket_threshold_
const SQLTypeInfo & get_type_info() const
Definition: Analyzer.h:81
std::unordered_map< int, std::unordered_map< int, std::shared_ptr< const ColumnarResults >>> ColumnCacheMap
bool isProbeCompressed() const
HOST DEVICE EncodingType get_compression() const
Definition: sqltypes.h:337
uint64_t ThreadId
Definition: Logger.h:363
bool isInnerColCompressed() const
void reifyForDevice(const ColumnsForDevice &columns_for_device, const HashType layout, const size_t entry_count, const size_t emitted_keys_count, const int device_id, const logger::ThreadId parent_thread_id)
Data_Namespace::MemoryLevel effective_memory_level_
const Analyzer::RangeOper * range_expr_
~RangeJoinHashTable() override=default
HashType
Definition: HashTable.h:19
const size_t max_hashtable_size_