OmniSciDB  471d68cefb
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
RangeJoinHashTable.h
Go to the documentation of this file.
1 /*
2  * Copyright 2021 OmniSci, Inc.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #pragma once
18 
20 
22  public:
23  RangeJoinHashTable(const std::shared_ptr<Analyzer::BinOper> condition,
24  const JoinType join_type,
25  const Analyzer::RangeOper* range_expr,
26  std::shared_ptr<Analyzer::ColumnVar> inner_col_expr,
27  const std::vector<InputTableInfo>& query_infos,
28  const Data_Namespace::MemoryLevel memory_level,
29  ColumnCacheMap& column_cache,
30  Executor* executor,
31  const std::vector<InnerOuter>& inner_outer_pairs,
32  const int device_count,
33  QueryPlan query_plan_dag,
34  HashtableCacheMetaInfo hashtable_cache_meta_info,
35  const HashTableBuildDagMap& hashtable_build_dag_map,
36  const TableIdToNodeMap& table_id_to_node_map)
37  : OverlapsJoinHashTable(condition,
38  join_type,
39  query_infos,
40  memory_level,
41  column_cache,
42  executor,
43  inner_outer_pairs,
44  device_count,
45  query_plan_dag,
46  hashtable_cache_meta_info,
47  table_id_to_node_map)
48  , range_expr_(range_expr)
49  , inner_col_expr_(std::move(inner_col_expr)) {}
50 
51  ~RangeJoinHashTable() override = default;
52 
53  static std::shared_ptr<RangeJoinHashTable> getInstance(
54  const std::shared_ptr<Analyzer::BinOper> condition,
55  const Analyzer::RangeOper* range_expr,
56  const std::vector<InputTableInfo>& query_infos,
57  const Data_Namespace::MemoryLevel memory_level,
58  const JoinType join_type,
59  const int device_count,
60  ColumnCacheMap& column_cache,
61  Executor* executor,
62  const HashTableBuildDagMap& hashtable_build_dag_map,
63  const RegisteredQueryHint& query_hint,
64  const TableIdToNodeMap& table_id_to_node_map);
65 
66  protected:
67  void reifyWithLayout(const HashType layout) override;
68 
69  void reifyForDevice(const ColumnsForDevice& columns_for_device,
70  const HashType layout,
71  const size_t entry_count,
72  const size_t emitted_keys_count,
73  const int device_id,
74  const logger::ThreadId parent_thread_id);
75 
76  std::shared_ptr<BaselineHashTable> initHashTableOnCpu(
77  const std::vector<JoinColumn>& join_columns,
78  const std::vector<JoinColumnTypeInfo>& join_column_types,
79  const std::vector<JoinBucketInfo>& join_bucket_info,
80  const HashType layout,
81  const size_t entry_count,
82  const size_t emitted_keys_count);
83 
84 #ifdef HAVE_CUDA
85  std::shared_ptr<BaselineHashTable> initHashTableOnGpu(
86  const std::vector<JoinColumn>& join_columns,
87  const std::vector<JoinColumnTypeInfo>& join_column_types,
88  const std::vector<JoinBucketInfo>& join_bucket_info,
89  const HashType layout,
90  const size_t entry_count,
91  const size_t emitted_keys_count,
92  const size_t device_id);
93 #endif
94 
95  HashType getHashType() const noexcept override { return HashType::OneToMany; }
96 
97  std::pair<size_t, size_t> approximateTupleCount(
98  const std::vector<double>& inverse_bucket_sizes_for_dimension,
99  std::vector<ColumnsForDevice>& columns_per_device,
100  const size_t chosen_max_hashtable_size,
101  const double chosen_bucket_threshold) override;
102 
103  std::pair<size_t, size_t> computeRangeHashTableCounts(
104  const size_t shard_count,
105  std::vector<ColumnsForDevice>& columns_per_device);
106 
107  public:
108  llvm::Value* codegenKey(const CompilationOptions& co, llvm::Value* offset);
109 
111  const size_t,
112  llvm::Value*);
113 
114  private:
115  inline bool isInnerColCompressed() const {
118  }
119 
120  inline bool isProbeCompressed() const {
121  const auto& inner_outer_pair = getInnerOuterPairs()[0];
122  const auto outer_col = inner_outer_pair.second;
123  const auto outer_col_ti = outer_col->get_type_info();
124 
125  return outer_col_ti.get_compression() == kENCODING_GEOINT;
126  }
127 
129  std::shared_ptr<Analyzer::ColumnVar> inner_col_expr_;
130  const double bucket_threshold_{std::numeric_limits<double>::max()};
131  const size_t max_hashtable_size_{std::numeric_limits<size_t>::max()};
133 };
JoinType
Definition: sqldefs.h:108
llvm::Value * codegenKey(const CompilationOptions &co, llvm::Value *offset)
HashType getHashType() const noexceptoverride
#define const
void reifyWithLayout(const HashType layout) override
const Expr * get_left_operand() const
Definition: Analyzer.h:538
HashtableCacheMetaInfo overlaps_hashtable_cache_meta_info_
std::shared_ptr< Analyzer::ColumnVar > inner_col_expr_
std::shared_ptr< BaselineHashTable > initHashTableOnCpu(const std::vector< JoinColumn > &join_columns, const std::vector< JoinColumnTypeInfo > &join_column_types, const std::vector< JoinBucketInfo > &join_bucket_info, const HashType layout, const size_t entry_count, const size_t emitted_keys_count)
std::pair< size_t, size_t > approximateTupleCount(const std::vector< double > &inverse_bucket_sizes_for_dimension, std::vector< ColumnsForDevice > &columns_per_device, const size_t chosen_max_hashtable_size, const double chosen_bucket_threshold) override
std::unordered_map< int, const RelAlgNode * > TableIdToNodeMap
const std::vector< InnerOuter > & getInnerOuterPairs() const
static std::shared_ptr< RangeJoinHashTable > getInstance(const std::shared_ptr< Analyzer::BinOper > condition, const Analyzer::RangeOper *range_expr, const std::vector< InputTableInfo > &query_infos, const Data_Namespace::MemoryLevel memory_level, const JoinType join_type, const int device_count, ColumnCacheMap &column_cache, Executor *executor, const HashTableBuildDagMap &hashtable_build_dag_map, const RegisteredQueryHint &query_hint, const TableIdToNodeMap &table_id_to_node_map)
std::pair< size_t, size_t > computeRangeHashTableCounts(const size_t shard_count, std::vector< ColumnsForDevice > &columns_per_device)
HashJoinMatchingSet codegenMatchingSetWithOffset(const CompilationOptions &, const size_t, llvm::Value *)
const double bucket_threshold_
const SQLTypeInfo & get_type_info() const
Definition: Analyzer.h:77
std::unordered_map< int, std::unordered_map< int, std::shared_ptr< const ColumnarResults >>> ColumnCacheMap
bool isProbeCompressed() const
HOST DEVICE EncodingType get_compression() const
Definition: sqltypes.h:337
uint64_t ThreadId
Definition: Logger.h:345
bool isInnerColCompressed() const
RangeJoinHashTable(const std::shared_ptr< Analyzer::BinOper > condition, const JoinType join_type, const Analyzer::RangeOper *range_expr, std::shared_ptr< Analyzer::ColumnVar > inner_col_expr, const std::vector< InputTableInfo > &query_infos, const Data_Namespace::MemoryLevel memory_level, ColumnCacheMap &column_cache, Executor *executor, const std::vector< InnerOuter > &inner_outer_pairs, const int device_count, QueryPlan query_plan_dag, HashtableCacheMetaInfo hashtable_cache_meta_info, const HashTableBuildDagMap &hashtable_build_dag_map, const TableIdToNodeMap &table_id_to_node_map)
void reifyForDevice(const ColumnsForDevice &columns_for_device, const HashType layout, const size_t entry_count, const size_t emitted_keys_count, const int device_id, const logger::ThreadId parent_thread_id)
const Analyzer::RangeOper * range_expr_
std::string QueryPlan
~RangeJoinHashTable() override=default
HashType
Definition: HashTable.h:19
const size_t max_hashtable_size_
std::unordered_map< JoinColumnsInfo, HashTableBuildDag > HashTableBuildDagMap