OmniSciDB  a987f07e93
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
RangeJoinHashTable.h
Go to the documentation of this file.
1 /*
2  * Copyright 2022 HEAVY.AI, Inc.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #pragma once
18 
20 
22  public:
23  RangeJoinHashTable(const std::shared_ptr<Analyzer::BinOper> condition,
24  const JoinType join_type,
25  const Analyzer::RangeOper* range_expr,
26  std::shared_ptr<Analyzer::ColumnVar> inner_col_expr,
27  const std::vector<InputTableInfo>& query_infos,
28  const Data_Namespace::MemoryLevel memory_level,
29  ColumnCacheMap& column_cache,
30  Executor* executor,
31  const std::vector<InnerOuter>& inner_outer_pairs,
32  const int device_count,
33  const RegisteredQueryHint& query_hints,
34  const HashTableBuildDagMap& hashtable_build_dag_map,
35  const TableIdToNodeMap& table_id_to_node_map)
36  : OverlapsJoinHashTable(condition,
37  join_type,
38  query_infos,
39  memory_level,
40  column_cache,
41  executor,
42  inner_outer_pairs,
43  device_count,
44  query_hints,
45  hashtable_build_dag_map,
46  table_id_to_node_map)
47  , range_expr_(range_expr)
48  , inner_col_expr_(std::move(inner_col_expr)) {}
49 
50  ~RangeJoinHashTable() override = default;
51 
52  static std::shared_ptr<RangeJoinHashTable> getInstance(
53  const std::shared_ptr<Analyzer::BinOper> condition,
54  const Analyzer::RangeOper* range_expr,
55  const std::vector<InputTableInfo>& query_infos,
56  const Data_Namespace::MemoryLevel memory_level,
57  const JoinType join_type,
58  const int device_count,
59  ColumnCacheMap& column_cache,
60  Executor* executor,
61  const HashTableBuildDagMap& hashtable_build_dag_map,
62  const RegisteredQueryHint& query_hints,
63  const TableIdToNodeMap& table_id_to_node_map);
64 
65  protected:
66  void reifyWithLayout(const HashType layout) override;
67 
68  void reifyForDevice(const ColumnsForDevice& columns_for_device,
69  const HashType layout,
70  const size_t entry_count,
71  const size_t emitted_keys_count,
72  const int device_id,
73  const logger::ThreadLocalIds parent_thread_local_ids);
74 
75  std::shared_ptr<BaselineHashTable> initHashTableOnCpu(
76  const std::vector<JoinColumn>& join_columns,
77  const std::vector<JoinColumnTypeInfo>& join_column_types,
78  const std::vector<JoinBucketInfo>& join_bucket_info,
79  const HashType layout,
80  const size_t entry_count,
81  const size_t emitted_keys_count);
82 
83 #ifdef HAVE_CUDA
84  std::shared_ptr<BaselineHashTable> initHashTableOnGpu(
85  const std::vector<JoinColumn>& join_columns,
86  const std::vector<JoinColumnTypeInfo>& join_column_types,
87  const std::vector<JoinBucketInfo>& join_bucket_info,
88  const HashType layout,
89  const size_t entry_count,
90  const size_t emitted_keys_count,
91  const size_t device_id);
92 #endif
93 
94  HashType getHashType() const noexcept override { return HashType::OneToMany; }
95 
96  std::pair<size_t, size_t> approximateTupleCount(
97  const std::vector<double>& inverse_bucket_sizes_for_dimension,
98  std::vector<ColumnsForDevice>& columns_per_device,
99  const size_t chosen_max_hashtable_size,
100  const double chosen_bucket_threshold) override;
101 
102  std::pair<size_t, size_t> computeRangeHashTableCounts(
103  const size_t shard_count,
104  std::vector<ColumnsForDevice>& columns_per_device);
105 
106  public:
107  llvm::Value* codegenKey(const CompilationOptions& co, llvm::Value* offset);
108 
110  const size_t,
111  llvm::Value*);
112 
113  private:
114  inline bool isInnerColCompressed() const {
117  }
118 
119  inline bool isProbeCompressed() const {
120  const auto& inner_outer_pair = getInnerOuterPairs()[0];
121  const auto outer_col = inner_outer_pair.second;
122  const auto outer_col_ti = outer_col->get_type_info();
123 
124  return outer_col_ti.get_compression() == kENCODING_GEOINT;
125  }
126 
128  std::shared_ptr<Analyzer::ColumnVar> inner_col_expr_;
130  const double bucket_threshold_{std::numeric_limits<double>::max()};
131  const size_t max_hashtable_size_{std::numeric_limits<size_t>::max()};
132 };
JoinType
Definition: sqldefs.h:164
llvm::Value * codegenKey(const CompilationOptions &co, llvm::Value *offset)
HashType getHashType() const noexceptoverride
void reifyWithLayout(const HashType layout) override
const Expr * get_left_operand() const
Definition: Analyzer.h:548
std::shared_ptr< Analyzer::ColumnVar > inner_col_expr_
std::unordered_map< size_t, HashTableBuildDag > HashTableBuildDagMap
std::shared_ptr< BaselineHashTable > initHashTableOnCpu(const std::vector< JoinColumn > &join_columns, const std::vector< JoinColumnTypeInfo > &join_column_types, const std::vector< JoinBucketInfo > &join_bucket_info, const HashType layout, const size_t entry_count, const size_t emitted_keys_count)
std::pair< size_t, size_t > approximateTupleCount(const std::vector< double > &inverse_bucket_sizes_for_dimension, std::vector< ColumnsForDevice > &columns_per_device, const size_t chosen_max_hashtable_size, const double chosen_bucket_threshold) override
std::unordered_map< int, const RelAlgNode * > TableIdToNodeMap
const std::vector< InnerOuter > & getInnerOuterPairs() const
std::pair< size_t, size_t > computeRangeHashTableCounts(const size_t shard_count, std::vector< ColumnsForDevice > &columns_per_device)
HashJoinMatchingSet codegenMatchingSetWithOffset(const CompilationOptions &, const size_t, llvm::Value *)
const double bucket_threshold_
const SQLTypeInfo & get_type_info() const
Definition: Analyzer.h:83
std::unordered_map< int, std::unordered_map< int, std::shared_ptr< const ColumnarResults >>> ColumnCacheMap
static std::shared_ptr< RangeJoinHashTable > getInstance(const std::shared_ptr< Analyzer::BinOper > condition, const Analyzer::RangeOper *range_expr, const std::vector< InputTableInfo > &query_infos, const Data_Namespace::MemoryLevel memory_level, const JoinType join_type, const int device_count, ColumnCacheMap &column_cache, Executor *executor, const HashTableBuildDagMap &hashtable_build_dag_map, const RegisteredQueryHint &query_hints, const TableIdToNodeMap &table_id_to_node_map)
bool isProbeCompressed() const
HOST DEVICE EncodingType get_compression() const
Definition: sqltypes.h:388
bool isInnerColCompressed() const
void reifyForDevice(const ColumnsForDevice &columns_for_device, const HashType layout, const size_t entry_count, const size_t emitted_keys_count, const int device_id, const logger::ThreadLocalIds parent_thread_local_ids)
Data_Namespace::MemoryLevel effective_memory_level_
const Analyzer::RangeOper * range_expr_
~RangeJoinHashTable() override=default
RangeJoinHashTable(const std::shared_ptr< Analyzer::BinOper > condition, const JoinType join_type, const Analyzer::RangeOper *range_expr, std::shared_ptr< Analyzer::ColumnVar > inner_col_expr, const std::vector< InputTableInfo > &query_infos, const Data_Namespace::MemoryLevel memory_level, ColumnCacheMap &column_cache, Executor *executor, const std::vector< InnerOuter > &inner_outer_pairs, const int device_count, const RegisteredQueryHint &query_hints, const HashTableBuildDagMap &hashtable_build_dag_map, const TableIdToNodeMap &table_id_to_node_map)
HashType
Definition: HashTable.h:19
const size_t max_hashtable_size_