OmniSciDB  c1a53651b2
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
HashJoin.cpp
Go to the documentation of this file.
1 /*
2  * Copyright 2022 HEAVY.AI, Inc.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
18 
22 #include "QueryEngine/Execute.h"
30 
31 #include <sstream>
32 
33 extern bool g_enable_overlaps_hashjoin;
34 
36  const std::vector<double>& inverse_bucket_sizes_for_dimension,
37  const std::vector<InnerOuter> inner_outer_pairs) {
38  join_buckets.clear();
39 
40  CHECK_EQ(inner_outer_pairs.size(), join_columns.size());
41  CHECK_EQ(join_columns.size(), join_column_types.size());
42  for (size_t i = 0; i < join_columns.size(); i++) {
43  const auto& inner_outer_pair = inner_outer_pairs[i];
44  const auto inner_col = inner_outer_pair.first;
45  const auto& ti = inner_col->get_type_info();
46  const auto elem_ti = ti.get_elem_type();
47  // CHECK(elem_ti.is_fp());
48 
49  join_buckets.emplace_back(JoinBucketInfo{inverse_bucket_sizes_for_dimension,
50  elem_ti.get_type() == kDOUBLE});
51  }
52 }
53 
59  const Analyzer::ColumnVar* hash_col,
60  const std::vector<Fragmenter_Namespace::FragmentInfo>& fragment_info,
61  const Data_Namespace::MemoryLevel effective_memory_level,
62  const int device_id,
63  std::vector<std::shared_ptr<Chunk_NS::Chunk>>& chunks_owner,
64  DeviceAllocator* dev_buff_owner,
65  std::vector<std::shared_ptr<void>>& malloc_owner,
66  Executor* executor,
67  ColumnCacheMap* column_cache) {
68  static std::mutex fragment_fetch_mutex;
69  std::lock_guard<std::mutex> fragment_fetch_lock(fragment_fetch_mutex);
70  try {
71  JoinColumn join_column = ColumnFetcher::makeJoinColumn(executor,
72  *hash_col,
73  fragment_info,
74  effective_memory_level,
75  device_id,
76  dev_buff_owner,
77  /*thread_idx=*/0,
78  chunks_owner,
79  malloc_owner,
80  *column_cache);
81  if (effective_memory_level == Data_Namespace::GPU_LEVEL) {
82  CHECK(dev_buff_owner);
83  auto device_col_chunks_buff = dev_buff_owner->alloc(join_column.col_chunks_buff_sz);
84  dev_buff_owner->copyToDevice(device_col_chunks_buff,
85  join_column.col_chunks_buff,
86  join_column.col_chunks_buff_sz);
87  join_column.col_chunks_buff = device_col_chunks_buff;
88  }
89  return join_column;
90  } catch (...) {
91  throw FailedToFetchColumn();
92  }
93 }
94 
95 namespace {
96 
97 template <typename T>
98 std::string toStringFlat(const HashJoin* hash_table,
99  const ExecutorDeviceType device_type,
100  const int device_id) {
101  auto mem =
102  reinterpret_cast<const T*>(hash_table->getJoinHashBuffer(device_type, device_id));
103  auto memsz = hash_table->getJoinHashBufferSize(device_type, device_id) / sizeof(T);
104  std::string txt;
105  for (size_t i = 0; i < memsz; ++i) {
106  if (i > 0) {
107  txt += ", ";
108  }
109  txt += std::to_string(mem[i]);
110  }
111  return txt;
112 }
113 
114 } // anonymous namespace
115 
116 std::string HashJoin::toStringFlat64(const ExecutorDeviceType device_type,
117  const int device_id) const {
118  return toStringFlat<int64_t>(this, device_type, device_id);
119 }
120 
121 std::string HashJoin::toStringFlat32(const ExecutorDeviceType device_type,
122  const int device_id) const {
123  return toStringFlat<int32_t>(this, device_type, device_id);
124 }
125 
126 std::ostream& operator<<(std::ostream& os, const DecodedJoinHashBufferEntry& e) {
127  os << " {{";
128  bool first = true;
129  for (auto k : e.key) {
130  if (!first) {
131  os << ",";
132  } else {
133  first = false;
134  }
135  os << k;
136  }
137  os << "}, ";
138  os << "{";
139  first = true;
140  for (auto p : e.payload) {
141  if (!first) {
142  os << ", ";
143  } else {
144  first = false;
145  }
146  os << p;
147  }
148  os << "}}";
149  return os;
150 }
151 
152 std::ostream& operator<<(std::ostream& os, const DecodedJoinHashBufferSet& s) {
153  os << "{\n";
154  bool first = true;
155  for (auto e : s) {
156  if (!first) {
157  os << ",\n";
158  } else {
159  first = false;
160  }
161  os << e;
162  }
163  if (!s.empty()) {
164  os << "\n";
165  }
166  os << "}\n";
167  return os;
168 }
169 
170 std::ostream& operator<<(std::ostream& os,
171  const InnerOuterStringOpInfos& inner_outer_string_op_infos) {
172  os << "(" << inner_outer_string_op_infos.first << ", "
173  << inner_outer_string_op_infos.second << ")";
174  return os;
175 }
176 
177 std::string toString(const InnerOuterStringOpInfos& inner_outer_string_op_infos) {
178  std::ostringstream os;
179  os << inner_outer_string_op_infos;
180  return os.str();
181 }
182 
183 std::ostream& operator<<(
184  std::ostream& os,
185  const std::vector<InnerOuterStringOpInfos>& inner_outer_string_op_infos_pairs) {
186  os << "[";
187  bool first_elem = true;
188  for (const auto& inner_outer_string_op_infos : inner_outer_string_op_infos_pairs) {
189  if (!first_elem) {
190  os << ", ";
191  }
192  first_elem = false;
193  os << inner_outer_string_op_infos;
194  }
195  os << "]";
196  return os;
197 }
198 
199 std::string toString(
200  const std::vector<InnerOuterStringOpInfos>& inner_outer_string_op_infos_pairs) {
201  std::ostringstream os;
202  os << inner_outer_string_op_infos_pairs;
203  return os.str();
204 }
205 
207  const std::vector<llvm::Value*>& hash_join_idx_args_in,
208  const bool is_sharded,
209  const bool col_is_nullable,
210  const bool is_bw_eq,
211  const int64_t sub_buff_size,
212  Executor* executor,
213  bool is_bucketized) {
214  AUTOMATIC_IR_METADATA(executor->cgen_state_.get());
215  using namespace std::string_literals;
216 
217  std::string fname(is_bucketized ? "bucketized_hash_join_idx"s : "hash_join_idx"s);
218 
219  if (is_bw_eq) {
220  fname += "_bitwise";
221  }
222  if (is_sharded) {
223  fname += "_sharded";
224  }
225  if (!is_bw_eq && col_is_nullable) {
226  fname += "_nullable";
227  }
228 
229  const auto slot_lv = executor->cgen_state_->emitCall(fname, hash_join_idx_args_in);
230  const auto slot_valid_lv = executor->cgen_state_->ir_builder_.CreateICmpSGE(
231  slot_lv, executor->cgen_state_->llInt(int64_t(0)));
232 
233  auto pos_ptr = hash_join_idx_args_in[0];
234  CHECK(pos_ptr);
235 
236  auto count_ptr = executor->cgen_state_->ir_builder_.CreateAdd(
237  pos_ptr, executor->cgen_state_->llInt(sub_buff_size));
238  auto hash_join_idx_args = hash_join_idx_args_in;
239  hash_join_idx_args[0] = executor->cgen_state_->ir_builder_.CreatePtrToInt(
240  count_ptr, llvm::Type::getInt64Ty(executor->cgen_state_->context_));
241 
242  const auto row_count_lv = executor->cgen_state_->ir_builder_.CreateSelect(
243  slot_valid_lv,
244  executor->cgen_state_->emitCall(fname, hash_join_idx_args),
245  executor->cgen_state_->llInt(int64_t(0)));
246  auto rowid_base_i32 = executor->cgen_state_->ir_builder_.CreateIntToPtr(
247  executor->cgen_state_->ir_builder_.CreateAdd(
248  pos_ptr, executor->cgen_state_->llInt(2 * sub_buff_size)),
249  llvm::Type::getInt32PtrTy(executor->cgen_state_->context_));
250  auto rowid_ptr_i32 = executor->cgen_state_->ir_builder_.CreateGEP(
251  rowid_base_i32->getType()->getScalarType()->getPointerElementType(),
252  rowid_base_i32,
253  slot_lv);
254  return {rowid_ptr_i32, row_count_lv, slot_lv};
255 }
256 
257 llvm::Value* HashJoin::codegenHashTableLoad(const size_t table_idx, Executor* executor) {
258  AUTOMATIC_IR_METADATA(executor->cgen_state_.get());
259  llvm::Value* hash_ptr = nullptr;
260  const auto total_table_count =
261  executor->plan_state_->join_info_.join_hash_tables_.size();
262  CHECK_LT(table_idx, total_table_count);
263  if (total_table_count > 1) {
264  auto hash_tables_ptr =
265  get_arg_by_name(executor->cgen_state_->row_func_, "join_hash_tables");
266  auto hash_pptr =
267  table_idx > 0
268  ? executor->cgen_state_->ir_builder_.CreateGEP(
269  hash_tables_ptr->getType()->getScalarType()->getPointerElementType(),
270  hash_tables_ptr,
271  executor->cgen_state_->llInt(static_cast<int64_t>(table_idx)))
272  : hash_tables_ptr;
273  hash_ptr = executor->cgen_state_->ir_builder_.CreateLoad(
274  hash_pptr->getType()->getPointerElementType(), hash_pptr);
275  } else {
276  hash_ptr = get_arg_by_name(executor->cgen_state_->row_func_, "join_hash_tables");
277  }
278  CHECK(hash_ptr);
279  return hash_ptr;
280 }
281 
283 std::shared_ptr<HashJoin> HashJoin::getInstance(
284  const std::shared_ptr<Analyzer::BinOper> qual_bin_oper,
285  const std::vector<InputTableInfo>& query_infos,
286  const Data_Namespace::MemoryLevel memory_level,
287  const JoinType join_type,
288  const HashType preferred_hash_type,
289  const int device_count,
290  ColumnCacheMap& column_cache,
291  Executor* executor,
292  const HashTableBuildDagMap& hashtable_build_dag_map,
293  const RegisteredQueryHint& query_hint,
294  const TableIdToNodeMap& table_id_to_node_map) {
295  auto timer = DEBUG_TIMER(__func__);
296  std::shared_ptr<HashJoin> join_hash_table;
297  CHECK_GT(device_count, 0);
298  if (!g_enable_overlaps_hashjoin && qual_bin_oper->is_overlaps_oper()) {
299  throw std::runtime_error(
300  "Overlaps hash join disabled, attempting to fall back to loop join");
301  }
302  if (qual_bin_oper->is_overlaps_oper()) {
303  VLOG(1) << "Trying to build geo hash table:";
304  join_hash_table = OverlapsJoinHashTable::getInstance(qual_bin_oper,
305  query_infos,
306  memory_level,
307  join_type,
308  device_count,
309  column_cache,
310  executor,
311  hashtable_build_dag_map,
312  query_hint,
313  table_id_to_node_map);
314  } else if (dynamic_cast<const Analyzer::ExpressionTuple*>(
315  qual_bin_oper->get_left_operand())) {
316  VLOG(1) << "Trying to build keyed hash table:";
317  join_hash_table = BaselineJoinHashTable::getInstance(qual_bin_oper,
318  query_infos,
319  memory_level,
320  join_type,
321  preferred_hash_type,
322  device_count,
323  column_cache,
324  executor,
325  hashtable_build_dag_map,
326  query_hint,
327  table_id_to_node_map);
328  } else {
329  try {
330  VLOG(1) << "Trying to build perfect hash table:";
331  join_hash_table = PerfectJoinHashTable::getInstance(qual_bin_oper,
332  query_infos,
333  memory_level,
334  join_type,
335  preferred_hash_type,
336  device_count,
337  column_cache,
338  executor,
339  hashtable_build_dag_map,
340  query_hint,
341  table_id_to_node_map);
342  } catch (JoinHashTableTooBig& e) {
343  throw e;
344  } catch (TooManyHashEntries&) {
345  const auto join_quals = coalesce_singleton_equi_join(qual_bin_oper);
346  CHECK_EQ(join_quals.size(), size_t(1));
347  const auto join_qual =
348  std::dynamic_pointer_cast<Analyzer::BinOper>(join_quals.front());
349  VLOG(1) << "Trying to build keyed hash table after perfect hash table:";
350  join_hash_table = BaselineJoinHashTable::getInstance(join_qual,
351  query_infos,
352  memory_level,
353  join_type,
354  preferred_hash_type,
355  device_count,
356  column_cache,
357  executor,
358  hashtable_build_dag_map,
359  query_hint,
360  table_id_to_node_map);
361  }
362  }
363  CHECK(join_hash_table);
364  if (VLOGGING(2)) {
365  if (join_hash_table->getMemoryLevel() == Data_Namespace::MemoryLevel::GPU_LEVEL) {
366  for (int device_id = 0; device_id < join_hash_table->getDeviceCount();
367  ++device_id) {
368  if (join_hash_table->getJoinHashBufferSize(ExecutorDeviceType::GPU, device_id) <=
369  1000) {
370  VLOG(2) << "Built GPU hash table: "
371  << join_hash_table->toString(ExecutorDeviceType::GPU, device_id);
372  }
373  }
374  } else {
375  if (join_hash_table->getJoinHashBufferSize(ExecutorDeviceType::CPU) <= 1000) {
376  VLOG(2) << "Built CPU hash table: "
377  << join_hash_table->toString(ExecutorDeviceType::CPU);
378  }
379  }
380  }
381  return join_hash_table;
382 }
383 
384 std::pair<const StringDictionaryProxy*, StringDictionaryProxy*>
386  const Executor* executor,
387  const bool has_string_ops) {
388  const auto inner_col = cols.first;
389  CHECK(inner_col);
390  const auto inner_ti = inner_col->get_type_info();
391  const auto outer_col = dynamic_cast<const Analyzer::ColumnVar*>(cols.second);
392  std::pair<const StringDictionaryProxy*, StringDictionaryProxy*>
393  inner_outer_str_dict_proxies{nullptr, nullptr};
394  if (inner_ti.is_string() && outer_col) {
395  const auto& outer_ti = outer_col->get_type_info();
396  CHECK(outer_ti.is_string());
397  inner_outer_str_dict_proxies.first =
398  executor->getStringDictionaryProxy(inner_ti.getStringDictKey(), true);
399  CHECK(inner_outer_str_dict_proxies.first);
400  inner_outer_str_dict_proxies.second =
401  executor->getStringDictionaryProxy(outer_ti.getStringDictKey(), true);
402  CHECK(inner_outer_str_dict_proxies.second);
403  if (!has_string_ops &&
404  *inner_outer_str_dict_proxies.first == *inner_outer_str_dict_proxies.second) {
405  // Dictionaries are the same - don't need to translate
406  CHECK_EQ(inner_ti.getStringDictKey(), outer_ti.getStringDictKey());
407  inner_outer_str_dict_proxies.first = nullptr;
408  inner_outer_str_dict_proxies.second = nullptr;
409  }
410  }
411  return inner_outer_str_dict_proxies;
412 }
413 
415  const InnerOuter& cols,
416  const InnerOuterStringOpInfos& inner_outer_string_op_infos,
417  ExpressionRange& col_range,
418  const Executor* executor) {
419  const bool has_string_ops = inner_outer_string_op_infos.first.size() ||
420  inner_outer_string_op_infos.second.size();
421  const auto inner_outer_proxies =
422  HashJoin::getStrDictProxies(cols, executor, has_string_ops);
423  const bool translate_dictionary =
424  inner_outer_proxies.first && inner_outer_proxies.second;
425  if (translate_dictionary) {
426  const auto& inner_dict_id = inner_outer_proxies.first->getDictKey();
427  const auto& outer_dict_id = inner_outer_proxies.second->getDictKey();
428  CHECK(has_string_ops || inner_dict_id != outer_dict_id);
429  const auto id_map = executor->getJoinIntersectionStringProxyTranslationMap(
430  inner_outer_proxies.first,
431  inner_outer_proxies.second,
432  inner_outer_string_op_infos.first,
433  inner_outer_string_op_infos.second,
434  executor->getRowSetMemoryOwner());
435  if (!inner_outer_string_op_infos.second.empty()) {
436  // String op was applied to lhs table,
437  // need to expand column range appropriately
438  col_range = ExpressionRange::makeIntRange(
439  std::min(col_range.getIntMin(),
440  static_cast<int64_t>(
441  inner_outer_proxies.second->transientEntryCount() + 1) *
442  -1),
443  col_range.getIntMax(),
444  0,
445  col_range.hasNulls());
446  }
447  return id_map;
448  }
449  return nullptr;
450 }
451 
453  const std::vector<Fragmenter_Namespace::FragmentInfo>& fragments) {
454  auto const fragment_id = [](auto const& frag_info) { return frag_info.fragmentId; };
455  std::vector<int> frag_ids(fragments.size());
456  std::transform(fragments.cbegin(), fragments.cend(), frag_ids.begin(), fragment_id);
457  std::sort(frag_ids.begin(), frag_ids.end());
458  return frag_ids;
459 }
460 
462  const std::vector<InnerOuter>& inner_outer_pairs,
463  const Executor* executor,
464  const std::vector<InnerOuterStringOpInfos>& inner_outer_string_op_infos_pairs) {
465  CHECK(executor);
466  std::vector<const void*> sd_inner_proxy_per_key;
467  std::vector<void*> sd_outer_proxy_per_key;
468  std::vector<ChunkKey> cache_key_chunks; // used for the cache key
469  const bool has_string_op_infos = inner_outer_string_op_infos_pairs.size();
470  if (has_string_op_infos) {
471  CHECK_EQ(inner_outer_pairs.size(), inner_outer_string_op_infos_pairs.size());
472  }
473  size_t string_op_info_pairs_idx = 0;
474  for (const auto& inner_outer_pair : inner_outer_pairs) {
475  const auto inner_col = inner_outer_pair.first;
476  const auto outer_col = inner_outer_pair.second;
477  const auto& inner_ti = inner_col->get_type_info();
478  const auto& outer_ti = outer_col->get_type_info();
479  const auto& inner_column_key = inner_col->getColumnKey();
480  ChunkKey cache_key_chunks_for_column{
481  inner_column_key.db_id, inner_column_key.table_id, inner_column_key.column_id};
482  if (inner_ti.is_string() &&
483  (!(inner_ti.get_comp_param() == outer_ti.get_comp_param()) ||
484  (has_string_op_infos &&
485  (inner_outer_string_op_infos_pairs[string_op_info_pairs_idx].first.size() ||
486  inner_outer_string_op_infos_pairs[string_op_info_pairs_idx].second.size())))) {
487  CHECK(outer_ti.is_string());
488  CHECK(inner_ti.get_compression() == kENCODING_DICT &&
489  outer_ti.get_compression() == kENCODING_DICT);
490  const auto sd_inner_proxy = executor->getStringDictionaryProxy(
491  inner_ti.getStringDictKey(), executor->getRowSetMemoryOwner(), true);
492  auto sd_outer_proxy = executor->getStringDictionaryProxy(
493  outer_ti.getStringDictKey(), executor->getRowSetMemoryOwner(), true);
494  CHECK(sd_inner_proxy && sd_outer_proxy);
495  sd_inner_proxy_per_key.push_back(sd_inner_proxy);
496  sd_outer_proxy_per_key.push_back(sd_outer_proxy);
497  cache_key_chunks_for_column.push_back(sd_outer_proxy->getGeneration());
498  } else {
499  sd_inner_proxy_per_key.emplace_back();
500  sd_outer_proxy_per_key.emplace_back();
501  }
502  cache_key_chunks.push_back(cache_key_chunks_for_column);
503  string_op_info_pairs_idx++;
504  }
505  return {sd_inner_proxy_per_key, sd_outer_proxy_per_key, cache_key_chunks};
506 }
507 
508 std::vector<const StringDictionaryProxy::IdMap*>
510  const CompositeKeyInfo& composite_key_info,
511  const std::vector<InnerOuterStringOpInfos>& string_op_infos_for_keys,
512  const Executor* executor) {
513  const auto& inner_proxies = composite_key_info.sd_inner_proxy_per_key;
514  const auto& outer_proxies = composite_key_info.sd_outer_proxy_per_key;
515  const size_t num_proxies = inner_proxies.size();
516  CHECK_EQ(num_proxies, outer_proxies.size());
517  std::vector<const StringDictionaryProxy::IdMap*> proxy_translation_maps;
518  proxy_translation_maps.reserve(num_proxies);
519  for (size_t proxy_pair_idx = 0; proxy_pair_idx < num_proxies; ++proxy_pair_idx) {
520  const bool translate_proxies =
521  inner_proxies[proxy_pair_idx] && outer_proxies[proxy_pair_idx];
522  if (translate_proxies) {
523  const auto inner_proxy =
524  reinterpret_cast<const StringDictionaryProxy*>(inner_proxies[proxy_pair_idx]);
525  auto outer_proxy =
526  reinterpret_cast<StringDictionaryProxy*>(outer_proxies[proxy_pair_idx]);
527  CHECK(inner_proxy);
528  CHECK(outer_proxy);
529 
530  CHECK_NE(inner_proxy->getDictKey(), outer_proxy->getDictKey());
531  proxy_translation_maps.emplace_back(
532  executor->getJoinIntersectionStringProxyTranslationMap(
533  inner_proxy,
534  outer_proxy,
535  string_op_infos_for_keys[proxy_pair_idx].first,
536  string_op_infos_for_keys[proxy_pair_idx].second,
537  executor->getRowSetMemoryOwner()));
538  } else {
539  proxy_translation_maps.emplace_back(nullptr);
540  }
541  }
542  return proxy_translation_maps;
543 }
544 
546  const Analyzer::Expr* col_or_string_oper,
547  const std::vector<StringOps_Namespace::StringOpInfo>& string_op_infos,
548  CodeGenerator& code_generator,
549  const CompilationOptions& co) {
550  if (!string_op_infos.empty()) {
551  const auto coerced_col_var =
552  dynamic_cast<const Analyzer::ColumnVar*>(col_or_string_oper);
553  CHECK(coerced_col_var);
554  std::vector<llvm::Value*> codegen_val_vec{
555  code_generator.codegenPseudoStringOper(coerced_col_var, string_op_infos, co)};
556  return codegen_val_vec[0];
557  }
558  return code_generator.codegen(col_or_string_oper, true, co)[0];
559 }
560 
561 std::shared_ptr<Analyzer::ColumnVar> getSyntheticColumnVar(
562  std::string_view table,
563  std::string_view column,
564  int rte_idx,
565  const Catalog_Namespace::Catalog& catalog) {
566  auto tmeta = catalog.getMetadataForTable(std::string(table));
567  CHECK(tmeta);
568 
569  auto cmeta = catalog.getMetadataForColumn(tmeta->tableId, std::string(column));
570  CHECK(cmeta);
571 
572  auto ti = cmeta->columnType;
573 
574  if (ti.is_geometry() && ti.get_type() != kPOINT) {
575  int geoColumnId{0};
576  switch (ti.get_type()) {
577  case kLINESTRING: {
578  geoColumnId = cmeta->columnId + 2;
579  break;
580  }
581  case kPOLYGON: {
582  geoColumnId = cmeta->columnId + 3;
583  break;
584  }
585  case kMULTIPOLYGON: {
586  geoColumnId = cmeta->columnId + 4;
587  break;
588  }
589  default:
590  CHECK(false);
591  }
592  cmeta = catalog.getMetadataForColumn(tmeta->tableId, geoColumnId);
593  CHECK(cmeta);
594  ti = cmeta->columnType;
595  }
596 
597  auto cv = std::make_shared<Analyzer::ColumnVar>(
598  ti,
599  shared::ColumnKey{catalog.getDatabaseId(), tmeta->tableId, cmeta->columnId},
600  rte_idx);
601  return cv;
602 }
603 
605  : public ScalarExprVisitor<std::set<const Analyzer::ColumnVar*>> {
606  protected:
607  std::set<const Analyzer::ColumnVar*> visitColumnVar(
608  const Analyzer::ColumnVar* column) const override {
609  return {column};
610  }
611 
612  std::set<const Analyzer::ColumnVar*> visitColumnVarTuple(
613  const Analyzer::ExpressionTuple* expr_tuple) const override {
614  AllColumnVarsVisitor visitor;
615  std::set<const Analyzer::ColumnVar*> result;
616  for (const auto& expr_component : expr_tuple->getTuple()) {
617  const auto component_rte_set = visitor.visit(expr_component.get());
618  result.insert(component_rte_set.begin(), component_rte_set.end());
619  }
620  return result;
621  }
622 
623  std::set<const Analyzer::ColumnVar*> aggregateResult(
624  const std::set<const Analyzer::ColumnVar*>& aggregate,
625  const std::set<const Analyzer::ColumnVar*>& next_result) const override {
626  auto result = aggregate;
627  result.insert(next_result.begin(), next_result.end());
628  return result;
629  }
630 };
631 
632 void setupSyntheticCaching(std::set<const Analyzer::ColumnVar*> cvs, Executor* executor) {
633  std::unordered_set<shared::TableKey> phys_table_ids;
634  for (auto cv : cvs) {
635  phys_table_ids.insert(cv->getTableKey());
636  }
637 
638  std::unordered_set<PhysicalInput> phys_inputs;
639  for (auto cv : cvs) {
640  const auto& column_key = cv->getColumnKey();
641  phys_inputs.emplace(
642  PhysicalInput{column_key.column_id, column_key.table_id, column_key.db_id});
643  }
644 
645  executor->setupCaching(phys_inputs, phys_table_ids);
646 }
647 
648 std::vector<InputTableInfo> getSyntheticInputTableInfo(
649  std::set<const Analyzer::ColumnVar*> cvs,
650  Executor* executor) {
651  std::unordered_set<shared::TableKey> phys_table_ids;
652  for (auto cv : cvs) {
653  phys_table_ids.insert(cv->getTableKey());
654  }
655 
656  // NOTE(sy): This vector ordering seems to work for now, but maybe we need to
657  // review how rte_idx is assigned for ColumnVars. See for example Analyzer.h
658  // and RelAlgExecutor.cpp and rte_idx there.
659  std::vector<InputTableInfo> query_infos;
660  query_infos.reserve(phys_table_ids.size());
661  size_t i = 0;
662  for (const auto& table_key : phys_table_ids) {
663  auto td = Catalog_Namespace::get_metadata_for_table(table_key);
664  CHECK(td);
665  query_infos.push_back({table_key, td->fragmenter->getFragmentsForQuery()});
666  ++i;
667  }
668 
669  return query_infos;
670 }
671 
673 std::shared_ptr<HashJoin> HashJoin::getSyntheticInstance(
674  std::string_view table1,
675  std::string_view column1,
676  const Catalog_Namespace::Catalog& catalog1,
677  std::string_view table2,
678  std::string_view column2,
679  const Catalog_Namespace::Catalog& catalog2,
680  const Data_Namespace::MemoryLevel memory_level,
681  const HashType preferred_hash_type,
682  const int device_count,
683  ColumnCacheMap& column_cache,
684  Executor* executor) {
685  auto a1 = getSyntheticColumnVar(table1, column1, 0, catalog1);
686  auto a2 = getSyntheticColumnVar(table2, column2, 1, catalog2);
687 
688  auto qual_bin_oper = std::make_shared<Analyzer::BinOper>(kBOOLEAN, kEQ, kONE, a1, a2);
689 
690  std::set<const Analyzer::ColumnVar*> cvs =
691  AllColumnVarsVisitor().visit(qual_bin_oper.get());
692  auto query_infos = getSyntheticInputTableInfo(cvs, executor);
693  setupSyntheticCaching(cvs, executor);
695 
696  auto hash_table = HashJoin::getInstance(qual_bin_oper,
697  query_infos,
698  memory_level,
700  preferred_hash_type,
701  device_count,
702  column_cache,
703  executor,
704  {},
705  query_hint,
706  {});
707  return hash_table;
708 }
709 
711 std::shared_ptr<HashJoin> HashJoin::getSyntheticInstance(
712  const std::shared_ptr<Analyzer::BinOper> qual_bin_oper,
713  const Data_Namespace::MemoryLevel memory_level,
714  const HashType preferred_hash_type,
715  const int device_count,
716  ColumnCacheMap& column_cache,
717  Executor* executor) {
718  std::set<const Analyzer::ColumnVar*> cvs =
719  AllColumnVarsVisitor().visit(qual_bin_oper.get());
720  auto query_infos = getSyntheticInputTableInfo(cvs, executor);
721  setupSyntheticCaching(cvs, executor);
723 
724  auto hash_table = HashJoin::getInstance(qual_bin_oper,
725  query_infos,
726  memory_level,
728  preferred_hash_type,
729  device_count,
730  column_cache,
731  executor,
732  {},
733  query_hint,
734  {});
735  return hash_table;
736 }
737 
738 std::pair<std::string, std::shared_ptr<HashJoin>> HashJoin::getSyntheticInstance(
739  std::vector<std::shared_ptr<Analyzer::BinOper>> qual_bin_opers,
740  const Data_Namespace::MemoryLevel memory_level,
741  const HashType preferred_hash_type,
742  const int device_count,
743  ColumnCacheMap& column_cache,
744  Executor* executor) {
745  std::set<const Analyzer::ColumnVar*> cvs;
746  for (auto& qual : qual_bin_opers) {
747  auto cv = AllColumnVarsVisitor().visit(qual.get());
748  cvs.insert(cv.begin(), cv.end());
749  }
750  auto query_infos = getSyntheticInputTableInfo(cvs, executor);
751  setupSyntheticCaching(cvs, executor);
753  std::shared_ptr<HashJoin> hash_table;
754  std::string error_msg;
755  for (auto& qual : qual_bin_opers) {
756  try {
757  auto candidate_hash_table = HashJoin::getInstance(qual,
758  query_infos,
759  memory_level,
761  preferred_hash_type,
762  device_count,
763  column_cache,
764  executor,
765  {},
766  query_hint,
767  {});
768  if (candidate_hash_table) {
769  hash_table = candidate_hash_table;
770  }
771  } catch (HashJoinFail& e) {
772  error_msg = e.what();
773  continue;
774  }
775  }
776  return std::make_pair(error_msg, hash_table);
777 }
778 
780  const size_t shard_count,
781  const Executor* executor) {
782  if (!g_cluster) {
783  return;
784  }
785  if (table_key.table_id >= 0) {
786  CHECK(executor);
787  const auto inner_td = Catalog_Namespace::get_metadata_for_table(table_key);
788  CHECK(inner_td);
789  if (!shard_count && !table_is_replicated(inner_td)) {
790  throw TableMustBeReplicated(inner_td->tableName);
791  }
792  }
793 }
794 
795 template <typename T>
797  auto* target_expr = expr;
798  if (auto cast_expr = dynamic_cast<const Analyzer::UOper*>(expr)) {
799  target_expr = cast_expr->get_operand();
800  }
801  CHECK(target_expr);
802  return dynamic_cast<const T*>(target_expr);
803 }
804 
805 std::pair<InnerOuter, InnerOuterStringOpInfos> HashJoin::normalizeColumnPair(
806  const Analyzer::Expr* lhs,
807  const Analyzer::Expr* rhs,
808  const TemporaryTables* temporary_tables,
809  const bool is_overlaps_join) {
810  SQLTypeInfo lhs_ti = lhs->get_type_info();
811  SQLTypeInfo rhs_ti = rhs->get_type_info();
812  if (!is_overlaps_join) {
813  if (lhs_ti.get_type() != rhs_ti.get_type()) {
814  throw HashJoinFail("Equijoin types must be identical, found: " +
815  lhs_ti.get_type_name() + ", " + rhs_ti.get_type_name());
816  }
817  if (!lhs_ti.is_integer() && !lhs_ti.is_time() && !lhs_ti.is_string() &&
818  !lhs_ti.is_decimal()) {
819  throw HashJoinFail("Cannot apply hash join to inner column type " +
820  lhs_ti.get_type_name());
821  }
822  // Decimal types should be identical.
823  if (lhs_ti.is_decimal() && (lhs_ti.get_scale() != rhs_ti.get_scale() ||
824  lhs_ti.get_precision() != rhs_ti.get_precision())) {
825  throw HashJoinFail("Equijoin with different decimal types");
826  }
827  }
828 
829  const auto lhs_cast = dynamic_cast<const Analyzer::UOper*>(lhs);
830  const auto rhs_cast = dynamic_cast<const Analyzer::UOper*>(rhs);
831  if (lhs_ti.is_string() && (static_cast<bool>(lhs_cast) != static_cast<bool>(rhs_cast) ||
832  (lhs_cast && lhs_cast->get_optype() != kCAST) ||
833  (rhs_cast && rhs_cast->get_optype() != kCAST))) {
834  throw HashJoinFail(
835  "Cannot use hash join for given expression (non-cast unary operator)");
836  }
837  // Casts to decimal are not suported.
838  if (lhs_ti.is_decimal() && (lhs_cast || rhs_cast)) {
839  throw HashJoinFail("Cannot use hash join for given expression (cast to decimal)");
840  }
841  auto lhs_col = getHashJoinColumn<Analyzer::ColumnVar>(lhs);
842  auto rhs_col = getHashJoinColumn<Analyzer::ColumnVar>(rhs);
843 
844  const auto lhs_string_oper = getHashJoinColumn<Analyzer::StringOper>(lhs);
845  const auto rhs_string_oper = getHashJoinColumn<Analyzer::StringOper>(rhs);
846 
847  auto process_string_op_infos = [](const auto& string_oper, auto& col, auto& ti) {
848  std::vector<StringOps_Namespace::StringOpInfo> string_op_infos;
849  if (string_oper) {
850  col = dynamic_cast<const Analyzer::ColumnVar*>(string_oper->getArg(0));
851  if (!col) {
852  // Todo (todd): Allow for non-colvar inputs into string operators for
853  // join predicates
854  // We now guard against non constant/colvar/stringoper inputs
855  // in Analyzer::StringOper::check_operand_types, but keeping this to not
856  // depend on that logic if and when it changes as allowing non-colvar inputs
857  // for hash joins will be additional work on top of allowing them
858  // outside of join predicates
859  throw HashJoinFail(
860  "Hash joins involving string operators currently restricted to column inputs "
861  "(i.e. not case statements).");
862  }
863  ti = col->get_type_info();
864  CHECK(ti.is_dict_encoded_string());
865  const auto chained_string_op_exprs = string_oper->getChainedStringOpExprs();
866  CHECK_GT(chained_string_op_exprs.size(), 0UL);
867  for (const auto& chained_string_op_expr : chained_string_op_exprs) {
868  auto chained_string_op =
869  dynamic_cast<const Analyzer::StringOper*>(chained_string_op_expr.get());
870  CHECK(chained_string_op);
871  StringOps_Namespace::StringOpInfo string_op_info(
872  chained_string_op->get_kind(),
873  chained_string_op->get_type_info(),
874  chained_string_op->getLiteralArgs());
875  string_op_infos.emplace_back(string_op_info);
876  }
877  }
878  return string_op_infos;
879  };
880 
881  auto outer_string_op_infos = process_string_op_infos(lhs_string_oper, lhs_col, lhs_ti);
882  auto inner_string_op_infos = process_string_op_infos(rhs_string_oper, rhs_col, rhs_ti);
883 
884  if (!lhs_col && !rhs_col) {
885  throw HashJoinFail(
886  "Cannot use hash join for given expression (both lhs and rhs are invalid)",
888  }
889 
890  const Analyzer::ColumnVar* inner_col{nullptr};
891  const Analyzer::ColumnVar* outer_col{nullptr};
892  auto outer_ti = lhs_ti;
893  auto inner_ti = rhs_ti;
894  const Analyzer::Expr* outer_expr{lhs};
895  InnerQualDecision inner_qual_decision = InnerQualDecision::UNKNOWN;
896  if (!lhs_col || (rhs_col && lhs_col->get_rte_idx() < rhs_col->get_rte_idx())) {
897  inner_qual_decision = InnerQualDecision::RHS;
898  inner_col = rhs_col;
899  outer_col = lhs_col;
900  } else {
901  inner_qual_decision = InnerQualDecision::LHS;
902  if (lhs_col && lhs_col->get_rte_idx() == 0) {
903  throw HashJoinFail(
904  "Cannot use hash join for given expression (lhs' rte idx is zero)",
905  inner_qual_decision);
906  }
907  inner_col = lhs_col;
908  outer_col = rhs_col;
909  std::swap(outer_ti, inner_ti);
910  std::swap(outer_string_op_infos, inner_string_op_infos);
911  outer_expr = rhs;
912  }
913  if (!inner_col) {
914  throw HashJoinFail("Cannot use hash join for given expression (invalid inner col)",
915  inner_qual_decision);
916  }
917  if (!outer_col) {
918  // check whether outer_col is a constant, i.e., inner_col = K;
919  const auto outer_constant_col = dynamic_cast<const Analyzer::Constant*>(outer_expr);
920  if (outer_constant_col) {
921  throw HashJoinFail(
922  "Cannot use hash join for given expression: try to join with a constant "
923  "value",
924  inner_qual_decision);
925  }
926  MaxRangeTableIndexVisitor rte_idx_visitor;
927  int outer_rte_idx = rte_idx_visitor.visit(outer_expr);
928  // The inner column candidate is not actually inner; the outer
929  // expression contains columns which are at least as deep.
930  if (inner_col->get_rte_idx() <= outer_rte_idx) {
931  throw HashJoinFail(
932  "Cannot use hash join for given expression (inner's rte <= outer's rte)",
933  inner_qual_decision);
934  }
935  }
936  // We need to fetch the actual type information from the catalog since Analyzer
937  // always reports nullable as true for inner table columns in left joins.
938  const auto& column_key = inner_col->getColumnKey();
939  const auto inner_col_cd = get_column_descriptor_maybe(column_key);
940  const auto inner_col_real_ti = get_column_type(
941  column_key.column_id, column_key.table_id, inner_col_cd, temporary_tables);
942  const auto& outer_col_ti =
943  !(dynamic_cast<const Analyzer::FunctionOper*>(lhs)) && outer_col
944  ? outer_col->get_type_info()
945  : outer_ti;
946  // Casts from decimal are not supported.
947  if ((inner_col_real_ti.is_decimal() || outer_col_ti.is_decimal()) &&
948  (lhs_cast || rhs_cast)) {
949  throw HashJoinFail("Cannot use hash join for given expression (cast from decimal)");
950  }
951  if (is_overlaps_join) {
952  if (!inner_col_real_ti.is_array()) {
953  throw HashJoinFail(
954  "Overlaps join only supported for inner columns with array type");
955  }
956  auto is_bounds_array = [](const auto ti) {
957  return ti.is_fixlen_array() && ti.get_size() == 32;
958  };
959  if (!is_bounds_array(inner_col_real_ti)) {
960  throw HashJoinFail(
961  "Overlaps join only supported for 4-element double fixed length arrays");
962  }
963  if (!(outer_col_ti.get_type() == kPOINT || is_bounds_array(outer_col_ti) ||
964  is_constructed_point(outer_expr))) {
965  throw HashJoinFail(
966  "Overlaps join only supported for geometry outer columns of type point, "
967  "geometry columns with bounds or constructed points");
968  }
969  } else {
970  if (!(inner_col_real_ti.is_integer() || inner_col_real_ti.is_time() ||
971  inner_col_real_ti.is_decimal() ||
972  (inner_col_real_ti.is_string() &&
973  inner_col_real_ti.get_compression() == kENCODING_DICT))) {
974  throw HashJoinFail(
975  "Can only apply hash join to integer-like types and dictionary encoded "
976  "strings");
977  }
978  }
979 
980  auto normalized_inner_col = inner_col;
981  auto normalized_outer_col = outer_col ? outer_col : outer_expr;
982 
983  const auto& normalized_inner_ti = normalized_inner_col->get_type_info();
984  const auto& normalized_outer_ti = normalized_outer_col->get_type_info();
985 
986  if (normalized_inner_ti.is_string() != normalized_outer_ti.is_string()) {
987  throw HashJoinFail(std::string("Could not build hash tables for incompatible types " +
988  normalized_inner_ti.get_type_name() + " and " +
989  normalized_outer_ti.get_type_name()));
990  }
991  return std::make_pair(std::make_pair(normalized_inner_col, normalized_outer_col),
992  std::make_pair(inner_string_op_infos, outer_string_op_infos));
993 }
994 
995 std::pair<std::vector<InnerOuter>, std::vector<InnerOuterStringOpInfos>>
997  const TemporaryTables* temporary_tables) {
998  std::pair<std::vector<InnerOuter>, std::vector<InnerOuterStringOpInfos>> result;
999  const auto lhs_tuple_expr =
1000  dynamic_cast<const Analyzer::ExpressionTuple*>(condition->get_left_operand());
1001  const auto rhs_tuple_expr =
1002  dynamic_cast<const Analyzer::ExpressionTuple*>(condition->get_right_operand());
1003 
1004  CHECK_EQ(static_cast<bool>(lhs_tuple_expr), static_cast<bool>(rhs_tuple_expr));
1005  if (lhs_tuple_expr) {
1006  const auto& lhs_tuple = lhs_tuple_expr->getTuple();
1007  const auto& rhs_tuple = rhs_tuple_expr->getTuple();
1008  CHECK_EQ(lhs_tuple.size(), rhs_tuple.size());
1009  for (size_t i = 0; i < lhs_tuple.size(); ++i) {
1010  const auto col_pair = normalizeColumnPair(lhs_tuple[i].get(),
1011  rhs_tuple[i].get(),
1012  temporary_tables,
1013  condition->is_overlaps_oper());
1014  result.first.emplace_back(col_pair.first);
1015  result.second.emplace_back(col_pair.second);
1016  }
1017  } else {
1018  CHECK(!lhs_tuple_expr && !rhs_tuple_expr);
1019  const auto col_pair = normalizeColumnPair(condition->get_left_operand(),
1020  condition->get_right_operand(),
1021  temporary_tables,
1022  condition->is_overlaps_oper());
1023  result.first.emplace_back(col_pair.first);
1024  result.second.emplace_back(col_pair.second);
1025  }
1026 
1027  return result;
1028 }
1029 
1030 bool HashJoin::canAccessHashTable(bool allow_hash_table_recycling,
1031  bool invalid_cache_key,
1032  JoinType join_type) {
1033  return g_enable_data_recycler && g_use_hashtable_cache && !invalid_cache_key &&
1034  allow_hash_table_recycling && join_type != JoinType::INVALID;
1035 }
1036 
1037 namespace {
1038 
1040  const TemporaryTables* temporary_tables) {
1041  const auto lhs = qual_bin_oper->get_left_operand();
1042  const auto rhs = qual_bin_oper->get_right_operand();
1043  return HashJoin::normalizeColumnPair(lhs, rhs, temporary_tables).first;
1044 }
1045 
1046 } // namespace
1047 
1048 size_t get_shard_count(const Analyzer::BinOper* join_condition,
1049  const Executor* executor) {
1050  const Analyzer::ColumnVar* inner_col{nullptr};
1051  const Analyzer::Expr* outer_col{nullptr};
1052  std::shared_ptr<Analyzer::BinOper> redirected_bin_oper;
1053  try {
1054  std::tie(inner_col, outer_col) =
1055  get_cols(join_condition, executor->getTemporaryTables());
1056  } catch (...) {
1057  return 0;
1058  }
1059  if (!inner_col || !outer_col) {
1060  return 0;
1061  }
1062  return get_shard_count({inner_col, outer_col}, executor);
1063 }
static std::vector< int > collectFragmentIds(const std::vector< Fragmenter_Namespace::FragmentInfo > &fragments)
Definition: HashJoin.cpp:452
static std::shared_ptr< HashJoin > getSyntheticInstance(std::string_view table1, std::string_view column1, const Catalog_Namespace::Catalog &catalog1, std::string_view table2, std::string_view column2, const Catalog_Namespace::Catalog &catalog2, const Data_Namespace::MemoryLevel memory_level, const HashType preferred_hash_type, const int device_count, ColumnCacheMap &column_cache, Executor *executor)
Make hash table from named tables and columns (such as for testing).
Definition: HashJoin.cpp:673
int64_t getIntMin() const
#define CHECK_EQ(x, y)
Definition: Logger.h:301
std::vector< int > ChunkKey
Definition: types.h:36
std::vector< InputTableInfo > getSyntheticInputTableInfo(std::set< const Analyzer::ColumnVar * > cvs, Executor *executor)
Definition: HashJoin.cpp:648
virtual HashJoinMatchingSet codegenMatchingSet(const CompilationOptions &, const size_t)=0
JoinType
Definition: sqldefs.h:165
static llvm::Value * codegenHashTableLoad(const size_t table_idx, Executor *executor)
Definition: HashJoin.cpp:257
class for a per-database catalog. also includes metadata for the current database and the current use...
Definition: Catalog.h:132
std::pair< const Analyzer::ColumnVar *, const Analyzer::Expr * > InnerOuter
Definition: HashJoin.h:106
std::string toStringFlat(const HashJoin *hash_table, const ExecutorDeviceType device_type, const int device_id)
Definition: HashJoin.cpp:98
static bool canAccessHashTable(bool allow_hash_table_recycling, bool invalid_cache_key, JoinType join_type)
Definition: HashJoin.cpp:1030
ExecutorDeviceType
std::vector< const void * > sd_inner_proxy_per_key
Definition: HashJoin.h:128
virtual std::string toStringFlat64(const ExecutorDeviceType device_type, const int device_id) const
Definition: HashJoin.cpp:116
std::list< std::shared_ptr< Analyzer::Expr > > coalesce_singleton_equi_join(const std::shared_ptr< Analyzer::BinOper > &join_qual)
static void checkHashJoinReplicationConstraint(const shared::TableKey &table_key, const size_t shard_count, const Executor *executor)
Definition: HashJoin.cpp:779
std::ostream & operator<<(std::ostream &os, const SessionInfo &session_info)
Definition: SessionInfo.cpp:57
static JoinColumn makeJoinColumn(Executor *executor, const Analyzer::ColumnVar &hash_col, const std::vector< Fragmenter_Namespace::FragmentInfo > &fragments, const Data_Namespace::MemoryLevel effective_mem_lvl, const int device_id, DeviceAllocator *device_allocator, const size_t thread_idx, std::vector< std::shared_ptr< Chunk_NS::Chunk >> &chunks_owner, std::vector< std::shared_ptr< void >> &malloc_owner, ColumnCacheMap &column_cache)
Creates a JoinColumn struct containing an array of JoinChunk structs.
void setBucketInfo(const std::vector< double > &bucket_sizes_for_dimension, const std::vector< InnerOuter > inner_outer_pairs)
Definition: HashJoin.cpp:35
std::set< const Analyzer::ColumnVar * > aggregateResult(const std::set< const Analyzer::ColumnVar * > &aggregate, const std::set< const Analyzer::ColumnVar * > &next_result) const override
Definition: HashJoin.cpp:623
HOST DEVICE int get_scale() const
Definition: sqltypes.h:386
const Expr * get_right_operand() const
Definition: Analyzer.h:456
bool is_constructed_point(const Analyzer::Expr *expr)
Definition: Execute.h:1508
JoinColumn fetchJoinColumn(const Analyzer::ColumnVar *hash_col, const std::vector< Fragmenter_Namespace::FragmentInfo > &fragment_info, const Data_Namespace::MemoryLevel effective_memory_level, const int device_id, std::vector< std::shared_ptr< Chunk_NS::Chunk >> &chunks_owner, DeviceAllocator *dev_buff_owner, std::vector< std::shared_ptr< void >> &malloc_owner, Executor *executor, ColumnCacheMap *column_cache)
Definition: HashJoin.cpp:58
static std::shared_ptr< OverlapsJoinHashTable > getInstance(const std::shared_ptr< Analyzer::BinOper > condition, const std::vector< InputTableInfo > &query_infos, const Data_Namespace::MemoryLevel memory_level, const JoinType join_type, const int device_count, ColumnCacheMap &column_cache, Executor *executor, const HashTableBuildDagMap &hashtable_build_dag_map, const RegisteredQueryHint &query_hint, const TableIdToNodeMap &table_id_to_node_map)
Make hash table from an in-flight SQL query&#39;s parse tree etc.
static std::pair< const StringDictionaryProxy *, StringDictionaryProxy * > getStrDictProxies(const InnerOuter &cols, const Executor *executor, const bool has_string_ops)
Definition: HashJoin.cpp:385
const TableDescriptor * get_metadata_for_table(const ::shared::TableKey &table_key, bool populate_fragmenter)
DEVICE void sort(ARGS &&...args)
Definition: gpu_enabled.h:105
const SQLTypeInfo get_column_type(const int col_id, const int table_id, const ColumnDescriptor *cd, const TemporaryTables *temporary_tables)
Definition: Execute.h:233
Definition: sqldefs.h:48
llvm::Value * codegenPseudoStringOper(const Analyzer::ColumnVar *, const std::vector< StringOps_Namespace::StringOpInfo > &string_op_infos, const CompilationOptions &)
Definition: sqldefs.h:29
Definition: HashTable.h:21
virtual int8_t * alloc(const size_t num_bytes)=0
InnerOuter get_cols(const Analyzer::BinOper *qual_bin_oper, const TemporaryTables *temporary_tables)
Definition: HashJoin.cpp:1039
T visit(const Analyzer::Expr *expr) const
HOST DEVICE SQLTypes get_type() const
Definition: sqltypes.h:381
static llvm::Value * codegenColOrStringOper(const Analyzer::Expr *col_or_string_oper, const std::vector< StringOps_Namespace::StringOpInfo > &string_op_infos, CodeGenerator &code_generator, const CompilationOptions &co)
Definition: HashJoin.cpp:545
bool g_enable_data_recycler
Definition: Execute.cpp:146
#define CHECK_GT(x, y)
Definition: Logger.h:305
bool is_time() const
Definition: sqltypes.h:586
virtual std::string toStringFlat32(const ExecutorDeviceType device_type, const int device_id) const
Definition: HashJoin.cpp:121
std::string to_string(char const *&&v)
bool g_enable_overlaps_hashjoin
Definition: Execute.cpp:102
std::unordered_map< int, const ResultSetPtr & > TemporaryTables
Definition: InputMetadata.h:31
const std::vector< JoinColumnTypeInfo > join_column_types
Definition: HashJoin.h:112
virtual void copyToDevice(void *device_dst, const void *host_src, const size_t num_bytes) const =0
size_t col_chunks_buff_sz
std::unordered_map< size_t, HashTableBuildDag > HashTableBuildDagMap
const std::vector< std::shared_ptr< Analyzer::Expr > > & getTuple() const
Definition: Analyzer.h:253
llvm::Value * get_arg_by_name(llvm::Function *func, const std::string &name)
Definition: Execute.h:167
std::vector< void * > sd_outer_proxy_per_key
Definition: HashJoin.h:129
bool is_integer() const
Definition: sqltypes.h:582
#define CHECK_NE(x, y)
Definition: Logger.h:302
const ColumnDescriptor * get_column_descriptor_maybe(const shared::ColumnKey &column_key)
Definition: Execute.h:220
const ColumnDescriptor * getMetadataForColumn(int tableId, const std::string &colName) const
int8_t * getJoinHashBuffer(const ExecutorDeviceType device_type, const int device_id) const
Definition: HashJoin.h:300
int getDatabaseId() const
Definition: Catalog.h:304
static std::vector< const StringDictionaryProxy::IdMap * > translateCompositeStrDictProxies(const CompositeKeyInfo &composite_key_info, const std::vector< InnerOuterStringOpInfos > &string_op_infos_for_keys, const Executor *executor)
Definition: HashJoin.cpp:509
std::string toString(const ExecutorDeviceType &device_type)
OUTPUT transform(INPUT const &input, FUNC const &func)
Definition: misc.h:320
bool hasNulls() const
#define AUTOMATIC_IR_METADATA(CGENSTATE)
const int8_t * col_chunks_buff
const SQLTypeInfo & get_type_info() const
Definition: Analyzer.h:79
int get_precision() const
Definition: sqltypes.h:384
static ExpressionRange makeIntRange(const int64_t int_min, const int64_t int_max, const int64_t bucket, const bool has_nulls)
static const StringDictionaryProxy::IdMap * translateInnerToOuterStrDictProxies(const InnerOuter &cols, const InnerOuterStringOpInfos &inner_outer_string_op_infos, ExpressionRange &old_col_range, const Executor *executor)
Definition: HashJoin.cpp:414
static std::pair< InnerOuter, InnerOuterStringOpInfos > normalizeColumnPair(const Analyzer::Expr *lhs, const Analyzer::Expr *rhs, const TemporaryTables *temporary_tables, const bool is_overlaps_join=false)
Definition: HashJoin.cpp:805
void setupSyntheticCaching(std::set< const Analyzer::ColumnVar * > cvs, Executor *executor)
Definition: HashJoin.cpp:632
#define VLOGGING(n)
Definition: Logger.h:289
std::unordered_map< shared::TableKey, const RelAlgNode * > TableIdToNodeMap
std::vector< llvm::Value * > codegen(const Analyzer::Expr *, const bool fetch_columns, const CompilationOptions &)
Definition: IRCodegen.cpp:30
#define CHECK_LT(x, y)
Definition: Logger.h:303
static RegisteredQueryHint defaults()
Definition: QueryHint.h:329
Definition: sqldefs.h:71
Expression class for string functions The &quot;arg&quot; constructor parameter must be an expression that reso...
Definition: Analyzer.h:1479
size_t getJoinHashBufferSize(const ExecutorDeviceType device_type)
Definition: HashJoin.h:286
bool table_is_replicated(const TableDescriptor *td)
std::set< DecodedJoinHashBufferEntry > DecodedJoinHashBufferSet
Definition: HashTable.h:34
std::set< const Analyzer::ColumnVar * > visitColumnVarTuple(const Analyzer::ExpressionTuple *expr_tuple) const override
Definition: HashJoin.cpp:612
static std::shared_ptr< BaselineJoinHashTable > getInstance(const std::shared_ptr< Analyzer::BinOper > condition, const std::vector< InputTableInfo > &query_infos, const Data_Namespace::MemoryLevel memory_level, const JoinType join_type, const HashType preferred_hash_type, const int device_count, ColumnCacheMap &column_cache, Executor *executor, const HashTableBuildDagMap &hashtable_build_dag_map, const RegisteredQueryHint &query_hints, const TableIdToNodeMap &table_id_to_node_map)
Make hash table from an in-flight SQL query&#39;s parse tree etc.
std::unordered_map< shared::TableKey, std::unordered_map< int, std::shared_ptr< const ColumnarResults >>> ColumnCacheMap
std::string get_type_name() const
Definition: sqltypes.h:507
static std::shared_ptr< PerfectJoinHashTable > getInstance(const std::shared_ptr< Analyzer::BinOper > qual_bin_oper, const std::vector< InputTableInfo > &query_infos, const Data_Namespace::MemoryLevel memory_level, const JoinType join_type, const HashType preferred_hash_type, const int device_count, ColumnCacheMap &column_cache, Executor *executor, const HashTableBuildDagMap &hashtable_build_dag_map, const RegisteredQueryHint &query_hints, const TableIdToNodeMap &table_id_to_node_map)
Make hash table from an in-flight SQL query&#39;s parse tree etc.
std::shared_ptr< Analyzer::ColumnVar > getSyntheticColumnVar(std::string_view table, std::string_view column, int rte_idx, const Catalog_Namespace::Catalog &catalog)
Definition: HashJoin.cpp:561
int64_t getIntMax() const
std::set< const Analyzer::ColumnVar * > visitColumnVar(const Analyzer::ColumnVar *column) const override
Definition: HashJoin.cpp:607
std::pair< std::vector< StringOps_Namespace::StringOpInfo >, std::vector< StringOps_Namespace::StringOpInfo >> InnerOuterStringOpInfos
Definition: HashJoin.h:108
#define CHECK(condition)
Definition: Logger.h:291
std::set< int32_t > payload
Definition: HashTable.h:23
#define DEBUG_TIMER(name)
Definition: Logger.h:411
static const T * getHashJoinColumn(const Analyzer::Expr *expr)
Definition: HashJoin.cpp:796
static std::pair< std::vector< InnerOuter >, std::vector< InnerOuterStringOpInfos > > normalizeColumnPairs(const Analyzer::BinOper *condition, const TemporaryTables *temporary_tables)
Definition: HashJoin.cpp:996
bool g_cluster
const Expr * get_left_operand() const
Definition: Analyzer.h:455
bool is_overlaps_oper() const
Definition: Analyzer.h:453
InnerQualDecision
Definition: HashJoin.h:63
const TableDescriptor * getMetadataForTable(const std::string &tableName, const bool populateFragmenter=true) const
Returns a pointer to a const TableDescriptor struct matching the provided tableName.
bool is_string() const
Definition: sqltypes.h:580
std::vector< int64_t > key
Definition: HashTable.h:22
std::vector< JoinBucketInfo > join_buckets
Definition: HashJoin.h:114
bool is_decimal() const
Definition: sqltypes.h:583
size_t get_shard_count(const Analyzer::BinOper *join_condition, const Executor *executor)
Definition: HashJoin.cpp:1048
static std::shared_ptr< HashJoin > getInstance(const std::shared_ptr< Analyzer::BinOper > qual_bin_oper, const std::vector< InputTableInfo > &query_infos, const Data_Namespace::MemoryLevel memory_level, const JoinType join_type, const HashType preferred_hash_type, const int device_count, ColumnCacheMap &column_cache, Executor *executor, const HashTableBuildDagMap &hashtable_build_dag_map, const RegisteredQueryHint &query_hint, const TableIdToNodeMap &table_id_to_node_map)
Make hash table from an in-flight SQL query&#39;s parse tree etc.
Definition: HashJoin.cpp:283
HashType
Definition: HashTable.h:19
DEVICE void swap(ARGS &&...args)
Definition: gpu_enabled.h:114
bool g_use_hashtable_cache
Definition: Execute.cpp:147
const std::vector< JoinColumn > join_columns
Definition: HashJoin.h:111
#define VLOG(n)
Definition: Logger.h:387
static CompositeKeyInfo getCompositeKeyInfo(const std::vector< InnerOuter > &inner_outer_pairs, const Executor *executor, const std::vector< InnerOuterStringOpInfos > &inner_outer_string_op_infos_pairs={})
Definition: HashJoin.cpp:461