OmniSciDB  16c4e035a1
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
BaselineJoinHashTable.cpp
Go to the documentation of this file.
1 /*
2  * Copyright 2017 MapD Technologies, Inc.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
18 
19 #include <future>
20 
24 #include "QueryEngine/Execute.h"
31 
32 // let's only consider CPU hashtable recycler at this moment
33 // todo (yoonmin): support GPU hashtable cache without regression
34 std::unique_ptr<HashTableRecycler> BaselineJoinHashTable::hash_table_cache_ =
35  std::make_unique<HashTableRecycler>(CacheItemType::BASELINE_HT, 0);
36 
38 std::shared_ptr<BaselineJoinHashTable> BaselineJoinHashTable::getInstance(
39  const std::shared_ptr<Analyzer::BinOper> condition,
40  const std::vector<InputTableInfo>& query_infos,
41  const Data_Namespace::MemoryLevel memory_level,
42  const JoinType join_type,
43  const HashType preferred_hash_type,
44  const int device_count,
45  ColumnCacheMap& column_cache,
46  Executor* executor,
47  const HashTableBuildDagMap& hashtable_build_dag_map,
48  const TableIdToNodeMap& table_id_to_node_map,
49  const RegisteredQueryHint& query_hint) {
50  decltype(std::chrono::steady_clock::now()) ts1, ts2;
51 
52  if (VLOGGING(1)) {
53  VLOG(1) << "Building keyed hash table " << getHashTypeString(preferred_hash_type)
54  << " for qual: " << condition->toString();
55  ts1 = std::chrono::steady_clock::now();
56  }
57  auto inner_outer_pairs = HashJoin::normalizeColumnPairs(
58  condition.get(), *executor->getCatalog(), executor->getTemporaryTables());
59  auto hashtable_access_path_info =
61  condition->get_optype(),
62  join_type,
63  hashtable_build_dag_map,
64  executor);
65  auto join_hash_table = std::shared_ptr<BaselineJoinHashTable>(
66  new BaselineJoinHashTable(condition,
67  join_type,
68  query_infos,
69  memory_level,
70  column_cache,
71  executor,
72  inner_outer_pairs,
73  device_count,
74  hashtable_access_path_info,
75  table_id_to_node_map));
76 
77  if (query_hint.isAnyQueryHintDelivered()) {
78  join_hash_table->registerQueryHint(query_hint);
79  }
80  try {
81  join_hash_table->reify(preferred_hash_type);
82  } catch (const TableMustBeReplicated& e) {
83  // Throw a runtime error to abort the query
84  join_hash_table->freeHashBufferMemory();
85  throw std::runtime_error(e.what());
86  } catch (const HashJoinFail& e) {
87  // HashJoinFail exceptions log an error and trigger a retry with a join loop (if
88  // possible)
89  join_hash_table->freeHashBufferMemory();
90  throw HashJoinFail(std::string("Could not build a 1-to-1 correspondence for columns "
91  "involved in equijoin | ") +
92  e.what());
93  } catch (const ColumnarConversionNotSupported& e) {
94  throw HashJoinFail(std::string("Could not build hash tables for equijoin | ") +
95  e.what());
96  } catch (const OutOfMemory& e) {
97  throw HashJoinFail(
98  std::string("Ran out of memory while building hash tables for equijoin | ") +
99  e.what());
100  } catch (const std::exception& e) {
101  throw std::runtime_error(
102  std::string("Fatal error while attempting to build hash tables for join: ") +
103  e.what());
104  }
105  if (VLOGGING(1)) {
106  ts2 = std::chrono::steady_clock::now();
107  VLOG(1) << "Built keyed hash table "
108  << getHashTypeString(join_hash_table->getHashType()) << " in "
109  << std::chrono::duration_cast<std::chrono::milliseconds>(ts2 - ts1).count()
110  << " ms";
111  }
112  return join_hash_table;
113 }
114 
116  const std::shared_ptr<Analyzer::BinOper> condition,
117  const JoinType join_type,
118  const std::vector<InputTableInfo>& query_infos,
119  const Data_Namespace::MemoryLevel memory_level,
120  ColumnCacheMap& column_cache,
121  Executor* executor,
122  const std::vector<InnerOuter>& inner_outer_pairs,
123  const int device_count,
124  HashtableAccessPathInfo hashtable_access_path_info,
125  const TableIdToNodeMap& table_id_to_node_map)
126  : condition_(condition)
127  , join_type_(join_type)
128  , query_infos_(query_infos)
129  , memory_level_(memory_level)
130  , executor_(executor)
131  , column_cache_(column_cache)
132  , inner_outer_pairs_(inner_outer_pairs)
133  , catalog_(executor->getCatalog())
134  , device_count_(device_count)
135  , needs_dict_translation_(false)
136  , hashtable_cache_key_(hashtable_access_path_info.hashed_query_plan_dag)
137  , hashtable_cache_meta_info_(hashtable_access_path_info.meta_info)
138  , table_keys_(hashtable_access_path_info.table_keys)
139  , table_id_to_node_map_(table_id_to_node_map) {
141  hash_tables_for_device_.resize(std::max(device_count_, 1));
143 }
144 
146  const Analyzer::BinOper* condition,
147  const Executor* executor,
148  const std::vector<InnerOuter>& inner_outer_pairs) {
149  for (const auto& inner_outer_pair : inner_outer_pairs) {
150  const auto pair_shard_count = get_shard_count(inner_outer_pair, executor);
151  if (pair_shard_count) {
152  return pair_shard_count;
153  }
154  }
155  return 0;
156 }
157 
159  const int device_id,
160  bool raw) const {
161  auto buffer = getJoinHashBuffer(device_type, device_id);
162  CHECK_LT(device_id, hash_tables_for_device_.size());
163  auto hash_table = hash_tables_for_device_[device_id];
164  CHECK(hash_table);
165  auto buffer_size = hash_table->getHashTableBufferSize(device_type);
166 #ifdef HAVE_CUDA
167  std::unique_ptr<int8_t[]> buffer_copy;
168  if (device_type == ExecutorDeviceType::GPU) {
169  buffer_copy = std::make_unique<int8_t[]>(buffer_size);
170 
171  auto& data_mgr = catalog_->getDataMgr();
172  auto device_allocator = data_mgr.createGpuAllocator(device_id);
173  device_allocator->copyFromDevice(buffer_copy.get(), buffer, buffer_size);
174  }
175  auto ptr1 = buffer_copy ? buffer_copy.get() : buffer;
176 #else
177  auto ptr1 = buffer;
178 #endif // HAVE_CUDA
179  auto ptr2 = ptr1 + offsetBufferOff();
180  auto ptr3 = ptr1 + countBufferOff();
181  auto ptr4 = ptr1 + payloadBufferOff();
182  CHECK(hash_table);
183  const auto layout = getHashType();
184  return HashTable::toString(
185  "keyed",
186  getHashTypeString(layout),
187  getKeyComponentCount() + (layout == HashType::OneToOne ? 1 : 0),
189  hash_table->getEntryCount(),
190  ptr1,
191  ptr2,
192  ptr3,
193  ptr4,
194  buffer_size,
195  raw);
196 }
197 
198 std::set<DecodedJoinHashBufferEntry> BaselineJoinHashTable::toSet(
199  const ExecutorDeviceType device_type,
200  const int device_id) const {
201  auto buffer = getJoinHashBuffer(device_type, device_id);
202  auto hash_table = getHashTableForDevice(device_id);
203  CHECK(hash_table);
204  auto buffer_size = hash_table->getHashTableBufferSize(device_type);
205 #ifdef HAVE_CUDA
206  std::unique_ptr<int8_t[]> buffer_copy;
207  if (device_type == ExecutorDeviceType::GPU) {
208  buffer_copy = std::make_unique<int8_t[]>(buffer_size);
209  auto& data_mgr = catalog_->getDataMgr();
210  auto device_allocator = data_mgr.createGpuAllocator(device_id);
211  device_allocator->copyFromDevice(buffer_copy.get(), buffer, buffer_size);
212  }
213  auto ptr1 = buffer_copy ? buffer_copy.get() : buffer;
214 #else
215  auto ptr1 = buffer;
216 #endif // HAVE_CUDA
217  auto ptr2 = ptr1 + offsetBufferOff();
218  auto ptr3 = ptr1 + countBufferOff();
219  auto ptr4 = ptr1 + payloadBufferOff();
220  const auto layout = hash_table->getLayout();
221  return HashTable::toSet(getKeyComponentCount() + (layout == HashType::OneToOne ? 1 : 0),
223  hash_table->getEntryCount(),
224  ptr1,
225  ptr2,
226  ptr3,
227  ptr4,
228  buffer_size);
229 }
230 
231 void BaselineJoinHashTable::reify(const HashType preferred_layout) {
232  auto timer = DEBUG_TIMER(__func__);
234  const auto composite_key_info =
236 
241  executor_);
242 
243  if (condition_->is_overlaps_oper()) {
244  CHECK_EQ(inner_outer_pairs_.size(), size_t(1));
245  HashType layout;
246 
247  if (inner_outer_pairs_[0].second->get_type_info().is_array()) {
248  layout = HashType::ManyToMany;
249  } else {
250  layout = HashType::OneToMany;
251  }
252  try {
253  reifyWithLayout(layout);
254  return;
255  } catch (const std::exception& e) {
256  VLOG(1) << "Caught exception while building overlaps baseline hash table: "
257  << e.what();
258  throw;
259  }
260  }
261 
262  try {
263  reifyWithLayout(preferred_layout);
264  } catch (const std::exception& e) {
265  VLOG(1) << "Caught exception while building baseline hash table: " << e.what();
269  }
270 }
271 
273  const auto& query_info = get_inner_query_info(getInnerTableId(), query_infos_).info;
274  if (query_info.fragments.empty()) {
275  return;
276  }
277 
278  const auto total_entries = 2 * query_info.getNumTuplesUpperBound();
279  if (total_entries > static_cast<size_t>(std::numeric_limits<int32_t>::max())) {
280  throw TooManyHashEntries();
281  }
282 
283  auto data_mgr = executor_->getDataMgr();
284  std::vector<std::unique_ptr<CudaAllocator>> dev_buff_owners;
286  for (int device_id = 0; device_id < device_count_; ++device_id) {
287  dev_buff_owners.emplace_back(std::make_unique<CudaAllocator>(data_mgr, device_id));
288  }
289  }
290  std::vector<ColumnsForDevice> columns_per_device;
291  const auto shard_count = shardCount();
292  auto entries_per_device =
293  get_entries_per_device(total_entries, shard_count, device_count_, memory_level_);
294 
295  for (int device_id = 0; device_id < device_count_; ++device_id) {
296  const auto fragments =
297  shard_count
298  ? only_shards_for_device(query_info.fragments, device_id, device_count_)
299  : query_info.fragments;
300  const auto columns_for_device =
301  fetchColumnsForDevice(fragments,
302  device_id,
304  ? dev_buff_owners[device_id].get()
305  : nullptr);
306  columns_per_device.push_back(columns_for_device);
307  }
308  auto hashtable_layout_type = layout;
309  auto table_keys = table_keys_;
311  // sometimes we cannot retrieve query plan dag, so try to recycler cache
312  // with the old-passioned cache key if we deal with hashtable of non-temporary table
315  columns_per_device.front().join_columns.front().num_elems,
316  condition_->get_optype(),
317  join_type_};
319  std::vector<int> alternative_table_key{catalog_->getDatabaseId(), getInnerTableId()};
320  CHECK(!alternative_table_key.empty());
321  table_keys = std::unordered_set<size_t>{boost::hash_value(alternative_table_key)};
322  VLOG(2) << "Use alternative hash table cache key";
323  }
324  hash_table_cache_->addQueryPlanDagForTableKeys(hashtable_cache_key_, table_keys);
326  table_keys);
327 
328  size_t emitted_keys_count = 0;
329  if (hashtable_layout_type == HashType::OneToMany) {
330  CHECK(!columns_per_device.front().join_columns.empty());
331  emitted_keys_count = columns_per_device.front().join_columns.front().num_elems;
332  size_t tuple_count;
333  std::tie(tuple_count, std::ignore) =
334  approximateTupleCount(columns_per_device,
338  const auto entry_count = 2 * std::max(tuple_count, size_t(1));
339 
340  // reset entries per device with one to many info
341  entries_per_device =
342  get_entries_per_device(entry_count, shard_count, device_count_, memory_level_);
343  }
344  std::vector<std::future<void>> init_threads;
345  for (int device_id = 0; device_id < device_count_; ++device_id) {
346  const auto fragments =
347  shard_count
348  ? only_shards_for_device(query_info.fragments, device_id, device_count_)
349  : query_info.fragments;
350  init_threads.push_back(std::async(std::launch::async,
352  this,
353  columns_per_device[device_id],
354  hashtable_layout_type,
355  device_id,
356  entries_per_device,
357  emitted_keys_count,
358  logger::thread_id()));
359  }
360  for (auto& init_thread : init_threads) {
361  init_thread.wait();
362  }
363  for (auto& init_thread : init_threads) {
364  init_thread.get();
365  }
366 }
367 
369  const std::vector<ColumnsForDevice>& columns_per_device,
370  QueryPlanHash key,
371  CacheItemType item_type,
372  DeviceIdentifier device_identifier) const {
373  const auto effective_memory_level = getEffectiveMemoryLevel(inner_outer_pairs_);
374  CountDistinctDescriptor count_distinct_desc{
376  0,
377  11,
378  true,
379  effective_memory_level == Data_Namespace::MemoryLevel::GPU_LEVEL
382  1};
383  const auto padded_size_bytes = count_distinct_desc.bitmapPaddedSizeBytes();
384 
385  CHECK(!columns_per_device.empty() && !columns_per_device.front().join_columns.empty());
386 
387  if (effective_memory_level == Data_Namespace::MemoryLevel::CPU_LEVEL) {
388  const auto composite_key_info =
390  const auto cached_count_info =
391  getApproximateTupleCountFromCache(key, item_type, device_identifier);
392  if (cached_count_info.first) {
393  VLOG(1) << "Using a cached tuple count: " << *cached_count_info.first
394  << ", emitted keys count: " << cached_count_info.second;
395  return std::make_pair(*cached_count_info.first, cached_count_info.second);
396  }
397  int thread_count = cpu_threads();
398  std::vector<uint8_t> hll_buffer_all_cpus(thread_count * padded_size_bytes);
399  auto hll_result = &hll_buffer_all_cpus[0];
400 
401  approximate_distinct_tuples(hll_result,
402  count_distinct_desc.bitmap_sz_bits,
403  padded_size_bytes,
404  columns_per_device.front().join_columns,
405  columns_per_device.front().join_column_types,
406  thread_count);
407  for (int i = 1; i < thread_count; ++i) {
408  hll_unify(hll_result,
409  hll_result + i * padded_size_bytes,
410  1 << count_distinct_desc.bitmap_sz_bits);
411  }
412  return std::make_pair(hll_size(hll_result, count_distinct_desc.bitmap_sz_bits), 0);
413  }
414 #ifdef HAVE_CUDA
415  auto data_mgr = executor_->getDataMgr();
416  std::vector<std::vector<uint8_t>> host_hll_buffers(device_count_);
417  for (auto& host_hll_buffer : host_hll_buffers) {
418  host_hll_buffer.resize(count_distinct_desc.bitmapPaddedSizeBytes());
419  }
420  std::vector<std::future<void>> approximate_distinct_device_threads;
421  for (int device_id = 0; device_id < device_count_; ++device_id) {
422  approximate_distinct_device_threads.emplace_back(std::async(
424  [device_id,
425  &columns_per_device,
426  &count_distinct_desc,
427  data_mgr,
428  &host_hll_buffers] {
429  auto allocator = data_mgr->createGpuAllocator(device_id);
430  auto device_hll_buffer =
431  allocator->alloc(count_distinct_desc.bitmapPaddedSizeBytes());
432  data_mgr->getCudaMgr()->zeroDeviceMem(
433  device_hll_buffer, count_distinct_desc.bitmapPaddedSizeBytes(), device_id);
434  const auto& columns_for_device = columns_per_device[device_id];
435  auto join_columns_gpu = transfer_vector_of_flat_objects_to_gpu(
436  columns_for_device.join_columns, *allocator);
437  auto join_column_types_gpu = transfer_vector_of_flat_objects_to_gpu(
438  columns_for_device.join_column_types, *allocator);
439  const auto key_handler =
440  GenericKeyHandler(columns_for_device.join_columns.size(),
441  true,
442  join_columns_gpu,
443  join_column_types_gpu,
444  nullptr,
445  nullptr);
446  const auto key_handler_gpu =
447  transfer_flat_object_to_gpu(key_handler, *allocator);
449  reinterpret_cast<uint8_t*>(device_hll_buffer),
450  count_distinct_desc.bitmap_sz_bits,
451  key_handler_gpu,
452  columns_for_device.join_columns[0].num_elems);
453 
454  auto& host_hll_buffer = host_hll_buffers[device_id];
455  allocator->copyFromDevice(&host_hll_buffer[0],
456  device_hll_buffer,
457  count_distinct_desc.bitmapPaddedSizeBytes());
458  }));
459  }
460  for (auto& child : approximate_distinct_device_threads) {
461  child.get();
462  }
463  CHECK_EQ(Data_Namespace::MemoryLevel::GPU_LEVEL, effective_memory_level);
464  auto& result_hll_buffer = host_hll_buffers.front();
465  auto hll_result = reinterpret_cast<int32_t*>(&result_hll_buffer[0]);
466  for (int device_id = 1; device_id < device_count_; ++device_id) {
467  auto& host_hll_buffer = host_hll_buffers[device_id];
468  hll_unify(hll_result,
469  reinterpret_cast<int32_t*>(&host_hll_buffer[0]),
470  1 << count_distinct_desc.bitmap_sz_bits);
471  }
472  return std::make_pair(hll_size(hll_result, count_distinct_desc.bitmap_sz_bits), 0);
473 #else
474  UNREACHABLE();
475  return {0, 0};
476 #endif // HAVE_CUDA
477 }
478 
480  const std::vector<Fragmenter_Namespace::FragmentInfo>& fragments,
481  const int device_id,
482  DeviceAllocator* dev_buff_owner) {
483  const auto effective_memory_level = getEffectiveMemoryLevel(inner_outer_pairs_);
484 
485  std::vector<JoinColumn> join_columns;
486  std::vector<std::shared_ptr<Chunk_NS::Chunk>> chunks_owner;
487  std::vector<JoinColumnTypeInfo> join_column_types;
488  std::vector<JoinBucketInfo> join_bucket_info;
489  std::vector<std::shared_ptr<void>> malloc_owner;
490  for (const auto& inner_outer_pair : inner_outer_pairs_) {
491  const auto inner_col = inner_outer_pair.first;
492  const auto inner_cd = get_column_descriptor_maybe(
493  inner_col->get_column_id(), inner_col->get_table_id(), *catalog_);
494  if (inner_cd && inner_cd->isVirtualCol) {
496  }
497  join_columns.emplace_back(fetchJoinColumn(inner_col,
498  fragments,
499  effective_memory_level,
500  device_id,
501  chunks_owner,
502  dev_buff_owner,
503  malloc_owner,
504  executor_,
505  &column_cache_));
506  const auto& ti = inner_col->get_type_info();
507  join_column_types.emplace_back(JoinColumnTypeInfo{static_cast<size_t>(ti.get_size()),
508  0,
509  0,
511  isBitwiseEq(),
512  0,
514  }
515  return {join_columns, join_column_types, chunks_owner, join_bucket_info, malloc_owner};
516 }
517 
519  const HashType layout,
520  const int device_id,
521  const size_t entry_count,
522  const size_t emitted_keys_count,
523  const logger::ThreadId parent_thread_id) {
524  DEBUG_TIMER_NEW_THREAD(parent_thread_id);
525  const auto effective_memory_level = getEffectiveMemoryLevel(inner_outer_pairs_);
526  const auto err = initHashTableForDevice(columns_for_device.join_columns,
527  columns_for_device.join_column_types,
528  columns_for_device.join_buckets,
529  layout,
530  effective_memory_level,
531  entry_count,
532  emitted_keys_count,
533  device_id);
534  if (err) {
535  throw HashJoinFail(
536  std::string("Unrecognized error when initializing baseline hash table (") +
537  std::to_string(err) + std::string(")"));
538  }
539 }
540 
543  return 0;
544  }
547 }
548 
550  for (const auto& inner_outer_pair : inner_outer_pairs_) {
551  const auto inner_col = inner_outer_pair.first;
552  const auto& inner_col_ti = inner_col->get_type_info();
553  if (inner_col_ti.get_logical_size() > 4) {
554  CHECK_EQ(8, inner_col_ti.get_logical_size());
555  return 8;
556  }
557  }
558  return 4;
559 }
560 
562  return inner_outer_pairs_.size();
563 }
564 
566  const std::vector<InnerOuter>& inner_outer_pairs) const {
567  for (const auto& inner_outer_pair : inner_outer_pairs) {
569  inner_outer_pair.first, inner_outer_pair.second, executor_)) {
572  }
573  }
574  return memory_level_;
575 }
576 
578  const std::vector<JoinColumn>& join_columns,
579  const std::vector<JoinColumnTypeInfo>& join_column_types,
580  const std::vector<JoinBucketInfo>& join_bucket_info,
581  const HashType layout,
582  const Data_Namespace::MemoryLevel effective_memory_level,
583  const size_t entry_count,
584  const size_t emitted_keys_count,
585  const int device_id) {
586  auto timer = DEBUG_TIMER(__func__);
587  const auto key_component_count = getKeyComponentCount();
588  int err = 0;
589  decltype(std::chrono::steady_clock::now()) ts1, ts2;
590  ts1 = std::chrono::steady_clock::now();
591  // cached property and query hint-related code logic is very similar to that of
592  // perfect join hash table, so skip to make comments on them if possible
593  auto hash_table_property_cache = getHashtablePropertyCache();
594  auto query_hint_delivered =
596  bool layout_hint_delivered = query_hint_delivered && query_hint_.hash_join->layout;
597  bool need_to_update_cached_prop = false;
598  auto hashtable_layout = layout_hint_delivered
599  ? hash_table_property_cache->translateLayoutType(
600  query_hint_.hash_join->layout.value())
601  : layout;
602  if (layout_hint_delivered) {
603  VLOG(1) << "A user tries to set hash table layout as "
604  << getHashTypeString(hashtable_layout);
605  }
606 
607  if (!layout_hint_delivered) {
608  auto cached_hashtable_property = hash_table_property_cache->getItemFromCache(
612  std::nullopt);
613  if (cached_hashtable_property) {
614  if (cached_hashtable_property->hashing &&
615  (*cached_hashtable_property->hashing == HashTableHashingType::PERFECT)) {
616  // the previous query puts its hash table prop having perfect hashing scheme into
617  // a cache now baseline join hash table builder is invoked so we need to update
618  // the cached prop
619  need_to_update_cached_prop = true;
620  }
621  if (cached_hashtable_property->layout) {
622  auto cached_layout_type = hash_table_property_cache->translateLayoutType(
623  *cached_hashtable_property->layout);
624  if (cached_layout_type == HashType::OneToOne &&
625  hashtable_layout == HashType::OneToMany) {
626  // if we try to build OneToMany layout hash table even though we give OneToOne
627  // layout hint, it means OneToOne hash table building is failed and we retry
628  // to build a hash table with OneToMany layout we actually clear delivered
629  // query hint when rebuilding the hash table due to layout issue, but since we
630  // explicitly control it by query hint, we add this logic for the correctness
631  VLOG(1) << "Skipping hash table layout hint: cannot apply OneToOne layout "
632  "hint when building OneToMany hash table";
633  } else {
634  hashtable_layout = cached_layout_type;
635  }
636  }
637  }
638  }
639 
640  auto allow_hashtable_recycling =
644  auto allow_put_hashtable_to_cache =
645  !query_hint_delivered || (query_hint_delivered && query_hint_.hash_join->caching);
646  if (effective_memory_level == Data_Namespace::CPU_LEVEL) {
647  std::lock_guard<std::mutex> cpu_hash_table_buff_lock(cpu_hash_table_buff_mutex_);
648 
649  const auto composite_key_info =
651 
652  CHECK(!join_columns.empty());
653 
655  CHECK_EQ(device_id, size_t(0));
656  }
657  CHECK_LT(static_cast<size_t>(device_id), hash_tables_for_device_.size());
658  std::shared_ptr<HashTable> hash_table{nullptr};
659  if (allow_hashtable_recycling) {
663  hashtable_layout);
664  }
665 
666  if (hash_table) {
667  hash_tables_for_device_[device_id] = hash_table;
668  } else {
670 
671  const auto key_handler =
672  GenericKeyHandler(key_component_count,
673  true,
674  &join_columns[0],
675  &join_column_types[0],
676  &composite_key_info.sd_inner_proxy_per_key[0],
677  &composite_key_info.sd_outer_proxy_per_key[0]);
678  err = builder.initHashTableOnCpu(&key_handler,
679  composite_key_info,
680  join_columns,
681  join_column_types,
682  join_bucket_info,
683  entry_count,
684  join_columns.front().num_elems,
685  hashtable_layout,
686  join_type_,
689  hash_tables_for_device_[device_id] = builder.getHashTable();
690  ts2 = std::chrono::steady_clock::now();
691  auto hashtable_build_time =
692  std::chrono::duration_cast<std::chrono::milliseconds>(ts2 - ts1).count();
693  if (!err && allow_hashtable_recycling && hash_tables_for_device_[device_id]) {
694  // add ht-related property to cache iff we have a valid hashtable
695  HashTableProperty prop{
698  if (layout_hint_delivered || need_to_update_cached_prop) {
699  hash_table_property_cache->updateItemInCacheIfNecessary(
701  prop,
704  } else {
705  hash_table_property_cache->putItemToCache(
707  prop,
710  0,
711  0,
712  std::nullopt);
713  }
714 
715  if (allow_put_hashtable_to_cache) {
716  hash_tables_for_device_[device_id]->setLayout(hashtable_layout);
719  hash_tables_for_device_[device_id],
721  hashtable_build_time);
722  }
723  }
724  }
725  // Transfer the hash table on the GPU if we've only built it on CPU
726  // but the query runs on GPU (join on dictionary encoded columns).
727  // Don't transfer the buffer if there was an error since we'll bail anyway.
728  if (memory_level_ == Data_Namespace::GPU_LEVEL && !err) {
729 #ifdef HAVE_CUDA
730  auto timer = DEBUG_TIMER("Copy CPU hash table to GPU");
732 
733  builder.allocateDeviceMemory(layout,
736  entry_count,
737  emitted_keys_count,
738  device_id,
739  executor_);
740 
741  CHECK_LT(size_t(device_id), hash_tables_for_device_.size());
742  auto cpu_source_hash_table = hash_tables_for_device_[device_id];
743  CHECK(cpu_source_hash_table);
744  auto gpu_target_hash_table = builder.getHashTable();
745  CHECK(gpu_target_hash_table);
746 
747  const auto gpu_buff = gpu_target_hash_table->getGpuBuffer();
748  CHECK(gpu_buff);
749  auto data_mgr = executor_->getDataMgr();
750  auto allocator = data_mgr->createGpuAllocator(device_id);
751  allocator->copyToDevice(
752  gpu_buff,
753  cpu_source_hash_table->getCpuBuffer(),
754  cpu_source_hash_table->getHashTableBufferSize(ExecutorDeviceType::CPU));
755  hash_tables_for_device_[device_id] = std::move(gpu_target_hash_table);
756 #else
757  CHECK(false);
758 #endif
759  }
760  } else {
761 #ifdef HAVE_CUDA
763 
764  auto data_mgr = executor_->getDataMgr();
765  CudaAllocator allocator(data_mgr, device_id);
766  auto join_column_types_gpu =
767  transfer_vector_of_flat_objects_to_gpu(join_column_types, allocator);
768  auto join_columns_gpu =
769  transfer_vector_of_flat_objects_to_gpu(join_columns, allocator);
770  const auto key_handler = GenericKeyHandler(key_component_count,
771  true,
772  join_columns_gpu,
773  join_column_types_gpu,
774  nullptr,
775  nullptr);
776 
777  err = builder.initHashTableOnGpu(&key_handler,
778  join_columns,
779  hashtable_layout,
780  join_type_,
783  entry_count,
784  emitted_keys_count,
785  device_id,
786  executor_);
787  CHECK_LT(size_t(device_id), hash_tables_for_device_.size());
788  hash_tables_for_device_[device_id] = builder.getHashTable();
789  if (!err && allow_hashtable_recycling && hash_tables_for_device_[device_id]) {
790  // add ht-related property to cache iff we have a valid hashtable
791  HashTableProperty prop{
794  if (layout_hint_delivered || need_to_update_cached_prop) {
795  hash_table_property_cache->updateItemInCacheIfNecessary(
797  prop,
800  } else {
801  hash_table_property_cache->putItemToCache(hashtable_cache_key_,
802  prop,
805  0,
806  0,
807  std::nullopt);
808  }
809  }
810 #else
811  UNREACHABLE();
812 #endif
813  }
814  return err;
815 }
816 
817 #define LL_CONTEXT executor_->cgen_state_->context_
818 #define LL_BUILDER executor_->cgen_state_->ir_builder_
819 #define LL_INT(v) executor_->cgen_state_->llInt(v)
820 #define LL_FP(v) executor_->cgen_state_->llFp(v)
821 #define ROW_FUNC executor_->cgen_state_->row_func_
822 
824  const size_t index) {
825  AUTOMATIC_IR_METADATA(executor_->cgen_state_.get());
827  const auto key_component_width = getKeyComponentWidth();
828  CHECK(key_component_width == 4 || key_component_width == 8);
829  auto key_buff_lv = codegenKey(co);
830  const auto hash_ptr = hashPtr(index);
831  const auto key_ptr_lv =
832  LL_BUILDER.CreatePointerCast(key_buff_lv, llvm::Type::getInt8PtrTy(LL_CONTEXT));
833  const auto key_size_lv = LL_INT(getKeyComponentCount() * key_component_width);
834  const auto hash_table = getHashTableForDevice(size_t(0));
835  return executor_->cgen_state_->emitExternalCall(
836  "baseline_hash_join_idx_" + std::to_string(key_component_width * 8),
838  {hash_ptr, key_ptr_lv, key_size_lv, LL_INT(hash_table->getEntryCount())});
839 }
840 
842  const CompilationOptions& co,
843  const size_t index) {
844  AUTOMATIC_IR_METADATA(executor_->cgen_state_.get());
845  const auto hash_table = getHashTableForDevice(size_t(0));
846  CHECK(hash_table);
847  const auto key_component_width = getKeyComponentWidth();
848  CHECK(key_component_width == 4 || key_component_width == 8);
849  auto key_buff_lv = codegenKey(co);
851  auto hash_ptr = HashJoin::codegenHashTableLoad(index, executor_);
852  const auto composite_dict_ptr_type =
853  llvm::Type::getIntNPtrTy(LL_CONTEXT, key_component_width * 8);
854  const auto composite_key_dict =
855  hash_ptr->getType()->isPointerTy()
856  ? LL_BUILDER.CreatePointerCast(hash_ptr, composite_dict_ptr_type)
857  : LL_BUILDER.CreateIntToPtr(hash_ptr, composite_dict_ptr_type);
858  const auto key_component_count = getKeyComponentCount();
859  const auto key = executor_->cgen_state_->emitExternalCall(
860  "get_composite_key_index_" + std::to_string(key_component_width * 8),
862  {key_buff_lv,
863  LL_INT(key_component_count),
864  composite_key_dict,
865  LL_INT(hash_table->getEntryCount())});
866  auto one_to_many_ptr = hash_ptr;
867  if (one_to_many_ptr->getType()->isPointerTy()) {
868  one_to_many_ptr =
869  LL_BUILDER.CreatePtrToInt(hash_ptr, llvm::Type::getInt64Ty(LL_CONTEXT));
870  } else {
871  CHECK(one_to_many_ptr->getType()->isIntegerTy(64));
872  }
873  const auto composite_key_dict_size = offsetBufferOff();
874  one_to_many_ptr =
875  LL_BUILDER.CreateAdd(one_to_many_ptr, LL_INT(composite_key_dict_size));
877  {one_to_many_ptr, key, LL_INT(int64_t(0)), LL_INT(hash_table->getEntryCount() - 1)},
878  false,
879  false,
880  false,
882  executor_);
883 }
884 
886  return getKeyBufferSize();
887 }
888 
892  } else {
893  return getKeyBufferSize();
894  }
895 }
896 
900  } else {
901  return getKeyBufferSize();
902  }
903 }
904 
906  const auto key_component_width = getKeyComponentWidth();
907  CHECK(key_component_width == 4 || key_component_width == 8);
908  const auto key_component_count = getKeyComponentCount();
909  auto hash_table = getHashTableForDevice(size_t(0));
910  CHECK(hash_table);
911  if (layoutRequiresAdditionalBuffers(hash_table->getLayout())) {
912  return hash_table->getEntryCount() * key_component_count * key_component_width;
913  } else {
914  return hash_table->getEntryCount() * (key_component_count + 1) * key_component_width;
915  }
916 }
917 
919  const auto hash_table = getHashTableForDevice(size_t(0));
920  return hash_table->getEntryCount() * sizeof(int32_t);
921 }
922 
924  AUTOMATIC_IR_METADATA(executor_->cgen_state_.get());
925  const auto key_component_width = getKeyComponentWidth();
926  CHECK(key_component_width == 4 || key_component_width == 8);
927  const auto key_size_lv = LL_INT(getKeyComponentCount() * key_component_width);
928  llvm::Value* key_buff_lv{nullptr};
929  switch (key_component_width) {
930  case 4:
931  key_buff_lv =
932  LL_BUILDER.CreateAlloca(llvm::Type::getInt32Ty(LL_CONTEXT), key_size_lv);
933  break;
934  case 8:
935  key_buff_lv =
936  LL_BUILDER.CreateAlloca(llvm::Type::getInt64Ty(LL_CONTEXT), key_size_lv);
937  break;
938  default:
939  CHECK(false);
940  }
941 
942  CodeGenerator code_generator(executor_);
943  for (size_t i = 0; i < getKeyComponentCount(); ++i) {
944  const auto key_comp_dest_lv = LL_BUILDER.CreateGEP(
945  key_buff_lv->getType()->getScalarType()->getPointerElementType(),
946  key_buff_lv,
947  LL_INT(i));
948  const auto& inner_outer_pair = inner_outer_pairs_[i];
949  const auto outer_col = inner_outer_pair.second;
950  const auto key_col_var = dynamic_cast<const Analyzer::ColumnVar*>(outer_col);
951  const auto val_col_var =
952  dynamic_cast<const Analyzer::ColumnVar*>(inner_outer_pair.first);
953  if (key_col_var && val_col_var &&
955  key_col_var,
956  val_col_var,
957  get_max_rte_scan_table(executor_->cgen_state_->scan_idx_to_hash_pos_))) {
958  throw std::runtime_error(
959  "Query execution fails because the query contains not supported self-join "
960  "pattern. We suspect the query requires multiple left-deep join tree due to "
961  "the join condition of the self-join and is not supported for now. Please "
962  "consider rewriting table order in "
963  "FROM clause.");
964  }
965  const auto col_lvs = code_generator.codegen(outer_col, true, co);
966  CHECK_EQ(size_t(1), col_lvs.size());
967  const auto col_lv = LL_BUILDER.CreateSExt(
968  col_lvs.front(), get_int_type(key_component_width * 8, LL_CONTEXT));
969  LL_BUILDER.CreateStore(col_lv, key_comp_dest_lv);
970  }
971  return key_buff_lv;
972 }
973 
974 llvm::Value* BaselineJoinHashTable::hashPtr(const size_t index) {
975  AUTOMATIC_IR_METADATA(executor_->cgen_state_.get());
976  auto hash_ptr = HashJoin::codegenHashTableLoad(index, executor_);
977  const auto pi8_type = llvm::Type::getInt8PtrTy(LL_CONTEXT);
978  return hash_ptr->getType()->isPointerTy()
979  ? LL_BUILDER.CreatePointerCast(hash_ptr, pi8_type)
980  : LL_BUILDER.CreateIntToPtr(hash_ptr, pi8_type);
981 }
982 
983 #undef ROW_FUNC
984 #undef LL_INT
985 #undef LL_BUILDER
986 #undef LL_CONTEXT
987 
989  try {
991  } catch (...) {
992  CHECK(false);
993  }
994  return 0;
995 }
996 
998  CHECK(!inner_outer_pairs_.empty());
999  const auto first_inner_col = inner_outer_pairs_.front().first;
1000  return first_inner_col->get_rte_idx();
1001 }
1002 
1004  auto hash_table = getHashTableForDevice(size_t(0));
1005  CHECK(hash_table);
1006  if (layout_override_) {
1007  return *layout_override_;
1008  } else {
1009  return hash_table->getLayout();
1010  }
1011 }
1012 
1014  const std::vector<InnerOuter>& inner_outer_pairs) {
1015  CHECK(!inner_outer_pairs.empty());
1016  const auto first_inner_col = inner_outer_pairs.front().first;
1017  return first_inner_col->get_table_id();
1018 }
1019 
1021  QueryPlanHash key,
1022  CacheItemType item_type,
1023  DeviceIdentifier device_identifier,
1024  HashType expected_layout) {
1025  auto timer = DEBUG_TIMER(__func__);
1026  VLOG(1) << "Checking CPU hash table cache.";
1028  HashtableCacheMetaInfo meta_info;
1029  meta_info.registered_query_hint = query_hint_;
1030  if (auto cached_hash_table = hash_table_cache_->getItemFromCache(
1031  key, item_type, device_identifier, meta_info)) {
1032  if (expected_layout != cached_hash_table->getLayout()) {
1033  auto translated_expected_layout =
1035  auto translated_cached_layout =
1036  HashTablePropertyRecycler::translateHashType(cached_hash_table->getLayout());
1037  VLOG(1) << "Skipping hash table recycling: candidate cached hash table does not "
1038  "have an expected layout (expected: "
1039  << HashTablePropertyRecycler::getLayoutString(translated_expected_layout)
1040  << ", actual: "
1041  << HashTablePropertyRecycler::getLayoutString(translated_cached_layout)
1042  << ")";
1043  return nullptr;
1044  }
1045  return cached_hash_table;
1046  }
1047  return nullptr;
1048 }
1049 
1051  QueryPlanHash key,
1052  CacheItemType item_type,
1053  std::shared_ptr<HashTable> hashtable_ptr,
1054  DeviceIdentifier device_identifier,
1055  size_t hashtable_building_time) {
1057  CHECK(hashtable_ptr && !hashtable_ptr->getGpuBuffer());
1058  HashtableCacheMetaInfo meta_info;
1059  meta_info.registered_query_hint = query_hint_;
1060  hash_table_cache_->putItemToCache(
1061  key,
1062  hashtable_ptr,
1063  item_type,
1064  device_identifier,
1065  hashtable_ptr->getHashTableBufferSize(ExecutorDeviceType::CPU),
1066  hashtable_building_time,
1067  meta_info);
1068 }
1069 
1070 std::pair<std::optional<size_t>, size_t>
1072  QueryPlanHash key,
1073  CacheItemType item_type,
1074  DeviceIdentifier device_identifier) const {
1079  auto hash_table_ptr =
1080  hash_table_cache_->getItemFromCache(key, item_type, device_identifier);
1081  if (hash_table_ptr) {
1082  return std::make_pair(hash_table_ptr->getEntryCount() / 2,
1083  hash_table_ptr->getEmittedKeysCount());
1084  }
1085  }
1086  return std::make_pair(std::nullopt, 0);
1087 }
1088 
1090  return condition_->get_optype() == kBW_EQ;
1091 }
size_t offsetBufferOff() const noexceptoverride
catalog_(nullptr)
std::set< DecodedJoinHashBufferEntry > toSet(const ExecutorDeviceType device_type, const int device_id) const override
#define CHECK_EQ(x, y)
Definition: Logger.h:219
virtual std::pair< size_t, size_t > approximateTupleCount(const std::vector< ColumnsForDevice > &, QueryPlanHash key, CacheItemType item_type, DeviceIdentifier device_identifier) const
void putHashTableOnCpuToCache(QueryPlanHash key, CacheItemType item_type, std::shared_ptr< HashTable > hashtable_ptr, DeviceIdentifier device_identifier, size_t hashtable_building_time)
size_t DeviceIdentifier
Definition: DataRecycler.h:112
virtual HashJoinMatchingSet codegenMatchingSet(const CompilationOptions &, const size_t)=0
JoinType
Definition: sqldefs.h:108
Fragmenter_Namespace::TableInfo info
Definition: InputMetadata.h:35
static HashtableAccessPathInfo getHashtableAccessPathInfo(const std::vector< InnerOuter > &inner_outer_pairs, const SQLOps op_type, const JoinType join_type, const HashTableBuildDagMap &hashtable_build_dag_map, Executor *executor)
static std::shared_ptr< BaselineJoinHashTable > getInstance(const std::shared_ptr< Analyzer::BinOper > condition, const std::vector< InputTableInfo > &query_infos, const Data_Namespace::MemoryLevel memory_level, const JoinType join_type, const HashType preferred_hash_type, const int device_count, ColumnCacheMap &column_cache, Executor *executor, const HashTableBuildDagMap &hashtable_build_dag_map, const TableIdToNodeMap &table_id_to_node_map, const RegisteredQueryHint &query_hint)
Make hash table from an in-flight SQL query&#39;s parse tree etc.
std::string toString(const ExecutorDeviceType device_type, const int device_id=0, bool raw=false) const override
static llvm::Value * codegenHashTableLoad(const size_t table_idx, Executor *executor)
Definition: HashJoin.cpp:219
bool self_join_not_covered_by_left_deep_tree(const Analyzer::ColumnVar *key_side, const Analyzer::ColumnVar *val_side, const int max_rte_covered)
Data_Namespace::MemoryLevel getEffectiveMemoryLevel(const std::vector< InnerOuter > &inner_outer_pairs) const
ExecutorDeviceType
Data_Namespace::DataMgr & getDataMgr() const
Definition: Catalog.h:226
T * transfer_flat_object_to_gpu(const T &object, DeviceAllocator &allocator)
#define const
HashJoinMatchingSet codegenMatchingSet(const CompilationOptions &, const size_t) override
constexpr QueryPlanHash EMPTY_HASHED_PLAN_DAG_KEY
void hll_unify(T1 *lhs, T2 *rhs, const size_t m)
Definition: HyperLogLog.h:109
JoinColumn fetchJoinColumn(const Analyzer::ColumnVar *hash_col, const std::vector< Fragmenter_Namespace::FragmentInfo > &fragment_info, const Data_Namespace::MemoryLevel effective_memory_level, const int device_id, std::vector< std::shared_ptr< Chunk_NS::Chunk >> &chunks_owner, DeviceAllocator *dev_buff_owner, std::vector< std::shared_ptr< void >> &malloc_owner, Executor *executor, ColumnCacheMap *column_cache)
Definition: HashJoin.cpp:56
std::vector< std::shared_ptr< HashTable > > hash_tables_for_device_
Definition: HashJoin.h:335
#define UNREACHABLE()
Definition: Logger.h:255
std::shared_ptr< HashTable > initHashTableOnCpuFromCache(QueryPlanHash key, CacheItemType item_type, DeviceIdentifier device_identifier, HashType expected_layout)
size_t getKeyBufferSize() const noexcept
#define DEBUG_TIMER_NEW_THREAD(parent_thread_id)
Definition: Logger.h:363
std::pair< std::optional< size_t >, size_t > getApproximateTupleCountFromCache(QueryPlanHash key, CacheItemType item_type, DeviceIdentifier device_identifier) const
bool needs_dictionary_translation(const Analyzer::ColumnVar *inner_col, const Analyzer::Expr *outer_col_expr, const Executor *executor)
const TableIdToNodeMap table_id_to_node_map_
size_t getComponentBufferSize() const noexceptoverride
void freeHashBufferMemory()
Definition: HashJoin.h:317
llvm::Type * get_int_type(const int width, llvm::LLVMContext &context)
size_t hll_size(const T *M, const size_t bitmap_sz_bits)
Definition: HyperLogLog.h:90
#define CHECK_GT(x, y)
Definition: Logger.h:223
const int get_max_rte_scan_table(std::unordered_map< int, llvm::Value * > &scan_idx_to_hash_pos)
int getInnerTableRteIdx() const noexceptoverride
static HashTableLayoutType translateHashType(HashType type)
std::string to_string(char const *&&v)
std::unordered_set< size_t > table_keys_
std::optional< RegisteredQueryHint > registered_query_hint
std::optional< HashJoinHint > hash_join
Definition: QueryHint.h:266
virtual void reifyForDevice(const ColumnsForDevice &columns_for_device, const HashType layout, const int device_id, const size_t entry_count, const size_t emitted_keys_count, const logger::ThreadId parent_thread_id)
virtual ColumnsForDevice fetchColumnsForDevice(const std::vector< Fragmenter_Namespace::FragmentInfo > &fragments, const int device_id, DeviceAllocator *dev_buff_owner)
const std::vector< InputTableInfo > & query_infos_
virtual llvm::Value * codegenKey(const CompilationOptions &)
void addQueryPlanDagForTableKeys(size_t hashed_query_plan_dag, const std::unordered_set< size_t > &table_keys)
size_t payloadBufferOff() const noexceptoverride
std::vector< InnerOuter > inner_outer_pairs_
const std::vector< JoinColumnTypeInfo > join_column_types
Definition: HashJoin.h:93
void reify(const HashType preferred_layout)
void approximate_distinct_tuples(uint8_t *hll_buffer_all_cpus, const uint32_t b, const size_t padded_size_bytes, const std::vector< JoinColumn > &join_column_per_key, const std::vector< JoinColumnTypeInfo > &type_info_per_key, const int thread_count)
future< Result > async(Fn &&fn, Args &&...args)
static HashTablePropertyRecycler * getHashtablePropertyCache()
HashType getHashType() const noexceptoverride
std::unique_ptr< DeviceAllocator > createGpuAllocator(int device_id)
Definition: DataMgr.cpp:526
static QueryPlanHash getAlternativeCacheKey(AlternativeCacheKeyForBaselineHashJoin &info)
const ColumnDescriptor * get_column_descriptor_maybe(const int col_id, const int table_id, const Catalog_Namespace::Catalog &cat)
Definition: Execute.h:221
CacheItemType
Definition: DataRecycler.h:37
int count
ColumnCacheMap & column_cache_
static std::unique_ptr< HashTableRecycler > hash_table_cache_
#define LL_INT(v)
std::unordered_map< int, const RelAlgNode * > TableIdToNodeMap
std::vector< Fragmenter_Namespace::FragmentInfo > only_shards_for_device(const std::vector< Fragmenter_Namespace::FragmentInfo > &fragments, const int device_id, const int device_count)
int8_t * getJoinHashBuffer(const ExecutorDeviceType device_type, const int device_id) const
Definition: HashJoin.h:294
int getDatabaseId() const
Definition: Catalog.h:281
int initHashTableOnGpu(KEY_HANDLER *key_handler, const std::vector< JoinColumn > &join_columns, const HashType layout, const JoinType join_type, const size_t key_component_width, const size_t key_component_count, const size_t keyspace_entry_count, const size_t emitted_keys_count, const int device_id, const Executor *executor)
#define LL_CONTEXT
#define AUTOMATIC_IR_METADATA(CGENSTATE)
static void checkHashJoinReplicationConstraint(const int table_id, const size_t shard_count, const Executor *executor)
Definition: HashJoin.cpp:645
virtual void reifyWithLayout(const HashType layout)
std::unordered_map< int, std::unordered_map< int, std::shared_ptr< const ColumnarResults >>> ColumnCacheMap
HashTable * getHashTableForDevice(const size_t device_id) const
Definition: HashJoin.h:275
#define VLOGGING(n)
Definition: Logger.h:209
const InputTableInfo & get_inner_query_info(const int inner_table_id, const std::vector< InputTableInfo > &query_infos)
std::vector< llvm::Value * > codegen(const Analyzer::Expr *, const bool fetch_columns, const CompilationOptions &)
Definition: IRCodegen.cpp:30
static CompositeKeyInfo getCompositeKeyInfo(const std::vector< InnerOuter > &inner_outer_pairs, const Executor *executor)
Definition: HashJoin.cpp:392
#define CHECK_LT(x, y)
Definition: Logger.h:221
int getInnerTableId() const noexceptoverride
static RegisteredQueryHint defaults()
Definition: QueryHint.h:277
#define LL_BUILDER
std::unique_ptr< BaselineHashTable > getHashTable()
static std::string getHashTypeString(HashType ht) noexcept
Definition: HashJoin.h:150
std::optional< HashType > layout_override_
static std::string toString(const std::string &type, const std::string &layout_type, size_t key_component_count, size_t key_component_width, size_t entry_count, const int8_t *ptr1, const int8_t *ptr2, const int8_t *ptr3, const int8_t *ptr4, size_t buffer_size, bool raw=false)
Decode hash table into a human-readable string.
Definition: HashTable.cpp:226
size_t get_entries_per_device(const size_t total_entries, const size_t shard_count, const size_t device_count, const Data_Namespace::MemoryLevel memory_level)
bool isHintRegistered(const QueryHint hint) const
Definition: QueryHint.h:301
const Catalog_Namespace::Catalog * catalog_
uint64_t ThreadId
Definition: Logger.h:351
size_t QueryPlanHash
static std::string getLayoutString(std::optional< HashTableLayoutType > layout)
const Data_Namespace::MemoryLevel memory_level_
BaselineJoinHashTable(const std::shared_ptr< Analyzer::BinOper > condition, const JoinType join_type, const std::vector< InputTableInfo > &query_infos, const Data_Namespace::MemoryLevel memory_level, ColumnCacheMap &column_cache, Executor *executor, const std::vector< InnerOuter > &inner_outer_pairs, const int device_count, HashtableAccessPathInfo hashtable_access_path_info, const TableIdToNodeMap &table_id_to_node_map)
llvm::Value * hashPtr(const size_t index)
ThreadId thread_id()
Definition: Logger.cpp:816
void allocateDeviceMemory(const HashType layout, const size_t key_component_width, const size_t key_component_count, const size_t keyspace_entry_count, const size_t emitted_keys_count, const int device_id, const Executor *executor)
virtual int initHashTableForDevice(const std::vector< JoinColumn > &join_columns, const std::vector< JoinColumnTypeInfo > &join_column_types, const std::vector< JoinBucketInfo > &join_buckets, const HashType layout, const Data_Namespace::MemoryLevel effective_memory_level, const size_t entry_count, const size_t emitted_keys_count, const int device_id)
void approximate_distinct_tuples_on_device(uint8_t *hll_buffer, const uint32_t b, const GenericKeyHandler *key_handler, const int64_t num_elems)
static std::vector< InnerOuter > normalizeColumnPairs(const Analyzer::BinOper *condition, const Catalog_Namespace::Catalog &cat, const TemporaryTables *temporary_tables)
Definition: HashJoin.cpp:802
ColumnType get_join_column_type_kind(const SQLTypeInfo &ti)
bool g_enable_watchdog false
Definition: Execute.cpp:76
llvm::Value * codegenSlot(const CompilationOptions &, const size_t) override
bool isBitwiseEq() const override
#define CHECK(condition)
Definition: Logger.h:211
#define DEBUG_TIMER(name)
Definition: Logger.h:358
Definition: sqldefs.h:31
virtual size_t getKeyComponentCount() const
int64_t inline_fixed_encoding_null_val(const SQL_TYPE_INFO &ti)
static bool isSafeToCacheHashtable(const TableIdToNodeMap &table_id_to_node_map, bool need_dict_translation, const int table_id)
virtual size_t getKeyComponentWidth() const
static DecodedJoinHashBufferSet toSet(size_t key_component_count, size_t key_component_width, size_t entry_count, const int8_t *ptr1, const int8_t *ptr2, const int8_t *ptr3, const int8_t *ptr4, size_t buffer_size)
Decode hash table into a std::set for easy inspection and validation.
Definition: HashTable.cpp:139
T * transfer_vector_of_flat_objects_to_gpu(const std::vector< T > &vec, DeviceAllocator &allocator)
Allocate GPU memory using GpuBuffers via DataMgr.
static size_t getShardCountForCondition(const Analyzer::BinOper *condition, const Executor *executor, const std::vector< InnerOuter > &inner_outer_pairs)
std::vector< JoinBucketInfo > join_buckets
Definition: HashJoin.h:95
static constexpr DeviceIdentifier CPU_DEVICE_IDENTIFIER
Definition: DataRecycler.h:133
int cpu_threads()
Definition: thread_count.h:24
size_t get_shard_count(const Analyzer::BinOper *join_condition, const Executor *executor)
Definition: HashJoin.cpp:848
HashType
Definition: HashTable.h:19
const std::vector< JoinColumn > join_columns
Definition: HashJoin.h:92
bool isAnyQueryHintDelivered() const
Definition: QueryHint.h:286
#define VLOG(n)
Definition: Logger.h:305
static bool layoutRequiresAdditionalBuffers(HashType layout) noexcept
Definition: HashJoin.h:146
const std::shared_ptr< Analyzer::BinOper > condition_
RegisteredQueryHint query_hint_
int initHashTableOnCpu(KEY_HANDLER *key_handler, const CompositeKeyInfo &composite_key_info, const std::vector< JoinColumn > &join_columns, const std::vector< JoinColumnTypeInfo > &join_column_types, const std::vector< JoinBucketInfo > &join_bucket_info, const size_t keyspace_entry_count, const size_t keys_for_all_rows, const HashType layout, const JoinType join_type, const size_t key_component_width, const size_t key_component_count)
size_t countBufferOff() const noexceptoverride
std::unordered_map< JoinColumnsInfo, HashTableBuildDag > HashTableBuildDagMap