OmniSciDB  91042dcc5b
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
HashtableRecycler Class Reference

#include <HashtableRecycler.h>

+ Inheritance diagram for HashtableRecycler:
+ Collaboration diagram for HashtableRecycler:

Public Member Functions

 HashtableRecycler (CacheItemType hashtable_type, int num_gpus)
 
void putItemToCache (QueryPlanHash key, std::shared_ptr< HashTable > item_ptr, CacheItemType item_type, DeviceIdentifier device_identifier, size_t item_size, size_t compute_time, std::optional< HashtableCacheMetaInfo > meta_info=std::nullopt) override
 
void initCache () override
 
void clearCache () override
 
void markCachedItemAsDirty (size_t table_key, std::unordered_set< QueryPlanHash > &key_set, CacheItemType item_type, DeviceIdentifier device_identifier) override
 
std::string toString () const override
 
bool checkOverlapsHashtableBucketCompatability (const OverlapsHashTableMetaInfo &candidate_bucket_dim, const OverlapsHashTableMetaInfo &target_bucket_dim) const
 
std::tuple< QueryPlanHash,
std::shared_ptr< HashTable >
, std::optional
< HashtableCacheMetaInfo > > 
getCachedHashtableWithoutCacheKey (std::set< size_t > &visited, CacheItemType hash_table_type, DeviceIdentifier device_identifier)
 
void addQueryPlanDagForTableKeys (size_t hashed_query_plan_dag, const std::unordered_set< size_t > &table_keys)
 
std::optional
< std::unordered_set< size_t > > 
getMappedQueryPlanDagsWithTableKey (size_t table_key) const
 
void removeTableKeyInfoFromQueryPlanDagMap (size_t table_key)
 
- Public Member Functions inherited from DataRecycler< std::shared_ptr< HashTable >, HashtableCacheMetaInfo >
 DataRecycler (const std::vector< CacheItemType > &item_types, size_t total_cache_size, size_t max_item_size, int num_gpus)
 
virtual ~DataRecycler ()=default
 
virtual std::shared_ptr
< HashTable
getItemFromCache (QueryPlanHash key, CacheItemType item_type, DeviceIdentifier device_identifier, std::optional< HashtableCacheMetaInfo > meta_info=std::nullopt)=0
 
void markCachedItemAsDirtyImpl (QueryPlanHash key, CachedItemContainer &m) const
 
bool isCachedItemDirty (QueryPlanHash key, CachedItemContainer &m) const
 
std::shared_ptr
< CachedItemContainer
getCachedItemContainer (CacheItemType item_type, DeviceIdentifier device_identifier) const
 
std::optional< CachedItem
< std::shared_ptr< HashTable >
, HashtableCacheMetaInfo > > 
getCachedItemWithoutConsideringMetaInfo (QueryPlanHash key, CacheItemType item_type, DeviceIdentifier device_identifier, CachedItemContainer &m, std::lock_guard< std::mutex > &lock)
 
size_t getCurrentNumCachedItems (CacheItemType item_type, DeviceIdentifier device_identifier) const
 
size_t getCurrentNumDirtyCachedItems (CacheItemType item_type, DeviceIdentifier device_identifier) const
 
size_t getCurrentNumCleanCachedItems (CacheItemType item_type, DeviceIdentifier device_identifier) const
 
size_t getCurrentCacheSizeForDevice (CacheItemType item_type, DeviceIdentifier device_identifier) const
 
std::shared_ptr< CacheItemMetricgetCachedItemMetric (CacheItemType item_type, DeviceIdentifier device_identifier, QueryPlanHash key) const
 
void setTotalCacheSize (CacheItemType item_type, size_t new_total_cache_size)
 
void setMaxCacheItemSize (CacheItemType item_type, size_t new_max_cache_item_size)
 

Static Public Member Functions

static HashtableAccessPathInfo getHashtableAccessPathInfo (const std::vector< InnerOuter > &inner_outer_pairs, const SQLOps op_type, const JoinType join_type, const HashTableBuildDagMap &hashtable_build_dag_map, Executor *executor)
 
static size_t getJoinColumnInfoHash (std::vector< const Analyzer::ColumnVar * > &inner_cols, std::vector< const Analyzer::ColumnVar * > &outer_cols, Executor *executor)
 
static bool isSafeToCacheHashtable (const TableIdToNodeMap &table_id_to_node_map, bool need_dict_translation, const int table_id)
 

Public Attributes

 g_hashtable_cache_total_bytes
 
 g_max_cacheable_hashtable_size_bytes
 
 num_gpus
 

Private Member Functions

bool hasItemInCache (QueryPlanHash key, CacheItemType item_type, DeviceIdentifier device_identifier, std::lock_guard< std::mutex > &lock, std::optional< HashtableCacheMetaInfo > meta_info=std::nullopt) const override
 
void removeItemFromCache (QueryPlanHash key, CacheItemType item_type, DeviceIdentifier device_identifier, std::lock_guard< std::mutex > &lock, std::optional< HashtableCacheMetaInfo > meta_info=std::nullopt) override
 
void cleanupCacheForInsertion (CacheItemType item_type, DeviceIdentifier device_identifier, size_t required_size, std::lock_guard< std::mutex > &lock, std::optional< HashtableCacheMetaInfo > meta_info=std::nullopt) override
 

Private Attributes

std::unordered_map< size_t,
std::unordered_set< size_t > > 
table_key_to_query_plan_dag_map_
 

Additional Inherited Members

- Public Types inherited from DataRecycler< std::shared_ptr< HashTable >, HashtableCacheMetaInfo >
using CachedItemContainer = std::vector< CachedItem< std::shared_ptr< HashTable >, HashtableCacheMetaInfo >>
 
using PerDeviceCacheItemContainer = std::unordered_map< DeviceIdentifier, std::shared_ptr< CachedItemContainer >>
 
using PerTypeCacheItemContainer = std::unordered_map< CacheItemType, std::shared_ptr< PerDeviceCacheItemContainer >>
 
using PerTypeCacheMetricTracker = std::unordered_map< CacheItemType, CacheMetricTracker >
 
- Protected Member Functions inherited from DataRecycler< std::shared_ptr< HashTable >, HashtableCacheMetaInfo >
void removeCachedItemFromBeginning (CacheItemType item_type, DeviceIdentifier device_identifier, int offset)
 
void sortCacheContainerByQueryMetric (CacheItemType item_type, DeviceIdentifier device_identifier)
 
std::mutex & getCacheLock () const
 
CacheMetricTrackergetMetricTracker (CacheItemType item_type)
 
CacheMetricTracker constgetMetricTracker (CacheItemType item_type) const
 
std::unordered_set
< CacheItemType > const
getCacheItemType () const
 
PerTypeCacheItemContainer constgetItemCache () const
 

Detailed Description

Definition at line 48 of file HashtableRecycler.h.

Constructor & Destructor Documentation

HashtableRecycler::HashtableRecycler ( CacheItemType  hashtable_type,
int  num_gpus 
)
inline

Definition at line 51 of file HashtableRecycler.h.

52  : DataRecycler({hashtable_type},
DataRecycler(const std::vector< CacheItemType > &item_types, size_t total_cache_size, size_t max_item_size, int num_gpus)
Definition: DataRecycler.h:410

Member Function Documentation

void HashtableRecycler::addQueryPlanDagForTableKeys ( size_t  hashed_query_plan_dag,
const std::unordered_set< size_t > &  table_keys 
)

Definition at line 433 of file HashtableRecycler.cpp.

References DataRecycler< std::shared_ptr< HashTable >, HashtableCacheMetaInfo >::getCacheLock(), and table_key_to_query_plan_dag_map_.

435  {
436  std::lock_guard<std::mutex> lock(getCacheLock());
437  for (auto table_key : table_keys) {
438  auto it = table_key_to_query_plan_dag_map_.find(table_key);
439  if (it != table_key_to_query_plan_dag_map_.end()) {
440  it->second.insert(hashed_query_plan_dag);
441  } else {
442  std::unordered_set<size_t> query_plan_dags{hashed_query_plan_dag};
443  table_key_to_query_plan_dag_map_.emplace(table_key, query_plan_dags);
444  }
445  }
446 }
std::unordered_map< size_t, std::unordered_set< size_t > > table_key_to_query_plan_dag_map_

+ Here is the call graph for this function:

bool HashtableRecycler::checkOverlapsHashtableBucketCompatability ( const OverlapsHashTableMetaInfo candidate_bucket_dim,
const OverlapsHashTableMetaInfo target_bucket_dim 
) const

Definition at line 293 of file HashtableRecycler.cpp.

References OverlapsHashTableMetaInfo::bucket_sizes, i, OverlapsHashTableMetaInfo::overlaps_bucket_threshold, and OverlapsHashTableMetaInfo::overlaps_max_table_size_bytes.

Referenced by hasItemInCache(), and putItemToCache().

295  {
296  if (candidate.bucket_sizes.size() != target.bucket_sizes.size()) {
297  return false;
298  }
299  for (size_t i = 0; i < candidate.bucket_sizes.size(); i++) {
300  if (std::abs(target.bucket_sizes[i] - candidate.bucket_sizes[i]) > 1e-4) {
301  return false;
302  }
303  }
304  auto threshold_check =
305  candidate.overlaps_bucket_threshold == target.overlaps_bucket_threshold;
306  auto hashtable_size_check =
307  candidate.overlaps_max_table_size_bytes == target.overlaps_max_table_size_bytes;
308  return threshold_check && hashtable_size_check;
309 }

+ Here is the caller graph for this function:

void HashtableRecycler::cleanupCacheForInsertion ( CacheItemType  item_type,
DeviceIdentifier  device_identifier,
size_t  required_size,
std::lock_guard< std::mutex > &  lock,
std::optional< HashtableCacheMetaInfo meta_info = std::nullopt 
)
overrideprivatevirtual

Implements DataRecycler< std::shared_ptr< HashTable >, HashtableCacheMetaInfo >.

Definition at line 201 of file HashtableRecycler.cpp.

References g_is_test_env, DataRecycler< std::shared_ptr< HashTable >, HashtableCacheMetaInfo >::getMetricTracker(), REMOVE, DataRecycler< std::shared_ptr< HashTable >, HashtableCacheMetaInfo >::removeCachedItemFromBeginning(), and DataRecycler< std::shared_ptr< HashTable >, HashtableCacheMetaInfo >::sortCacheContainerByQueryMetric().

Referenced by putItemToCache().

206  {
207  // sort the vector based on the importance of the cached items (by # referenced, size
208  // and compute time) and then remove unimportant cached items
209  int elimination_target_offset = 0;
210  size_t removed_size = 0;
211  auto& metric_tracker = getMetricTracker(item_type);
212  auto actual_space_to_free = metric_tracker.getTotalCacheSize() / 2;
213  if (!g_is_test_env && required_size < actual_space_to_free) {
214  // remove enough items to avoid too frequent cache cleanup
215  // we do not apply thin to test code since test scenarios are designed to
216  // specific size of items and their caches
217  required_size = actual_space_to_free;
218  }
219  metric_tracker.sortCacheInfoByQueryMetric(device_identifier);
220  auto cached_item_metrics = metric_tracker.getCacheItemMetrics(device_identifier);
221  sortCacheContainerByQueryMetric(item_type, device_identifier);
222 
223  // collect targets to eliminate
224  for (auto& metric : cached_item_metrics) {
225  auto target_size = metric->getMemSize();
226  ++elimination_target_offset;
227  removed_size += target_size;
228  if (removed_size > required_size) {
229  break;
230  }
231  }
232 
233  // eliminate targets in 1) cache container and 2) their metrics
234  removeCachedItemFromBeginning(item_type, device_identifier, elimination_target_offset);
235  metric_tracker.removeMetricFromBeginning(device_identifier, elimination_target_offset);
236 
237  // update the current cache size after this cleanup
238  metric_tracker.updateCurrentCacheSize(
239  device_identifier, CacheUpdateAction::REMOVE, removed_size);
240 }
CacheMetricTracker & getMetricTracker(CacheItemType item_type)
Definition: DataRecycler.h:606
bool g_is_test_env
Definition: Execute.cpp:136
void sortCacheContainerByQueryMetric(CacheItemType item_type, DeviceIdentifier device_identifier)
Definition: DataRecycler.h:584
void removeCachedItemFromBeginning(CacheItemType item_type, DeviceIdentifier device_identifier, int offset)
Definition: DataRecycler.h:573

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

void HashtableRecycler::clearCache ( )
overridevirtual

Implements DataRecycler< std::shared_ptr< HashTable >, HashtableCacheMetaInfo >.

Definition at line 242 of file HashtableRecycler.cpp.

References CacheMetricTracker::clearCacheMetricTracker(), DataRecycler< std::shared_ptr< HashTable >, HashtableCacheMetaInfo >::getCacheItemType(), DataRecycler< std::shared_ptr< HashTable >, HashtableCacheMetaInfo >::getCacheLock(), DataRecycler< std::shared_ptr< HashTable >, HashtableCacheMetaInfo >::getItemCache(), DataRecycler< std::shared_ptr< HashTable >, HashtableCacheMetaInfo >::getMetricTracker(), and table_key_to_query_plan_dag_map_.

242  {
243  std::lock_guard<std::mutex> lock(getCacheLock());
244  for (auto& item_type : getCacheItemType()) {
246  auto item_cache = getItemCache().find(item_type)->second;
247  for (auto& kv : *item_cache) {
248  kv.second->clear();
249  }
250  }
252 }
CacheMetricTracker & getMetricTracker(CacheItemType item_type)
Definition: DataRecycler.h:606
void clearCacheMetricTracker()
Definition: DataRecycler.h:274
std::unordered_map< size_t, std::unordered_set< size_t > > table_key_to_query_plan_dag_map_
std::unordered_set< CacheItemType > const & getCacheItemType() const
Definition: DataRecycler.h:616

+ Here is the call graph for this function:

std::tuple< QueryPlanHash, std::shared_ptr< HashTable >, std::optional< HashtableCacheMetaInfo > > HashtableRecycler::getCachedHashtableWithoutCacheKey ( std::set< size_t > &  visited,
CacheItemType  hash_table_type,
DeviceIdentifier  device_identifier 
)

Definition at line 420 of file HashtableRecycler.cpp.

References EMPTY_HASHED_PLAN_DAG_KEY, DataRecycler< std::shared_ptr< HashTable >, HashtableCacheMetaInfo >::getCachedItemContainer(), and DataRecycler< std::shared_ptr< HashTable >, HashtableCacheMetaInfo >::getCacheLock().

422  {
423  std::lock_guard<std::mutex> lock(getCacheLock());
424  auto hashtable_cache = getCachedItemContainer(hash_table_type, device_identifier);
425  for (auto& ht : *hashtable_cache) {
426  if (!visited.count(ht.key)) {
427  return std::make_tuple(ht.key, ht.cached_item, ht.meta_info);
428  }
429  }
430  return std::make_tuple(EMPTY_HASHED_PLAN_DAG_KEY, nullptr, std::nullopt);
431 }
constexpr QueryPlanHash EMPTY_HASHED_PLAN_DAG_KEY
std::shared_ptr< CachedItemContainer > getCachedItemContainer(CacheItemType item_type, DeviceIdentifier device_identifier) const
Definition: DataRecycler.h:480

+ Here is the call graph for this function:

HashtableAccessPathInfo HashtableRecycler::getHashtableAccessPathInfo ( const std::vector< InnerOuter > &  inner_outer_pairs,
const SQLOps  op_type,
const JoinType  join_type,
const HashTableBuildDagMap hashtable_build_dag_map,
Executor executor 
)
static

Definition at line 367 of file HashtableRecycler.cpp.

References EMPTY_HASHED_PLAN_DAG_KEY, getJoinColumnInfoHash(), HashtableAccessPathInfo::hashed_query_plan_dag, kDirect, HashtableAccessPathInfo::meta_info, and HashtableAccessPathInfo::table_keys.

Referenced by BaselineJoinHashTable::getInstance(), RangeJoinHashTable::getInstance(), PerfectJoinHashTable::getInstance(), and OverlapsJoinHashTable::getInstance().

372  {
373  std::vector<const Analyzer::ColumnVar*> inner_cols_vec, outer_cols_vec;
374  size_t join_qual_info = EMPTY_HASHED_PLAN_DAG_KEY;
375  for (auto& join_col_pair : inner_outer_pairs) {
376  inner_cols_vec.push_back(join_col_pair.first);
377  // extract inner join col's id
378  // b/c when the inner col comes from a subquery's resulset,
379  // table id / rte_index can be different even if we have the same
380  // subquery's semantic, i.e., project col A from table T
381  boost::hash_combine(join_qual_info,
382  executor->getQueryPlanDagCache().getJoinColumnsInfoHash(
383  join_col_pair.first, JoinColumnSide::kDirect, true));
384  boost::hash_combine(join_qual_info, op_type);
385  boost::hash_combine(join_qual_info, join_type);
386  auto outer_col_var = dynamic_cast<const Analyzer::ColumnVar*>(join_col_pair.second);
387  boost::hash_combine(join_qual_info, join_col_pair.first->get_type_info().toString());
388  if (outer_col_var) {
389  outer_cols_vec.push_back(outer_col_var);
390  if (join_col_pair.first->get_type_info().is_dict_encoded_string()) {
391  // add comp param for dict encoded string
392  boost::hash_combine(join_qual_info,
393  executor->getQueryPlanDagCache().getJoinColumnsInfoHash(
394  outer_col_var, JoinColumnSide::kDirect, true));
395  boost::hash_combine(join_qual_info, outer_col_var->get_type_info().toString());
396  }
397  }
398  }
399  auto join_cols_info = getJoinColumnInfoHash(inner_cols_vec, outer_cols_vec, executor);
400  HashtableAccessPathInfo access_path_info;
401  auto it = hashtable_build_dag_map.find(join_cols_info);
402  if (it != hashtable_build_dag_map.end()) {
403  size_t hashtable_access_path = EMPTY_HASHED_PLAN_DAG_KEY;
404  boost::hash_combine(hashtable_access_path, it->second.inner_cols_access_path);
405  boost::hash_combine(hashtable_access_path, join_qual_info);
406  if (inner_cols_vec.front()->get_type_info().is_dict_encoded_string()) {
407  boost::hash_combine(hashtable_access_path, it->second.outer_cols_access_path);
408  }
409  HashtableCacheMetaInfo meta_info;
410  access_path_info.hashed_query_plan_dag = hashtable_access_path;
411  access_path_info.meta_info = meta_info;
412  access_path_info.table_keys = it->second.inputTableKeys;
413  }
414  return access_path_info;
415 }
constexpr QueryPlanHash EMPTY_HASHED_PLAN_DAG_KEY
static size_t getJoinColumnInfoHash(std::vector< const Analyzer::ColumnVar * > &inner_cols, std::vector< const Analyzer::ColumnVar * > &outer_cols, Executor *executor)
QueryPlanHash hashed_query_plan_dag
std::unordered_set< size_t > table_keys
HashtableCacheMetaInfo meta_info

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

size_t HashtableRecycler::getJoinColumnInfoHash ( std::vector< const Analyzer::ColumnVar * > &  inner_cols,
std::vector< const Analyzer::ColumnVar * > &  outer_cols,
Executor executor 
)
static

Definition at line 311 of file HashtableRecycler.cpp.

References EMPTY_HASHED_PLAN_DAG_KEY.

Referenced by getHashtableAccessPathInfo().

314  {
315  auto hashed_join_col_info = EMPTY_HASHED_PLAN_DAG_KEY;
316  boost::hash_combine(
317  hashed_join_col_info,
318  executor->getQueryPlanDagCache().translateColVarsToInfoHash(inner_cols, false));
319  boost::hash_combine(
320  hashed_join_col_info,
321  executor->getQueryPlanDagCache().translateColVarsToInfoHash(outer_cols, false));
322  return hashed_join_col_info;
323 }
constexpr QueryPlanHash EMPTY_HASHED_PLAN_DAG_KEY

+ Here is the caller graph for this function:

std::optional< std::unordered_set< size_t > > HashtableRecycler::getMappedQueryPlanDagsWithTableKey ( size_t  table_key) const

Definition at line 449 of file HashtableRecycler.cpp.

References DataRecycler< std::shared_ptr< HashTable >, HashtableCacheMetaInfo >::getCacheLock(), and table_key_to_query_plan_dag_map_.

449  {
450  std::lock_guard<std::mutex> lock(getCacheLock());
451  auto it = table_key_to_query_plan_dag_map_.find(table_key);
452  return it != table_key_to_query_plan_dag_map_.end() ? std::make_optional(it->second)
453  : std::nullopt;
454 }
std::unordered_map< size_t, std::unordered_set< size_t > > table_key_to_query_plan_dag_map_

+ Here is the call graph for this function:

bool HashtableRecycler::hasItemInCache ( QueryPlanHash  key,
CacheItemType  item_type,
DeviceIdentifier  device_identifier,
std::lock_guard< std::mutex > &  lock,
std::optional< HashtableCacheMetaInfo meta_info = std::nullopt 
) const
overrideprivatevirtual

Implements DataRecycler< std::shared_ptr< HashTable >, HashtableCacheMetaInfo >.

Definition at line 21 of file HashtableRecycler.cpp.

References CHECK, checkOverlapsHashtableBucketCompatability(), EMPTY_HASHED_PLAN_DAG_KEY, g_enable_data_recycler, g_use_hashtable_cache, DataRecycler< std::shared_ptr< HashTable >, HashtableCacheMetaInfo >::getCachedItemContainer(), and OVERLAPS_HT.

Referenced by putItemToCache().

26  {
29  return false;
30  }
31  auto hashtable_cache = getCachedItemContainer(item_type, device_identifier);
32  // hashtable cache of the *any* device type should be properly initialized
33  CHECK(hashtable_cache);
34  auto candidate_ht_it = std::find_if(
35  hashtable_cache->begin(), hashtable_cache->end(), [&key](const auto& cached_item) {
36  return cached_item.key == key;
37  });
38  if (candidate_ht_it != hashtable_cache->end()) {
39  if (item_type == OVERLAPS_HT) {
40  CHECK(candidate_ht_it->meta_info && candidate_ht_it->meta_info->overlaps_meta_info);
41  CHECK(meta_info && meta_info->overlaps_meta_info);
43  *candidate_ht_it->meta_info->overlaps_meta_info,
44  *meta_info->overlaps_meta_info)) {
45  return true;
46  }
47  } else {
48  return true;
49  }
50  }
51  return false;
52 }
constexpr QueryPlanHash EMPTY_HASHED_PLAN_DAG_KEY
std::shared_ptr< CachedItemContainer > getCachedItemContainer(CacheItemType item_type, DeviceIdentifier device_identifier) const
Definition: DataRecycler.h:480
bool g_enable_data_recycler
Definition: Execute.cpp:141
bool checkOverlapsHashtableBucketCompatability(const OverlapsHashTableMetaInfo &candidate_bucket_dim, const OverlapsHashTableMetaInfo &target_bucket_dim) const
#define CHECK(condition)
Definition: Logger.h:211
bool g_use_hashtable_cache
Definition: Execute.cpp:142

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

void HashtableRecycler::initCache ( )
inlineoverridevirtual
bool HashtableRecycler::isSafeToCacheHashtable ( const TableIdToNodeMap table_id_to_node_map,
bool  need_dict_translation,
const int  table_id 
)
static

Definition at line 325 of file HashtableRecycler.cpp.

Referenced by BaselineJoinHashTable::getApproximateTupleCountFromCache(), PerfectJoinHashTable::initHashTableForDevice(), and BaselineJoinHashTable::initHashTableForDevice().

328  {
329  // if hashtable is built from subquery's resultset we need to check
330  // 1) whether resulset rows can have inconsistency, e.g., rows can randomly be
331  // permutated per execution and 2) whether it needs dictionary translation for hashtable
332  // building to recycle the hashtable safely
333  auto getNodeByTableId =
334  [&table_id_to_node_map](const int table_id) -> const RelAlgNode* {
335  auto it = table_id_to_node_map.find(table_id);
336  if (it != table_id_to_node_map.end()) {
337  return it->second;
338  }
339  return nullptr;
340  };
341  bool found_sort_node = false;
342  bool found_project_node = false;
343  if (table_id < 0) {
344  auto origin_table_id = table_id * -1;
345  auto inner_node = getNodeByTableId(origin_table_id);
346  if (!inner_node) {
347  // we have to keep the node info of temporary resultset
348  // so in this case we are not safe to recycle the hashtable
349  return false;
350  }
351  // it is not safe to recycle the hashtable when
352  // this resultset may have resultset ordering inconsistency and/or
353  // need dictionary translation for hashtable building
354  auto sort_node = dynamic_cast<const RelSort*>(inner_node);
355  if (sort_node) {
356  found_sort_node = true;
357  } else {
358  auto project_node = dynamic_cast<const RelProject*>(inner_node);
359  if (project_node) {
360  found_project_node = true;
361  }
362  }
363  }
364  return !(found_sort_node || (found_project_node && need_dict_translation));
365 }

+ Here is the caller graph for this function:

void HashtableRecycler::markCachedItemAsDirty ( size_t  table_key,
std::unordered_set< QueryPlanHash > &  key_set,
CacheItemType  item_type,
DeviceIdentifier  device_identifier 
)
overridevirtual

Implements DataRecycler< std::shared_ptr< HashTable >, HashtableCacheMetaInfo >.

Definition at line 254 of file HashtableRecycler.cpp.

References g_enable_data_recycler, g_use_hashtable_cache, DataRecycler< std::shared_ptr< HashTable >, HashtableCacheMetaInfo >::getCachedItemContainer(), DataRecycler< std::shared_ptr< HashTable >, HashtableCacheMetaInfo >::getCacheLock(), DataRecycler< std::shared_ptr< HashTable >, HashtableCacheMetaInfo >::markCachedItemAsDirtyImpl(), and removeTableKeyInfoFromQueryPlanDagMap().

257  {
258  if (!g_enable_data_recycler || !g_use_hashtable_cache || key_set.empty()) {
259  return;
260  }
261  std::lock_guard<std::mutex> lock(getCacheLock());
262  auto hashtable_cache = getCachedItemContainer(item_type, device_identifier);
263  for (auto key : key_set) {
264  markCachedItemAsDirtyImpl(key, *hashtable_cache);
265  }
266  // after marking all cached hashtable having the given "table_key" as its one of input,
267  // we remove the mapping between the table_key -> hashed_query_plan_dag
268  // since we do not need to care about "already marked" item in the cache
270 }
std::shared_ptr< CachedItemContainer > getCachedItemContainer(CacheItemType item_type, DeviceIdentifier device_identifier) const
Definition: DataRecycler.h:480
void markCachedItemAsDirtyImpl(QueryPlanHash key, CachedItemContainer &m) const
Definition: DataRecycler.h:456
bool g_enable_data_recycler
Definition: Execute.cpp:141
void removeTableKeyInfoFromQueryPlanDagMap(size_t table_key)
bool g_use_hashtable_cache
Definition: Execute.cpp:142

+ Here is the call graph for this function:

void HashtableRecycler::putItemToCache ( QueryPlanHash  key,
std::shared_ptr< HashTable item_ptr,
CacheItemType  item_type,
DeviceIdentifier  device_identifier,
size_t  item_size,
size_t  compute_time,
std::optional< HashtableCacheMetaInfo meta_info = std::nullopt 
)
overridevirtual

Implements DataRecycler< std::shared_ptr< HashTable >, HashtableCacheMetaInfo >.

Definition at line 93 of file HashtableRecycler.cpp.

References ADD, AVAILABLE_AFTER_CLEANUP, CHECK, CHECK_EQ, checkOverlapsHashtableBucketCompatability(), cleanupCacheForInsertion(), EMPTY_HASHED_PLAN_DAG_KEY, g_enable_data_recycler, g_use_hashtable_cache, DataRecycler< std::shared_ptr< HashTable >, HashtableCacheMetaInfo >::getCachedItemContainer(), DataRecycler< std::shared_ptr< HashTable >, HashtableCacheMetaInfo >::getCacheLock(), DataRecyclerUtil::getDeviceIdentifierString(), DataRecycler< std::shared_ptr< HashTable >, HashtableCacheMetaInfo >::getMetricTracker(), hasItemInCache(), OVERLAPS_HT, removeItemFromCache(), DataRecyclerUtil::toStringCacheItemType(), UNAVAILABLE, and VLOG.

99  {
101  key == EMPTY_HASHED_PLAN_DAG_KEY) {
102  return;
103  }
104  std::lock_guard<std::mutex> lock(getCacheLock());
105  auto has_cached_ht = hasItemInCache(key, item_type, device_identifier, lock, meta_info);
106  if (has_cached_ht) {
107  // check to see whether the cached one is in a dirty status
108  auto hashtable_cache = getCachedItemContainer(item_type, device_identifier);
109  auto candidate_it =
110  std::find_if(hashtable_cache->begin(),
111  hashtable_cache->end(),
112  [&key](const auto& cached_item) { return cached_item.key == key; });
113  bool found_candidate = false;
114  if (candidate_it != hashtable_cache->end()) {
115  if (item_type == OVERLAPS_HT) {
116  // we have to check hashtable metainfo for overlaps join hashtable
117  CHECK(candidate_it->meta_info && candidate_it->meta_info->overlaps_meta_info);
118  CHECK(meta_info && meta_info->overlaps_meta_info);
120  *candidate_it->meta_info->overlaps_meta_info,
121  *meta_info->overlaps_meta_info)) {
122  found_candidate = true;
123  }
124  } else {
125  found_candidate = true;
126  }
127  if (found_candidate && candidate_it->isDirty()) {
128  // remove the dirty item from the cache and make a room for the new one
130  key, item_type, device_identifier, lock, candidate_it->meta_info);
131  has_cached_ht = false;
132  }
133  }
134  }
135 
136  if (!has_cached_ht) {
137  // check cache's space availability
138  auto& metric_tracker = getMetricTracker(item_type);
139  auto cache_status = metric_tracker.canAddItem(device_identifier, item_size);
140  if (cache_status == CacheAvailability::UNAVAILABLE) {
141  // hashtable is too large
142  return;
143  } else if (cache_status == CacheAvailability::AVAILABLE_AFTER_CLEANUP) {
144  // we need to cleanup some cached hashtables to make a room to insert this hashtable
145  // here we try to cache the new one anyway since we don't know the importance of
146  // this hashtable yet and if it is not that frequently reused it is removed
147  // in a near future
148  auto required_size = metric_tracker.calculateRequiredSpaceForItemAddition(
149  device_identifier, item_size);
150  cleanupCacheForInsertion(item_type, device_identifier, required_size, lock);
151  }
152  // put hashtable's metric to metric tracker
153  auto new_cache_metric_ptr = metric_tracker.putNewCacheItemMetric(
154  key, device_identifier, item_size, compute_time);
155  CHECK_EQ(item_size, new_cache_metric_ptr->getMemSize());
156  metric_tracker.updateCurrentCacheSize(
157  device_identifier, CacheUpdateAction::ADD, item_size);
158  // put hashtable to cache
159  VLOG(1) << "[" << DataRecyclerUtil::toStringCacheItemType(item_type) << ", "
160  << DataRecyclerUtil::getDeviceIdentifierString(device_identifier)
161  << "] Put item to cache";
162  auto hashtable_cache = getCachedItemContainer(item_type, device_identifier);
163  hashtable_cache->emplace_back(key, item_ptr, new_cache_metric_ptr, meta_info);
164  }
165  // we have a cached hashtable in a clean status
166  return;
167 }
#define CHECK_EQ(x, y)
Definition: Logger.h:219
bool hasItemInCache(QueryPlanHash key, CacheItemType item_type, DeviceIdentifier device_identifier, std::lock_guard< std::mutex > &lock, std::optional< HashtableCacheMetaInfo > meta_info=std::nullopt) const override
static std::string getDeviceIdentifierString(DeviceIdentifier device_identifier)
Definition: DataRecycler.h:135
static std::string_view toStringCacheItemType(CacheItemType item_type)
Definition: DataRecycler.h:128
CacheMetricTracker & getMetricTracker(CacheItemType item_type)
Definition: DataRecycler.h:606
constexpr QueryPlanHash EMPTY_HASHED_PLAN_DAG_KEY
std::shared_ptr< CachedItemContainer > getCachedItemContainer(CacheItemType item_type, DeviceIdentifier device_identifier) const
Definition: DataRecycler.h:480
bool g_enable_data_recycler
Definition: Execute.cpp:141
void cleanupCacheForInsertion(CacheItemType item_type, DeviceIdentifier device_identifier, size_t required_size, std::lock_guard< std::mutex > &lock, std::optional< HashtableCacheMetaInfo > meta_info=std::nullopt) override
void removeItemFromCache(QueryPlanHash key, CacheItemType item_type, DeviceIdentifier device_identifier, std::lock_guard< std::mutex > &lock, std::optional< HashtableCacheMetaInfo > meta_info=std::nullopt) override
bool checkOverlapsHashtableBucketCompatability(const OverlapsHashTableMetaInfo &candidate_bucket_dim, const OverlapsHashTableMetaInfo &target_bucket_dim) const
#define CHECK(condition)
Definition: Logger.h:211
bool g_use_hashtable_cache
Definition: Execute.cpp:142
#define VLOG(n)
Definition: Logger.h:305

+ Here is the call graph for this function:

void HashtableRecycler::removeItemFromCache ( QueryPlanHash  key,
CacheItemType  item_type,
DeviceIdentifier  device_identifier,
std::lock_guard< std::mutex > &  lock,
std::optional< HashtableCacheMetaInfo meta_info = std::nullopt 
)
overrideprivatevirtual

Implements DataRecycler< std::shared_ptr< HashTable >, HashtableCacheMetaInfo >.

Definition at line 169 of file HashtableRecycler.cpp.

References CHECK, EMPTY_HASHED_PLAN_DAG_KEY, g_enable_data_recycler, g_use_hashtable_cache, DataRecycler< std::shared_ptr< HashTable >, HashtableCacheMetaInfo >::getCachedItemContainer(), DataRecycler< std::shared_ptr< HashTable >, HashtableCacheMetaInfo >::getMetricTracker(), and REMOVE.

Referenced by putItemToCache().

174  {
176  key == EMPTY_HASHED_PLAN_DAG_KEY) {
177  return;
178  }
179  auto& cache_metrics = getMetricTracker(item_type);
180  // remove cached item from the cache
181  auto cache_metric = cache_metrics.getCacheItemMetric(key, device_identifier);
182  CHECK(cache_metric);
183  auto hashtable_size = cache_metric->getMemSize();
184  auto hashtable_container = getCachedItemContainer(item_type, device_identifier);
185  auto filter = [key](auto const& item) { return item.key == key; };
186  auto itr =
187  std::find_if(hashtable_container->cbegin(), hashtable_container->cend(), filter);
188  if (itr == hashtable_container->cend()) {
189  return;
190  } else {
191  hashtable_container->erase(itr);
192  }
193  // remove cache metric
194  cache_metrics.removeCacheItemMetric(key, device_identifier);
195  // update current cache size
196  cache_metrics.updateCurrentCacheSize(
197  device_identifier, CacheUpdateAction::REMOVE, hashtable_size);
198  return;
199 }
CacheMetricTracker & getMetricTracker(CacheItemType item_type)
Definition: DataRecycler.h:606
constexpr QueryPlanHash EMPTY_HASHED_PLAN_DAG_KEY
std::shared_ptr< CachedItemContainer > getCachedItemContainer(CacheItemType item_type, DeviceIdentifier device_identifier) const
Definition: DataRecycler.h:480
bool g_enable_data_recycler
Definition: Execute.cpp:141
#define CHECK(condition)
Definition: Logger.h:211
bool g_use_hashtable_cache
Definition: Execute.cpp:142

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

void HashtableRecycler::removeTableKeyInfoFromQueryPlanDagMap ( size_t  table_key)

Definition at line 456 of file HashtableRecycler.cpp.

References table_key_to_query_plan_dag_map_.

Referenced by markCachedItemAsDirty().

456  {
457  // this function is called when marking cached item for the given table_key as dirty
458  // and when we do that we already acquire the cache lock so we skip to lock in this func
459  table_key_to_query_plan_dag_map_.erase(table_key);
460 }
std::unordered_map< size_t, std::unordered_set< size_t > > table_key_to_query_plan_dag_map_

+ Here is the caller graph for this function:

std::string HashtableRecycler::toString ( ) const
overridevirtual

Implements DataRecycler< std::shared_ptr< HashTable >, HashtableCacheMetaInfo >.

Definition at line 272 of file HashtableRecycler.cpp.

References DataRecycler< std::shared_ptr< HashTable >, HashtableCacheMetaInfo >::getCacheItemType(), DataRecyclerUtil::getDeviceIdentifierString(), DataRecycler< std::shared_ptr< HashTable >, HashtableCacheMetaInfo >::getItemCache(), DataRecycler< std::shared_ptr< HashTable >, HashtableCacheMetaInfo >::getMetricTracker(), and DataRecyclerUtil::toStringCacheItemType().

272  {
273  std::ostringstream oss;
274  oss << "A current status of the Hashtable Recycler:\n";
275  for (auto& item_type : getCacheItemType()) {
276  oss << "\t" << DataRecyclerUtil::toStringCacheItemType(item_type);
277  auto& metric_tracker = getMetricTracker(item_type);
278  oss << "\n\t# cached hashtables:\n";
279  auto item_cache = getItemCache().find(item_type)->second;
280  for (auto& cache_container : *item_cache) {
281  oss << "\t\tDevice"
282  << DataRecyclerUtil::getDeviceIdentifierString(cache_container.first)
283  << ", # hashtables: " << cache_container.second->size() << "\n";
284  for (auto& ht : *cache_container.second) {
285  oss << "\t\t\tHT] " << ht.item_metric->toString() << "\n";
286  }
287  }
288  oss << "\t" << metric_tracker.toString() << "\n";
289  }
290  return oss.str();
291 }
static std::string getDeviceIdentifierString(DeviceIdentifier device_identifier)
Definition: DataRecycler.h:135
static std::string_view toStringCacheItemType(CacheItemType item_type)
Definition: DataRecycler.h:128
CacheMetricTracker & getMetricTracker(CacheItemType item_type)
Definition: DataRecycler.h:606
std::unordered_set< CacheItemType > const & getCacheItemType() const
Definition: DataRecycler.h:616

+ Here is the call graph for this function:

Member Data Documentation

HashtableRecycler::g_hashtable_cache_total_bytes

Definition at line 52 of file HashtableRecycler.h.

HashtableRecycler::g_max_cacheable_hashtable_size_bytes

Definition at line 52 of file HashtableRecycler.h.

HashtableRecycler::num_gpus
Initial value:
{}
std::shared_ptr<HashTable> getItemFromCache(
CacheItemType item_type,
DeviceIdentifier device_identifier,
std::optional<HashtableCacheMetaInfo> meta_info = std::nullopt) override

Definition at line 55 of file HashtableRecycler.h.

std::unordered_map<size_t, std::unordered_set<size_t> > HashtableRecycler::table_key_to_query_plan_dag_map_
private

The documentation for this class was generated from the following files: