OmniSciDB  6686921089
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
DataRecycler.h
Go to the documentation of this file.
1 /*
2  * Copyright 2021 OmniSci, Inc.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #pragma once
18 
19 #include "Analyzer/Analyzer.h"
25 #include "QueryEngine/ResultSet.h"
27 #include "Shared/misc.h"
28 
29 #include <boost/functional/hash.hpp>
30 
31 #include <unordered_map>
32 
33 struct EMPTY_META_INFO {};
34 
35 // Item type that we try to recycle
37  PERFECT_HT = 0, // Perfect hashtable
38  BASELINE_HT, // Baseline hashtable
39  OVERLAPS_HT, // Overlaps hashtable
40  HT_HASHING_SCHEME, // Hashtable layout
41  BASELINE_HT_APPROX_CARD, // Approximated cardinality for baseline hashtable
42  OVERLAPS_AUTO_TUNER_PARAM, // Hashtable auto tuner's params for overlaps join
43  // TODO (yoonmin): support the following items for recycling
44  // ROW_RS, Row-wise resultset
45  // COUNTALL_CARD_EST, Cardinality of query result
46  // NDV_CARD_EST, # Non-distinct value
47  // FILTER_SEL Selectivity of (push-downed) filter node
49 };
50 
51 // given item to be cached, it represents whether the item can be cached when considering
52 // various size limitation
54  AVAILABLE, // item can be cached as is
55  AVAILABLE_AFTER_CLEANUP, // item can be cached after removing already cached items
56  UNAVAILABLE // item cannot be cached due to size limitation
57 };
58 
60 
61 // the order of enum values affects how we remove cached items when
62 // new item wants to be cached but there is not enough space to keep them
63 // regarding `REF_COUNT`, it represents how many times a cached item is referenced during
64 // its lifetime to numerically estimate the usefulness of this cached item
65 // (not to measure exact # reference count at time T as std::shared_ptr does)
67 
68 // per query plan DAG metric
70  public:
71  CacheItemMetric(QueryPlanHash query_plan_hash, size_t compute_time, size_t mem_size)
72  : query_plan_hash_(query_plan_hash), metrics_({0, mem_size, compute_time}) {}
73 
75 
77 
78  size_t getRefCount() const { return metrics_[CacheMetricType::REF_COUNT]; }
79 
81 
82  size_t getMemSize() const { return metrics_[CacheMetricType::MEM_SIZE]; }
83 
84  const std::array<size_t, CacheMetricType::NUM_METRIC_TYPE>& getMetrics() const {
85  return metrics_;
86  }
87 
88  void setComputeTime(size_t compute_time) {
90  }
91 
92  void setMemSize(const size_t mem_size) {
94  }
95 
96  std::string toString() const {
97  std::ostringstream oss;
98  oss << "Query plan hash: " << query_plan_hash_
99  << ", compute_time: " << metrics_[CacheMetricType::COMPUTE_TIME]
100  << ", mem_size: " << metrics_[CacheMetricType::MEM_SIZE]
101  << ", ref_count: " << metrics_[CacheMetricType::REF_COUNT];
102  return oss.str();
103  }
104 
105  private:
107  std::array<size_t, CacheMetricType::NUM_METRIC_TYPE> metrics_;
108 };
109 
110 // 0 = CPU, 1 ~ N : GPU-1 ~ GPU-N
111 using DeviceIdentifier = size_t;
112 using CacheSizeMap = std::unordered_map<DeviceIdentifier, size_t>;
113 using CacheMetricInfoMap =
114  std::unordered_map<DeviceIdentifier, std::vector<std::shared_ptr<CacheItemMetric>>>;
115 
117  public:
118  // need to add more constants if necessary: ROW_RS, COUNTALL_CARD_EST, NDV_CARD_EST,
119  // FILTER_SEL, ...
120  static constexpr auto cache_item_type_str =
121  shared::string_view_array("Perfect Join Hashtable",
122  "Baseline Join Hashtable",
123  "Overlaps Join Hashtable",
124  "Hashing Scheme for Join Hashtable",
125  "Baseline Join Hashtable's Approximated Cardinality",
126  "Overlaps Join Hashtable's Auto Tuner's Parameters");
127  static std::string_view toStringCacheItemType(CacheItemType item_type) {
128  static_assert(cache_item_type_str.size() == NUM_CACHE_ITEM_TYPE);
129  return cache_item_type_str[item_type];
130  }
131 
133 
134  static std::string getDeviceIdentifierString(DeviceIdentifier device_identifier) {
135  std::string device_type = device_identifier == CPU_DEVICE_IDENTIFIER ? "CPU" : "GPU-";
136  return device_identifier != CPU_DEVICE_IDENTIFIER
137  ? device_type.append(std::to_string(device_identifier))
138  : device_type;
139  }
140 };
141 
142 // contain information regarding 1) per-cache item metric: perfect ht-1, perfect ht-2,
143 // baseline ht-1, ... and 2) per-type size in current: perfect-ht cache size, baseline-ht
144 // cache size, overlaps-ht cache size, ...
146  public:
148  size_t total_cache_size,
149  size_t max_cache_item_size,
150  int num_gpus = 0)
151  : item_type_(cache_item_type)
152  , total_cache_size_(total_cache_size)
153  , max_cache_item_size_(max_cache_item_size) {
154  // initialize cache metrics for each device: CPU, GPU0, GPU1, ...
155  // Currently we only consider maintaining our cache in CPU-memory
156  for (int gpu_device_identifier = num_gpus; gpu_device_identifier >= 1;
157  --gpu_device_identifier) {
158  cache_metrics_.emplace(gpu_device_identifier,
159  std::vector<std::shared_ptr<CacheItemMetric>>());
160  current_cache_size_in_bytes_.emplace(gpu_device_identifier, 0);
161  }
163  std::vector<std::shared_ptr<CacheItemMetric>>());
165 
166  if (total_cache_size_ < 1024 * 1024 * 256) {
167  LOG(INFO) << "The total cache size of "
168  << DataRecyclerUtil::toStringCacheItemType(cache_item_type)
169  << " is set too low, so we suggest raising it larger than 256MB";
170  }
171 
172  if (max_cache_item_size < 1024 * 1024 * 10) {
173  LOG(INFO)
174  << "The maximum item size of "
175  << DataRecyclerUtil::toStringCacheItemType(cache_item_type)
176  << " that can be cached is set too low, we suggest raising it larger than 10MB";
177  }
178  if (max_cache_item_size > total_cache_size_) {
179  LOG(INFO) << "The maximum item size of "
180  << DataRecyclerUtil::toStringCacheItemType(cache_item_type)
181  << " is set larger than its total cache size, so we force to set the "
182  "maximum item size as equal to the total cache size";
183  max_cache_item_size = total_cache_size_;
184  }
185  }
186 
187  static inline CacheMetricInfoMap::mapped_type::const_iterator getCacheItemMetricItr(
188  QueryPlanHash key,
189  CacheMetricInfoMap::mapped_type const& metrics) {
190  auto same_hash = [key](auto itr) { return itr->getQueryPlanHash() == key; };
191  return std::find_if(metrics.cbegin(), metrics.cend(), same_hash);
192  }
193 
194  static inline std::shared_ptr<CacheItemMetric> getCacheItemMetricImpl(
195  QueryPlanHash key,
196  CacheMetricInfoMap::mapped_type const& metrics) {
197  auto itr = getCacheItemMetricItr(key, metrics);
198  return itr == metrics.cend() ? nullptr : *itr;
199  }
200 
201  std::vector<std::shared_ptr<CacheItemMetric>>& getCacheItemMetrics(
202  DeviceIdentifier device_identifier) {
203  auto itr = cache_metrics_.find(device_identifier);
204  CHECK(itr != cache_metrics_.end());
205  return itr->second;
206  }
207 
208  std::shared_ptr<CacheItemMetric> getCacheItemMetric(
209  QueryPlanHash key,
210  DeviceIdentifier device_identifier) const {
211  auto itr = cache_metrics_.find(device_identifier);
212  return itr == cache_metrics_.cend() ? nullptr
213  : getCacheItemMetricImpl(key, itr->second);
214  }
215 
216  void setCurrentCacheSize(DeviceIdentifier device_identifier, size_t bytes) {
217  if (bytes > total_cache_size_) {
218  return;
219  }
220  auto itr = current_cache_size_in_bytes_.find(device_identifier);
221  CHECK(itr != current_cache_size_in_bytes_.end());
222  itr->second = bytes;
223  }
224 
225  std::optional<size_t> getCurrentCacheSize(DeviceIdentifier key) const {
226  auto same_hash = [key](auto itr) { return itr.first == key; };
227  auto itr = std::find_if(current_cache_size_in_bytes_.cbegin(),
229  same_hash);
230  return itr == current_cache_size_in_bytes_.cend() ? std::nullopt
231  : std::make_optional(itr->second);
232  }
233 
234  std::shared_ptr<CacheItemMetric> putNewCacheItemMetric(
235  QueryPlanHash key,
236  DeviceIdentifier device_identifier,
237  size_t mem_size,
238  size_t compute_time) {
239  auto itr = cache_metrics_.find(device_identifier);
240  CHECK(itr != cache_metrics_.end());
241  if (auto cached_metric = getCacheItemMetricImpl(key, itr->second)) {
242  return cached_metric;
243  }
244  auto cache_metric = std::make_shared<CacheItemMetric>(key, compute_time, mem_size);
245  // we add the item to cache after we create it during query runtime
246  // so it is used at least once
247  cache_metric->incRefCount();
248  return itr->second.emplace_back(std::move(cache_metric));
249  }
250 
251  void removeCacheItemMetric(QueryPlanHash key, DeviceIdentifier device_identifier) {
252  auto& cache_metrics = getCacheItemMetrics(device_identifier);
253  auto itr = getCacheItemMetricItr(key, cache_metrics);
254  if (itr != cache_metrics.cend()) {
255  cache_metrics.erase(itr);
256  }
257  }
258 
259  void removeMetricFromBeginning(DeviceIdentifier device_identifier, int offset) {
260  auto metrics = getCacheItemMetrics(device_identifier);
261  metrics.erase(metrics.begin(), metrics.begin() + offset);
262  }
263 
265  size_t item_size) const {
266  auto it = current_cache_size_in_bytes_.find(device_identifier);
267  CHECK(it != current_cache_size_in_bytes_.end());
268  auto rem = total_cache_size_ - it->second;
269  CHECK_GT(item_size, rem);
270  return item_size - rem;
271  }
272 
274  for (auto& kv : current_cache_size_in_bytes_) {
275  auto cache_item_metrics = getCacheItemMetrics(kv.first);
276  VLOG(1) << "Clear cache of " << DataRecyclerUtil::toStringCacheItemType(item_type_)
277  << " from device [" << kv.first
278  << "] (# cached items: " << cache_item_metrics.size() << ", " << kv.second
279  << " bytes)";
281  CHECK_EQ(getCurrentCacheSize(kv.first).value(), 0u);
282  }
283  for (auto& kv : cache_metrics_) {
284  kv.second.clear();
285  }
286  }
287 
289  size_t item_size) const {
290  if (item_size > max_cache_item_size_) {
292  }
293  auto current_cache_size = getCurrentCacheSize(device_identifier);
294  CHECK(current_cache_size.has_value());
295  if (*current_cache_size > total_cache_size_) {
297  }
298  auto cache_size_after_addition = *current_cache_size + item_size;
299  if (cache_size_after_addition > total_cache_size_) {
301  }
303  }
304 
305  void updateCurrentCacheSize(DeviceIdentifier device_identifier,
307  size_t size) {
308  auto current_cache_size = getCurrentCacheSize(device_identifier);
309  CHECK(current_cache_size.has_value());
310  if (action == CacheUpdateAction::ADD) {
311  setCurrentCacheSize(device_identifier, current_cache_size.value() + size);
312  } else {
314  CHECK_LE(size, *current_cache_size);
315  setCurrentCacheSize(device_identifier, current_cache_size.value() - size);
316  }
317  }
318 
320  auto& metric_cache = getCacheItemMetrics(device_identifier);
321  std::sort(metric_cache.begin(),
322  metric_cache.end(),
323  [](const std::shared_ptr<CacheItemMetric>& left,
324  const std::shared_ptr<CacheItemMetric>& right) {
325  auto& elem1_metrics = left->getMetrics();
326  auto& elem2_metrics = right->getMetrics();
327  for (size_t i = 0; i < CacheMetricType::NUM_METRIC_TYPE; ++i) {
328  if (elem1_metrics[i] != elem2_metrics[i]) {
329  return elem1_metrics[i] < elem2_metrics[i];
330  }
331  }
332  return false;
333  });
334  }
335 
336  std::string toString() const {
337  std::ostringstream oss;
338  oss << "Current memory consumption of caches for each device:\n";
339  for (auto& kv : current_cache_size_in_bytes_) {
340  oss << "\t\tDevice " << kv.first << " : " << kv.second << " bytes\n";
341  }
342  return oss.str();
343  }
344 
345  size_t getTotalCacheSize() const { return total_cache_size_; }
346  size_t getMaxCacheItemSize() const { return max_cache_item_size_; }
347  void setTotalCacheSize(size_t new_total_cache_size) {
348  if (new_total_cache_size > 0) {
349  total_cache_size_ = new_total_cache_size;
350  }
351  }
352  void setMaxCacheItemSize(size_t new_max_cache_item_size) {
353  if (new_max_cache_item_size > 0) {
354  max_cache_item_size_ = new_max_cache_item_size;
355  }
356  }
357 
358  private:
362  // metadata of cached item that belongs to a cache of a specific device
363  // 1) ref_count: how many times this cached item is recycled
364  // 2) memory_usage: the size of cached item in bytes
365  // 3) compute_time: an elapsed time to generate this cached item
367 
368  // the total amount of currently cached data per device
370 };
371 
372 template <typename CACHED_ITEM_TYPE, typename META_INFO_TYPE>
373 struct CachedItem {
375  CACHED_ITEM_TYPE item,
376  std::shared_ptr<CacheItemMetric> item_metric_ptr,
377  std::optional<META_INFO_TYPE> metadata = std::nullopt)
378  : key(hashed_plan)
379  , cached_item(item)
380  , item_metric(item_metric_ptr)
381  , meta_info(metadata) {}
383  CACHED_ITEM_TYPE cached_item;
384  std::shared_ptr<CacheItemMetric> item_metric;
385  std::optional<META_INFO_TYPE> meta_info;
386 };
387 
388 // A main class of data recycler
389 // note that some tests which directly accesses APIs for update/modify/delete
390 // (meta)data may need to disable data recycler explicitly before running test suites
391 // to make test scenarios as expected
392 // i.e., UpdelStorageTest that calls fragmenter's updateColumn API
393 template <typename CACHED_ITEM_TYPE, typename META_INFO_TYPE>
395  public:
396  using CachedItemContainer = std::vector<CachedItem<CACHED_ITEM_TYPE, META_INFO_TYPE>>;
398  std::unordered_map<DeviceIdentifier, std::shared_ptr<CachedItemContainer>>;
400  std::unordered_map<CacheItemType, std::shared_ptr<PerDeviceCacheItemContainer>>;
401  using PerTypeCacheMetricTracker = std::unordered_map<CacheItemType, CacheMetricTracker>;
402 
403  DataRecycler(const std::vector<CacheItemType>& item_types,
404  size_t total_cache_size,
405  size_t max_item_size,
406  int num_gpus) {
407  for (auto& item_type : item_types) {
408  cache_item_types_.insert(item_type);
409  metric_tracker_.emplace(
410  item_type,
411  CacheMetricTracker(item_type, total_cache_size, max_item_size, num_gpus));
412  auto item_container = std::make_shared<PerDeviceCacheItemContainer>();
413  for (int gpu_device_identifier = num_gpus; gpu_device_identifier >= 1;
414  --gpu_device_identifier) {
415  item_container->emplace(gpu_device_identifier,
416  std::make_shared<CachedItemContainer>());
417  }
418  item_container->emplace(DataRecyclerUtil::CPU_DEVICE_IDENTIFIER,
419  std::make_shared<CachedItemContainer>());
420  cached_items_container_.emplace(item_type, item_container);
421  }
422  }
423 
424  virtual ~DataRecycler() = default;
425 
426  virtual CACHED_ITEM_TYPE getItemFromCache(
427  QueryPlanHash key,
428  CacheItemType item_type,
429  DeviceIdentifier device_identifier,
430  std::optional<META_INFO_TYPE> meta_info = std::nullopt) const = 0;
431 
432  virtual void putItemToCache(QueryPlanHash key,
433  CACHED_ITEM_TYPE item_ptr,
434  CacheItemType item_type,
435  DeviceIdentifier device_identifier,
436  size_t item_size,
437  size_t compute_time,
438  std::optional<META_INFO_TYPE> meta_info = std::nullopt) = 0;
439 
440  virtual void initCache() = 0;
441 
442  virtual void clearCache() = 0;
443 
444  virtual std::string toString() const = 0;
445 
446  std::shared_ptr<CachedItemContainer> getCachedItemContainer(
447  CacheItemType item_type,
448  DeviceIdentifier device_identifier) const {
449  auto item_type_container_itr = cached_items_container_.find(item_type);
450  if (item_type_container_itr != cached_items_container_.end()) {
451  auto device_type_container_itr =
452  item_type_container_itr->second->find(device_identifier);
453  return device_type_container_itr != item_type_container_itr->second->end()
454  ? device_type_container_itr->second
455  : nullptr;
456  }
457  return nullptr;
458  }
459 
460  std::optional<CachedItem<CACHED_ITEM_TYPE, META_INFO_TYPE>> getCachedItem(
461  QueryPlanHash key,
462  CachedItemContainer& m) const {
463  for (auto& candidate : m) {
464  if (candidate.key == key) {
465  return candidate;
466  }
467  }
468  return std::nullopt;
469  }
470 
472  DeviceIdentifier device_identifier) const {
473  std::lock_guard<std::mutex> lock(cache_lock_);
474  auto container = getCachedItemContainer(item_type, device_identifier);
475  return container ? container->size() : 0;
476  }
477 
479  DeviceIdentifier device_identifier) const {
480  std::lock_guard<std::mutex> lock(cache_lock_);
481  auto metric_tracker = getMetricTracker(item_type);
482  auto current_size_opt = metric_tracker.getCurrentCacheSize(device_identifier);
483  return current_size_opt ? current_size_opt.value() : 0;
484  }
485 
486  std::shared_ptr<CacheItemMetric> getCachedItemMetric(CacheItemType item_type,
487  DeviceIdentifier device_identifier,
488  QueryPlanHash key) const {
489  std::lock_guard<std::mutex> lock(cache_lock_);
490  auto cache_metric_tracker = getMetricTracker(item_type);
491  return cache_metric_tracker.getCacheItemMetric(key, device_identifier);
492  }
493 
494  void setTotalCacheSize(CacheItemType item_type, size_t new_total_cache_size) {
495  if (new_total_cache_size > 0) {
496  std::lock_guard<std::mutex> lock(cache_lock_);
497  getMetricTracker(item_type).setTotalCacheSize(new_total_cache_size);
498  }
499  }
500 
501  void setMaxCacheItemSize(CacheItemType item_type, size_t new_max_cache_item_size) {
502  if (new_max_cache_item_size > 0) {
503  std::lock_guard<std::mutex> lock(cache_lock_);
504  getMetricTracker(item_type).setMaxCacheItemSize(new_max_cache_item_size);
505  }
506  }
507 
508  std::function<void()> getCacheInvalidator() {
509  return [this]() -> void { clearCache(); };
510  }
511 
512  protected:
514  DeviceIdentifier device_identifier,
515  int offset) {
516  // it removes cached items located from `idx 0` to `offset`
517  // so, call this function after sorting the cached items container vec
518  // and we should call this function under the proper locking scheme
519  auto container = getCachedItemContainer(item_type, device_identifier);
520  CHECK(container);
521  container->erase(container->begin(), container->begin() + offset);
522  }
523 
525  DeviceIdentifier device_identifier) {
526  // should call this function under the proper locking scheme
527  auto container = getCachedItemContainer(item_type, device_identifier);
528  CHECK(container);
529  std::sort(container->begin(),
530  container->end(),
533  auto& left_metrics = left.item_metric->getMetrics();
534  auto& right_metrics = right.item_metric->getMetrics();
535  for (size_t i = 0; i < CacheMetricType::NUM_METRIC_TYPE; ++i) {
536  if (left_metrics[i] != right_metrics[i]) {
537  return left_metrics[i] < right_metrics[i];
538  }
539  }
540  return false;
541  });
542  }
543 
544  std::mutex& getCacheLock() const { return cache_lock_; }
545 
547  auto metric_iter = metric_tracker_.find(item_type);
548  CHECK(metric_iter != metric_tracker_.end());
549  return metric_iter->second;
550  }
551 
553  return const_cast<DataRecycler*>(this)->getMetricTracker(item_type);
554  }
555 
556  std::unordered_set<CacheItemType> const& getCacheItemType() const {
557  return cache_item_types_;
558  }
559 
562  }
563 
564  private:
565  // internally called under the proper locking scheme
566  virtual bool hasItemInCache(
567  QueryPlanHash key,
568  CacheItemType item_type,
569  DeviceIdentifier device_identifier,
570  std::lock_guard<std::mutex>& lock,
571  std::optional<META_INFO_TYPE> meta_info = std::nullopt) const = 0;
572 
573  // internally called under the proper locking scheme
574  virtual void removeItemFromCache(
575  QueryPlanHash key,
576  CacheItemType item_type,
577  DeviceIdentifier device_identifier,
578  std::lock_guard<std::mutex>& lock,
579  std::optional<META_INFO_TYPE> meta_info = std::nullopt) = 0;
580 
581  // internally called under the proper locking scheme
582  virtual void cleanupCacheForInsertion(
583  CacheItemType item_type,
584  DeviceIdentifier device_identifier,
585  size_t required_size,
586  std::lock_guard<std::mutex>& lock,
587  std::optional<META_INFO_TYPE> meta_info = std::nullopt) = 0;
588 
589  // a set of cache item type that this recycler supports
590  std::unordered_set<CacheItemType> cache_item_types_;
591 
592  // cache metric tracker
594 
595  // per-device cached item containers for each cached item type
597 
598  mutable std::mutex cache_lock_;
599 };
Defines data structures for the semantic analysis phase of query processing.
CACHED_ITEM_TYPE cached_item
Definition: DataRecycler.h:383
std::mutex & getCacheLock() const
Definition: DataRecycler.h:544
std::unordered_map< CacheItemType, std::shared_ptr< PerDeviceCacheItemContainer >> PerTypeCacheItemContainer
Definition: DataRecycler.h:400
#define CHECK_EQ(x, y)
Definition: Logger.h:217
CacheUpdateAction
Definition: DataRecycler.h:59
size_t DeviceIdentifier
Definition: DataRecycler.h:111
static std::string getDeviceIdentifierString(DeviceIdentifier device_identifier)
Definition: DataRecycler.h:134
virtual std::string toString() const =0
size_t calculateRequiredSpaceForItemAddition(DeviceIdentifier device_identifier, size_t item_size) const
Definition: DataRecycler.h:264
std::function< void()> getCacheInvalidator()
Definition: DataRecycler.h:508
std::shared_ptr< CacheItemMetric > putNewCacheItemMetric(QueryPlanHash key, DeviceIdentifier device_identifier, size_t mem_size, size_t compute_time)
Definition: DataRecycler.h:234
static std::string_view toStringCacheItemType(CacheItemType item_type)
Definition: DataRecycler.h:127
size_t getCurrentNumCachedItems(CacheItemType item_type, DeviceIdentifier device_identifier) const
Definition: DataRecycler.h:471
CacheMetricTracker & getMetricTracker(CacheItemType item_type)
Definition: DataRecycler.h:546
CacheItemType item_type_
Definition: DataRecycler.h:359
const QueryPlanHash query_plan_hash_
Definition: DataRecycler.h:106
const std::array< size_t, CacheMetricType::NUM_METRIC_TYPE > & getMetrics() const
Definition: DataRecycler.h:84
std::unordered_map< DeviceIdentifier, size_t > CacheSizeMap
Definition: DataRecycler.h:112
DataRecycler(const std::vector< CacheItemType > &item_types, size_t total_cache_size, size_t max_item_size, int num_gpus)
Definition: DataRecycler.h:403
#define LOG(tag)
Definition: Logger.h:203
std::vector< CachedItem< std::optional< HashType >, EMPTY_META_INFO >> CachedItemContainer
Definition: DataRecycler.h:396
CacheMetricTracker const & getMetricTracker(CacheItemType item_type) const
Definition: DataRecycler.h:552
CacheItemMetric(QueryPlanHash query_plan_hash, size_t compute_time, size_t mem_size)
Definition: DataRecycler.h:71
std::unordered_set< CacheItemType > cache_item_types_
Definition: DataRecycler.h:590
size_t getMemSize() const
Definition: DataRecycler.h:82
DEVICE void sort(ARGS &&...args)
Definition: gpu_enabled.h:105
QueryPlanHash key
Definition: DataRecycler.h:382
std::optional< size_t > getCurrentCacheSize(DeviceIdentifier key) const
Definition: DataRecycler.h:225
std::shared_ptr< CachedItemContainer > getCachedItemContainer(CacheItemType item_type, DeviceIdentifier device_identifier) const
Definition: DataRecycler.h:446
QueryPlanHash getQueryPlanHash() const
Definition: DataRecycler.h:74
void setMaxCacheItemSize(size_t new_max_cache_item_size)
Definition: DataRecycler.h:352
void setMaxCacheItemSize(CacheItemType item_type, size_t new_max_cache_item_size)
Definition: DataRecycler.h:501
CacheAvailability canAddItem(DeviceIdentifier device_identifier, size_t item_size) const
Definition: DataRecycler.h:288
#define CHECK_GT(x, y)
Definition: Logger.h:221
void clearCacheMetricTracker()
Definition: DataRecycler.h:273
std::string to_string(char const *&&v)
PerTypeCacheItemContainer const & getItemCache() const
Definition: DataRecycler.h:560
CacheAvailability
Definition: DataRecycler.h:53
PerTypeCacheMetricTracker metric_tracker_
Definition: DataRecycler.h:593
size_t getCurrentCacheSizeForDevice(CacheItemType item_type, DeviceIdentifier device_identifier) const
Definition: DataRecycler.h:478
std::mutex cache_lock_
Definition: DataRecycler.h:598
void setMemSize(const size_t mem_size)
Definition: DataRecycler.h:92
void setCurrentCacheSize(DeviceIdentifier device_identifier, size_t bytes)
Definition: DataRecycler.h:216
size_t getMaxCacheItemSize() const
Definition: DataRecycler.h:346
std::vector< std::shared_ptr< CacheItemMetric > > & getCacheItemMetrics(DeviceIdentifier device_identifier)
Definition: DataRecycler.h:201
void removeCacheItemMetric(QueryPlanHash key, DeviceIdentifier device_identifier)
Definition: DataRecycler.h:251
CacheItemType
Definition: DataRecycler.h:36
std::optional< META_INFO_TYPE > meta_info
Definition: DataRecycler.h:385
virtual void initCache()=0
size_t getTotalCacheSize() const
Definition: DataRecycler.h:345
std::shared_ptr< CacheItemMetric > getCacheItemMetric(QueryPlanHash key, DeviceIdentifier device_identifier) const
Definition: DataRecycler.h:208
std::string toString() const
Definition: DataRecycler.h:336
PerTypeCacheItemContainer cached_items_container_
Definition: DataRecycler.h:596
size_t max_cache_item_size_
Definition: DataRecycler.h:361
CacheSizeMap current_cache_size_in_bytes_
Definition: DataRecycler.h:369
void updateCurrentCacheSize(DeviceIdentifier device_identifier, CacheUpdateAction action, size_t size)
Definition: DataRecycler.h:305
virtual CACHED_ITEM_TYPE getItemFromCache(QueryPlanHash key, CacheItemType item_type, DeviceIdentifier device_identifier, std::optional< META_INFO_TYPE > meta_info=std::nullopt) const =0
virtual void putItemToCache(QueryPlanHash key, CACHED_ITEM_TYPE item_ptr, CacheItemType item_type, DeviceIdentifier device_identifier, size_t item_size, size_t compute_time, std::optional< META_INFO_TYPE > meta_info=std::nullopt)=0
std::unordered_map< CacheItemType, CacheMetricTracker > PerTypeCacheMetricTracker
Definition: DataRecycler.h:401
std::shared_ptr< CacheItemMetric > getCachedItemMetric(CacheItemType item_type, DeviceIdentifier device_identifier, QueryPlanHash key) const
Definition: DataRecycler.h:486
void setComputeTime(size_t compute_time)
Definition: DataRecycler.h:88
void setTotalCacheSize(size_t new_total_cache_size)
Definition: DataRecycler.h:347
void setTotalCacheSize(CacheItemType item_type, size_t new_total_cache_size)
Definition: DataRecycler.h:494
constexpr std::array< std::string_view, sizeof...(STR)> string_view_array(STR &&...str)
Definition: misc.h:284
static std::shared_ptr< CacheItemMetric > getCacheItemMetricImpl(QueryPlanHash key, CacheMetricInfoMap::mapped_type const &metrics)
Definition: DataRecycler.h:194
static constexpr auto cache_item_type_str
Definition: DataRecycler.h:120
std::unordered_map< DeviceIdentifier, std::shared_ptr< CachedItemContainer >> PerDeviceCacheItemContainer
Definition: DataRecycler.h:398
CacheMetricType
Definition: DataRecycler.h:66
void sortCacheContainerByQueryMetric(CacheItemType item_type, DeviceIdentifier device_identifier)
Definition: DataRecycler.h:524
#define CHECK_LE(x, y)
Definition: Logger.h:220
void sortCacheInfoByQueryMetric(DeviceIdentifier device_identifier)
Definition: DataRecycler.h:319
size_t getComputeTime() const
Definition: DataRecycler.h:80
std::unordered_set< CacheItemType > const & getCacheItemType() const
Definition: DataRecycler.h:556
CacheMetricTracker(CacheItemType cache_item_type, size_t total_cache_size, size_t max_cache_item_size, int num_gpus=0)
Definition: DataRecycler.h:147
virtual ~DataRecycler()=default
size_t QueryPlanHash
size_t getRefCount() const
Definition: DataRecycler.h:78
std::unordered_map< DeviceIdentifier, std::vector< std::shared_ptr< CacheItemMetric >>> CacheMetricInfoMap
Definition: DataRecycler.h:114
static CacheMetricInfoMap::mapped_type::const_iterator getCacheItemMetricItr(QueryPlanHash key, CacheMetricInfoMap::mapped_type const &metrics)
Definition: DataRecycler.h:187
CachedItem(QueryPlanHash hashed_plan, CACHED_ITEM_TYPE item, std::shared_ptr< CacheItemMetric > item_metric_ptr, std::optional< META_INFO_TYPE > metadata=std::nullopt)
Definition: DataRecycler.h:374
virtual bool hasItemInCache(QueryPlanHash key, CacheItemType item_type, DeviceIdentifier device_identifier, std::lock_guard< std::mutex > &lock, std::optional< META_INFO_TYPE > meta_info=std::nullopt) const =0
CacheMetricInfoMap cache_metrics_
Definition: DataRecycler.h:366
#define CHECK(condition)
Definition: Logger.h:209
virtual void clearCache()=0
void incRefCount()
Definition: DataRecycler.h:76
std::optional< CachedItem< CACHED_ITEM_TYPE, META_INFO_TYPE > > getCachedItem(QueryPlanHash key, CachedItemContainer &m) const
Definition: DataRecycler.h:460
virtual void removeItemFromCache(QueryPlanHash key, CacheItemType item_type, DeviceIdentifier device_identifier, std::lock_guard< std::mutex > &lock, std::optional< META_INFO_TYPE > meta_info=std::nullopt)=0
Basic constructors and methods of the row set interface.
std::string toString() const
Definition: DataRecycler.h:96
void removeMetricFromBeginning(DeviceIdentifier device_identifier, int offset)
Definition: DataRecycler.h:259
Execution unit for relational algebra. It&#39;s a low-level description of any relational algebra operati...
static constexpr DeviceIdentifier CPU_DEVICE_IDENTIFIER
Definition: DataRecycler.h:132
void removeCachedItemFromBeginning(CacheItemType item_type, DeviceIdentifier device_identifier, int offset)
Definition: DataRecycler.h:513
virtual void cleanupCacheForInsertion(CacheItemType item_type, DeviceIdentifier device_identifier, size_t required_size, std::lock_guard< std::mutex > &lock, std::optional< META_INFO_TYPE > meta_info=std::nullopt)=0
std::shared_ptr< CacheItemMetric > item_metric
Definition: DataRecycler.h:384
#define VLOG(n)
Definition: Logger.h:303
std::array< size_t, CacheMetricType::NUM_METRIC_TYPE > metrics_
Definition: DataRecycler.h:107