OmniSciDB  72c90bc290
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
TableOptimizer.cpp
Go to the documentation of this file.
1 /*
2  * Copyright 2022 HEAVY.AI, Inc.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #include "TableOptimizer.h"
18 
19 #include "Analyzer/Analyzer.h"
20 #include "LockMgr/LockMgr.h"
21 #include "Logger/Logger.h"
22 #include "QueryEngine/Execute.h"
23 #include "Shared/misc.h"
24 #include "Shared/scope.h"
25 
26 // By default, when rows are deleted, vacuum fragments with a least 10% deleted rows
28 
30  Executor* executor,
32  : td_(td), executor_(executor), cat_(cat) {
33  CHECK(td);
34 }
35 namespace {
36 
37 template <typename T>
39  const auto stv = boost::get<ScalarTargetValue>(&tv);
40  CHECK(stv);
41  const auto val_ptr = boost::get<T>(stv);
42  CHECK(val_ptr);
43  return *val_ptr;
44 }
45 
47  const std::vector<TargetValue>& row,
48  const SQLTypeInfo& ti,
49  const bool has_nulls) {
50  switch (ti.get_type()) {
51  case kBOOLEAN:
52  case kTINYINT:
53  case kSMALLINT:
54  case kINT:
55  case kBIGINT:
56  case kNUMERIC:
57  case kDECIMAL:
58  case kTIME:
59  case kTIMESTAMP:
60  case kDATE: {
61  int64_t min_val = read_scalar_target_value<int64_t>(row[0]);
62  int64_t max_val = read_scalar_target_value<int64_t>(row[1]);
63  chunk_metadata.fillChunkStats(min_val, max_val, has_nulls);
64  break;
65  }
66  case kFLOAT: {
67  float min_val = read_scalar_target_value<float>(row[0]);
68  float max_val = read_scalar_target_value<float>(row[1]);
69  chunk_metadata.fillChunkStats(min_val, max_val, has_nulls);
70  break;
71  }
72  case kDOUBLE: {
73  double min_val = read_scalar_target_value<double>(row[0]);
74  double max_val = read_scalar_target_value<double>(row[1]);
75  chunk_metadata.fillChunkStats(min_val, max_val, has_nulls);
76  break;
77  }
78  case kVARCHAR:
79  case kCHAR:
80  case kTEXT:
81  if (ti.get_compression() == kENCODING_DICT) {
82  int64_t min_val = read_scalar_target_value<int64_t>(row[0]);
83  int64_t max_val = read_scalar_target_value<int64_t>(row[1]);
84  chunk_metadata.fillChunkStats(min_val, max_val, has_nulls);
85  }
86  break;
87  default: {
88  return false; // skip column
89  }
90  }
91  return true;
92 }
93 
95  const std::shared_ptr<const InputColDescriptor> input_col_desc,
96  const std::vector<Analyzer::Expr*>& target_exprs) {
97  return RelAlgExecutionUnit{{input_col_desc->getScanDesc()},
98  {input_col_desc},
99  {},
100  {},
101  {},
102  {},
103  target_exprs,
104  {},
105  nullptr,
106  SortInfo(),
107  0};
108 }
109 
111  return CompilationOptions{device_type, false, ExecutorOptLevel::Default, false};
112 }
113 
115  return ExecutionOptions{false,
116  false,
117  false,
118  false,
119  false,
120  false,
121  false,
122  false,
123  false,
124  0,
125  false,
126  false,
127  0,
128  false,
129  false};
130 }
131 
132 } // namespace
133 
135  auto timer = DEBUG_TIMER(__func__);
137 
138  LOG(INFO) << "Recomputing metadata for " << td_->tableName;
139 
140  CHECK_GE(td_->tableId, 0);
141 
142  std::vector<const TableDescriptor*> table_descriptors;
143  if (td_->nShards > 0) {
144  const auto physical_tds = cat_.getPhysicalTablesDescriptors(td_);
145  table_descriptors.insert(
146  table_descriptors.begin(), physical_tds.begin(), physical_tds.end());
147  } else {
148  table_descriptors.push_back(td_);
149  }
150 
151  auto& data_mgr = cat_.getDataMgr();
152 
153  // acquire write lock on table data
155 
156  for (const auto td : table_descriptors) {
157  ScopeGuard row_set_holder = [this] { executor_->row_set_mem_owner_ = nullptr; };
158  executor_->row_set_mem_owner_ = std::make_shared<RowSetMemoryOwner>(
159  ROW_SET_SIZE, executor_->executor_id_, /*num_threads=*/1);
160  const auto table_id = td->tableId;
162 
163  // TODO(adb): Support geo
164  auto col_descs = cat_.getAllColumnMetadataForTable(table_id, false, false, false);
165  for (const auto& cd : col_descs) {
166  recomputeColumnMetadata(td, cd, stats.visible_row_count_per_fragment, {}, {});
167  }
168  data_mgr.checkpoint(cat_.getCurrentDB().dbId, table_id);
169  executor_->clearMetaInfoCache();
170  }
171 
172  data_mgr.clearMemory(Data_Namespace::MemoryLevel::CPU_LEVEL);
173  if (data_mgr.gpusPresent()) {
174  data_mgr.clearMemory(Data_Namespace::MemoryLevel::GPU_LEVEL);
175  }
176 }
177 
179  const TableUpdateMetadata& table_update_metadata) const {
180  auto timer = DEBUG_TIMER(__func__);
181  std::map<int, std::list<const ColumnDescriptor*>> columns_by_table_id;
182  auto& columns_for_update = table_update_metadata.columns_for_metadata_update;
183  for (const auto& entry : columns_for_update) {
184  auto column_descriptor = entry.first;
185  columns_by_table_id[column_descriptor->tableId].emplace_back(column_descriptor);
186  }
187 
188  for (const auto& [table_id, columns] : columns_by_table_id) {
189  auto td = cat_.getMetadataForTable(table_id);
191  for (const auto cd : columns) {
192  CHECK(columns_for_update.find(cd) != columns_for_update.end());
193  auto fragment_indexes = getFragmentIndexes(td, columns_for_update.find(cd)->second);
195  cd,
196  stats.visible_row_count_per_fragment,
198  fragment_indexes);
199  }
200  }
201 }
202 
203 // Special case handle $deleted column if it exists
204 // whilst handling the delete column also capture
205 // the number of non deleted rows per fragment
207  const TableDescriptor* td,
208  const std::set<size_t>& fragment_indexes) const {
209  if (!td->hasDeletedCol) {
210  return {};
211  }
212 
213  auto stats = getDeletedColumnStats(td, fragment_indexes);
214  auto* fragmenter = td->fragmenter.get();
215  CHECK(fragmenter);
216  auto cd = cat_.getDeletedColumn(td);
217  fragmenter->updateChunkStats(cd, stats.chunk_stats_per_fragment, {});
218  fragmenter->setNumRows(stats.total_row_count);
219  return stats;
220 }
221 
223  const TableDescriptor* td,
224  const std::set<size_t>& fragment_indexes) const {
225  if (!td->hasDeletedCol) {
226  return {};
227  }
228 
229  auto cd = cat_.getDeletedColumn(td);
230  const auto column_id = cd->columnId;
231 
232  const auto input_col_desc = std::make_shared<const InputColDescriptor>(
233  column_id, td->tableId, cat_.getDatabaseId(), 0);
234  const auto col_expr = makeExpr<Analyzer::ColumnVar>(
235  cd->columnType, shared::ColumnKey{cat_.getDatabaseId(), td->tableId, column_id}, 0);
236  const auto count_expr =
237  makeExpr<Analyzer::AggExpr>(cd->columnType, kCOUNT, col_expr, false, nullptr);
238 
239  const auto ra_exe_unit = build_ra_exe_unit(input_col_desc, {count_expr.get()});
240  const auto table_infos = get_table_infos(ra_exe_unit, executor_);
241  CHECK_EQ(table_infos.size(), size_t(1));
242 
244  const auto eo = get_execution_options();
245 
246  DeletedColumnStats deleted_column_stats;
247  Executor::PerFragmentCallBack compute_deleted_callback =
248  [&deleted_column_stats, cd](
249  ResultSetPtr results, const Fragmenter_Namespace::FragmentInfo& fragment_info) {
250  // count number of tuples in $deleted as total number of tuples in table.
251  if (cd->isDeletedCol) {
252  deleted_column_stats.total_row_count += fragment_info.getPhysicalNumTuples();
253  }
254  if (fragment_info.getPhysicalNumTuples() == 0) {
255  // TODO(adb): Should not happen, but just to be safe...
256  LOG(WARNING) << "Skipping completely empty fragment for column "
257  << cd->columnName;
258  return;
259  }
260 
261  const auto row = results->getNextRow(false, false);
262  CHECK_EQ(row.size(), size_t(1));
263 
264  const auto& ti = cd->columnType;
265 
266  auto chunk_metadata = std::make_shared<ChunkMetadata>();
267  chunk_metadata->sqlType = get_logical_type_info(ti);
268 
269  const auto count_val = read_scalar_target_value<int64_t>(row[0]);
270 
271  // min element 0 max element 1
272  std::vector<TargetValue> fakerow;
273 
274  auto num_tuples = static_cast<size_t>(count_val);
275 
276  // calculate min
277  if (num_tuples == fragment_info.getPhysicalNumTuples()) {
278  // nothing deleted
279  // min = false;
280  // max = false;
281  fakerow.emplace_back(TargetValue{int64_t(0)});
282  fakerow.emplace_back(TargetValue{int64_t(0)});
283  } else {
284  if (num_tuples == 0) {
285  // everything marked as delete
286  // min = true
287  // max = true
288  fakerow.emplace_back(TargetValue{int64_t(1)});
289  fakerow.emplace_back(TargetValue{int64_t(1)});
290  } else {
291  // some deleted
292  // min = false
293  // max = true;
294  fakerow.emplace_back(TargetValue{int64_t(0)});
295  fakerow.emplace_back(TargetValue{int64_t(1)});
296  }
297  }
298 
299  // place manufacture min and max in fake row to use common infra
300  if (!set_metadata_from_results(*chunk_metadata, fakerow, ti, false)) {
301  LOG(WARNING) << "Unable to process new metadata values for column "
302  << cd->columnName;
303  return;
304  }
305 
306  deleted_column_stats.chunk_stats_per_fragment.emplace(
307  std::make_pair(fragment_info.fragmentId, chunk_metadata->chunkStats));
308  deleted_column_stats.visible_row_count_per_fragment.emplace(
309  std::make_pair(fragment_info.fragmentId, num_tuples));
310  };
311 
312  executor_->executeWorkUnitPerFragment(ra_exe_unit,
313  table_infos[0],
314  co,
315  eo,
316  cat_,
317  compute_deleted_callback,
318  fragment_indexes);
319  return deleted_column_stats;
320 }
321 
323  const TableDescriptor* td,
324  const ColumnDescriptor* cd,
325  const std::unordered_map</*fragment_id*/ int, size_t>& tuple_count_map,
326  std::optional<Data_Namespace::MemoryLevel> memory_level,
327  const std::set<size_t>& fragment_indexes) const {
328  const auto ti = cd->columnType;
329  if (ti.is_varlen()) {
330  LOG(INFO) << "Skipping varlen column " << cd->columnName;
331  return;
332  }
333 
334  const auto column_id = cd->columnId;
335  const auto input_col_desc = std::make_shared<const InputColDescriptor>(
336  column_id, td->tableId, cat_.getDatabaseId(), 0);
337  const auto col_expr = makeExpr<Analyzer::ColumnVar>(
338  cd->columnType, shared::ColumnKey{cat_.getDatabaseId(), td->tableId, column_id}, 0);
339  auto max_expr =
340  makeExpr<Analyzer::AggExpr>(cd->columnType, kMAX, col_expr, false, nullptr);
341  auto min_expr =
342  makeExpr<Analyzer::AggExpr>(cd->columnType, kMIN, col_expr, false, nullptr);
343  auto count_expr =
344  makeExpr<Analyzer::AggExpr>(cd->columnType, kCOUNT, col_expr, false, nullptr);
345 
346  if (ti.is_string()) {
347  const SQLTypeInfo fun_ti(kINT);
348  const auto fun_expr = makeExpr<Analyzer::KeyForStringExpr>(col_expr);
349  max_expr = makeExpr<Analyzer::AggExpr>(fun_ti, kMAX, fun_expr, false, nullptr);
350  min_expr = makeExpr<Analyzer::AggExpr>(fun_ti, kMIN, fun_expr, false, nullptr);
351  }
352  const auto ra_exe_unit = build_ra_exe_unit(
353  input_col_desc, {min_expr.get(), max_expr.get(), count_expr.get()});
354  const auto table_infos = get_table_infos(ra_exe_unit, executor_);
355  CHECK_EQ(table_infos.size(), size_t(1));
356 
358  const auto eo = get_execution_options();
359 
360  std::unordered_map</*fragment_id*/ int, ChunkStats> stats_map;
361 
362  Executor::PerFragmentCallBack compute_metadata_callback =
363  [&stats_map, &tuple_count_map, cd](
364  ResultSetPtr results, const Fragmenter_Namespace::FragmentInfo& fragment_info) {
365  if (fragment_info.getPhysicalNumTuples() == 0) {
366  // TODO(adb): Should not happen, but just to be safe...
367  LOG(WARNING) << "Skipping completely empty fragment for column "
368  << cd->columnName;
369  return;
370  }
371 
372  const auto row = results->getNextRow(false, false);
373  CHECK_EQ(row.size(), size_t(3));
374 
375  const auto& ti = cd->columnType;
376 
377  auto chunk_metadata = std::make_shared<ChunkMetadata>();
378  chunk_metadata->sqlType = get_logical_type_info(ti);
379 
380  const auto count_val = read_scalar_target_value<int64_t>(row[2]);
381  if (count_val == 0) {
382  // Assume chunk of all nulls, bail
383  return;
384  }
385 
386  bool has_nulls = true; // default to wide
387  auto tuple_count_itr = tuple_count_map.find(fragment_info.fragmentId);
388  if (tuple_count_itr != tuple_count_map.end()) {
389  has_nulls = !(static_cast<size_t>(count_val) == tuple_count_itr->second);
390  } else {
391  // no deleted column calc so use raw physical count
392  has_nulls =
393  !(static_cast<size_t>(count_val) == fragment_info.getPhysicalNumTuples());
394  }
395 
396  if (!set_metadata_from_results(*chunk_metadata, row, ti, has_nulls)) {
397  LOG(WARNING) << "Unable to process new metadata values for column "
398  << cd->columnName;
399  return;
400  }
401 
402  stats_map.emplace(
403  std::make_pair(fragment_info.fragmentId, chunk_metadata->chunkStats));
404  };
405 
406  executor_->executeWorkUnitPerFragment(ra_exe_unit,
407  table_infos[0],
408  co,
409  eo,
410  cat_,
411  compute_metadata_callback,
412  fragment_indexes);
413 
414  auto* fragmenter = td->fragmenter.get();
415  CHECK(fragmenter);
416  fragmenter->updateChunkStats(cd, stats_map, memory_level);
417 }
418 
419 // Returns the corresponding indexes for the given fragment ids in the list of fragments
420 // returned by `getFragmentsForQuery()`
422  const TableDescriptor* td,
423  const std::set<int>& fragment_ids) const {
424  CHECK(td->fragmenter);
425  auto table_info = td->fragmenter->getFragmentsForQuery();
426  std::set<size_t> fragment_indexes;
427  for (size_t i = 0; i < table_info.fragments.size(); i++) {
428  if (shared::contains(fragment_ids, table_info.fragments[i].fragmentId)) {
429  fragment_indexes.emplace(i);
430  }
431  }
432  return fragment_indexes;
433 }
434 
436  auto timer = DEBUG_TIMER(__func__);
437  const auto table_id = td_->tableId;
438  const auto db_id = cat_.getDatabaseId();
439  const auto table_lock =
441  const auto table_epochs = cat_.getTableEpochs(db_id, table_id);
442  const auto shards = cat_.getPhysicalTablesDescriptors(td_);
443  try {
444  for (const auto shard : shards) {
445  vacuumFragments(shard);
446  }
447  cat_.checkpoint(table_id);
448  } catch (...) {
449  cat_.setTableEpochsLogExceptions(db_id, table_epochs);
450  throw;
451  }
452 
453  for (auto shard : shards) {
454  cat_.removeFragmenterForTable(shard->tableId);
456  shard->tableId);
457  }
458 }
459 
460 namespace {
461 std::set<ChunkKey> get_uncached_cpu_chunk_keys(const Catalog_Namespace::Catalog& catalog,
462  int32_t table_id,
463  int32_t fragment_id) {
464  auto& data_mgr = catalog.getDataMgr();
465  std::set<ChunkKey> uncached_cpu_chunk_keys;
466  for (auto cd : catalog.getAllColumnMetadataForTable(table_id, false, false, true)) {
467  ChunkKey chunk_key{catalog.getDatabaseId(), table_id, cd->columnId, fragment_id};
468  if (cd->columnType.is_varlen_indeed()) {
469  chunk_key.emplace_back(1);
470  if (!data_mgr.isBufferOnDevice(
472  uncached_cpu_chunk_keys.emplace(chunk_key);
473  }
474  chunk_key.back() = 2;
475  if (!data_mgr.isBufferOnDevice(
477  uncached_cpu_chunk_keys.emplace(chunk_key);
478  }
479  } else {
480  if (!data_mgr.isBufferOnDevice(
482  uncached_cpu_chunk_keys.emplace(chunk_key);
483  }
484  }
485  }
486  return uncached_cpu_chunk_keys;
487 }
488 
490  const std::set<ChunkKey>& cpu_chunks_to_delete) {
491  auto& data_mgr = catalog.getDataMgr();
492  for (const auto& chunk_key : cpu_chunks_to_delete) {
494  }
495 }
496 } // namespace
497 
499  const std::set<int>& fragment_ids) const {
500  // "if not a table that supports delete return, nothing more to do"
501  const ColumnDescriptor* cd = cat_.getDeletedColumn(td);
502  if (nullptr == cd) {
503  return;
504  }
505  // vacuum chunks which show sign of deleted rows in metadata
506  ChunkKey chunk_key_prefix = {cat_.getDatabaseId(), td->tableId, cd->columnId};
507  ChunkMetadataVector chunk_metadata_vec;
508  auto& data_mgr = cat_.getDataMgr();
509  data_mgr.getChunkMetadataVecForKeyPrefix(chunk_metadata_vec, chunk_key_prefix);
510  for (auto& [chunk_key, chunk_metadata] : chunk_metadata_vec) {
511  auto fragment_id = chunk_key[CHUNK_KEY_FRAGMENT_IDX];
512  // If delete has occurred, only vacuum fragments that are in the fragment_ids set.
513  // Empty fragment_ids set implies all fragments.
514  if (chunk_metadata->chunkStats.max.tinyintval == 1 &&
515  (fragment_ids.empty() || shared::contains(fragment_ids, fragment_id))) {
516  auto cpu_chunks_to_delete =
517  get_uncached_cpu_chunk_keys(cat_, td->tableId, fragment_id);
518 
519  UpdelRoll updel_roll;
520  updel_roll.catalog = &cat_;
521  updel_roll.logicalTableId = cat_.getLogicalTableId(td->tableId);
523  updel_roll.table_descriptor = td;
524  CHECK_EQ(cd->columnId, chunk_key[CHUNK_KEY_COLUMN_IDX]);
525  const auto chunk = Chunk_NS::Chunk::getChunk(cd,
526  &cat_.getDataMgr(),
527  chunk_key,
528  updel_roll.memoryLevel,
529  0,
530  chunk_metadata->numBytes,
531  chunk_metadata->numElements);
532  td->fragmenter->compactRows(&cat_,
533  td,
534  fragment_id,
535  td->fragmenter->getVacuumOffsets(chunk),
536  updel_roll.memoryLevel,
537  updel_roll);
538  updel_roll.stageUpdate();
539 
540  delete_cpu_chunks(cat_, cpu_chunks_to_delete);
541  }
542  }
543  td->fragmenter->resetSizesFromFragments();
544 }
545 
547  const TableUpdateMetadata& table_update_metadata) const {
549  return;
550  }
551  auto timer = DEBUG_TIMER(__func__);
552  std::map<const TableDescriptor*, std::set<int32_t>> fragments_to_vacuum;
553  for (const auto& [table_id, fragment_ids] :
554  table_update_metadata.fragments_with_deleted_rows) {
555  auto td = cat_.getMetadataForTable(table_id);
556  // Skip automatic vacuuming for tables with uncapped epoch
557  if (td->maxRollbackEpochs == -1) {
558  continue;
559  }
560 
561  DeletedColumnStats deleted_column_stats;
562  {
564  executor_->execute_mutex_);
565  ScopeGuard row_set_holder = [this] { executor_->row_set_mem_owner_ = nullptr; };
566  executor_->row_set_mem_owner_ = std::make_shared<RowSetMemoryOwner>(
567  ROW_SET_SIZE, executor_->executor_id_, /*num_threads=*/1);
568  deleted_column_stats =
569  getDeletedColumnStats(td, getFragmentIndexes(td, fragment_ids));
570  executor_->clearMetaInfoCache();
571  }
572 
573  std::set<int32_t> filtered_fragment_ids;
574  for (const auto [fragment_id, visible_row_count] :
575  deleted_column_stats.visible_row_count_per_fragment) {
576  auto total_row_count =
577  td->fragmenter->getFragmentInfo(fragment_id)->getPhysicalNumTuples();
578  float deleted_row_count = total_row_count - visible_row_count;
579  if ((deleted_row_count / total_row_count) >= g_vacuum_min_selectivity) {
580  filtered_fragment_ids.emplace(fragment_id);
581  }
582  }
583 
584  if (!filtered_fragment_ids.empty()) {
585  fragments_to_vacuum[td] = filtered_fragment_ids;
586  }
587  }
588 
589  if (!fragments_to_vacuum.empty()) {
590  const auto db_id = cat_.getDatabaseId();
591  const auto table_lock =
593  const auto table_epochs = cat_.getTableEpochs(db_id, td_->tableId);
594  try {
595  for (const auto& [td, fragment_ids] : fragments_to_vacuum) {
596  vacuumFragments(td, fragment_ids);
597  VLOG(1) << "Auto-vacuumed fragments: " << shared::printContainer(fragment_ids)
598  << ", table id: " << td->tableId;
599  }
601  } catch (...) {
602  cat_.setTableEpochsLogExceptions(db_id, table_epochs);
603  throw;
604  }
605  } else {
606  // Checkpoint, even when no data update occurs, in order to ensure that epochs are
607  // uniformly incremented in distributed mode.
609  }
610 }
bool contains(const T &container, const U &element)
Definition: misc.h:195
Defines data structures for the semantic analysis phase of query processing.
Data_Namespace::MemoryLevel memoryLevel
Definition: UpdelRoll.h:55
#define CHECK_EQ(x, y)
Definition: Logger.h:301
std::vector< int > ChunkKey
Definition: types.h:36
DeletedColumnStats getDeletedColumnStats(const TableDescriptor *td, const std::set< size_t > &fragment_indexes) const
void vacuumFragmentsAboveMinSelectivity(const TableUpdateMetadata &table_update_metadata) const
std::string cat(Ts &&...args)
class for a per-database catalog. also includes metadata for the current database and the current use...
Definition: Catalog.h:143
Definition: sqltypes.h:76
const TableDescriptor * table_descriptor
Definition: UpdelRoll.h:58
std::string tableName
const ColumnDescriptor * getDeletedColumn(const TableDescriptor *td) const
Definition: Catalog.cpp:3897
void recomputeMetadataUnlocked(const TableUpdateMetadata &table_update_metadata) const
Recomputes column chunk metadata for the given set of fragments. The caller of this method is expecte...
Data_Namespace::DataMgr & getDataMgr() const
Definition: Catalog.h:266
#define LOG(tag)
Definition: Logger.h:285
static WriteLock getWriteLockForTable(const Catalog_Namespace::Catalog &cat, const std::string &table_name)
Definition: LockMgrImpl.h:225
CompilationOptions get_compilation_options(const ExecutorDeviceType &device_type)
#define CHUNK_KEY_FRAGMENT_IDX
Definition: types.h:41
void fillChunkStats(const T min, const T max, const bool has_nulls)
Definition: ChunkMetadata.h:51
TableToFragmentIds fragments_with_deleted_rows
Definition: Execute.h:340
void delete_cpu_chunks(const Catalog_Namespace::Catalog &catalog, const std::set< ChunkKey > &cpu_chunks_to_delete)
#define CHECK_GE(x, y)
Definition: Logger.h:306
SQLTypeInfo get_logical_type_info(const SQLTypeInfo &type_info)
Definition: sqltypes.h:1470
std::shared_ptr< ResultSet > ResultSetPtr
dictionary stats
Definition: report.py:116
HOST DEVICE SQLTypes get_type() const
Definition: sqltypes.h:391
static constexpr size_t ROW_SET_SIZE
ExecutorDeviceType
const TableDescriptor * td_
Definition: sqldefs.h:75
Executor * executor_
T read_scalar_target_value(const TargetValue &tv)
void stageUpdate()
const Catalog_Namespace::Catalog * catalog
Definition: UpdelRoll.h:53
const DBMetadata & getCurrentDB() const
Definition: Catalog.h:265
void getChunkMetadataVecForKeyPrefix(ChunkMetadataVector &chunkMetadataVec, const ChunkKey &keyPrefix)
Definition: DataMgr.cpp:496
Used by Fragmenter classes to store info about each fragment - the fragment id and number of tuples(r...
Definition: Fragmenter.h:86
executor_(executor)
std::unique_lock< T > unique_lock
std::unordered_map< int, size_t > visible_row_count_per_fragment
std::vector< std::pair< ChunkKey, std::shared_ptr< ChunkMetadata >>> ChunkMetadataVector
int getDatabaseId() const
Definition: Catalog.h:326
void vacuumDeletedRows() const
Compacts fragments to remove deleted rows. When a row is deleted, a boolean deleted system column is ...
int getLogicalTableId(const int physicalTableId) const
Definition: Catalog.cpp:5008
specifies the content in-memory of a row in the column metadata table
std::unordered_map< int, ChunkStats > chunk_stats_per_fragment
void checkpointWithAutoRollback(const int logical_table_id) const
Definition: Catalog.cpp:5030
std::vector< const TableDescriptor * > getPhysicalTablesDescriptors(const TableDescriptor *logical_table_desc, bool populate_fragmenter=true) const
Definition: Catalog.cpp:4869
std::shared_ptr< Fragmenter_Namespace::AbstractFragmenter > fragmenter
void recomputeColumnMetadata(const TableDescriptor *td, const ColumnDescriptor *cd, const std::unordered_map< int, size_t > &tuple_count_map, std::optional< Data_Namespace::MemoryLevel > memory_level, const std::set< size_t > &fragment_indexes) const
DeletedColumnStats recomputeDeletedColumnMetadata(const TableDescriptor *td, const std::set< size_t > &fragment_indexes={}) const
File_Namespace::GlobalFileMgr * getGlobalFileMgr() const
Definition: DataMgr.cpp:649
void checkpoint(const int logicalTableId) const
Definition: Catalog.cpp:5022
ColumnToFragmentsMap columns_for_metadata_update
Definition: Execute.h:339
void deleteChunksWithPrefix(const ChunkKey &keyPrefix)
Definition: DataMgr.cpp:522
Definition: sqltypes.h:79
Definition: sqltypes.h:80
HOST DEVICE EncodingType get_compression() const
Definition: sqltypes.h:399
std::list< const ColumnDescriptor * > getAllColumnMetadataForTable(const int tableId, const bool fetchSystemColumns, const bool fetchVirtualColumns, const bool fetchPhysicalColumns) const
Returns a list of pointers to constant ColumnDescriptor structs for all the columns from a particular...
Definition: Catalog.cpp:2172
Definition: sqldefs.h:78
Data_Namespace::MemoryLevel persistenceLevel
float g_vacuum_min_selectivity
void removeFragmenterForTable(const int table_id) const
Definition: Catalog.cpp:4260
void compactDataFiles(const int32_t db_id, const int32_t tb_id)
Definition: sqltypes.h:68
int logicalTableId
Definition: UpdelRoll.h:54
bool set_metadata_from_results(ChunkMetadata &chunk_metadata, const std::vector< TargetValue > &row, const SQLTypeInfo &ti, const bool has_nulls)
std::function< void(ResultSetPtr, const Fragmenter_Namespace::FragmentInfo &)> PerFragmentCallBack
Definition: Execute.h:890
std::set< ChunkKey > get_uncached_cpu_chunk_keys(const Catalog_Namespace::Catalog &catalog, int32_t table_id, int32_t fragment_id)
#define CHECK(condition)
Definition: Logger.h:291
std::vector< InputTableInfo > get_table_infos(const std::vector< InputDescriptor > &input_descs, Executor *executor)
#define DEBUG_TIMER(name)
Definition: Logger.h:412
boost::variant< ScalarTargetValue, ArrayTargetValue, GeoTargetValue, GeoTargetValuePtr > TargetValue
Definition: TargetValue.h:195
TableOptimizer(const TableDescriptor *td, Executor *executor, const Catalog_Namespace::Catalog &cat)
void setTableEpochsLogExceptions(const int32_t db_id, const std::vector< TableEpochInfo > &table_epochs) const
Definition: Catalog.cpp:3885
Definition: sqltypes.h:72
SQLTypeInfo columnType
PrintContainer< CONTAINER > printContainer(CONTAINER &container)
Definition: misc.h:107
const TableDescriptor * getMetadataForTable(const std::string &tableName, const bool populateFragmenter=true) const
Returns a pointer to a const TableDescriptor struct matching the provided tableName.
static std::shared_ptr< Chunk > getChunk(const ColumnDescriptor *cd, DataMgr *data_mgr, const ChunkKey &key, const MemoryLevel mem_level, const int deviceId, const size_t num_bytes, const size_t num_elems, const bool pinnable=true)
Definition: Chunk.cpp:31
#define CHUNK_KEY_COLUMN_IDX
Definition: types.h:40
Definition: sqldefs.h:76
RelAlgExecutionUnit build_ra_exe_unit(const std::shared_ptr< const InputColDescriptor > input_col_desc, const std::vector< Analyzer::Expr * > &target_exprs)
std::string columnName
#define VLOG(n)
Definition: Logger.h:388
const Catalog_Namespace::Catalog & cat_
std::vector< TableEpochInfo > getTableEpochs(const int32_t db_id, const int32_t table_id) const
Definition: Catalog.cpp:3821
void recomputeMetadata() const
Recomputes per-chunk metadata for each fragment in the table. Updates and deletes can cause chunk met...
std::set< size_t > getFragmentIndexes(const TableDescriptor *td, const std::set< int > &fragment_ids) const
void vacuumFragments(const TableDescriptor *td, const std::set< int > &fragment_ids={}) const