OmniSciDB  4201147b46
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
TableOptimizer.cpp
Go to the documentation of this file.
1 /*
2  * Copyright 2022 HEAVY.AI, Inc.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #include "TableOptimizer.h"
18 
19 #include "Analyzer/Analyzer.h"
20 #include "LockMgr/LockMgr.h"
21 #include "Logger/Logger.h"
22 #include "QueryEngine/Execute.h"
23 #include "Shared/misc.h"
24 #include "Shared/scope.h"
25 
26 // By default, when rows are deleted, vacuum fragments with a least 10% deleted rows
28 
30  Executor* executor,
32  : td_(td), executor_(executor), cat_(cat) {
33  CHECK(td);
34 }
35 namespace {
36 
37 template <typename T>
39  const auto stv = boost::get<ScalarTargetValue>(&tv);
40  CHECK(stv);
41  const auto val_ptr = boost::get<T>(stv);
42  CHECK(val_ptr);
43  return *val_ptr;
44 }
45 
47  const std::vector<TargetValue>& row,
48  const SQLTypeInfo& ti,
49  const bool has_nulls) {
50  switch (ti.get_type()) {
51  case kBOOLEAN:
52  case kTINYINT:
53  case kSMALLINT:
54  case kINT:
55  case kBIGINT:
56  case kNUMERIC:
57  case kDECIMAL:
58  case kTIME:
59  case kTIMESTAMP:
60  case kDATE: {
61  int64_t min_val = read_scalar_target_value<int64_t>(row[0]);
62  int64_t max_val = read_scalar_target_value<int64_t>(row[1]);
63  chunk_metadata.fillChunkStats(min_val, max_val, has_nulls);
64  break;
65  }
66  case kFLOAT: {
67  float min_val = read_scalar_target_value<float>(row[0]);
68  float max_val = read_scalar_target_value<float>(row[1]);
69  chunk_metadata.fillChunkStats(min_val, max_val, has_nulls);
70  break;
71  }
72  case kDOUBLE: {
73  double min_val = read_scalar_target_value<double>(row[0]);
74  double max_val = read_scalar_target_value<double>(row[1]);
75  chunk_metadata.fillChunkStats(min_val, max_val, has_nulls);
76  break;
77  }
78  case kVARCHAR:
79  case kCHAR:
80  case kTEXT:
81  if (ti.get_compression() == kENCODING_DICT) {
82  int64_t min_val = read_scalar_target_value<int64_t>(row[0]);
83  int64_t max_val = read_scalar_target_value<int64_t>(row[1]);
84  chunk_metadata.fillChunkStats(min_val, max_val, has_nulls);
85  }
86  break;
87  default: {
88  return false; // skip column
89  }
90  }
91  return true;
92 }
93 
95  const std::shared_ptr<const InputColDescriptor> input_col_desc,
96  const std::vector<Analyzer::Expr*>& target_exprs) {
97  return RelAlgExecutionUnit{{input_col_desc->getScanDesc()},
98  {input_col_desc},
99  {},
100  {},
101  {},
102  {},
103  target_exprs,
104  nullptr,
105  SortInfo{{}, SortAlgorithm::Default, 0, 0, false},
106  0};
107 }
108 
110  return CompilationOptions{device_type, false, ExecutorOptLevel::Default, false};
111 }
112 
114  return ExecutionOptions{false,
115  false,
116  false,
117  false,
118  false,
119  false,
120  false,
121  false,
122  false,
123  0,
124  false,
125  false,
126  0,
127  false};
128 }
129 
130 } // namespace
131 
133  auto timer = DEBUG_TIMER(__func__);
135 
136  LOG(INFO) << "Recomputing metadata for " << td_->tableName;
137 
138  CHECK_GE(td_->tableId, 0);
139 
140  std::vector<const TableDescriptor*> table_descriptors;
141  if (td_->nShards > 0) {
142  const auto physical_tds = cat_.getPhysicalTablesDescriptors(td_);
143  table_descriptors.insert(
144  table_descriptors.begin(), physical_tds.begin(), physical_tds.end());
145  } else {
146  table_descriptors.push_back(td_);
147  }
148 
149  auto& data_mgr = cat_.getDataMgr();
150 
151  // acquire write lock on table data
153 
154  for (const auto td : table_descriptors) {
155  ScopeGuard row_set_holder = [this] { executor_->row_set_mem_owner_ = nullptr; };
156  executor_->row_set_mem_owner_ =
157  std::make_shared<RowSetMemoryOwner>(ROW_SET_SIZE, /*num_threads=*/1);
158  executor_->catalog_ = &cat_;
159  const auto table_id = td->tableId;
160  auto stats = recomputeDeletedColumnMetadata(td);
161 
162  // TODO(adb): Support geo
163  auto col_descs = cat_.getAllColumnMetadataForTable(table_id, false, false, false);
164  for (const auto& cd : col_descs) {
165  recomputeColumnMetadata(td, cd, stats.visible_row_count_per_fragment, {}, {});
166  }
167  data_mgr.checkpoint(cat_.getCurrentDB().dbId, table_id);
168  executor_->clearMetaInfoCache();
169  }
170 
171  data_mgr.clearMemory(Data_Namespace::MemoryLevel::CPU_LEVEL);
172  if (data_mgr.gpusPresent()) {
173  data_mgr.clearMemory(Data_Namespace::MemoryLevel::GPU_LEVEL);
174  }
175 }
176 
178  const TableUpdateMetadata& table_update_metadata) const {
179  auto timer = DEBUG_TIMER(__func__);
180  std::map<int, std::list<const ColumnDescriptor*>> columns_by_table_id;
181  auto& columns_for_update = table_update_metadata.columns_for_metadata_update;
182  for (const auto& entry : columns_for_update) {
183  auto column_descriptor = entry.first;
184  columns_by_table_id[column_descriptor->tableId].emplace_back(column_descriptor);
185  }
186 
187  for (const auto& [table_id, columns] : columns_by_table_id) {
188  auto td = cat_.getMetadataForTable(table_id);
189  auto stats = recomputeDeletedColumnMetadata(td);
190  for (const auto cd : columns) {
191  CHECK(columns_for_update.find(cd) != columns_for_update.end());
192  auto fragment_indexes = getFragmentIndexes(td, columns_for_update.find(cd)->second);
194  cd,
195  stats.visible_row_count_per_fragment,
197  fragment_indexes);
198  }
199  }
200 }
201 
202 // Special case handle $deleted column if it exists
203 // whilst handling the delete column also capture
204 // the number of non deleted rows per fragment
206  const TableDescriptor* td,
207  const std::set<size_t>& fragment_indexes) const {
208  if (!td->hasDeletedCol) {
209  return {};
210  }
211 
212  auto stats = getDeletedColumnStats(td, fragment_indexes);
213  auto* fragmenter = td->fragmenter.get();
214  CHECK(fragmenter);
215  auto cd = cat_.getDeletedColumn(td);
216  fragmenter->updateChunkStats(cd, stats.chunk_stats_per_fragment, {});
217  fragmenter->setNumRows(stats.total_row_count);
218  return stats;
219 }
220 
222  const TableDescriptor* td,
223  const std::set<size_t>& fragment_indexes) const {
224  if (!td->hasDeletedCol) {
225  return {};
226  }
227 
228  auto cd = cat_.getDeletedColumn(td);
229  const auto column_id = cd->columnId;
230 
231  const auto input_col_desc =
232  std::make_shared<const InputColDescriptor>(column_id, td->tableId, 0);
233  const auto col_expr =
234  makeExpr<Analyzer::ColumnVar>(cd->columnType, td->tableId, column_id, 0);
235  const auto count_expr =
236  makeExpr<Analyzer::AggExpr>(cd->columnType, kCOUNT, col_expr, false, nullptr);
237 
238  const auto ra_exe_unit = build_ra_exe_unit(input_col_desc, {count_expr.get()});
239  const auto table_infos = get_table_infos(ra_exe_unit, executor_);
240  CHECK_EQ(table_infos.size(), size_t(1));
241 
243  const auto eo = get_execution_options();
244 
245  DeletedColumnStats deleted_column_stats;
246  Executor::PerFragmentCallBack compute_deleted_callback =
247  [&deleted_column_stats, cd](
248  ResultSetPtr results, const Fragmenter_Namespace::FragmentInfo& fragment_info) {
249  // count number of tuples in $deleted as total number of tuples in table.
250  if (cd->isDeletedCol) {
251  deleted_column_stats.total_row_count += fragment_info.getPhysicalNumTuples();
252  }
253  if (fragment_info.getPhysicalNumTuples() == 0) {
254  // TODO(adb): Should not happen, but just to be safe...
255  LOG(WARNING) << "Skipping completely empty fragment for column "
256  << cd->columnName;
257  return;
258  }
259 
260  const auto row = results->getNextRow(false, false);
261  CHECK_EQ(row.size(), size_t(1));
262 
263  const auto& ti = cd->columnType;
264 
265  auto chunk_metadata = std::make_shared<ChunkMetadata>();
266  chunk_metadata->sqlType = get_logical_type_info(ti);
267 
268  const auto count_val = read_scalar_target_value<int64_t>(row[0]);
269 
270  // min element 0 max element 1
271  std::vector<TargetValue> fakerow;
272 
273  auto num_tuples = static_cast<size_t>(count_val);
274 
275  // calculate min
276  if (num_tuples == fragment_info.getPhysicalNumTuples()) {
277  // nothing deleted
278  // min = false;
279  // max = false;
280  fakerow.emplace_back(TargetValue{int64_t(0)});
281  fakerow.emplace_back(TargetValue{int64_t(0)});
282  } else {
283  if (num_tuples == 0) {
284  // everything marked as delete
285  // min = true
286  // max = true
287  fakerow.emplace_back(TargetValue{int64_t(1)});
288  fakerow.emplace_back(TargetValue{int64_t(1)});
289  } else {
290  // some deleted
291  // min = false
292  // max = true;
293  fakerow.emplace_back(TargetValue{int64_t(0)});
294  fakerow.emplace_back(TargetValue{int64_t(1)});
295  }
296  }
297 
298  // place manufacture min and max in fake row to use common infra
299  if (!set_metadata_from_results(*chunk_metadata, fakerow, ti, false)) {
300  LOG(WARNING) << "Unable to process new metadata values for column "
301  << cd->columnName;
302  return;
303  }
304 
305  deleted_column_stats.chunk_stats_per_fragment.emplace(
306  std::make_pair(fragment_info.fragmentId, chunk_metadata->chunkStats));
307  deleted_column_stats.visible_row_count_per_fragment.emplace(
308  std::make_pair(fragment_info.fragmentId, num_tuples));
309  };
310 
311  executor_->executeWorkUnitPerFragment(ra_exe_unit,
312  table_infos[0],
313  co,
314  eo,
315  cat_,
316  compute_deleted_callback,
317  fragment_indexes);
318  return deleted_column_stats;
319 }
320 
322  const TableDescriptor* td,
323  const ColumnDescriptor* cd,
324  const std::unordered_map</*fragment_id*/ int, size_t>& tuple_count_map,
325  std::optional<Data_Namespace::MemoryLevel> memory_level,
326  const std::set<size_t>& fragment_indexes) const {
327  const auto ti = cd->columnType;
328  if (ti.is_varlen()) {
329  LOG(INFO) << "Skipping varlen column " << cd->columnName;
330  return;
331  }
332 
333  const auto column_id = cd->columnId;
334  const auto input_col_desc =
335  std::make_shared<const InputColDescriptor>(column_id, td->tableId, 0);
336  const auto col_expr =
337  makeExpr<Analyzer::ColumnVar>(cd->columnType, td->tableId, column_id, 0);
338  auto max_expr =
339  makeExpr<Analyzer::AggExpr>(cd->columnType, kMAX, col_expr, false, nullptr);
340  auto min_expr =
341  makeExpr<Analyzer::AggExpr>(cd->columnType, kMIN, col_expr, false, nullptr);
342  auto count_expr =
343  makeExpr<Analyzer::AggExpr>(cd->columnType, kCOUNT, col_expr, false, nullptr);
344 
345  if (ti.is_string()) {
346  const SQLTypeInfo fun_ti(kINT);
347  const auto fun_expr = makeExpr<Analyzer::KeyForStringExpr>(col_expr);
348  max_expr = makeExpr<Analyzer::AggExpr>(fun_ti, kMAX, fun_expr, false, nullptr);
349  min_expr = makeExpr<Analyzer::AggExpr>(fun_ti, kMIN, fun_expr, false, nullptr);
350  }
351  const auto ra_exe_unit = build_ra_exe_unit(
352  input_col_desc, {min_expr.get(), max_expr.get(), count_expr.get()});
353  const auto table_infos = get_table_infos(ra_exe_unit, executor_);
354  CHECK_EQ(table_infos.size(), size_t(1));
355 
357  const auto eo = get_execution_options();
358 
359  std::unordered_map</*fragment_id*/ int, ChunkStats> stats_map;
360 
361  Executor::PerFragmentCallBack compute_metadata_callback =
362  [&stats_map, &tuple_count_map, cd](
363  ResultSetPtr results, const Fragmenter_Namespace::FragmentInfo& fragment_info) {
364  if (fragment_info.getPhysicalNumTuples() == 0) {
365  // TODO(adb): Should not happen, but just to be safe...
366  LOG(WARNING) << "Skipping completely empty fragment for column "
367  << cd->columnName;
368  return;
369  }
370 
371  const auto row = results->getNextRow(false, false);
372  CHECK_EQ(row.size(), size_t(3));
373 
374  const auto& ti = cd->columnType;
375 
376  auto chunk_metadata = std::make_shared<ChunkMetadata>();
377  chunk_metadata->sqlType = get_logical_type_info(ti);
378 
379  const auto count_val = read_scalar_target_value<int64_t>(row[2]);
380  if (count_val == 0) {
381  // Assume chunk of all nulls, bail
382  return;
383  }
384 
385  bool has_nulls = true; // default to wide
386  auto tuple_count_itr = tuple_count_map.find(fragment_info.fragmentId);
387  if (tuple_count_itr != tuple_count_map.end()) {
388  has_nulls = !(static_cast<size_t>(count_val) == tuple_count_itr->second);
389  } else {
390  // no deleted column calc so use raw physical count
391  has_nulls =
392  !(static_cast<size_t>(count_val) == fragment_info.getPhysicalNumTuples());
393  }
394 
395  if (!set_metadata_from_results(*chunk_metadata, row, ti, has_nulls)) {
396  LOG(WARNING) << "Unable to process new metadata values for column "
397  << cd->columnName;
398  return;
399  }
400 
401  stats_map.emplace(
402  std::make_pair(fragment_info.fragmentId, chunk_metadata->chunkStats));
403  };
404 
405  executor_->executeWorkUnitPerFragment(ra_exe_unit,
406  table_infos[0],
407  co,
408  eo,
409  cat_,
410  compute_metadata_callback,
411  fragment_indexes);
412 
413  auto* fragmenter = td->fragmenter.get();
414  CHECK(fragmenter);
415  fragmenter->updateChunkStats(cd, stats_map, memory_level);
416 }
417 
418 // Returns the corresponding indexes for the given fragment ids in the list of fragments
419 // returned by `getFragmentsForQuery()`
421  const TableDescriptor* td,
422  const std::set<int>& fragment_ids) const {
423  CHECK(td->fragmenter);
424  auto table_info = td->fragmenter->getFragmentsForQuery();
425  std::set<size_t> fragment_indexes;
426  for (size_t i = 0; i < table_info.fragments.size(); i++) {
427  if (shared::contains(fragment_ids, table_info.fragments[i].fragmentId)) {
428  fragment_indexes.emplace(i);
429  }
430  }
431  return fragment_indexes;
432 }
433 
435  auto timer = DEBUG_TIMER(__func__);
436  const auto table_id = td_->tableId;
437  const auto db_id = cat_.getDatabaseId();
438  const auto table_lock =
440  const auto table_epochs = cat_.getTableEpochs(db_id, table_id);
441  const auto shards = cat_.getPhysicalTablesDescriptors(td_);
442  try {
443  for (const auto shard : shards) {
444  vacuumFragments(shard);
445  }
446  cat_.checkpoint(table_id);
447  } catch (...) {
448  cat_.setTableEpochsLogExceptions(db_id, table_epochs);
449  throw;
450  }
451 
452  for (auto shard : shards) {
453  cat_.removeFragmenterForTable(shard->tableId);
455  shard->tableId);
456  }
457 }
458 
460  const std::set<int>& fragment_ids) const {
461  // "if not a table that supports delete return, nothing more to do"
462  const ColumnDescriptor* cd = cat_.getDeletedColumn(td);
463  if (nullptr == cd) {
464  return;
465  }
466  // vacuum chunks which show sign of deleted rows in metadata
467  ChunkKey chunk_key_prefix = {cat_.getDatabaseId(), td->tableId, cd->columnId};
468  ChunkMetadataVector chunk_metadata_vec;
469  cat_.getDataMgr().getChunkMetadataVecForKeyPrefix(chunk_metadata_vec, chunk_key_prefix);
470  for (auto& [chunk_key, chunk_metadata] : chunk_metadata_vec) {
471  auto fragment_id = chunk_key[CHUNK_KEY_FRAGMENT_IDX];
472  // If delete has occurred, only vacuum fragments that are in the fragment_ids set.
473  // Empty fragment_ids set implies all fragments.
474  if (chunk_metadata->chunkStats.max.tinyintval == 1 &&
475  (fragment_ids.empty() || shared::contains(fragment_ids, fragment_id))) {
476  UpdelRoll updel_roll;
477  updel_roll.catalog = &cat_;
478  updel_roll.logicalTableId = cat_.getLogicalTableId(td->tableId);
480  updel_roll.table_descriptor = td;
481  CHECK_EQ(cd->columnId, chunk_key[CHUNK_KEY_COLUMN_IDX]);
482  const auto chunk = Chunk_NS::Chunk::getChunk(cd,
483  &cat_.getDataMgr(),
484  chunk_key,
485  updel_roll.memoryLevel,
486  0,
487  chunk_metadata->numBytes,
488  chunk_metadata->numElements);
489  td->fragmenter->compactRows(&cat_,
490  td,
491  fragment_id,
492  td->fragmenter->getVacuumOffsets(chunk),
493  updel_roll.memoryLevel,
494  updel_roll);
495  updel_roll.stageUpdate();
496  }
497  }
498  td->fragmenter->resetSizesFromFragments();
499 }
500 
502  const TableUpdateMetadata& table_update_metadata) const {
504  return;
505  }
506  auto timer = DEBUG_TIMER(__func__);
507  std::map<const TableDescriptor*, std::set<int32_t>> fragments_to_vacuum;
508  for (const auto& [table_id, fragment_ids] :
509  table_update_metadata.fragments_with_deleted_rows) {
510  auto td = cat_.getMetadataForTable(table_id);
511  // Skip automatic vacuuming for tables with uncapped epoch
512  if (td->maxRollbackEpochs == -1) {
513  continue;
514  }
515 
516  DeletedColumnStats deleted_column_stats;
517  {
519  executor_->execute_mutex_);
520  ScopeGuard row_set_holder = [this] { executor_->row_set_mem_owner_ = nullptr; };
521  executor_->row_set_mem_owner_ =
522  std::make_shared<RowSetMemoryOwner>(ROW_SET_SIZE, /*num_threads=*/1);
523  deleted_column_stats =
524  getDeletedColumnStats(td, getFragmentIndexes(td, fragment_ids));
525  executor_->clearMetaInfoCache();
526  }
527 
528  std::set<int32_t> filtered_fragment_ids;
529  for (const auto [fragment_id, visible_row_count] :
530  deleted_column_stats.visible_row_count_per_fragment) {
531  auto total_row_count =
532  td->fragmenter->getFragmentInfo(fragment_id)->getPhysicalNumTuples();
533  float deleted_row_count = total_row_count - visible_row_count;
534  if ((deleted_row_count / total_row_count) >= g_vacuum_min_selectivity) {
535  filtered_fragment_ids.emplace(fragment_id);
536  }
537  }
538 
539  if (!filtered_fragment_ids.empty()) {
540  fragments_to_vacuum[td] = filtered_fragment_ids;
541  }
542  }
543 
544  if (!fragments_to_vacuum.empty()) {
545  const auto db_id = cat_.getDatabaseId();
546  const auto table_lock =
548  const auto table_epochs = cat_.getTableEpochs(db_id, td_->tableId);
549  try {
550  for (const auto& [td, fragment_ids] : fragments_to_vacuum) {
551  vacuumFragments(td, fragment_ids);
552  VLOG(1) << "Auto-vacuumed fragments: " << shared::printContainer(fragment_ids)
553  << ", table id: " << td->tableId;
554  }
556  } catch (...) {
557  cat_.setTableEpochsLogExceptions(db_id, table_epochs);
558  throw;
559  }
560  } else {
561  // Checkpoint, even when no data update occurs, in order to ensure that epochs are
562  // uniformly incremented in distributed mode.
564  }
565 }
bool contains(const T &container, const U &element)
Definition: misc.h:196
Defines data structures for the semantic analysis phase of query processing.
Data_Namespace::MemoryLevel memoryLevel
Definition: UpdelRoll.h:55
#define CHECK_EQ(x, y)
Definition: Logger.h:230
std::vector< int > ChunkKey
Definition: types.h:36
DeletedColumnStats getDeletedColumnStats(const TableDescriptor *td, const std::set< size_t > &fragment_indexes) const
void vacuumFragmentsAboveMinSelectivity(const TableUpdateMetadata &table_update_metadata) const
std::string cat(Ts &&...args)
class for a per-database catalog. also includes metadata for the current database and the current use...
Definition: Catalog.h:113
Definition: sqltypes.h:49
const TableDescriptor * table_descriptor
Definition: UpdelRoll.h:58
std::string tableName
const ColumnDescriptor * getDeletedColumn(const TableDescriptor *td) const
Definition: Catalog.cpp:3316
void recomputeMetadataUnlocked(const TableUpdateMetadata &table_update_metadata) const
Recomputes column chunk metadata for the given set of fragments. The caller of this method is expecte...
ExecutorDeviceType
Data_Namespace::DataMgr & getDataMgr() const
Definition: Catalog.h:228
#define LOG(tag)
Definition: Logger.h:216
static WriteLock getWriteLockForTable(const Catalog_Namespace::Catalog &cat, const std::string &table_name)
Definition: LockMgrImpl.h:155
CompilationOptions get_compilation_options(const ExecutorDeviceType &device_type)
#define CHUNK_KEY_FRAGMENT_IDX
Definition: types.h:41
void fillChunkStats(const T min, const T max, const bool has_nulls)
Definition: ChunkMetadata.h:74
TableToFragmentIds fragments_with_deleted_rows
Definition: Execute.h:322
#define CHECK_GE(x, y)
Definition: Logger.h:235
SQLTypeInfo get_logical_type_info(const SQLTypeInfo &type_info)
Definition: sqltypes.h:1087
std::shared_ptr< ResultSet > ResultSetPtr
HOST DEVICE SQLTypes get_type() const
Definition: sqltypes.h:329
static constexpr size_t ROW_SET_SIZE
const TableDescriptor * td_
Definition: sqldefs.h:74
Executor * executor_
T read_scalar_target_value(const TargetValue &tv)
void stageUpdate()
const Catalog_Namespace::Catalog * catalog
Definition: UpdelRoll.h:53
const DBMetadata & getCurrentDB() const
Definition: Catalog.h:227
void getChunkMetadataVecForKeyPrefix(ChunkMetadataVector &chunkMetadataVec, const ChunkKey &keyPrefix)
Definition: DataMgr.cpp:469
Used by Fragmenter classes to store info about each fragment - the fragment id and number of tuples(r...
Definition: Fragmenter.h:86
std::unique_lock< T > unique_lock
std::unordered_map< int, size_t > visible_row_count_per_fragment
std::vector< std::pair< ChunkKey, std::shared_ptr< ChunkMetadata >>> ChunkMetadataVector
int getDatabaseId() const
Definition: Catalog.h:283
void vacuumDeletedRows() const
Compacts fragments to remove deleted rows. When a row is deleted, a boolean deleted system column is ...
int getLogicalTableId(const int physicalTableId) const
Definition: Catalog.cpp:4402
specifies the content in-memory of a row in the column metadata table
std::unordered_map< int, ChunkStats > chunk_stats_per_fragment
void checkpointWithAutoRollback(const int logical_table_id) const
Definition: Catalog.cpp:4424
std::vector< const TableDescriptor * > getPhysicalTablesDescriptors(const TableDescriptor *logical_table_desc, bool populate_fragmenter=true) const
Definition: Catalog.cpp:4263
std::shared_ptr< Fragmenter_Namespace::AbstractFragmenter > fragmenter
void recomputeColumnMetadata(const TableDescriptor *td, const ColumnDescriptor *cd, const std::unordered_map< int, size_t > &tuple_count_map, std::optional< Data_Namespace::MemoryLevel > memory_level, const std::set< size_t > &fragment_indexes) const
DeletedColumnStats recomputeDeletedColumnMetadata(const TableDescriptor *td, const std::set< size_t > &fragment_indexes={}) const
File_Namespace::GlobalFileMgr * getGlobalFileMgr() const
Definition: DataMgr.cpp:609
void checkpoint(const int logicalTableId) const
Definition: Catalog.cpp:4416
ColumnToFragmentsMap columns_for_metadata_update
Definition: Execute.h:321
Definition: sqltypes.h:52
Definition: sqltypes.h:53
HOST DEVICE EncodingType get_compression() const
Definition: sqltypes.h:337
std::list< const ColumnDescriptor * > getAllColumnMetadataForTable(const int tableId, const bool fetchSystemColumns, const bool fetchVirtualColumns, const bool fetchPhysicalColumns) const
Returns a list of pointers to constant ColumnDescriptor structs for all the columns from a particular...
Definition: Catalog.cpp:1939
Definition: sqldefs.h:77
Data_Namespace::MemoryLevel persistenceLevel
float g_vacuum_min_selectivity
void removeFragmenterForTable(const int table_id) const
Definition: Catalog.cpp:3627
void compactDataFiles(const int32_t db_id, const int32_t tb_id)
Definition: sqltypes.h:41
int logicalTableId
Definition: UpdelRoll.h:54
bool set_metadata_from_results(ChunkMetadata &chunk_metadata, const std::vector< TargetValue > &row, const SQLTypeInfo &ti, const bool has_nulls)
std::function< void(ResultSetPtr, const Fragmenter_Namespace::FragmentInfo &)> PerFragmentCallBack
Definition: Execute.h:697
#define CHECK(condition)
Definition: Logger.h:222
std::vector< InputTableInfo > get_table_infos(const std::vector< InputDescriptor > &input_descs, Executor *executor)
#define DEBUG_TIMER(name)
Definition: Logger.h:369
boost::variant< ScalarTargetValue, ArrayTargetValue, GeoTargetValue, GeoTargetValuePtr > TargetValue
Definition: TargetValue.h:165
TableOptimizer(const TableDescriptor *td, Executor *executor, const Catalog_Namespace::Catalog &cat)
void setTableEpochsLogExceptions(const int32_t db_id, const std::vector< TableEpochInfo > &table_epochs) const
Definition: Catalog.cpp:3304
Definition: sqltypes.h:45
SQLTypeInfo columnType
PrintContainer< CONTAINER > printContainer(CONTAINER &container)
Definition: misc.h:108
const TableDescriptor * getMetadataForTable(const std::string &tableName, const bool populateFragmenter=true) const
Returns a pointer to a const TableDescriptor struct matching the provided tableName.
static std::shared_ptr< Chunk > getChunk(const ColumnDescriptor *cd, DataMgr *data_mgr, const ChunkKey &key, const MemoryLevel mem_level, const int deviceId, const size_t num_bytes, const size_t num_elems, const bool pinnable=true)
Definition: Chunk.cpp:31
#define CHUNK_KEY_COLUMN_IDX
Definition: types.h:40
Definition: sqldefs.h:75
RelAlgExecutionUnit build_ra_exe_unit(const std::shared_ptr< const InputColDescriptor > input_col_desc, const std::vector< Analyzer::Expr * > &target_exprs)
std::string columnName
#define VLOG(n)
Definition: Logger.h:316
const Catalog_Namespace::Catalog & cat_
std::vector< TableEpochInfo > getTableEpochs(const int32_t db_id, const int32_t table_id) const
Definition: Catalog.cpp:3240
void recomputeMetadata() const
Recomputes per-chunk metadata for each fragment in the table. Updates and deletes can cause chunk met...
std::set< size_t > getFragmentIndexes(const TableDescriptor *td, const std::set< int > &fragment_ids) const
void vacuumFragments(const TableDescriptor *td, const std::set< int > &fragment_ids={}) const