OmniSciDB  a987f07e93
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
TableOptimizer.cpp
Go to the documentation of this file.
1 /*
2  * Copyright 2022 HEAVY.AI, Inc.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #include "TableOptimizer.h"
18 
19 #include "Analyzer/Analyzer.h"
20 #include "LockMgr/LockMgr.h"
21 #include "Logger/Logger.h"
22 #include "QueryEngine/Execute.h"
23 #include "Shared/misc.h"
24 #include "Shared/scope.h"
25 
26 // By default, when rows are deleted, vacuum fragments with a least 10% deleted rows
28 
30  Executor* executor,
32  : td_(td), executor_(executor), cat_(cat) {
33  CHECK(td);
34 }
35 namespace {
36 
37 template <typename T>
39  const auto stv = boost::get<ScalarTargetValue>(&tv);
40  CHECK(stv);
41  const auto val_ptr = boost::get<T>(stv);
42  CHECK(val_ptr);
43  return *val_ptr;
44 }
45 
47  const std::vector<TargetValue>& row,
48  const SQLTypeInfo& ti,
49  const bool has_nulls) {
50  switch (ti.get_type()) {
51  case kBOOLEAN:
52  case kTINYINT:
53  case kSMALLINT:
54  case kINT:
55  case kBIGINT:
56  case kNUMERIC:
57  case kDECIMAL:
58  case kTIME:
59  case kTIMESTAMP:
60  case kDATE: {
61  int64_t min_val = read_scalar_target_value<int64_t>(row[0]);
62  int64_t max_val = read_scalar_target_value<int64_t>(row[1]);
63  chunk_metadata.fillChunkStats(min_val, max_val, has_nulls);
64  break;
65  }
66  case kFLOAT: {
67  float min_val = read_scalar_target_value<float>(row[0]);
68  float max_val = read_scalar_target_value<float>(row[1]);
69  chunk_metadata.fillChunkStats(min_val, max_val, has_nulls);
70  break;
71  }
72  case kDOUBLE: {
73  double min_val = read_scalar_target_value<double>(row[0]);
74  double max_val = read_scalar_target_value<double>(row[1]);
75  chunk_metadata.fillChunkStats(min_val, max_val, has_nulls);
76  break;
77  }
78  case kVARCHAR:
79  case kCHAR:
80  case kTEXT:
81  if (ti.get_compression() == kENCODING_DICT) {
82  int64_t min_val = read_scalar_target_value<int64_t>(row[0]);
83  int64_t max_val = read_scalar_target_value<int64_t>(row[1]);
84  chunk_metadata.fillChunkStats(min_val, max_val, has_nulls);
85  }
86  break;
87  default: {
88  return false; // skip column
89  }
90  }
91  return true;
92 }
93 
95  const std::shared_ptr<const InputColDescriptor> input_col_desc,
96  const std::vector<Analyzer::Expr*>& target_exprs) {
97  return RelAlgExecutionUnit{{input_col_desc->getScanDesc()},
98  {input_col_desc},
99  {},
100  {},
101  {},
102  {},
103  target_exprs,
104  {},
105  nullptr,
106  SortInfo{{}, SortAlgorithm::Default, 0, 0, false},
107  0};
108 }
109 
111  return CompilationOptions{device_type, false, ExecutorOptLevel::Default, false};
112 }
113 
115  return ExecutionOptions{false,
116  false,
117  false,
118  false,
119  false,
120  false,
121  false,
122  false,
123  false,
124  0,
125  false,
126  false,
127  0,
128  false,
129  false};
130 }
131 
132 } // namespace
133 
135  auto timer = DEBUG_TIMER(__func__);
137 
138  LOG(INFO) << "Recomputing metadata for " << td_->tableName;
139 
140  CHECK_GE(td_->tableId, 0);
141 
142  std::vector<const TableDescriptor*> table_descriptors;
143  if (td_->nShards > 0) {
144  const auto physical_tds = cat_.getPhysicalTablesDescriptors(td_);
145  table_descriptors.insert(
146  table_descriptors.begin(), physical_tds.begin(), physical_tds.end());
147  } else {
148  table_descriptors.push_back(td_);
149  }
150 
151  auto& data_mgr = cat_.getDataMgr();
152 
153  // acquire write lock on table data
155 
156  for (const auto td : table_descriptors) {
157  ScopeGuard row_set_holder = [this] { executor_->row_set_mem_owner_ = nullptr; };
158  executor_->row_set_mem_owner_ =
159  std::make_shared<RowSetMemoryOwner>(ROW_SET_SIZE, /*num_threads=*/1);
160  executor_->catalog_ = &cat_;
161  const auto table_id = td->tableId;
162  auto stats = recomputeDeletedColumnMetadata(td);
163 
164  // TODO(adb): Support geo
165  auto col_descs = cat_.getAllColumnMetadataForTable(table_id, false, false, false);
166  for (const auto& cd : col_descs) {
167  recomputeColumnMetadata(td, cd, stats.visible_row_count_per_fragment, {}, {});
168  }
169  data_mgr.checkpoint(cat_.getCurrentDB().dbId, table_id);
170  executor_->clearMetaInfoCache();
171  }
172 
173  data_mgr.clearMemory(Data_Namespace::MemoryLevel::CPU_LEVEL);
174  if (data_mgr.gpusPresent()) {
175  data_mgr.clearMemory(Data_Namespace::MemoryLevel::GPU_LEVEL);
176  }
177 }
178 
180  const TableUpdateMetadata& table_update_metadata) const {
181  auto timer = DEBUG_TIMER(__func__);
182  std::map<int, std::list<const ColumnDescriptor*>> columns_by_table_id;
183  auto& columns_for_update = table_update_metadata.columns_for_metadata_update;
184  for (const auto& entry : columns_for_update) {
185  auto column_descriptor = entry.first;
186  columns_by_table_id[column_descriptor->tableId].emplace_back(column_descriptor);
187  }
188 
189  for (const auto& [table_id, columns] : columns_by_table_id) {
190  auto td = cat_.getMetadataForTable(table_id);
191  auto stats = recomputeDeletedColumnMetadata(td);
192  for (const auto cd : columns) {
193  CHECK(columns_for_update.find(cd) != columns_for_update.end());
194  auto fragment_indexes = getFragmentIndexes(td, columns_for_update.find(cd)->second);
196  cd,
197  stats.visible_row_count_per_fragment,
199  fragment_indexes);
200  }
201  }
202 }
203 
204 // Special case handle $deleted column if it exists
205 // whilst handling the delete column also capture
206 // the number of non deleted rows per fragment
208  const TableDescriptor* td,
209  const std::set<size_t>& fragment_indexes) const {
210  if (!td->hasDeletedCol) {
211  return {};
212  }
213 
214  auto stats = getDeletedColumnStats(td, fragment_indexes);
215  auto* fragmenter = td->fragmenter.get();
216  CHECK(fragmenter);
217  auto cd = cat_.getDeletedColumn(td);
218  fragmenter->updateChunkStats(cd, stats.chunk_stats_per_fragment, {});
219  fragmenter->setNumRows(stats.total_row_count);
220  return stats;
221 }
222 
224  const TableDescriptor* td,
225  const std::set<size_t>& fragment_indexes) const {
226  if (!td->hasDeletedCol) {
227  return {};
228  }
229 
230  auto cd = cat_.getDeletedColumn(td);
231  const auto column_id = cd->columnId;
232 
233  const auto input_col_desc =
234  std::make_shared<const InputColDescriptor>(column_id, td->tableId, 0);
235  const auto col_expr =
236  makeExpr<Analyzer::ColumnVar>(cd->columnType, td->tableId, column_id, 0);
237  const auto count_expr =
238  makeExpr<Analyzer::AggExpr>(cd->columnType, kCOUNT, col_expr, false, nullptr);
239 
240  const auto ra_exe_unit = build_ra_exe_unit(input_col_desc, {count_expr.get()});
241  const auto table_infos = get_table_infos(ra_exe_unit, executor_);
242  CHECK_EQ(table_infos.size(), size_t(1));
243 
245  const auto eo = get_execution_options();
246 
247  DeletedColumnStats deleted_column_stats;
248  Executor::PerFragmentCallBack compute_deleted_callback =
249  [&deleted_column_stats, cd](
250  ResultSetPtr results, const Fragmenter_Namespace::FragmentInfo& fragment_info) {
251  // count number of tuples in $deleted as total number of tuples in table.
252  if (cd->isDeletedCol) {
253  deleted_column_stats.total_row_count += fragment_info.getPhysicalNumTuples();
254  }
255  if (fragment_info.getPhysicalNumTuples() == 0) {
256  // TODO(adb): Should not happen, but just to be safe...
257  LOG(WARNING) << "Skipping completely empty fragment for column "
258  << cd->columnName;
259  return;
260  }
261 
262  const auto row = results->getNextRow(false, false);
263  CHECK_EQ(row.size(), size_t(1));
264 
265  const auto& ti = cd->columnType;
266 
267  auto chunk_metadata = std::make_shared<ChunkMetadata>();
268  chunk_metadata->sqlType = get_logical_type_info(ti);
269 
270  const auto count_val = read_scalar_target_value<int64_t>(row[0]);
271 
272  // min element 0 max element 1
273  std::vector<TargetValue> fakerow;
274 
275  auto num_tuples = static_cast<size_t>(count_val);
276 
277  // calculate min
278  if (num_tuples == fragment_info.getPhysicalNumTuples()) {
279  // nothing deleted
280  // min = false;
281  // max = false;
282  fakerow.emplace_back(TargetValue{int64_t(0)});
283  fakerow.emplace_back(TargetValue{int64_t(0)});
284  } else {
285  if (num_tuples == 0) {
286  // everything marked as delete
287  // min = true
288  // max = true
289  fakerow.emplace_back(TargetValue{int64_t(1)});
290  fakerow.emplace_back(TargetValue{int64_t(1)});
291  } else {
292  // some deleted
293  // min = false
294  // max = true;
295  fakerow.emplace_back(TargetValue{int64_t(0)});
296  fakerow.emplace_back(TargetValue{int64_t(1)});
297  }
298  }
299 
300  // place manufacture min and max in fake row to use common infra
301  if (!set_metadata_from_results(*chunk_metadata, fakerow, ti, false)) {
302  LOG(WARNING) << "Unable to process new metadata values for column "
303  << cd->columnName;
304  return;
305  }
306 
307  deleted_column_stats.chunk_stats_per_fragment.emplace(
308  std::make_pair(fragment_info.fragmentId, chunk_metadata->chunkStats));
309  deleted_column_stats.visible_row_count_per_fragment.emplace(
310  std::make_pair(fragment_info.fragmentId, num_tuples));
311  };
312 
313  executor_->executeWorkUnitPerFragment(ra_exe_unit,
314  table_infos[0],
315  co,
316  eo,
317  cat_,
318  compute_deleted_callback,
319  fragment_indexes);
320  return deleted_column_stats;
321 }
322 
324  const TableDescriptor* td,
325  const ColumnDescriptor* cd,
326  const std::unordered_map</*fragment_id*/ int, size_t>& tuple_count_map,
327  std::optional<Data_Namespace::MemoryLevel> memory_level,
328  const std::set<size_t>& fragment_indexes) const {
329  const auto ti = cd->columnType;
330  if (ti.is_varlen()) {
331  LOG(INFO) << "Skipping varlen column " << cd->columnName;
332  return;
333  }
334 
335  const auto column_id = cd->columnId;
336  const auto input_col_desc =
337  std::make_shared<const InputColDescriptor>(column_id, td->tableId, 0);
338  const auto col_expr =
339  makeExpr<Analyzer::ColumnVar>(cd->columnType, td->tableId, column_id, 0);
340  auto max_expr =
341  makeExpr<Analyzer::AggExpr>(cd->columnType, kMAX, col_expr, false, nullptr);
342  auto min_expr =
343  makeExpr<Analyzer::AggExpr>(cd->columnType, kMIN, col_expr, false, nullptr);
344  auto count_expr =
345  makeExpr<Analyzer::AggExpr>(cd->columnType, kCOUNT, col_expr, false, nullptr);
346 
347  if (ti.is_string()) {
348  const SQLTypeInfo fun_ti(kINT);
349  const auto fun_expr = makeExpr<Analyzer::KeyForStringExpr>(col_expr);
350  max_expr = makeExpr<Analyzer::AggExpr>(fun_ti, kMAX, fun_expr, false, nullptr);
351  min_expr = makeExpr<Analyzer::AggExpr>(fun_ti, kMIN, fun_expr, false, nullptr);
352  }
353  const auto ra_exe_unit = build_ra_exe_unit(
354  input_col_desc, {min_expr.get(), max_expr.get(), count_expr.get()});
355  const auto table_infos = get_table_infos(ra_exe_unit, executor_);
356  CHECK_EQ(table_infos.size(), size_t(1));
357 
359  const auto eo = get_execution_options();
360 
361  std::unordered_map</*fragment_id*/ int, ChunkStats> stats_map;
362 
363  Executor::PerFragmentCallBack compute_metadata_callback =
364  [&stats_map, &tuple_count_map, cd](
365  ResultSetPtr results, const Fragmenter_Namespace::FragmentInfo& fragment_info) {
366  if (fragment_info.getPhysicalNumTuples() == 0) {
367  // TODO(adb): Should not happen, but just to be safe...
368  LOG(WARNING) << "Skipping completely empty fragment for column "
369  << cd->columnName;
370  return;
371  }
372 
373  const auto row = results->getNextRow(false, false);
374  CHECK_EQ(row.size(), size_t(3));
375 
376  const auto& ti = cd->columnType;
377 
378  auto chunk_metadata = std::make_shared<ChunkMetadata>();
379  chunk_metadata->sqlType = get_logical_type_info(ti);
380 
381  const auto count_val = read_scalar_target_value<int64_t>(row[2]);
382  if (count_val == 0) {
383  // Assume chunk of all nulls, bail
384  return;
385  }
386 
387  bool has_nulls = true; // default to wide
388  auto tuple_count_itr = tuple_count_map.find(fragment_info.fragmentId);
389  if (tuple_count_itr != tuple_count_map.end()) {
390  has_nulls = !(static_cast<size_t>(count_val) == tuple_count_itr->second);
391  } else {
392  // no deleted column calc so use raw physical count
393  has_nulls =
394  !(static_cast<size_t>(count_val) == fragment_info.getPhysicalNumTuples());
395  }
396 
397  if (!set_metadata_from_results(*chunk_metadata, row, ti, has_nulls)) {
398  LOG(WARNING) << "Unable to process new metadata values for column "
399  << cd->columnName;
400  return;
401  }
402 
403  stats_map.emplace(
404  std::make_pair(fragment_info.fragmentId, chunk_metadata->chunkStats));
405  };
406 
407  executor_->executeWorkUnitPerFragment(ra_exe_unit,
408  table_infos[0],
409  co,
410  eo,
411  cat_,
412  compute_metadata_callback,
413  fragment_indexes);
414 
415  auto* fragmenter = td->fragmenter.get();
416  CHECK(fragmenter);
417  fragmenter->updateChunkStats(cd, stats_map, memory_level);
418 }
419 
420 // Returns the corresponding indexes for the given fragment ids in the list of fragments
421 // returned by `getFragmentsForQuery()`
423  const TableDescriptor* td,
424  const std::set<int>& fragment_ids) const {
425  CHECK(td->fragmenter);
426  auto table_info = td->fragmenter->getFragmentsForQuery();
427  std::set<size_t> fragment_indexes;
428  for (size_t i = 0; i < table_info.fragments.size(); i++) {
429  if (shared::contains(fragment_ids, table_info.fragments[i].fragmentId)) {
430  fragment_indexes.emplace(i);
431  }
432  }
433  return fragment_indexes;
434 }
435 
437  auto timer = DEBUG_TIMER(__func__);
438  const auto table_id = td_->tableId;
439  const auto db_id = cat_.getDatabaseId();
440  const auto table_lock =
442  const auto table_epochs = cat_.getTableEpochs(db_id, table_id);
443  const auto shards = cat_.getPhysicalTablesDescriptors(td_);
444  try {
445  for (const auto shard : shards) {
446  vacuumFragments(shard);
447  }
448  cat_.checkpoint(table_id);
449  } catch (...) {
450  cat_.setTableEpochsLogExceptions(db_id, table_epochs);
451  throw;
452  }
453 
454  for (auto shard : shards) {
455  cat_.removeFragmenterForTable(shard->tableId);
457  shard->tableId);
458  }
459 }
460 
462  const std::set<int>& fragment_ids) const {
463  // "if not a table that supports delete return, nothing more to do"
464  const ColumnDescriptor* cd = cat_.getDeletedColumn(td);
465  if (nullptr == cd) {
466  return;
467  }
468  // vacuum chunks which show sign of deleted rows in metadata
469  ChunkKey chunk_key_prefix = {cat_.getDatabaseId(), td->tableId, cd->columnId};
470  ChunkMetadataVector chunk_metadata_vec;
471  cat_.getDataMgr().getChunkMetadataVecForKeyPrefix(chunk_metadata_vec, chunk_key_prefix);
472  for (auto& [chunk_key, chunk_metadata] : chunk_metadata_vec) {
473  auto fragment_id = chunk_key[CHUNK_KEY_FRAGMENT_IDX];
474  // If delete has occurred, only vacuum fragments that are in the fragment_ids set.
475  // Empty fragment_ids set implies all fragments.
476  if (chunk_metadata->chunkStats.max.tinyintval == 1 &&
477  (fragment_ids.empty() || shared::contains(fragment_ids, fragment_id))) {
478  UpdelRoll updel_roll;
479  updel_roll.catalog = &cat_;
480  updel_roll.logicalTableId = cat_.getLogicalTableId(td->tableId);
482  updel_roll.table_descriptor = td;
483  CHECK_EQ(cd->columnId, chunk_key[CHUNK_KEY_COLUMN_IDX]);
484  const auto chunk = Chunk_NS::Chunk::getChunk(cd,
485  &cat_.getDataMgr(),
486  chunk_key,
487  updel_roll.memoryLevel,
488  0,
489  chunk_metadata->numBytes,
490  chunk_metadata->numElements);
491  td->fragmenter->compactRows(&cat_,
492  td,
493  fragment_id,
494  td->fragmenter->getVacuumOffsets(chunk),
495  updel_roll.memoryLevel,
496  updel_roll);
497  updel_roll.stageUpdate();
498  }
499  }
500  td->fragmenter->resetSizesFromFragments();
501 }
502 
504  const TableUpdateMetadata& table_update_metadata) const {
506  return;
507  }
508  auto timer = DEBUG_TIMER(__func__);
509  std::map<const TableDescriptor*, std::set<int32_t>> fragments_to_vacuum;
510  for (const auto& [table_id, fragment_ids] :
511  table_update_metadata.fragments_with_deleted_rows) {
512  auto td = cat_.getMetadataForTable(table_id);
513  // Skip automatic vacuuming for tables with uncapped epoch
514  if (td->maxRollbackEpochs == -1) {
515  continue;
516  }
517 
518  DeletedColumnStats deleted_column_stats;
519  {
521  executor_->execute_mutex_);
522  ScopeGuard row_set_holder = [this] { executor_->row_set_mem_owner_ = nullptr; };
523  executor_->row_set_mem_owner_ =
524  std::make_shared<RowSetMemoryOwner>(ROW_SET_SIZE, /*num_threads=*/1);
525  deleted_column_stats =
526  getDeletedColumnStats(td, getFragmentIndexes(td, fragment_ids));
527  executor_->clearMetaInfoCache();
528  }
529 
530  std::set<int32_t> filtered_fragment_ids;
531  for (const auto [fragment_id, visible_row_count] :
532  deleted_column_stats.visible_row_count_per_fragment) {
533  auto total_row_count =
534  td->fragmenter->getFragmentInfo(fragment_id)->getPhysicalNumTuples();
535  float deleted_row_count = total_row_count - visible_row_count;
536  if ((deleted_row_count / total_row_count) >= g_vacuum_min_selectivity) {
537  filtered_fragment_ids.emplace(fragment_id);
538  }
539  }
540 
541  if (!filtered_fragment_ids.empty()) {
542  fragments_to_vacuum[td] = filtered_fragment_ids;
543  }
544  }
545 
546  if (!fragments_to_vacuum.empty()) {
547  const auto db_id = cat_.getDatabaseId();
548  const auto table_lock =
550  const auto table_epochs = cat_.getTableEpochs(db_id, td_->tableId);
551  try {
552  for (const auto& [td, fragment_ids] : fragments_to_vacuum) {
553  vacuumFragments(td, fragment_ids);
554  VLOG(1) << "Auto-vacuumed fragments: " << shared::printContainer(fragment_ids)
555  << ", table id: " << td->tableId;
556  }
558  } catch (...) {
559  cat_.setTableEpochsLogExceptions(db_id, table_epochs);
560  throw;
561  }
562  } else {
563  // Checkpoint, even when no data update occurs, in order to ensure that epochs are
564  // uniformly incremented in distributed mode.
566  }
567 }
bool contains(const T &container, const U &element)
Definition: misc.h:195
Defines data structures for the semantic analysis phase of query processing.
Data_Namespace::MemoryLevel memoryLevel
Definition: UpdelRoll.h:55
#define CHECK_EQ(x, y)
Definition: Logger.h:297
std::vector< int > ChunkKey
Definition: types.h:36
DeletedColumnStats getDeletedColumnStats(const TableDescriptor *td, const std::set< size_t > &fragment_indexes) const
void vacuumFragmentsAboveMinSelectivity(const TableUpdateMetadata &table_update_metadata) const
static WriteLock getWriteLockForTable(Catalog_Namespace::Catalog &cat, const std::string &table_name)
Definition: LockMgrImpl.h:225
std::string cat(Ts &&...args)
class for a per-database catalog. also includes metadata for the current database and the current use...
Definition: Catalog.h:132
Definition: sqltypes.h:64
const TableDescriptor * table_descriptor
Definition: UpdelRoll.h:58
std::string tableName
const ColumnDescriptor * getDeletedColumn(const TableDescriptor *td) const
Definition: Catalog.cpp:3687
void recomputeMetadataUnlocked(const TableUpdateMetadata &table_update_metadata) const
Recomputes column chunk metadata for the given set of fragments. The caller of this method is expecte...
ExecutorDeviceType
Data_Namespace::DataMgr & getDataMgr() const
Definition: Catalog.h:249
#define LOG(tag)
Definition: Logger.h:283
CompilationOptions get_compilation_options(const ExecutorDeviceType &device_type)
#define CHUNK_KEY_FRAGMENT_IDX
Definition: types.h:41
void fillChunkStats(const T min, const T max, const bool has_nulls)
Definition: ChunkMetadata.h:51
TableToFragmentIds fragments_with_deleted_rows
Definition: Execute.h:322
#define CHECK_GE(x, y)
Definition: Logger.h:302
SQLTypeInfo get_logical_type_info(const SQLTypeInfo &type_info)
Definition: sqltypes.h:1206
std::shared_ptr< ResultSet > ResultSetPtr
HOST DEVICE SQLTypes get_type() const
Definition: sqltypes.h:380
static constexpr size_t ROW_SET_SIZE
const TableDescriptor * td_
Definition: sqldefs.h:75
Executor * executor_
Catalog_Namespace::Catalog & cat_
T read_scalar_target_value(const TargetValue &tv)
void stageUpdate()
const Catalog_Namespace::Catalog * catalog
Definition: UpdelRoll.h:53
const DBMetadata & getCurrentDB() const
Definition: Catalog.h:248
void getChunkMetadataVecForKeyPrefix(ChunkMetadataVector &chunkMetadataVec, const ChunkKey &keyPrefix)
Definition: DataMgr.cpp:466
Used by Fragmenter classes to store info about each fragment - the fragment id and number of tuples(r...
Definition: Fragmenter.h:86
std::unique_lock< T > unique_lock
std::unordered_map< int, size_t > visible_row_count_per_fragment
std::vector< std::pair< ChunkKey, std::shared_ptr< ChunkMetadata >>> ChunkMetadataVector
int getDatabaseId() const
Definition: Catalog.h:304
void vacuumDeletedRows() const
Compacts fragments to remove deleted rows. When a row is deleted, a boolean deleted system column is ...
int getLogicalTableId(const int physicalTableId) const
Definition: Catalog.cpp:4819
specifies the content in-memory of a row in the column metadata table
std::unordered_map< int, ChunkStats > chunk_stats_per_fragment
void checkpointWithAutoRollback(const int logical_table_id) const
Definition: Catalog.cpp:4841
std::vector< const TableDescriptor * > getPhysicalTablesDescriptors(const TableDescriptor *logical_table_desc, bool populate_fragmenter=true) const
Definition: Catalog.cpp:4680
std::shared_ptr< Fragmenter_Namespace::AbstractFragmenter > fragmenter
void recomputeColumnMetadata(const TableDescriptor *td, const ColumnDescriptor *cd, const std::unordered_map< int, size_t > &tuple_count_map, std::optional< Data_Namespace::MemoryLevel > memory_level, const std::set< size_t > &fragment_indexes) const
DeletedColumnStats recomputeDeletedColumnMetadata(const TableDescriptor *td, const std::set< size_t > &fragment_indexes={}) const
File_Namespace::GlobalFileMgr * getGlobalFileMgr() const
Definition: DataMgr.cpp:606
void checkpoint(const int logicalTableId) const
Definition: Catalog.cpp:4833
ColumnToFragmentsMap columns_for_metadata_update
Definition: Execute.h:321
Definition: sqltypes.h:67
Definition: sqltypes.h:68
HOST DEVICE EncodingType get_compression() const
Definition: sqltypes.h:388
TableOptimizer(const TableDescriptor *td, Executor *executor, Catalog_Namespace::Catalog &cat)
std::list< const ColumnDescriptor * > getAllColumnMetadataForTable(const int tableId, const bool fetchSystemColumns, const bool fetchVirtualColumns, const bool fetchPhysicalColumns) const
Returns a list of pointers to constant ColumnDescriptor structs for all the columns from a particular...
Definition: Catalog.cpp:2254
Definition: sqldefs.h:78
Data_Namespace::MemoryLevel persistenceLevel
float g_vacuum_min_selectivity
void removeFragmenterForTable(const int table_id) const
Definition: Catalog.cpp:4044
void compactDataFiles(const int32_t db_id, const int32_t tb_id)
Definition: sqltypes.h:56
int logicalTableId
Definition: UpdelRoll.h:54
bool set_metadata_from_results(ChunkMetadata &chunk_metadata, const std::vector< TargetValue > &row, const SQLTypeInfo &ti, const bool has_nulls)
std::function< void(ResultSetPtr, const Fragmenter_Namespace::FragmentInfo &)> PerFragmentCallBack
Definition: Execute.h:785
#define CHECK(condition)
Definition: Logger.h:289
std::vector< InputTableInfo > get_table_infos(const std::vector< InputDescriptor > &input_descs, Executor *executor)
#define DEBUG_TIMER(name)
Definition: Logger.h:407
boost::variant< ScalarTargetValue, ArrayTargetValue, GeoTargetValue, GeoTargetValuePtr > TargetValue
Definition: TargetValue.h:195
void setTableEpochsLogExceptions(const int32_t db_id, const std::vector< TableEpochInfo > &table_epochs) const
Definition: Catalog.cpp:3675
Definition: sqltypes.h:60
SQLTypeInfo columnType
PrintContainer< CONTAINER > printContainer(CONTAINER &container)
Definition: misc.h:107
const TableDescriptor * getMetadataForTable(const std::string &tableName, const bool populateFragmenter=true) const
Returns a pointer to a const TableDescriptor struct matching the provided tableName.
static std::shared_ptr< Chunk > getChunk(const ColumnDescriptor *cd, DataMgr *data_mgr, const ChunkKey &key, const MemoryLevel mem_level, const int deviceId, const size_t num_bytes, const size_t num_elems, const bool pinnable=true)
Definition: Chunk.cpp:31
#define CHUNK_KEY_COLUMN_IDX
Definition: types.h:40
Definition: sqldefs.h:76
RelAlgExecutionUnit build_ra_exe_unit(const std::shared_ptr< const InputColDescriptor > input_col_desc, const std::vector< Analyzer::Expr * > &target_exprs)
std::string columnName
#define VLOG(n)
Definition: Logger.h:383
std::vector< TableEpochInfo > getTableEpochs(const int32_t db_id, const int32_t table_id) const
Definition: Catalog.cpp:3611
void recomputeMetadata() const
Recomputes per-chunk metadata for each fragment in the table. Updates and deletes can cause chunk met...
std::set< size_t > getFragmentIndexes(const TableDescriptor *td, const std::set< int > &fragment_ids) const
void vacuumFragments(const TableDescriptor *td, const std::set< int > &fragment_ids={}) const