OmniSciDB  c0231cc57d
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
TableOptimizer.cpp
Go to the documentation of this file.
1 /*
2  * Copyright 2022 HEAVY.AI, Inc.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #include "TableOptimizer.h"
18 
19 #include "Analyzer/Analyzer.h"
20 #include "LockMgr/LockMgr.h"
21 #include "Logger/Logger.h"
22 #include "QueryEngine/Execute.h"
23 #include "Shared/misc.h"
24 #include "Shared/scope.h"
25 
26 // By default, when rows are deleted, vacuum fragments with a least 10% deleted rows
28 
30  Executor* executor,
32  : td_(td), executor_(executor), cat_(cat) {
33  CHECK(td);
34 }
35 namespace {
36 
37 template <typename T>
39  const auto stv = boost::get<ScalarTargetValue>(&tv);
40  CHECK(stv);
41  const auto val_ptr = boost::get<T>(stv);
42  CHECK(val_ptr);
43  return *val_ptr;
44 }
45 
47  const std::vector<TargetValue>& row,
48  const SQLTypeInfo& ti,
49  const bool has_nulls) {
50  switch (ti.get_type()) {
51  case kBOOLEAN:
52  case kTINYINT:
53  case kSMALLINT:
54  case kINT:
55  case kBIGINT:
56  case kNUMERIC:
57  case kDECIMAL:
58  case kTIME:
59  case kTIMESTAMP:
60  case kDATE: {
61  int64_t min_val = read_scalar_target_value<int64_t>(row[0]);
62  int64_t max_val = read_scalar_target_value<int64_t>(row[1]);
63  chunk_metadata.fillChunkStats(min_val, max_val, has_nulls);
64  break;
65  }
66  case kFLOAT: {
67  float min_val = read_scalar_target_value<float>(row[0]);
68  float max_val = read_scalar_target_value<float>(row[1]);
69  chunk_metadata.fillChunkStats(min_val, max_val, has_nulls);
70  break;
71  }
72  case kDOUBLE: {
73  double min_val = read_scalar_target_value<double>(row[0]);
74  double max_val = read_scalar_target_value<double>(row[1]);
75  chunk_metadata.fillChunkStats(min_val, max_val, has_nulls);
76  break;
77  }
78  case kVARCHAR:
79  case kCHAR:
80  case kTEXT:
81  if (ti.get_compression() == kENCODING_DICT) {
82  int64_t min_val = read_scalar_target_value<int64_t>(row[0]);
83  int64_t max_val = read_scalar_target_value<int64_t>(row[1]);
84  chunk_metadata.fillChunkStats(min_val, max_val, has_nulls);
85  }
86  break;
87  default: {
88  return false; // skip column
89  }
90  }
91  return true;
92 }
93 
95  const std::shared_ptr<const InputColDescriptor> input_col_desc,
96  const std::vector<Analyzer::Expr*>& target_exprs) {
97  return RelAlgExecutionUnit{{input_col_desc->getScanDesc()},
98  {input_col_desc},
99  {},
100  {},
101  {},
102  {},
103  target_exprs,
104  {},
105  nullptr,
106  SortInfo{{}, SortAlgorithm::Default, 0, 0, false},
107  0};
108 }
109 
111  return CompilationOptions{device_type, false, ExecutorOptLevel::Default, false};
112 }
113 
115  return ExecutionOptions{false,
116  false,
117  false,
118  false,
119  false,
120  false,
121  false,
122  false,
123  false,
124  0,
125  false,
126  false,
127  0,
128  false};
129 }
130 
131 } // namespace
132 
134  auto timer = DEBUG_TIMER(__func__);
136 
137  LOG(INFO) << "Recomputing metadata for " << td_->tableName;
138 
139  CHECK_GE(td_->tableId, 0);
140 
141  std::vector<const TableDescriptor*> table_descriptors;
142  if (td_->nShards > 0) {
143  const auto physical_tds = cat_.getPhysicalTablesDescriptors(td_);
144  table_descriptors.insert(
145  table_descriptors.begin(), physical_tds.begin(), physical_tds.end());
146  } else {
147  table_descriptors.push_back(td_);
148  }
149 
150  auto& data_mgr = cat_.getDataMgr();
151 
152  // acquire write lock on table data
154 
155  for (const auto td : table_descriptors) {
156  ScopeGuard row_set_holder = [this] { executor_->row_set_mem_owner_ = nullptr; };
157  executor_->row_set_mem_owner_ =
158  std::make_shared<RowSetMemoryOwner>(ROW_SET_SIZE, /*num_threads=*/1);
159  executor_->catalog_ = &cat_;
160  const auto table_id = td->tableId;
161  auto stats = recomputeDeletedColumnMetadata(td);
162 
163  // TODO(adb): Support geo
164  auto col_descs = cat_.getAllColumnMetadataForTable(table_id, false, false, false);
165  for (const auto& cd : col_descs) {
166  recomputeColumnMetadata(td, cd, stats.visible_row_count_per_fragment, {}, {});
167  }
168  data_mgr.checkpoint(cat_.getCurrentDB().dbId, table_id);
169  executor_->clearMetaInfoCache();
170  }
171 
172  data_mgr.clearMemory(Data_Namespace::MemoryLevel::CPU_LEVEL);
173  if (data_mgr.gpusPresent()) {
174  data_mgr.clearMemory(Data_Namespace::MemoryLevel::GPU_LEVEL);
175  }
176 }
177 
179  const TableUpdateMetadata& table_update_metadata) const {
180  auto timer = DEBUG_TIMER(__func__);
181  std::map<int, std::list<const ColumnDescriptor*>> columns_by_table_id;
182  auto& columns_for_update = table_update_metadata.columns_for_metadata_update;
183  for (const auto& entry : columns_for_update) {
184  auto column_descriptor = entry.first;
185  columns_by_table_id[column_descriptor->tableId].emplace_back(column_descriptor);
186  }
187 
188  for (const auto& [table_id, columns] : columns_by_table_id) {
189  auto td = cat_.getMetadataForTable(table_id);
190  auto stats = recomputeDeletedColumnMetadata(td);
191  for (const auto cd : columns) {
192  CHECK(columns_for_update.find(cd) != columns_for_update.end());
193  auto fragment_indexes = getFragmentIndexes(td, columns_for_update.find(cd)->second);
195  cd,
196  stats.visible_row_count_per_fragment,
198  fragment_indexes);
199  }
200  }
201 }
202 
203 // Special case handle $deleted column if it exists
204 // whilst handling the delete column also capture
205 // the number of non deleted rows per fragment
207  const TableDescriptor* td,
208  const std::set<size_t>& fragment_indexes) const {
209  if (!td->hasDeletedCol) {
210  return {};
211  }
212 
213  auto stats = getDeletedColumnStats(td, fragment_indexes);
214  auto* fragmenter = td->fragmenter.get();
215  CHECK(fragmenter);
216  auto cd = cat_.getDeletedColumn(td);
217  fragmenter->updateChunkStats(cd, stats.chunk_stats_per_fragment, {});
218  fragmenter->setNumRows(stats.total_row_count);
219  return stats;
220 }
221 
223  const TableDescriptor* td,
224  const std::set<size_t>& fragment_indexes) const {
225  if (!td->hasDeletedCol) {
226  return {};
227  }
228 
229  auto cd = cat_.getDeletedColumn(td);
230  const auto column_id = cd->columnId;
231 
232  const auto input_col_desc =
233  std::make_shared<const InputColDescriptor>(column_id, td->tableId, 0);
234  const auto col_expr =
235  makeExpr<Analyzer::ColumnVar>(cd->columnType, td->tableId, column_id, 0);
236  const auto count_expr =
237  makeExpr<Analyzer::AggExpr>(cd->columnType, kCOUNT, col_expr, false, nullptr);
238 
239  const auto ra_exe_unit = build_ra_exe_unit(input_col_desc, {count_expr.get()});
240  const auto table_infos = get_table_infos(ra_exe_unit, executor_);
241  CHECK_EQ(table_infos.size(), size_t(1));
242 
244  const auto eo = get_execution_options();
245 
246  DeletedColumnStats deleted_column_stats;
247  Executor::PerFragmentCallBack compute_deleted_callback =
248  [&deleted_column_stats, cd](
249  ResultSetPtr results, const Fragmenter_Namespace::FragmentInfo& fragment_info) {
250  // count number of tuples in $deleted as total number of tuples in table.
251  if (cd->isDeletedCol) {
252  deleted_column_stats.total_row_count += fragment_info.getPhysicalNumTuples();
253  }
254  if (fragment_info.getPhysicalNumTuples() == 0) {
255  // TODO(adb): Should not happen, but just to be safe...
256  LOG(WARNING) << "Skipping completely empty fragment for column "
257  << cd->columnName;
258  return;
259  }
260 
261  const auto row = results->getNextRow(false, false);
262  CHECK_EQ(row.size(), size_t(1));
263 
264  const auto& ti = cd->columnType;
265 
266  auto chunk_metadata = std::make_shared<ChunkMetadata>();
267  chunk_metadata->sqlType = get_logical_type_info(ti);
268 
269  const auto count_val = read_scalar_target_value<int64_t>(row[0]);
270 
271  // min element 0 max element 1
272  std::vector<TargetValue> fakerow;
273 
274  auto num_tuples = static_cast<size_t>(count_val);
275 
276  // calculate min
277  if (num_tuples == fragment_info.getPhysicalNumTuples()) {
278  // nothing deleted
279  // min = false;
280  // max = false;
281  fakerow.emplace_back(TargetValue{int64_t(0)});
282  fakerow.emplace_back(TargetValue{int64_t(0)});
283  } else {
284  if (num_tuples == 0) {
285  // everything marked as delete
286  // min = true
287  // max = true
288  fakerow.emplace_back(TargetValue{int64_t(1)});
289  fakerow.emplace_back(TargetValue{int64_t(1)});
290  } else {
291  // some deleted
292  // min = false
293  // max = true;
294  fakerow.emplace_back(TargetValue{int64_t(0)});
295  fakerow.emplace_back(TargetValue{int64_t(1)});
296  }
297  }
298 
299  // place manufacture min and max in fake row to use common infra
300  if (!set_metadata_from_results(*chunk_metadata, fakerow, ti, false)) {
301  LOG(WARNING) << "Unable to process new metadata values for column "
302  << cd->columnName;
303  return;
304  }
305 
306  deleted_column_stats.chunk_stats_per_fragment.emplace(
307  std::make_pair(fragment_info.fragmentId, chunk_metadata->chunkStats));
308  deleted_column_stats.visible_row_count_per_fragment.emplace(
309  std::make_pair(fragment_info.fragmentId, num_tuples));
310  };
311 
312  executor_->executeWorkUnitPerFragment(ra_exe_unit,
313  table_infos[0],
314  co,
315  eo,
316  cat_,
317  compute_deleted_callback,
318  fragment_indexes);
319  return deleted_column_stats;
320 }
321 
323  const TableDescriptor* td,
324  const ColumnDescriptor* cd,
325  const std::unordered_map</*fragment_id*/ int, size_t>& tuple_count_map,
326  std::optional<Data_Namespace::MemoryLevel> memory_level,
327  const std::set<size_t>& fragment_indexes) const {
328  const auto ti = cd->columnType;
329  if (ti.is_varlen()) {
330  LOG(INFO) << "Skipping varlen column " << cd->columnName;
331  return;
332  }
333 
334  const auto column_id = cd->columnId;
335  const auto input_col_desc =
336  std::make_shared<const InputColDescriptor>(column_id, td->tableId, 0);
337  const auto col_expr =
338  makeExpr<Analyzer::ColumnVar>(cd->columnType, td->tableId, column_id, 0);
339  auto max_expr =
340  makeExpr<Analyzer::AggExpr>(cd->columnType, kMAX, col_expr, false, nullptr);
341  auto min_expr =
342  makeExpr<Analyzer::AggExpr>(cd->columnType, kMIN, col_expr, false, nullptr);
343  auto count_expr =
344  makeExpr<Analyzer::AggExpr>(cd->columnType, kCOUNT, col_expr, false, nullptr);
345 
346  if (ti.is_string()) {
347  const SQLTypeInfo fun_ti(kINT);
348  const auto fun_expr = makeExpr<Analyzer::KeyForStringExpr>(col_expr);
349  max_expr = makeExpr<Analyzer::AggExpr>(fun_ti, kMAX, fun_expr, false, nullptr);
350  min_expr = makeExpr<Analyzer::AggExpr>(fun_ti, kMIN, fun_expr, false, nullptr);
351  }
352  const auto ra_exe_unit = build_ra_exe_unit(
353  input_col_desc, {min_expr.get(), max_expr.get(), count_expr.get()});
354  const auto table_infos = get_table_infos(ra_exe_unit, executor_);
355  CHECK_EQ(table_infos.size(), size_t(1));
356 
358  const auto eo = get_execution_options();
359 
360  std::unordered_map</*fragment_id*/ int, ChunkStats> stats_map;
361 
362  Executor::PerFragmentCallBack compute_metadata_callback =
363  [&stats_map, &tuple_count_map, cd](
364  ResultSetPtr results, const Fragmenter_Namespace::FragmentInfo& fragment_info) {
365  if (fragment_info.getPhysicalNumTuples() == 0) {
366  // TODO(adb): Should not happen, but just to be safe...
367  LOG(WARNING) << "Skipping completely empty fragment for column "
368  << cd->columnName;
369  return;
370  }
371 
372  const auto row = results->getNextRow(false, false);
373  CHECK_EQ(row.size(), size_t(3));
374 
375  const auto& ti = cd->columnType;
376 
377  auto chunk_metadata = std::make_shared<ChunkMetadata>();
378  chunk_metadata->sqlType = get_logical_type_info(ti);
379 
380  const auto count_val = read_scalar_target_value<int64_t>(row[2]);
381  if (count_val == 0) {
382  // Assume chunk of all nulls, bail
383  return;
384  }
385 
386  bool has_nulls = true; // default to wide
387  auto tuple_count_itr = tuple_count_map.find(fragment_info.fragmentId);
388  if (tuple_count_itr != tuple_count_map.end()) {
389  has_nulls = !(static_cast<size_t>(count_val) == tuple_count_itr->second);
390  } else {
391  // no deleted column calc so use raw physical count
392  has_nulls =
393  !(static_cast<size_t>(count_val) == fragment_info.getPhysicalNumTuples());
394  }
395 
396  if (!set_metadata_from_results(*chunk_metadata, row, ti, has_nulls)) {
397  LOG(WARNING) << "Unable to process new metadata values for column "
398  << cd->columnName;
399  return;
400  }
401 
402  stats_map.emplace(
403  std::make_pair(fragment_info.fragmentId, chunk_metadata->chunkStats));
404  };
405 
406  executor_->executeWorkUnitPerFragment(ra_exe_unit,
407  table_infos[0],
408  co,
409  eo,
410  cat_,
411  compute_metadata_callback,
412  fragment_indexes);
413 
414  auto* fragmenter = td->fragmenter.get();
415  CHECK(fragmenter);
416  fragmenter->updateChunkStats(cd, stats_map, memory_level);
417 }
418 
419 // Returns the corresponding indexes for the given fragment ids in the list of fragments
420 // returned by `getFragmentsForQuery()`
422  const TableDescriptor* td,
423  const std::set<int>& fragment_ids) const {
424  CHECK(td->fragmenter);
425  auto table_info = td->fragmenter->getFragmentsForQuery();
426  std::set<size_t> fragment_indexes;
427  for (size_t i = 0; i < table_info.fragments.size(); i++) {
428  if (shared::contains(fragment_ids, table_info.fragments[i].fragmentId)) {
429  fragment_indexes.emplace(i);
430  }
431  }
432  return fragment_indexes;
433 }
434 
436  auto timer = DEBUG_TIMER(__func__);
437  const auto table_id = td_->tableId;
438  const auto db_id = cat_.getDatabaseId();
439  const auto table_lock =
441  const auto table_epochs = cat_.getTableEpochs(db_id, table_id);
442  const auto shards = cat_.getPhysicalTablesDescriptors(td_);
443  try {
444  for (const auto shard : shards) {
445  vacuumFragments(shard);
446  }
447  cat_.checkpoint(table_id);
448  } catch (...) {
449  cat_.setTableEpochsLogExceptions(db_id, table_epochs);
450  throw;
451  }
452 
453  for (auto shard : shards) {
454  cat_.removeFragmenterForTable(shard->tableId);
456  shard->tableId);
457  }
458 }
459 
461  const std::set<int>& fragment_ids) const {
462  // "if not a table that supports delete return, nothing more to do"
463  const ColumnDescriptor* cd = cat_.getDeletedColumn(td);
464  if (nullptr == cd) {
465  return;
466  }
467  // vacuum chunks which show sign of deleted rows in metadata
468  ChunkKey chunk_key_prefix = {cat_.getDatabaseId(), td->tableId, cd->columnId};
469  ChunkMetadataVector chunk_metadata_vec;
470  cat_.getDataMgr().getChunkMetadataVecForKeyPrefix(chunk_metadata_vec, chunk_key_prefix);
471  for (auto& [chunk_key, chunk_metadata] : chunk_metadata_vec) {
472  auto fragment_id = chunk_key[CHUNK_KEY_FRAGMENT_IDX];
473  // If delete has occurred, only vacuum fragments that are in the fragment_ids set.
474  // Empty fragment_ids set implies all fragments.
475  if (chunk_metadata->chunkStats.max.tinyintval == 1 &&
476  (fragment_ids.empty() || shared::contains(fragment_ids, fragment_id))) {
477  UpdelRoll updel_roll;
478  updel_roll.catalog = &cat_;
479  updel_roll.logicalTableId = cat_.getLogicalTableId(td->tableId);
481  updel_roll.table_descriptor = td;
482  CHECK_EQ(cd->columnId, chunk_key[CHUNK_KEY_COLUMN_IDX]);
483  const auto chunk = Chunk_NS::Chunk::getChunk(cd,
484  &cat_.getDataMgr(),
485  chunk_key,
486  updel_roll.memoryLevel,
487  0,
488  chunk_metadata->numBytes,
489  chunk_metadata->numElements);
490  td->fragmenter->compactRows(&cat_,
491  td,
492  fragment_id,
493  td->fragmenter->getVacuumOffsets(chunk),
494  updel_roll.memoryLevel,
495  updel_roll);
496  updel_roll.stageUpdate();
497  }
498  }
499  td->fragmenter->resetSizesFromFragments();
500 }
501 
503  const TableUpdateMetadata& table_update_metadata) const {
505  return;
506  }
507  auto timer = DEBUG_TIMER(__func__);
508  std::map<const TableDescriptor*, std::set<int32_t>> fragments_to_vacuum;
509  for (const auto& [table_id, fragment_ids] :
510  table_update_metadata.fragments_with_deleted_rows) {
511  auto td = cat_.getMetadataForTable(table_id);
512  // Skip automatic vacuuming for tables with uncapped epoch
513  if (td->maxRollbackEpochs == -1) {
514  continue;
515  }
516 
517  DeletedColumnStats deleted_column_stats;
518  {
520  executor_->execute_mutex_);
521  ScopeGuard row_set_holder = [this] { executor_->row_set_mem_owner_ = nullptr; };
522  executor_->row_set_mem_owner_ =
523  std::make_shared<RowSetMemoryOwner>(ROW_SET_SIZE, /*num_threads=*/1);
524  deleted_column_stats =
525  getDeletedColumnStats(td, getFragmentIndexes(td, fragment_ids));
526  executor_->clearMetaInfoCache();
527  }
528 
529  std::set<int32_t> filtered_fragment_ids;
530  for (const auto [fragment_id, visible_row_count] :
531  deleted_column_stats.visible_row_count_per_fragment) {
532  auto total_row_count =
533  td->fragmenter->getFragmentInfo(fragment_id)->getPhysicalNumTuples();
534  float deleted_row_count = total_row_count - visible_row_count;
535  if ((deleted_row_count / total_row_count) >= g_vacuum_min_selectivity) {
536  filtered_fragment_ids.emplace(fragment_id);
537  }
538  }
539 
540  if (!filtered_fragment_ids.empty()) {
541  fragments_to_vacuum[td] = filtered_fragment_ids;
542  }
543  }
544 
545  if (!fragments_to_vacuum.empty()) {
546  const auto db_id = cat_.getDatabaseId();
547  const auto table_lock =
549  const auto table_epochs = cat_.getTableEpochs(db_id, td_->tableId);
550  try {
551  for (const auto& [td, fragment_ids] : fragments_to_vacuum) {
552  vacuumFragments(td, fragment_ids);
553  VLOG(1) << "Auto-vacuumed fragments: " << shared::printContainer(fragment_ids)
554  << ", table id: " << td->tableId;
555  }
557  } catch (...) {
558  cat_.setTableEpochsLogExceptions(db_id, table_epochs);
559  throw;
560  }
561  } else {
562  // Checkpoint, even when no data update occurs, in order to ensure that epochs are
563  // uniformly incremented in distributed mode.
565  }
566 }
bool contains(const T &container, const U &element)
Definition: misc.h:195
Defines data structures for the semantic analysis phase of query processing.
Data_Namespace::MemoryLevel memoryLevel
Definition: UpdelRoll.h:55
#define CHECK_EQ(x, y)
Definition: Logger.h:230
std::vector< int > ChunkKey
Definition: types.h:36
DeletedColumnStats getDeletedColumnStats(const TableDescriptor *td, const std::set< size_t > &fragment_indexes) const
void vacuumFragmentsAboveMinSelectivity(const TableUpdateMetadata &table_update_metadata) const
static WriteLock getWriteLockForTable(Catalog_Namespace::Catalog &cat, const std::string &table_name)
Definition: LockMgrImpl.h:225
std::string cat(Ts &&...args)
class for a per-database catalog. also includes metadata for the current database and the current use...
Definition: Catalog.h:132
Definition: sqltypes.h:63
const TableDescriptor * table_descriptor
Definition: UpdelRoll.h:58
std::string tableName
const ColumnDescriptor * getDeletedColumn(const TableDescriptor *td) const
Definition: Catalog.cpp:3661
void recomputeMetadataUnlocked(const TableUpdateMetadata &table_update_metadata) const
Recomputes column chunk metadata for the given set of fragments. The caller of this method is expecte...
ExecutorDeviceType
Data_Namespace::DataMgr & getDataMgr() const
Definition: Catalog.h:243
#define LOG(tag)
Definition: Logger.h:216
CompilationOptions get_compilation_options(const ExecutorDeviceType &device_type)
#define CHUNK_KEY_FRAGMENT_IDX
Definition: types.h:41
void fillChunkStats(const T min, const T max, const bool has_nulls)
Definition: ChunkMetadata.h:51
TableToFragmentIds fragments_with_deleted_rows
Definition: Execute.h:322
#define CHECK_GE(x, y)
Definition: Logger.h:235
SQLTypeInfo get_logical_type_info(const SQLTypeInfo &type_info)
Definition: sqltypes.h:1221
std::shared_ptr< ResultSet > ResultSetPtr
HOST DEVICE SQLTypes get_type() const
Definition: sqltypes.h:404
static constexpr size_t ROW_SET_SIZE
const TableDescriptor * td_
Definition: sqldefs.h:74
Executor * executor_
Catalog_Namespace::Catalog & cat_
T read_scalar_target_value(const TargetValue &tv)
void stageUpdate()
const Catalog_Namespace::Catalog * catalog
Definition: UpdelRoll.h:53
const DBMetadata & getCurrentDB() const
Definition: Catalog.h:242
void getChunkMetadataVecForKeyPrefix(ChunkMetadataVector &chunkMetadataVec, const ChunkKey &keyPrefix)
Definition: DataMgr.cpp:466
Used by Fragmenter classes to store info about each fragment - the fragment id and number of tuples(r...
Definition: Fragmenter.h:86
std::unique_lock< T > unique_lock
std::unordered_map< int, size_t > visible_row_count_per_fragment
std::vector< std::pair< ChunkKey, std::shared_ptr< ChunkMetadata >>> ChunkMetadataVector
int getDatabaseId() const
Definition: Catalog.h:298
void vacuumDeletedRows() const
Compacts fragments to remove deleted rows. When a row is deleted, a boolean deleted system column is ...
int getLogicalTableId(const int physicalTableId) const
Definition: Catalog.cpp:4769
specifies the content in-memory of a row in the column metadata table
std::unordered_map< int, ChunkStats > chunk_stats_per_fragment
void checkpointWithAutoRollback(const int logical_table_id) const
Definition: Catalog.cpp:4791
std::vector< const TableDescriptor * > getPhysicalTablesDescriptors(const TableDescriptor *logical_table_desc, bool populate_fragmenter=true) const
Definition: Catalog.cpp:4630
std::shared_ptr< Fragmenter_Namespace::AbstractFragmenter > fragmenter
void recomputeColumnMetadata(const TableDescriptor *td, const ColumnDescriptor *cd, const std::unordered_map< int, size_t > &tuple_count_map, std::optional< Data_Namespace::MemoryLevel > memory_level, const std::set< size_t > &fragment_indexes) const
DeletedColumnStats recomputeDeletedColumnMetadata(const TableDescriptor *td, const std::set< size_t > &fragment_indexes={}) const
File_Namespace::GlobalFileMgr * getGlobalFileMgr() const
Definition: DataMgr.cpp:606
void checkpoint(const int logicalTableId) const
Definition: Catalog.cpp:4783
ColumnToFragmentsMap columns_for_metadata_update
Definition: Execute.h:321
Definition: sqltypes.h:66
Definition: sqltypes.h:67
HOST DEVICE EncodingType get_compression() const
Definition: sqltypes.h:412
TableOptimizer(const TableDescriptor *td, Executor *executor, Catalog_Namespace::Catalog &cat)
std::list< const ColumnDescriptor * > getAllColumnMetadataForTable(const int tableId, const bool fetchSystemColumns, const bool fetchVirtualColumns, const bool fetchPhysicalColumns) const
Returns a list of pointers to constant ColumnDescriptor structs for all the columns from a particular...
Definition: Catalog.cpp:2228
Definition: sqldefs.h:77
Data_Namespace::MemoryLevel persistenceLevel
float g_vacuum_min_selectivity
void removeFragmenterForTable(const int table_id) const
Definition: Catalog.cpp:3994
void compactDataFiles(const int32_t db_id, const int32_t tb_id)
Definition: sqltypes.h:55
int logicalTableId
Definition: UpdelRoll.h:54
bool set_metadata_from_results(ChunkMetadata &chunk_metadata, const std::vector< TargetValue > &row, const SQLTypeInfo &ti, const bool has_nulls)
std::function< void(ResultSetPtr, const Fragmenter_Namespace::FragmentInfo &)> PerFragmentCallBack
Definition: Execute.h:729
#define CHECK(condition)
Definition: Logger.h:222
std::vector< InputTableInfo > get_table_infos(const std::vector< InputDescriptor > &input_descs, Executor *executor)
#define DEBUG_TIMER(name)
Definition: Logger.h:371
boost::variant< ScalarTargetValue, ArrayTargetValue, GeoTargetValue, GeoTargetValuePtr > TargetValue
Definition: TargetValue.h:195
void setTableEpochsLogExceptions(const int32_t db_id, const std::vector< TableEpochInfo > &table_epochs) const
Definition: Catalog.cpp:3649
Definition: sqltypes.h:59
SQLTypeInfo columnType
PrintContainer< CONTAINER > printContainer(CONTAINER &container)
Definition: misc.h:107
const TableDescriptor * getMetadataForTable(const std::string &tableName, const bool populateFragmenter=true) const
Returns a pointer to a const TableDescriptor struct matching the provided tableName.
static std::shared_ptr< Chunk > getChunk(const ColumnDescriptor *cd, DataMgr *data_mgr, const ChunkKey &key, const MemoryLevel mem_level, const int deviceId, const size_t num_bytes, const size_t num_elems, const bool pinnable=true)
Definition: Chunk.cpp:31
#define CHUNK_KEY_COLUMN_IDX
Definition: types.h:40
Definition: sqldefs.h:75
RelAlgExecutionUnit build_ra_exe_unit(const std::shared_ptr< const InputColDescriptor > input_col_desc, const std::vector< Analyzer::Expr * > &target_exprs)
std::string columnName
#define VLOG(n)
Definition: Logger.h:316
std::vector< TableEpochInfo > getTableEpochs(const int32_t db_id, const int32_t table_id) const
Definition: Catalog.cpp:3585
void recomputeMetadata() const
Recomputes per-chunk metadata for each fragment in the table. Updates and deletes can cause chunk met...
std::set< size_t > getFragmentIndexes(const TableDescriptor *td, const std::set< int > &fragment_ids) const
void vacuumFragments(const TableDescriptor *td, const std::set< int > &fragment_ids={}) const