OmniSciDB  a667adc9c8
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
TableOptimizer.cpp
Go to the documentation of this file.
1 /*
2  * Copyright 2019 OmniSci, Inc.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #include "TableOptimizer.h"
18 
19 #include "Analyzer/Analyzer.h"
20 #include "LockMgr/LockMgr.h"
21 #include "Logger/Logger.h"
22 #include "QueryEngine/Execute.h"
23 #include "Shared/misc.h"
24 #include "Shared/scope.h"
25 
26 // By default, when rows are deleted, vacuum fragments with a least 10% deleted rows
28 
30  Executor* executor,
32  : td_(td), executor_(executor), cat_(cat) {
33  CHECK(td);
34 }
35 namespace {
36 
37 template <typename T>
39  const auto stv = boost::get<ScalarTargetValue>(&tv);
40  CHECK(stv);
41  const auto val_ptr = boost::get<T>(stv);
42  CHECK(val_ptr);
43  return *val_ptr;
44 }
45 
47  const std::vector<TargetValue>& row,
48  const SQLTypeInfo& ti,
49  const bool has_nulls) {
50  switch (ti.get_type()) {
51  case kBOOLEAN:
52  case kTINYINT:
53  case kSMALLINT:
54  case kINT:
55  case kBIGINT:
56  case kNUMERIC:
57  case kDECIMAL:
58  case kTIME:
59  case kTIMESTAMP:
60  case kDATE: {
61  int64_t min_val = read_scalar_target_value<int64_t>(row[0]);
62  int64_t max_val = read_scalar_target_value<int64_t>(row[1]);
63  chunk_metadata.fillChunkStats(min_val, max_val, has_nulls);
64  break;
65  }
66  case kFLOAT: {
67  float min_val = read_scalar_target_value<float>(row[0]);
68  float max_val = read_scalar_target_value<float>(row[1]);
69  chunk_metadata.fillChunkStats(min_val, max_val, has_nulls);
70  break;
71  }
72  case kDOUBLE: {
73  double min_val = read_scalar_target_value<double>(row[0]);
74  double max_val = read_scalar_target_value<double>(row[1]);
75  chunk_metadata.fillChunkStats(min_val, max_val, has_nulls);
76  break;
77  }
78  case kVARCHAR:
79  case kCHAR:
80  case kTEXT:
81  if (ti.get_compression() == kENCODING_DICT) {
82  int64_t min_val = read_scalar_target_value<int64_t>(row[0]);
83  int64_t max_val = read_scalar_target_value<int64_t>(row[1]);
84  chunk_metadata.fillChunkStats(min_val, max_val, has_nulls);
85  }
86  break;
87  default: {
88  return false; // skip column
89  }
90  }
91  return true;
92 }
93 
95  const std::shared_ptr<const InputColDescriptor> input_col_desc,
96  const std::vector<Analyzer::Expr*>& target_exprs) {
97  return RelAlgExecutionUnit{{input_col_desc->getScanDesc()},
98  {input_col_desc},
99  {},
100  {},
101  {},
102  {},
103  target_exprs,
104  nullptr,
105  SortInfo{{}, SortAlgorithm::Default, 0, 0},
106  0};
107 }
108 
110  return CompilationOptions{device_type, false, ExecutorOptLevel::Default, false};
111 }
112 
114  return ExecutionOptions{
115  false, false, false, false, false, false, false, false, 0, false, false, 0, false};
116 }
117 
118 } // namespace
119 
121  auto timer = DEBUG_TIMER(__func__);
122  mapd_unique_lock<mapd_shared_mutex> lock(executor_->execute_mutex_);
123 
124  LOG(INFO) << "Recomputing metadata for " << td_->tableName;
125 
126  CHECK_GE(td_->tableId, 0);
127 
128  std::vector<const TableDescriptor*> table_descriptors;
129  if (td_->nShards > 0) {
130  const auto physical_tds = cat_.getPhysicalTablesDescriptors(td_);
131  table_descriptors.insert(
132  table_descriptors.begin(), physical_tds.begin(), physical_tds.end());
133  } else {
134  table_descriptors.push_back(td_);
135  }
136 
137  auto& data_mgr = cat_.getDataMgr();
138 
139  // acquire write lock on table data
141 
142  for (const auto td : table_descriptors) {
143  ScopeGuard row_set_holder = [this] { executor_->row_set_mem_owner_ = nullptr; };
144  executor_->row_set_mem_owner_ =
145  std::make_shared<RowSetMemoryOwner>(ROW_SET_SIZE, /*num_threads=*/1);
146  executor_->catalog_ = &cat_;
147  const auto table_id = td->tableId;
148  auto stats = recomputeDeletedColumnMetadata(td);
149 
150  // TODO(adb): Support geo
151  auto col_descs = cat_.getAllColumnMetadataForTable(table_id, false, false, false);
152  for (const auto& cd : col_descs) {
153  recomputeColumnMetadata(td, cd, stats.visible_row_count_per_fragment, {}, {});
154  }
155  data_mgr.checkpoint(cat_.getCurrentDB().dbId, table_id);
156  executor_->clearMetaInfoCache();
157  }
158 
159  data_mgr.clearMemory(Data_Namespace::MemoryLevel::CPU_LEVEL);
160  if (data_mgr.gpusPresent()) {
161  data_mgr.clearMemory(Data_Namespace::MemoryLevel::GPU_LEVEL);
162  }
163 }
164 
166  const TableUpdateMetadata& table_update_metadata) const {
167  auto timer = DEBUG_TIMER(__func__);
168  std::map<int, std::list<const ColumnDescriptor*>> columns_by_table_id;
169  auto& columns_for_update = table_update_metadata.columns_for_metadata_update;
170  for (const auto& entry : columns_for_update) {
171  auto column_descriptor = entry.first;
172  columns_by_table_id[column_descriptor->tableId].emplace_back(column_descriptor);
173  }
174 
175  for (const auto& [table_id, columns] : columns_by_table_id) {
176  auto td = cat_.getMetadataForTable(table_id);
177  auto stats = recomputeDeletedColumnMetadata(td);
178  for (const auto cd : columns) {
179  CHECK(columns_for_update.find(cd) != columns_for_update.end());
180  auto fragment_indexes = getFragmentIndexes(td, columns_for_update.find(cd)->second);
182  cd,
183  stats.visible_row_count_per_fragment,
185  fragment_indexes);
186  }
187  }
188 }
189 
190 // Special case handle $deleted column if it exists
191 // whilst handling the delete column also capture
192 // the number of non deleted rows per fragment
194  const TableDescriptor* td,
195  const std::set<size_t>& fragment_indexes) const {
196  if (!td->hasDeletedCol) {
197  return {};
198  }
199 
200  auto stats = getDeletedColumnStats(td, fragment_indexes);
201  auto* fragmenter = td->fragmenter.get();
202  CHECK(fragmenter);
203  auto cd = cat_.getDeletedColumn(td);
204  fragmenter->updateChunkStats(cd, stats.chunk_stats_per_fragment, {});
205  fragmenter->setNumRows(stats.total_row_count);
206  return stats;
207 }
208 
210  const TableDescriptor* td,
211  const std::set<size_t>& fragment_indexes) const {
212  if (!td->hasDeletedCol) {
213  return {};
214  }
215 
216  auto cd = cat_.getDeletedColumn(td);
217  const auto column_id = cd->columnId;
218 
219  const auto input_col_desc =
220  std::make_shared<const InputColDescriptor>(column_id, td->tableId, 0);
221  const auto col_expr =
222  makeExpr<Analyzer::ColumnVar>(cd->columnType, td->tableId, column_id, 0);
223  const auto count_expr =
224  makeExpr<Analyzer::AggExpr>(cd->columnType, kCOUNT, col_expr, false, nullptr);
225 
226  const auto ra_exe_unit = build_ra_exe_unit(input_col_desc, {count_expr.get()});
227  const auto table_infos = get_table_infos(ra_exe_unit, executor_);
228  CHECK_EQ(table_infos.size(), size_t(1));
229 
231  const auto eo = get_execution_options();
232 
233  DeletedColumnStats deleted_column_stats;
234  Executor::PerFragmentCallBack compute_deleted_callback =
235  [&deleted_column_stats, cd](
236  ResultSetPtr results, const Fragmenter_Namespace::FragmentInfo& fragment_info) {
237  // count number of tuples in $deleted as total number of tuples in table.
238  if (cd->isDeletedCol) {
239  deleted_column_stats.total_row_count += fragment_info.getPhysicalNumTuples();
240  }
241  if (fragment_info.getPhysicalNumTuples() == 0) {
242  // TODO(adb): Should not happen, but just to be safe...
243  LOG(WARNING) << "Skipping completely empty fragment for column "
244  << cd->columnName;
245  return;
246  }
247 
248  const auto row = results->getNextRow(false, false);
249  CHECK_EQ(row.size(), size_t(1));
250 
251  const auto& ti = cd->columnType;
252 
253  auto chunk_metadata = std::make_shared<ChunkMetadata>();
254  chunk_metadata->sqlType = get_logical_type_info(ti);
255 
256  const auto count_val = read_scalar_target_value<int64_t>(row[0]);
257 
258  // min element 0 max element 1
259  std::vector<TargetValue> fakerow;
260 
261  auto num_tuples = static_cast<size_t>(count_val);
262 
263  // calculate min
264  if (num_tuples == fragment_info.getPhysicalNumTuples()) {
265  // nothing deleted
266  // min = false;
267  // max = false;
268  fakerow.emplace_back(TargetValue{int64_t(0)});
269  fakerow.emplace_back(TargetValue{int64_t(0)});
270  } else {
271  if (num_tuples == 0) {
272  // everything marked as delete
273  // min = true
274  // max = true
275  fakerow.emplace_back(TargetValue{int64_t(1)});
276  fakerow.emplace_back(TargetValue{int64_t(1)});
277  } else {
278  // some deleted
279  // min = false
280  // max = true;
281  fakerow.emplace_back(TargetValue{int64_t(0)});
282  fakerow.emplace_back(TargetValue{int64_t(1)});
283  }
284  }
285 
286  // place manufacture min and max in fake row to use common infra
287  if (!set_metadata_from_results(*chunk_metadata, fakerow, ti, false)) {
288  LOG(WARNING) << "Unable to process new metadata values for column "
289  << cd->columnName;
290  return;
291  }
292 
293  deleted_column_stats.chunk_stats_per_fragment.emplace(
294  std::make_pair(fragment_info.fragmentId, chunk_metadata->chunkStats));
295  deleted_column_stats.visible_row_count_per_fragment.emplace(
296  std::make_pair(fragment_info.fragmentId, num_tuples));
297  };
298 
299  executor_->executeWorkUnitPerFragment(ra_exe_unit,
300  table_infos[0],
301  co,
302  eo,
303  cat_,
304  compute_deleted_callback,
305  fragment_indexes);
306  return deleted_column_stats;
307 }
308 
310  const TableDescriptor* td,
311  const ColumnDescriptor* cd,
312  const std::unordered_map</*fragment_id*/ int, size_t>& tuple_count_map,
313  std::optional<Data_Namespace::MemoryLevel> memory_level,
314  const std::set<size_t>& fragment_indexes) const {
315  const auto ti = cd->columnType;
316  if (ti.is_varlen()) {
317  LOG(INFO) << "Skipping varlen column " << cd->columnName;
318  return;
319  }
320 
321  const auto column_id = cd->columnId;
322  const auto input_col_desc =
323  std::make_shared<const InputColDescriptor>(column_id, td->tableId, 0);
324  const auto col_expr =
325  makeExpr<Analyzer::ColumnVar>(cd->columnType, td->tableId, column_id, 0);
326  auto max_expr =
327  makeExpr<Analyzer::AggExpr>(cd->columnType, kMAX, col_expr, false, nullptr);
328  auto min_expr =
329  makeExpr<Analyzer::AggExpr>(cd->columnType, kMIN, col_expr, false, nullptr);
330  auto count_expr =
331  makeExpr<Analyzer::AggExpr>(cd->columnType, kCOUNT, col_expr, false, nullptr);
332 
333  if (ti.is_string()) {
334  const SQLTypeInfo fun_ti(kINT);
335  const auto fun_expr = makeExpr<Analyzer::KeyForStringExpr>(col_expr);
336  max_expr = makeExpr<Analyzer::AggExpr>(fun_ti, kMAX, fun_expr, false, nullptr);
337  min_expr = makeExpr<Analyzer::AggExpr>(fun_ti, kMIN, fun_expr, false, nullptr);
338  }
339  const auto ra_exe_unit = build_ra_exe_unit(
340  input_col_desc, {min_expr.get(), max_expr.get(), count_expr.get()});
341  const auto table_infos = get_table_infos(ra_exe_unit, executor_);
342  CHECK_EQ(table_infos.size(), size_t(1));
343 
345  const auto eo = get_execution_options();
346 
347  std::unordered_map</*fragment_id*/ int, ChunkStats> stats_map;
348 
349  Executor::PerFragmentCallBack compute_metadata_callback =
350  [&stats_map, &tuple_count_map, cd](
351  ResultSetPtr results, const Fragmenter_Namespace::FragmentInfo& fragment_info) {
352  if (fragment_info.getPhysicalNumTuples() == 0) {
353  // TODO(adb): Should not happen, but just to be safe...
354  LOG(WARNING) << "Skipping completely empty fragment for column "
355  << cd->columnName;
356  return;
357  }
358 
359  const auto row = results->getNextRow(false, false);
360  CHECK_EQ(row.size(), size_t(3));
361 
362  const auto& ti = cd->columnType;
363 
364  auto chunk_metadata = std::make_shared<ChunkMetadata>();
365  chunk_metadata->sqlType = get_logical_type_info(ti);
366 
367  const auto count_val = read_scalar_target_value<int64_t>(row[2]);
368  if (count_val == 0) {
369  // Assume chunk of all nulls, bail
370  return;
371  }
372 
373  bool has_nulls = true; // default to wide
374  auto tuple_count_itr = tuple_count_map.find(fragment_info.fragmentId);
375  if (tuple_count_itr != tuple_count_map.end()) {
376  has_nulls = !(static_cast<size_t>(count_val) == tuple_count_itr->second);
377  } else {
378  // no deleted column calc so use raw physical count
379  has_nulls =
380  !(static_cast<size_t>(count_val) == fragment_info.getPhysicalNumTuples());
381  }
382 
383  if (!set_metadata_from_results(*chunk_metadata, row, ti, has_nulls)) {
384  LOG(WARNING) << "Unable to process new metadata values for column "
385  << cd->columnName;
386  return;
387  }
388 
389  stats_map.emplace(
390  std::make_pair(fragment_info.fragmentId, chunk_metadata->chunkStats));
391  };
392 
393  executor_->executeWorkUnitPerFragment(ra_exe_unit,
394  table_infos[0],
395  co,
396  eo,
397  cat_,
398  compute_metadata_callback,
399  fragment_indexes);
400 
401  auto* fragmenter = td->fragmenter.get();
402  CHECK(fragmenter);
403  fragmenter->updateChunkStats(cd, stats_map, memory_level);
404 }
405 
406 // Returns the corresponding indexes for the given fragment ids in the list of fragments
407 // returned by `getFragmentsForQuery()`
409  const TableDescriptor* td,
410  const std::set<int>& fragment_ids) const {
411  CHECK(td->fragmenter);
412  auto table_info = td->fragmenter->getFragmentsForQuery();
413  std::set<size_t> fragment_indexes;
414  for (size_t i = 0; i < table_info.fragments.size(); i++) {
415  if (shared::contains(fragment_ids, table_info.fragments[i].fragmentId)) {
416  fragment_indexes.emplace(i);
417  }
418  }
419  return fragment_indexes;
420 }
421 
423  auto timer = DEBUG_TIMER(__func__);
424  const auto table_id = td_->tableId;
425  const auto db_id = cat_.getDatabaseId();
426  const auto table_epochs = cat_.getTableEpochs(db_id, table_id);
427  const auto shards = cat_.getPhysicalTablesDescriptors(td_);
428  try {
429  for (const auto shard : shards) {
430  vacuumFragments(shard);
431  }
432  cat_.checkpoint(table_id);
433  } catch (...) {
434  cat_.setTableEpochsLogExceptions(db_id, table_epochs);
435  throw;
436  }
437 
438  for (auto shard : shards) {
439  cat_.removeFragmenterForTable(shard->tableId);
441  shard->tableId);
442  }
443 }
444 
446  const std::set<int>& fragment_ids) const {
447  // "if not a table that supports delete return, nothing more to do"
448  const ColumnDescriptor* cd = cat_.getDeletedColumn(td);
449  if (nullptr == cd) {
450  return;
451  }
452  // vacuum chunks which show sign of deleted rows in metadata
453  ChunkKey chunk_key_prefix = {cat_.getDatabaseId(), td->tableId, cd->columnId};
454  ChunkMetadataVector chunk_metadata_vec;
455  cat_.getDataMgr().getChunkMetadataVecForKeyPrefix(chunk_metadata_vec, chunk_key_prefix);
456  for (auto& [chunk_key, chunk_metadata] : chunk_metadata_vec) {
457  auto fragment_id = chunk_key[CHUNK_KEY_FRAGMENT_IDX];
458  // If delete has occurred, only vacuum fragments that are in the fragment_ids set.
459  // Empty fragment_ids set implies all fragments.
460  if (chunk_metadata->chunkStats.max.tinyintval == 1 &&
461  (fragment_ids.empty() || shared::contains(fragment_ids, fragment_id))) {
462  UpdelRoll updel_roll;
463  updel_roll.catalog = &cat_;
464  updel_roll.logicalTableId = cat_.getLogicalTableId(td->tableId);
466  updel_roll.table_descriptor = td;
467  CHECK_EQ(cd->columnId, chunk_key[CHUNK_KEY_COLUMN_IDX]);
468  const auto chunk = Chunk_NS::Chunk::getChunk(cd,
469  &cat_.getDataMgr(),
470  chunk_key,
471  updel_roll.memoryLevel,
472  0,
473  chunk_metadata->numBytes,
474  chunk_metadata->numElements);
475  td->fragmenter->compactRows(&cat_,
476  td,
477  fragment_id,
478  td->fragmenter->getVacuumOffsets(chunk),
479  updel_roll.memoryLevel,
480  updel_roll);
481  updel_roll.stageUpdate();
482  }
483  }
484 }
485 
487  const TableUpdateMetadata& table_update_metadata) const {
489  return;
490  }
491  auto timer = DEBUG_TIMER(__func__);
492  const auto db_id = cat_.getDatabaseId();
493  const auto table_epochs = cat_.getTableEpochs(db_id, td_->tableId);
494  std::set<const TableDescriptor*> vacuumed_tables;
495  try {
496  for (const auto& [table_id, fragment_ids] :
497  table_update_metadata.fragments_with_deleted_rows) {
498  auto td = cat_.getMetadataForTable(table_id);
499  // Skip automatic vacuuming for tables with uncapped epoch
500  if (td->maxRollbackEpochs == -1) {
501  continue;
502  }
503 
504  DeletedColumnStats deleted_column_stats;
505  {
506  mapd_unique_lock<mapd_shared_mutex> executor_lock(executor_->execute_mutex_);
507  ScopeGuard row_set_holder = [this] { executor_->row_set_mem_owner_ = nullptr; };
508  executor_->row_set_mem_owner_ =
509  std::make_shared<RowSetMemoryOwner>(ROW_SET_SIZE, /*num_threads=*/1);
510  deleted_column_stats =
511  getDeletedColumnStats(td, getFragmentIndexes(td, fragment_ids));
512  executor_->clearMetaInfoCache();
513  }
514 
515  std::set<int32_t> filtered_fragment_ids;
516  for (const auto [fragment_id, visible_row_count] :
517  deleted_column_stats.visible_row_count_per_fragment) {
518  auto total_row_count =
519  td->fragmenter->getFragmentInfo(fragment_id)->getPhysicalNumTuples();
520  float deleted_row_count = total_row_count - visible_row_count;
521  if ((deleted_row_count / total_row_count) >= g_vacuum_min_selectivity) {
522  filtered_fragment_ids.emplace(fragment_id);
523  }
524  }
525 
526  if (!filtered_fragment_ids.empty()) {
527  vacuumFragments(td, filtered_fragment_ids);
528  vacuumed_tables.emplace(td);
529  VLOG(1) << "Auto-vacuumed fragments: "
530  << shared::printContainer(filtered_fragment_ids)
531  << ", table id: " << td->tableId;
532  }
533  }
534 
535  // Always checkpoint in order to ensure that epochs are uniformly incremented in
536  // distributed mode.
538  } catch (...) {
539  cat_.setTableEpochsLogExceptions(db_id, table_epochs);
540  throw;
541  }
542 
543  // Reset fragmenters for vacuumed tables in order to ensure that their metadata is in
544  // sync
545  for (auto table : vacuumed_tables) {
547  cat_.getMetadataForTable(table->tableId);
548  CHECK(table->fragmenter);
549  }
550 }
bool contains(const T &container, const U &element)
Definition: misc.h:147
Defines data structures for the semantic analysis phase of query processing.
Data_Namespace::MemoryLevel memoryLevel
Definition: UpdelRoll.h:65
#define CHECK_EQ(x, y)
Definition: Logger.h:205
std::vector< int > ChunkKey
Definition: types.h:37
DeletedColumnStats getDeletedColumnStats(const TableDescriptor *td, const std::set< size_t > &fragment_indexes) const
void vacuumFragmentsAboveMinSelectivity(const TableUpdateMetadata &table_update_metadata) const
std::string cat(Ts &&...args)
class for a per-database catalog. also includes metadata for the current database and the current use...
Definition: Catalog.h:101
Definition: sqltypes.h:48
const TableDescriptor * table_descriptor
Definition: UpdelRoll.h:68
std::string tableName
const ColumnDescriptor * getDeletedColumn(const TableDescriptor *td) const
Definition: Catalog.cpp:3068
void recomputeMetadataUnlocked(const TableUpdateMetadata &table_update_metadata) const
Recomputes column chunk metadata for the given set of fragments. The caller of this method is expecte...
ExecutorDeviceType
Data_Namespace::DataMgr & getDataMgr() const
Definition: Catalog.h:222
#define LOG(tag)
Definition: Logger.h:188
static WriteLock getWriteLockForTable(const Catalog_Namespace::Catalog &cat, const std::string &table_name)
Definition: LockMgrImpl.h:155
CompilationOptions get_compilation_options(const ExecutorDeviceType &device_type)
#define CHUNK_KEY_FRAGMENT_IDX
Definition: types.h:42
void fillChunkStats(const T min, const T max, const bool has_nulls)
Definition: ChunkMetadata.h:73
TableToFragmentIds fragments_with_deleted_rows
Definition: Execute.h:313
#define CHECK_GE(x, y)
Definition: Logger.h:210
SQLTypeInfo get_logical_type_info(const SQLTypeInfo &type_info)
Definition: sqltypes.h:910
std::shared_ptr< ResultSet > ResultSetPtr
HOST DEVICE SQLTypes get_type() const
Definition: sqltypes.h:314
static constexpr size_t ROW_SET_SIZE
const TableDescriptor * td_
Definition: sqldefs.h:73
Executor * executor_
T read_scalar_target_value(const TargetValue &tv)
void stageUpdate()
const Catalog_Namespace::Catalog * catalog
Definition: UpdelRoll.h:63
const DBMetadata & getCurrentDB() const
Definition: Catalog.h:221
void getChunkMetadataVecForKeyPrefix(ChunkMetadataVector &chunkMetadataVec, const ChunkKey &keyPrefix)
Definition: DataMgr.cpp:410
Used by Fragmenter classes to store info about each fragment - the fragment id and number of tuples(r...
Definition: Fragmenter.h:77
std::unordered_map< int, size_t > visible_row_count_per_fragment
std::vector< std::pair< ChunkKey, std::shared_ptr< ChunkMetadata >>> ChunkMetadataVector
int getDatabaseId() const
Definition: Catalog.h:276
void vacuumDeletedRows() const
Compacts fragments to remove deleted rows. When a row is deleted, a boolean deleted system column is ...
static std::shared_ptr< Chunk > getChunk(const ColumnDescriptor *cd, DataMgr *data_mgr, const ChunkKey &key, const MemoryLevel mem_level, const int deviceId, const size_t num_bytes, const size_t num_elems)
Definition: Chunk.cpp:28
int getLogicalTableId(const int physicalTableId) const
Definition: Catalog.cpp:4138
specifies the content in-memory of a row in the column metadata table
std::unordered_map< int, ChunkStats > chunk_stats_per_fragment
std::vector< const TableDescriptor * > getPhysicalTablesDescriptors(const TableDescriptor *logical_table_desc, bool populate_fragmenter=true) const
Definition: Catalog.cpp:4013
std::shared_ptr< Fragmenter_Namespace::AbstractFragmenter > fragmenter
void recomputeColumnMetadata(const TableDescriptor *td, const ColumnDescriptor *cd, const std::unordered_map< int, size_t > &tuple_count_map, std::optional< Data_Namespace::MemoryLevel > memory_level, const std::set< size_t > &fragment_indexes) const
DeletedColumnStats recomputeDeletedColumnMetadata(const TableDescriptor *td, const std::set< size_t > &fragment_indexes={}) const
File_Namespace::GlobalFileMgr * getGlobalFileMgr() const
Definition: DataMgr.cpp:543
void checkpoint(const int logicalTableId) const
Definition: Catalog.cpp:4152
ColumnToFragmentsMap columns_for_metadata_update
Definition: Execute.h:312
Definition: sqltypes.h:51
Definition: sqltypes.h:52
HOST DEVICE EncodingType get_compression() const
Definition: sqltypes.h:322
std::list< const ColumnDescriptor * > getAllColumnMetadataForTable(const int tableId, const bool fetchSystemColumns, const bool fetchVirtualColumns, const bool fetchPhysicalColumns) const
Returns a list of pointers to constant ColumnDescriptor structs for all the columns from a particular...
Definition: Catalog.cpp:1721
Definition: sqldefs.h:76
Data_Namespace::MemoryLevel persistenceLevel
float g_vacuum_min_selectivity
void removeFragmenterForTable(const int table_id) const
Definition: Catalog.cpp:3394
void compactDataFiles(const int32_t db_id, const int32_t tb_id)
Definition: sqltypes.h:40
int logicalTableId
Definition: UpdelRoll.h:64
bool set_metadata_from_results(ChunkMetadata &chunk_metadata, const std::vector< TargetValue > &row, const SQLTypeInfo &ti, const bool has_nulls)
std::function< void(ResultSetPtr, const Fragmenter_Namespace::FragmentInfo &)> PerFragmentCallBack
Definition: Execute.h:538
#define CHECK(condition)
Definition: Logger.h:197
std::vector< InputTableInfo > get_table_infos(const std::vector< InputDescriptor > &input_descs, Executor *executor)
#define DEBUG_TIMER(name)
Definition: Logger.h:313
boost::variant< ScalarTargetValue, ArrayTargetValue, GeoTargetValue, GeoTargetValuePtr > TargetValue
Definition: TargetValue.h:167
TableOptimizer(const TableDescriptor *td, Executor *executor, const Catalog_Namespace::Catalog &cat)
void setTableEpochsLogExceptions(const int32_t db_id, const std::vector< TableEpochInfo > &table_epochs) const
Definition: Catalog.cpp:3056
Definition: sqltypes.h:44
SQLTypeInfo columnType
PrintContainer< CONTAINER > printContainer(CONTAINER &container)
Definition: misc.h:64
const TableDescriptor * getMetadataForTable(const std::string &tableName, const bool populateFragmenter=true) const
Returns a pointer to a const TableDescriptor struct matching the provided tableName.
#define CHUNK_KEY_COLUMN_IDX
Definition: types.h:41
Definition: sqldefs.h:74
RelAlgExecutionUnit build_ra_exe_unit(const std::shared_ptr< const InputColDescriptor > input_col_desc, const std::vector< Analyzer::Expr * > &target_exprs)
std::string columnName
#define VLOG(n)
Definition: Logger.h:291
const Catalog_Namespace::Catalog & cat_
std::vector< TableEpochInfo > getTableEpochs(const int32_t db_id, const int32_t table_id) const
Definition: Catalog.cpp:2991
void recomputeMetadata() const
Recomputes per-chunk metadata for each fragment in the table. Updates and deletes can cause chunk met...
std::set< size_t > getFragmentIndexes(const TableDescriptor *td, const std::set< int > &fragment_ids) const
void vacuumFragments(const TableDescriptor *td, const std::set< int > &fragment_ids={}) const