OmniSciDB  addbbd5075
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
StorageIOFacility.h
Go to the documentation of this file.
1 #ifndef STORAGEIOFACILITY_H
2 #define STORAGEIOFACILITY_H
3 
6 #include "TargetMetaInfo.h"
7 
8 #include <boost/variant.hpp>
9 #include "Shared/ConfigResolve.h"
11 #include "Shared/UpdelRoll.h"
12 #include "Shared/likely.h"
13 #include "Shared/thread_count.h"
14 
15 #include <future>
16 
17 template <typename FRAGMENTER_TYPE = Fragmenter_Namespace::InsertOrderFragmenter>
19  public:
20  using FragmenterType = FRAGMENTER_TYPE;
21  using DeleteVictimOffsetList = std::vector<uint64_t>;
22  using UpdateTargetOffsetList = std::vector<uint64_t>;
23  using UpdateTargetTypeList = std::vector<TargetMetaInfo>;
24  using UpdateTargetColumnNamesList = std::vector<std::string>;
25  using TransactionLog = typename FragmenterType::ModifyTransactionTracker;
26  using TransactionLogPtr = std::unique_ptr<TransactionLog>;
27  using ColumnValidationFunction = std::function<bool(std::string const&)>;
28 
29  template <typename CATALOG_TYPE,
30  typename TABLE_ID_TYPE,
31  typename COLUMN_NAME_TYPE,
32  typename FRAGMENT_ID_TYPE,
33  typename FRAGMENT_OFFSET_LIST_TYPE,
34  typename UPDATE_VALUES_LIST_TYPE,
35  typename COLUMN_TYPE_INFO>
36  static void updateColumn(CATALOG_TYPE const& cat,
37  TABLE_ID_TYPE const&& table_id,
38  COLUMN_NAME_TYPE const& column_name,
39  FRAGMENT_ID_TYPE const frag_id,
40  FRAGMENT_OFFSET_LIST_TYPE const& frag_offsets,
41  UPDATE_VALUES_LIST_TYPE const& update_values,
42  COLUMN_TYPE_INFO const& col_type_info,
43  TransactionLog& transaction_tracker) {
44  auto const* table_descriptor = cat.getMetadataForTable(table_id);
45  auto* fragmenter = table_descriptor->fragmenter;
46  CHECK(fragmenter);
47  auto const* target_column = cat.getMetadataForColumn(table_id, column_name);
48 
49  fragmenter->updateColumn(&cat,
50  table_descriptor,
51  target_column,
52  frag_id,
53  frag_offsets,
54  update_values,
55  col_type_info,
57  transaction_tracker);
58  }
59 
60  template <typename CATALOG_TYPE,
61  typename TABLE_ID_TYPE,
62  typename FRAGMENT_ID_TYPE,
63  typename VICTIM_OFFSET_LIST,
64  typename COLUMN_TYPE_INFO>
65  static void deleteColumns(CATALOG_TYPE const& cat,
66  TABLE_ID_TYPE const&& table_id,
67  FRAGMENT_ID_TYPE const frag_id,
68  VICTIM_OFFSET_LIST& victims,
69  COLUMN_TYPE_INFO const& col_type_info,
70  TransactionLog& transaction_tracker) {
71  auto const* table_descriptor = cat.getMetadataForTable(table_id);
72  CHECK(!table_is_temporary(table_descriptor));
73  auto* fragmenter = table_descriptor->fragmenter;
74  CHECK(fragmenter);
75 
76  auto const* deleted_column_desc = cat.getDeletedColumn(table_descriptor);
77  if (deleted_column_desc != nullptr) {
78  fragmenter->updateColumn(&cat,
79  table_descriptor,
80  deleted_column_desc,
81  frag_id,
82  victims,
83  ScalarTargetValue(int64_t(1L)),
84  col_type_info,
86  transaction_tracker);
87  } else {
88  LOG(INFO) << "Delete metadata column unavailable; skipping delete operation.";
89  }
90  }
91 
92  template <typename CATALOG_TYPE, typename TABLE_DESCRIPTOR_TYPE>
93  static std::function<bool(std::string const&)> yieldColumnValidator(
94  CATALOG_TYPE const& cat,
95  TABLE_DESCRIPTOR_TYPE const* table_descriptor) {
96  return [](std::string const& column_name) -> bool { return true; };
97  };
98 };
99 
100 template <typename EXECUTOR_TRAITS,
101  typename IO_FACET = DefaultIOFacet<>,
102  typename FRAGMENT_UPDATER = UpdateLogForFragment>
104  public:
105  using ExecutorType = typename EXECUTOR_TRAITS::ExecutorType;
106  using CatalogType = typename EXECUTOR_TRAITS::CatalogType;
107  using FragmentUpdaterType = FRAGMENT_UPDATER;
109  using IOFacility = IO_FACET;
110  using TableDescriptorType = typename EXECUTOR_TRAITS::TableDescriptorType;
115  using UpdateTargetColumnNameType = typename UpdateTargetColumnNamesList::value_type;
117 
120 
121  struct MethodSelector {
122  static constexpr auto getEntryAt(StringSelector) {
123  return &FragmentUpdaterType::getTranslatedEntryAt;
124  }
125  static constexpr auto getEntryAt(NonStringSelector) {
126  return &FragmentUpdaterType::getEntryAt;
127  }
128  };
129 
131  public:
132  typename IOFacility::TransactionLog& getTransactionTracker() {
133  return transaction_tracker_;
134  }
135  void finalizeTransaction() { transaction_tracker_.commitUpdate(); }
136 
137  private:
138  typename IOFacility::TransactionLog transaction_tracker_;
139  };
140 
142  public:
144 
145  private:
148  delete;
149  };
150 
152  public:
154  UpdateTargetColumnNamesList const& update_column_names,
155  UpdateTargetTypeList const& target_types,
156  bool varlen_update_required)
157  : table_descriptor_(table_desc)
158  , update_column_names_(update_column_names)
159  , targets_meta_(target_types)
160  , varlen_update_required_(varlen_update_required) {
162  throw std::runtime_error("UPDATE not yet supported on temporary tables.");
163  }
164  };
165 
166  auto getUpdateColumnCount() const { return update_column_names_.size(); }
167  auto const* getTableDescriptor() const { return table_descriptor_; }
168  auto const& getTargetsMetaInfo() const { return targets_meta_; }
169  auto getTargetsMetaInfoSize() const { return targets_meta_.size(); }
170  auto const& getUpdateColumnNames() const { return update_column_names_; }
172 
173  private:
176  delete;
177 
182  };
183 
184  StorageIOFacility(ExecutorType* executor, CatalogType const& catalog)
185  : executor_(executor), catalog_(catalog) {}
186 
188  TableDescriptorType const* table_descriptor) {
189  return IOFacility::yieldColumnValidator(catalog_, table_descriptor);
190  }
191 
192  UpdateCallback yieldUpdateCallback(UpdateTransactionParameters& update_parameters);
193  UpdateCallback yieldDeleteCallback(DeleteTransactionParameters& delete_parameters);
194 
195  private:
196  int normalized_cpu_threads() const { return cpu_threads() / 2; }
197 
200 };
201 
202 template <typename EXECUTOR_TRAITS, typename IO_FACET, typename FRAGMENT_UPDATER>
205  UpdateTransactionParameters& update_parameters) {
206  using OffsetVector = std::vector<uint64_t>;
207  using ScalarTargetValueVector = std::vector<ScalarTargetValue>;
208  using RowProcessingFuturesVector = std::vector<std::future<uint64_t>>;
209 
210  if (update_parameters.isVarlenUpdateRequired()) {
211  auto callback = [this,
212  &update_parameters](FragmentUpdaterType const& update_log) -> void {
213  std::vector<const ColumnDescriptor*> columnDescriptors;
214  std::vector<TargetMetaInfo> sourceMetaInfos;
215 
216  for (size_t idx = 0; idx < update_parameters.getUpdateColumnNames().size(); idx++) {
217  auto& column_name = update_parameters.getUpdateColumnNames()[idx];
218  auto target_column =
219  catalog_.getMetadataForColumn(update_log.getPhysicalTableId(), column_name);
220  columnDescriptors.push_back(target_column);
221  sourceMetaInfos.push_back(update_parameters.getTargetsMetaInfo()[idx]);
222  }
223 
224  auto td = catalog_.getMetadataForTable(update_log.getPhysicalTableId());
225  auto* fragmenter = td->fragmenter;
226  CHECK(fragmenter);
227 
228  fragmenter->updateColumns(
229  &catalog_,
230  td,
231  update_log.getFragmentId(),
232  sourceMetaInfos,
233  columnDescriptors,
234  update_log,
235  update_parameters.getUpdateColumnCount(), // last column of result set
237  update_parameters.getTransactionTracker());
238  };
239  return callback;
240 
241  } else {
242  auto callback = [this,
243  &update_parameters](FragmentUpdaterType const& update_log) -> void {
244  auto entries_per_column = update_log.getEntryCount();
245  auto rows_per_column = update_log.getRowCount();
246  if (rows_per_column == 0) {
247  return;
248  }
249 
250  OffsetVector column_offsets(rows_per_column);
251  ScalarTargetValueVector scalar_target_values(rows_per_column);
252 
253  auto complete_entry_block_size = entries_per_column / normalized_cpu_threads();
254  auto partial_row_block_size = entries_per_column % normalized_cpu_threads();
255  auto usable_threads = normalized_cpu_threads();
256  if (UNLIKELY(rows_per_column < (unsigned)normalized_cpu_threads())) {
257  complete_entry_block_size = entries_per_column;
258  partial_row_block_size = 0;
259  usable_threads = 1;
260  }
261 
262  std::atomic<size_t> row_idx{0};
263 
264  auto process_rows = [&update_log,
265  &update_parameters,
266  &column_offsets,
267  &scalar_target_values,
268  &row_idx](auto type_tag,
269  uint64_t column_index,
270  uint64_t entry_start,
271  uint64_t entry_count) -> uint64_t {
272  uint64_t entries_processed = 0;
273  for (uint64_t entry_index = entry_start;
274  entry_index < (entry_start + entry_count);
275  entry_index++) {
276  constexpr auto get_entry_method_sel(MethodSelector::getEntryAt(type_tag));
277  auto const row((update_log.*get_entry_method_sel)(entry_index));
278 
279  if (row.empty()) {
280  continue;
281  }
282 
283  entries_processed++;
284  size_t row_index = row_idx.fetch_add(1);
285 
286  CHECK(row.size() == update_parameters.getUpdateColumnCount() + 1);
287 
288  auto terminal_column_iter = std::prev(row.end());
289  const auto frag_offset_scalar_tv =
290  boost::get<ScalarTargetValue>(&*terminal_column_iter);
291  CHECK(frag_offset_scalar_tv);
292 
293  column_offsets[row_index] =
294  static_cast<uint64_t>(*(boost::get<int64_t>(frag_offset_scalar_tv)));
295  scalar_target_values[row_index] =
296  boost::get<ScalarTargetValue>(row[column_index]);
297  }
298  return entries_processed;
299  };
300 
301  auto get_row_index =
302  [complete_entry_block_size](uint64_t thread_index) -> uint64_t {
303  return (thread_index * complete_entry_block_size);
304  };
305 
306  // Iterate over each column
307  for (decltype(update_parameters.getUpdateColumnCount()) column_index = 0;
308  column_index < update_parameters.getUpdateColumnCount();
309  column_index++) {
310  row_idx = 0;
311  RowProcessingFuturesVector entry_processing_futures;
312  entry_processing_futures.reserve(usable_threads);
313 
314  auto thread_launcher = [&](auto const& type_tag) {
315  for (unsigned i = 0; i < static_cast<unsigned>(usable_threads); i++) {
316  entry_processing_futures.emplace_back(
317  std::async(std::launch::async,
318  std::forward<decltype(process_rows)>(process_rows),
319  type_tag,
320  column_index,
321  get_row_index(i),
322  complete_entry_block_size));
323  }
324  if (partial_row_block_size) {
325  entry_processing_futures.emplace_back(
326  std::async(std::launch::async,
327  std::forward<decltype(process_rows)>(process_rows),
328  type_tag,
329  column_index,
330  get_row_index(usable_threads),
331  partial_row_block_size));
332  }
333  };
334 
335  if (!update_log.getColumnType(column_index).is_string()) {
336  thread_launcher(NonStringSelector());
337  } else {
338  thread_launcher(StringSelector());
339  }
340 
341  uint64_t entries_processed(0);
342  for (auto& t : entry_processing_futures) {
343  t.wait();
344  entries_processed += t.get();
345  }
346 
347  CHECK(row_idx == rows_per_column);
348 
349  IOFacility::updateColumn(catalog_,
350  update_log.getPhysicalTableId(),
351  update_parameters.getUpdateColumnNames()[column_index],
352  update_log.getFragmentId(),
353  column_offsets,
354  scalar_target_values,
355  update_log.getColumnType(column_index),
356  update_parameters.getTransactionTracker());
357  }
358  };
359  return callback;
360  }
361 }
362 
363 template <typename EXECUTOR_TRAITS, typename IO_FACET, typename FRAGMENT_UPDATER>
366  DeleteTransactionParameters& delete_parameters) {
367  using RowProcessingFuturesVector = std::vector<std::future<uint64_t>>;
368 
369  auto callback = [this,
370  &delete_parameters](FragmentUpdaterType const& update_log) -> void {
371  auto entries_per_column = update_log.getEntryCount();
372  auto rows_per_column = update_log.getRowCount();
373  if (rows_per_column == 0) {
374  return;
375  }
376  DeleteVictimOffsetList victim_offsets(rows_per_column);
377 
378  auto complete_row_block_size = entries_per_column / normalized_cpu_threads();
379  auto partial_row_block_size = entries_per_column % normalized_cpu_threads();
380  auto usable_threads = normalized_cpu_threads();
381 
382  if (UNLIKELY(rows_per_column < (unsigned)normalized_cpu_threads())) {
383  complete_row_block_size = rows_per_column;
384  partial_row_block_size = 0;
385  usable_threads = 1;
386  }
387 
388  std::atomic<size_t> row_idx{0};
389 
390  auto process_rows = [&update_log, &victim_offsets, &row_idx](
391  uint64_t entry_start, uint64_t entry_count) -> uint64_t {
392  uint64_t entries_processed = 0;
393 
394  for (uint64_t entry_index = entry_start; entry_index < (entry_start + entry_count);
395  entry_index++) {
396  auto const row(update_log.getEntryAt(entry_index));
397 
398  if (row.empty()) {
399  continue;
400  }
401 
402  entries_processed++;
403  size_t row_index = row_idx.fetch_add(1);
404 
405  auto terminal_column_iter = std::prev(row.end());
406  const auto scalar_tv = boost::get<ScalarTargetValue>(&*terminal_column_iter);
407  CHECK(scalar_tv);
408 
409  uint64_t fragment_offset =
410  static_cast<uint64_t>(*(boost::get<int64_t>(scalar_tv)));
411  victim_offsets[row_index] = fragment_offset;
412  }
413  return entries_processed;
414  };
415 
416  auto get_row_index = [complete_row_block_size](uint64_t thread_index) -> uint64_t {
417  return thread_index * complete_row_block_size;
418  };
419 
420  RowProcessingFuturesVector row_processing_futures;
421  row_processing_futures.reserve(usable_threads);
422 
423  for (unsigned i = 0; i < (unsigned)usable_threads; i++) {
424  row_processing_futures.emplace_back(
425  std::async(std::launch::async,
426  std::forward<decltype(process_rows)>(process_rows),
427  get_row_index(i),
428  complete_row_block_size));
429  }
430  if (partial_row_block_size) {
431  row_processing_futures.emplace_back(
432  std::async(std::launch::async,
433  std::forward<decltype(process_rows)>(process_rows),
434  get_row_index(usable_threads),
435  partial_row_block_size));
436  }
437 
438  uint64_t rows_processed(0);
439  for (auto& t : row_processing_futures) {
440  t.wait();
441  rows_processed += t.get();
442  }
443 
444  IOFacility::deleteColumns(catalog_,
445  update_log.getPhysicalTableId(),
446  update_log.getFragmentId(),
447  victim_offsets,
448  update_log.getColumnType(0),
449  delete_parameters.getTransactionTracker());
450  };
451  return callback;
452 }
453 
454 #endif
int normalized_cpu_threads() const
catalog_(nullptr)
static std::function< bool(std::string const &)> yieldColumnValidator(CATALOG_TYPE const &cat, TABLE_DESCRIPTOR_TYPE const *table_descriptor)
typename IOFacility::DeleteVictimOffsetList DeleteVictimOffsetList
std::unique_ptr< TransactionLog > TransactionLogPtr
typename RelAlgExecutorTraits::ExecutorType ExecutorType
UpdateTransactionParameters(TableDescriptorType const *table_desc, UpdateTargetColumnNamesList const &update_column_names, UpdateTargetTypeList const &target_types, bool varlen_update_required)
IOFacility::TransactionLog transaction_tracker_
#define LOG(tag)
Definition: Logger.h:188
typename IOFacility::UpdateTargetOffsetList UpdateTargetOffsetList
typename IOFacility::ColumnValidationFunction ColumnValidationFunction
static constexpr auto getEntryAt(NonStringSelector)
DeleteTransactionParameters & operator=(DeleteTransactionParameters const &other)=delete
ExecutorType * executor_
std::function< void(const UpdateLogForFragment &)> Callback
Definition: Execute.h:317
FRAGMENT_UPDATER FragmentUpdaterType
IOFacility::TransactionLog & getTransactionTracker()
FRAGMENTER_TYPE FragmenterType
CHECK(cgen_state)
typename FragmentUpdaterType::Callback UpdateCallback
ColumnValidationFunction yieldColumnValidator(TableDescriptorType const *table_descriptor)
bool table_is_temporary(const TableDescriptor *td)
typename IOFacility::UpdateTargetColumnNamesList UpdateTargetColumnNamesList
#define UNLIKELY(x)
Definition: likely.h:20
UpdateCallback yieldUpdateCallback(UpdateTransactionParameters &update_parameters)
typename RelAlgExecutorTraits::TableDescriptorType TableDescriptorType
UpdateCallback yieldDeleteCallback(DeleteTransactionParameters &delete_parameters)
StorageIOFacility(ExecutorType *executor, CatalogType const &catalog)
std::vector< uint64_t > UpdateTargetOffsetList
typename UpdateTargetColumnNamesList::value_type UpdateTargetColumnNameType
std::vector< uint64_t > DeleteVictimOffsetList
CatalogType const & catalog_
UpdateTransactionParameters & operator=(UpdateTransactionParameters const &other)=delete
static void deleteColumns(CATALOG_TYPE const &cat, TABLE_ID_TYPE const &&table_id, FRAGMENT_ID_TYPE const frag_id, VICTIM_OFFSET_LIST &victims, COLUMN_TYPE_INFO const &col_type_info, TransactionLog &transaction_tracker)
std::vector< TargetMetaInfo > UpdateTargetTypeList
static void updateColumn(CATALOG_TYPE const &cat, TABLE_ID_TYPE const &&table_id, COLUMN_NAME_TYPE const &column_name, FRAGMENT_ID_TYPE const frag_id, FRAGMENT_OFFSET_LIST_TYPE const &frag_offsets, UPDATE_VALUES_LIST_TYPE const &update_values, COLUMN_TYPE_INFO const &col_type_info, TransactionLog &transaction_tracker)
typename RelAlgExecutorTraits::CatalogType CatalogType
int cpu_threads()
Definition: thread_count.h:25
std::vector< std::string > UpdateTargetColumnNamesList
typename FragmenterType::ModifyTransactionTracker TransactionLog
static constexpr auto getEntryAt(StringSelector)
std::function< bool(std::string const &)> ColumnValidationFunction
typename IOFacility::UpdateTargetTypeList UpdateTargetTypeList
boost::variant< int64_t, double, float, NullableString > ScalarTargetValue
Definition: TargetValue.h:156