OmniSciDB  1dac507f6e
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
StorageIOFacility.h
Go to the documentation of this file.
1 #ifndef STORAGEIOFACILITY_H
2 #define STORAGEIOFACILITY_H
3 
6 #include "TargetMetaInfo.h"
7 
8 #include <boost/variant.hpp>
9 #include "Shared/ConfigResolve.h"
11 #include "Shared/UpdelRoll.h"
12 #include "Shared/likely.h"
13 #include "Shared/thread_count.h"
14 
15 #include <future>
16 
17 template <typename FRAGMENTER_TYPE = Fragmenter_Namespace::InsertOrderFragmenter>
19  public:
20  using FragmenterType = FRAGMENTER_TYPE;
21  using DeleteVictimOffsetList = std::vector<uint64_t>;
22  using UpdateTargetOffsetList = std::vector<uint64_t>;
23  using UpdateTargetTypeList = std::vector<TargetMetaInfo>;
24  using UpdateTargetColumnNamesList = std::vector<std::string>;
25  using TransactionLog = typename FragmenterType::ModifyTransactionTracker;
26  using TransactionLogPtr = std::unique_ptr<TransactionLog>;
27  using ColumnValidationFunction = std::function<bool(std::string const&)>;
28 
29  template <typename CATALOG_TYPE,
30  typename TABLE_ID_TYPE,
31  typename COLUMN_NAME_TYPE,
32  typename FRAGMENT_ID_TYPE,
33  typename FRAGMENT_OFFSET_LIST_TYPE,
34  typename UPDATE_VALUES_LIST_TYPE,
35  typename COLUMN_TYPE_INFO>
36  static void updateColumn(CATALOG_TYPE const& cat,
37  TABLE_ID_TYPE const&& table_id,
38  COLUMN_NAME_TYPE const& column_name,
39  FRAGMENT_ID_TYPE const frag_id,
40  FRAGMENT_OFFSET_LIST_TYPE const& frag_offsets,
41  UPDATE_VALUES_LIST_TYPE const& update_values,
42  COLUMN_TYPE_INFO const& col_type_info,
43  TransactionLog& transaction_tracker) {
44  auto const* table_descriptor = cat.getMetadataForTable(table_id);
45  auto* fragmenter = table_descriptor->fragmenter;
46  CHECK(fragmenter);
47  auto const* target_column = cat.getMetadataForColumn(table_id, column_name);
48 
49  fragmenter->updateColumn(&cat,
50  table_descriptor,
51  target_column,
52  frag_id,
53  frag_offsets,
54  update_values,
55  col_type_info,
57  transaction_tracker);
58  }
59 
60  template <typename CATALOG_TYPE,
61  typename TABLE_ID_TYPE,
62  typename FRAGMENT_ID_TYPE,
63  typename VICTIM_OFFSET_LIST,
64  typename COLUMN_TYPE_INFO>
65  static void deleteColumns(CATALOG_TYPE const& cat,
66  TABLE_ID_TYPE const&& table_id,
67  FRAGMENT_ID_TYPE const frag_id,
68  VICTIM_OFFSET_LIST& victims,
69  COLUMN_TYPE_INFO const& col_type_info,
70  TransactionLog& transaction_tracker) {
71  auto const* table_descriptor = cat.getMetadataForTable(table_id);
72  auto* fragmenter = table_descriptor->fragmenter;
73  CHECK(fragmenter);
74 
75  auto const* deleted_column_desc = cat.getDeletedColumn(table_descriptor);
76  if (deleted_column_desc != nullptr) {
77  fragmenter->updateColumn(&cat,
78  table_descriptor,
79  deleted_column_desc,
80  frag_id,
81  victims,
82  ScalarTargetValue(int64_t(1L)),
83  col_type_info,
85  transaction_tracker);
86  } else {
87  LOG(INFO) << "Delete metadata column unavailable; skipping delete operation.";
88  }
89  }
90 
91  template <typename CATALOG_TYPE, typename TABLE_DESCRIPTOR_TYPE>
92  static std::function<bool(std::string const&)> yieldColumnValidator(
93  CATALOG_TYPE const& cat,
94  TABLE_DESCRIPTOR_TYPE const* table_descriptor) {
95  return [](std::string const& column_name) -> bool { return true; };
96  };
97 };
98 
99 template <typename EXECUTOR_TRAITS,
100  typename IO_FACET = DefaultIOFacet<>,
101  typename FRAGMENT_UPDATER = UpdateLogForFragment>
103  public:
104  using ExecutorType = typename EXECUTOR_TRAITS::ExecutorType;
105  using CatalogType = typename EXECUTOR_TRAITS::CatalogType;
106  using FragmentUpdaterType = FRAGMENT_UPDATER;
108  using IOFacility = IO_FACET;
109  using TableDescriptorType = typename EXECUTOR_TRAITS::TableDescriptorType;
114  using UpdateTargetColumnNameType = typename UpdateTargetColumnNamesList::value_type;
116 
119 
120  struct MethodSelector {
121  static constexpr auto getEntryAt(StringSelector) {
122  return &FragmentUpdaterType::getTranslatedEntryAt;
123  }
124  static constexpr auto getEntryAt(NonStringSelector) {
125  return &FragmentUpdaterType::getEntryAt;
126  }
127  };
128 
130  public:
131  typename IOFacility::TransactionLog& getTransactionTracker() {
132  return transaction_tracker_;
133  }
134  void finalizeTransaction() { transaction_tracker_.commitUpdate(); }
135 
136  private:
137  typename IOFacility::TransactionLog transaction_tracker_;
138  };
139 
141  public:
143 
144  private:
147  delete;
148  };
149 
151  public:
153  UpdateTargetColumnNamesList const& update_column_names,
154  UpdateTargetTypeList const& target_types,
155  bool varlen_update_required)
156  : table_descriptor_(table_desc)
157  , update_column_names_(update_column_names)
158  , targets_meta_(target_types)
159  , varlen_update_required_(varlen_update_required){};
160 
161  auto getUpdateColumnCount() const { return update_column_names_.size(); }
162  auto const* getTableDescriptor() const { return table_descriptor_; }
163  auto const& getTargetsMetaInfo() const { return targets_meta_; }
164  auto getTargetsMetaInfoSize() const { return targets_meta_.size(); }
165  auto const& getUpdateColumnNames() const { return update_column_names_; }
167 
168  private:
171  delete;
172 
177  };
178 
179  StorageIOFacility(ExecutorType* executor, CatalogType const& catalog)
180  : executor_(executor), catalog_(catalog) {}
181 
183  TableDescriptorType const* table_descriptor) {
184  return IOFacility::yieldColumnValidator(catalog_, table_descriptor);
185  }
186 
187  UpdateCallback yieldUpdateCallback(UpdateTransactionParameters& update_parameters);
188  UpdateCallback yieldDeleteCallback(DeleteTransactionParameters& delete_parameters);
189 
190  private:
191  int normalized_cpu_threads() const { return cpu_threads() / 2; }
192 
195 };
196 
197 template <typename EXECUTOR_TRAITS, typename IO_FACET, typename FRAGMENT_UPDATER>
200  UpdateTransactionParameters& update_parameters) {
201  using OffsetVector = std::vector<uint64_t>;
202  using ScalarTargetValueVector = std::vector<ScalarTargetValue>;
203  using RowProcessingFuturesVector = std::vector<std::future<uint64_t>>;
204 
205  if (update_parameters.isVarlenUpdateRequired()) {
206  auto callback = [this,
207  &update_parameters](FragmentUpdaterType const& update_log) -> void {
208  std::vector<const ColumnDescriptor*> columnDescriptors;
209  std::vector<TargetMetaInfo> sourceMetaInfos;
210 
211  for (size_t idx = 0; idx < update_parameters.getUpdateColumnNames().size(); idx++) {
212  auto& column_name = update_parameters.getUpdateColumnNames()[idx];
213  auto target_column =
214  catalog_.getMetadataForColumn(update_log.getPhysicalTableId(), column_name);
215  columnDescriptors.push_back(target_column);
216  sourceMetaInfos.push_back(update_parameters.getTargetsMetaInfo()[idx]);
217  }
218 
219  auto td = catalog_.getMetadataForTable(update_log.getPhysicalTableId());
220  auto* fragmenter = td->fragmenter;
221  CHECK(fragmenter);
222 
223  fragmenter->updateColumns(
224  &catalog_,
225  td,
226  update_log.getFragmentId(),
227  sourceMetaInfos,
228  columnDescriptors,
229  update_log,
230  update_parameters.getUpdateColumnCount(), // last column of result set
232  update_parameters.getTransactionTracker());
233  };
234  return callback;
235 
236  } else {
237  auto callback = [this,
238  &update_parameters](FragmentUpdaterType const& update_log) -> void {
239  auto entries_per_column = update_log.getEntryCount();
240  auto rows_per_column = update_log.getRowCount();
241  if (rows_per_column == 0) {
242  return;
243  }
244 
245  OffsetVector column_offsets(rows_per_column);
246  ScalarTargetValueVector scalar_target_values(rows_per_column);
247 
248  auto complete_entry_block_size = entries_per_column / normalized_cpu_threads();
249  auto partial_row_block_size = entries_per_column % normalized_cpu_threads();
250  auto usable_threads = normalized_cpu_threads();
251  if (UNLIKELY(rows_per_column < (unsigned)normalized_cpu_threads())) {
252  complete_entry_block_size = entries_per_column;
253  partial_row_block_size = 0;
254  usable_threads = 1;
255  }
256 
257  std::atomic<size_t> row_idx{0};
258 
259  auto process_rows = [&update_log,
260  &update_parameters,
261  &column_offsets,
262  &scalar_target_values,
263  &row_idx](auto type_tag,
264  uint64_t column_index,
265  uint64_t entry_start,
266  uint64_t entry_count) -> uint64_t {
267  uint64_t entries_processed = 0;
268  for (uint64_t entry_index = entry_start;
269  entry_index < (entry_start + entry_count);
270  entry_index++) {
271  constexpr auto get_entry_method_sel(MethodSelector::getEntryAt(type_tag));
272  auto const row((update_log.*get_entry_method_sel)(entry_index));
273 
274  if (row.empty()) {
275  continue;
276  }
277 
278  entries_processed++;
279  size_t row_index = row_idx.fetch_add(1);
280 
281  CHECK(row.size() == update_parameters.getUpdateColumnCount() + 1);
282 
283  auto terminal_column_iter = std::prev(row.end());
284  const auto frag_offset_scalar_tv =
285  boost::get<ScalarTargetValue>(&*terminal_column_iter);
286  CHECK(frag_offset_scalar_tv);
287 
288  column_offsets[row_index] =
289  static_cast<uint64_t>(*(boost::get<int64_t>(frag_offset_scalar_tv)));
290  scalar_target_values[row_index] =
291  boost::get<ScalarTargetValue>(row[column_index]);
292  }
293  return entries_processed;
294  };
295 
296  auto get_row_index =
297  [complete_entry_block_size](uint64_t thread_index) -> uint64_t {
298  return (thread_index * complete_entry_block_size);
299  };
300 
301  // Iterate over each column
302  for (decltype(update_parameters.getUpdateColumnCount()) column_index = 0;
303  column_index < update_parameters.getUpdateColumnCount();
304  column_index++) {
305  row_idx = 0;
306  RowProcessingFuturesVector entry_processing_futures;
307  entry_processing_futures.reserve(usable_threads);
308 
309  auto thread_launcher = [&](auto const& type_tag) {
310  for (unsigned i = 0; i < static_cast<unsigned>(usable_threads); i++) {
311  entry_processing_futures.emplace_back(
312  std::async(std::launch::async,
313  std::forward<decltype(process_rows)>(process_rows),
314  type_tag,
315  column_index,
316  get_row_index(i),
317  complete_entry_block_size));
318  }
319  if (partial_row_block_size) {
320  entry_processing_futures.emplace_back(
321  std::async(std::launch::async,
322  std::forward<decltype(process_rows)>(process_rows),
323  type_tag,
324  column_index,
325  get_row_index(usable_threads),
326  partial_row_block_size));
327  }
328  };
329 
330  if (!update_log.getColumnType(column_index).is_string()) {
331  thread_launcher(NonStringSelector());
332  } else {
333  thread_launcher(StringSelector());
334  }
335 
336  uint64_t entries_processed(0);
337  for (auto& t : entry_processing_futures) {
338  t.wait();
339  entries_processed += t.get();
340  }
341 
342  CHECK(row_idx == rows_per_column);
343 
344  IOFacility::updateColumn(catalog_,
345  update_log.getPhysicalTableId(),
346  update_parameters.getUpdateColumnNames()[column_index],
347  update_log.getFragmentId(),
348  column_offsets,
349  scalar_target_values,
350  update_log.getColumnType(column_index),
351  update_parameters.getTransactionTracker());
352  }
353  };
354  return callback;
355  }
356 }
357 
358 template <typename EXECUTOR_TRAITS, typename IO_FACET, typename FRAGMENT_UPDATER>
361  DeleteTransactionParameters& delete_parameters) {
362  using RowProcessingFuturesVector = std::vector<std::future<uint64_t>>;
363 
364  auto callback = [this,
365  &delete_parameters](FragmentUpdaterType const& update_log) -> void {
366  auto entries_per_column = update_log.getEntryCount();
367  auto rows_per_column = update_log.getRowCount();
368  if (rows_per_column == 0) {
369  return;
370  }
371  DeleteVictimOffsetList victim_offsets(rows_per_column);
372 
373  auto complete_row_block_size = entries_per_column / normalized_cpu_threads();
374  auto partial_row_block_size = entries_per_column % normalized_cpu_threads();
375  auto usable_threads = normalized_cpu_threads();
376 
377  if (UNLIKELY(rows_per_column < (unsigned)normalized_cpu_threads())) {
378  complete_row_block_size = rows_per_column;
379  partial_row_block_size = 0;
380  usable_threads = 1;
381  }
382 
383  std::atomic<size_t> row_idx{0};
384 
385  auto process_rows = [&update_log, &victim_offsets, &row_idx](
386  uint64_t entry_start, uint64_t entry_count) -> uint64_t {
387  uint64_t entries_processed = 0;
388 
389  for (uint64_t entry_index = entry_start; entry_index < (entry_start + entry_count);
390  entry_index++) {
391  auto const row(update_log.getEntryAt(entry_index));
392 
393  if (row.empty()) {
394  continue;
395  }
396 
397  entries_processed++;
398  size_t row_index = row_idx.fetch_add(1);
399 
400  auto terminal_column_iter = std::prev(row.end());
401  const auto scalar_tv = boost::get<ScalarTargetValue>(&*terminal_column_iter);
402  CHECK(scalar_tv);
403 
404  uint64_t fragment_offset =
405  static_cast<uint64_t>(*(boost::get<int64_t>(scalar_tv)));
406  victim_offsets[row_index] = fragment_offset;
407  }
408  return entries_processed;
409  };
410 
411  auto get_row_index = [complete_row_block_size](uint64_t thread_index) -> uint64_t {
412  return thread_index * complete_row_block_size;
413  };
414 
415  RowProcessingFuturesVector row_processing_futures;
416  row_processing_futures.reserve(usable_threads);
417 
418  for (unsigned i = 0; i < (unsigned)usable_threads; i++) {
419  row_processing_futures.emplace_back(
420  std::async(std::launch::async,
421  std::forward<decltype(process_rows)>(process_rows),
422  get_row_index(i),
423  complete_row_block_size));
424  }
425  if (partial_row_block_size) {
426  row_processing_futures.emplace_back(
427  std::async(std::launch::async,
428  std::forward<decltype(process_rows)>(process_rows),
429  get_row_index(usable_threads),
430  partial_row_block_size));
431  }
432 
433  uint64_t rows_processed(0);
434  for (auto& t : row_processing_futures) {
435  t.wait();
436  rows_processed += t.get();
437  }
438 
439  IOFacility::deleteColumns(catalog_,
440  update_log.getPhysicalTableId(),
441  update_log.getFragmentId(),
442  victim_offsets,
443  update_log.getColumnType(0),
444  delete_parameters.getTransactionTracker());
445  };
446  return callback;
447 }
448 
449 #endif
int normalized_cpu_threads() const
catalog_(nullptr)
static std::function< bool(std::string const &)> yieldColumnValidator(CATALOG_TYPE const &cat, TABLE_DESCRIPTOR_TYPE const *table_descriptor)
typename IOFacility::DeleteVictimOffsetList DeleteVictimOffsetList
std::unique_ptr< TransactionLog > TransactionLogPtr
typename RelAlgExecutorTraits::ExecutorType ExecutorType
UpdateTransactionParameters(TableDescriptorType const *table_desc, UpdateTargetColumnNamesList const &update_column_names, UpdateTargetTypeList const &target_types, bool varlen_update_required)
IOFacility::TransactionLog transaction_tracker_
#define LOG(tag)
Definition: Logger.h:185
typename IOFacility::UpdateTargetOffsetList UpdateTargetOffsetList
typename IOFacility::ColumnValidationFunction ColumnValidationFunction
static constexpr auto getEntryAt(NonStringSelector)
DeleteTransactionParameters & operator=(DeleteTransactionParameters const &other)=delete
ExecutorType * executor_
std::function< void(const UpdateLogForFragment &)> Callback
Definition: Execute.h:318
FRAGMENT_UPDATER FragmentUpdaterType
IOFacility::TransactionLog & getTransactionTracker()
FRAGMENTER_TYPE FragmenterType
CHECK(cgen_state)
typename FragmentUpdaterType::Callback UpdateCallback
ColumnValidationFunction yieldColumnValidator(TableDescriptorType const *table_descriptor)
typename IOFacility::UpdateTargetColumnNamesList UpdateTargetColumnNamesList
#define UNLIKELY(x)
Definition: likely.h:20
UpdateCallback yieldUpdateCallback(UpdateTransactionParameters &update_parameters)
typename RelAlgExecutorTraits::TableDescriptorType TableDescriptorType
UpdateCallback yieldDeleteCallback(DeleteTransactionParameters &delete_parameters)
StorageIOFacility(ExecutorType *executor, CatalogType const &catalog)
std::vector< uint64_t > UpdateTargetOffsetList
typename UpdateTargetColumnNamesList::value_type UpdateTargetColumnNameType
std::vector< uint64_t > DeleteVictimOffsetList
CatalogType const & catalog_
UpdateTransactionParameters & operator=(UpdateTransactionParameters const &other)=delete
static void deleteColumns(CATALOG_TYPE const &cat, TABLE_ID_TYPE const &&table_id, FRAGMENT_ID_TYPE const frag_id, VICTIM_OFFSET_LIST &victims, COLUMN_TYPE_INFO const &col_type_info, TransactionLog &transaction_tracker)
std::vector< TargetMetaInfo > UpdateTargetTypeList
static void updateColumn(CATALOG_TYPE const &cat, TABLE_ID_TYPE const &&table_id, COLUMN_NAME_TYPE const &column_name, FRAGMENT_ID_TYPE const frag_id, FRAGMENT_OFFSET_LIST_TYPE const &frag_offsets, UPDATE_VALUES_LIST_TYPE const &update_values, COLUMN_TYPE_INFO const &col_type_info, TransactionLog &transaction_tracker)
typename RelAlgExecutorTraits::CatalogType CatalogType
int cpu_threads()
Definition: thread_count.h:25
std::vector< std::string > UpdateTargetColumnNamesList
typename FragmenterType::ModifyTransactionTracker TransactionLog
static constexpr auto getEntryAt(StringSelector)
std::function< bool(std::string const &)> ColumnValidationFunction
typename IOFacility::UpdateTargetTypeList UpdateTargetTypeList
boost::variant< int64_t, double, float, NullableString > ScalarTargetValue
Definition: TargetValue.h:156