OmniSciDB  72c90bc290
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
InputMetadata.h File Reference
+ Include dependency graph for InputMetadata.h:
+ This graph shows which files directly or indirectly include this file:

Go to the source code of this file.

Classes

struct  InputTableInfo
 
class  InputTableInfoCache
 

Namespaces

 Catalog_Namespace
 

Typedefs

using TemporaryTables = std::unordered_map< int, const ResultSetPtr & >
 

Functions

ChunkMetadataMap synthesize_metadata (const ResultSet *rows)
 
size_t get_frag_count_of_table (const shared::TableKey &table_key, Executor *executor)
 
std::vector< InputTableInfoget_table_infos (const std::vector< InputDescriptor > &input_descs, Executor *executor)
 
std::vector< InputTableInfoget_table_infos (const RelAlgExecutionUnit &ra_exe_unit, Executor *executor)
 
Fragmenter_Namespace::TableInfo build_table_info (const std::vector< const TableDescriptor * > &shard_tables)
 

Typedef Documentation

using TemporaryTables = std::unordered_map<int, const ResultSetPtr&>

Definition at line 31 of file InputMetadata.h.

Function Documentation

Fragmenter_Namespace::TableInfo build_table_info ( const std::vector< const TableDescriptor * > &  shard_tables)

Definition at line 44 of file InputMetadata.cpp.

References CHECK, Fragmenter_Namespace::TableInfo::fragments, and Fragmenter_Namespace::TableInfo::setPhysicalNumTuples().

Referenced by InputTableInfoCache::getTableInfo().

45  {
46  size_t total_number_of_tuples{0};
47  Fragmenter_Namespace::TableInfo table_info_all_shards;
48  for (const TableDescriptor* shard_table : shard_tables) {
49  CHECK(shard_table->fragmenter);
50  const auto& shard_metainfo = shard_table->fragmenter->getFragmentsForQuery();
51  total_number_of_tuples += shard_metainfo.getPhysicalNumTuples();
52  table_info_all_shards.fragments.reserve(table_info_all_shards.fragments.size() +
53  shard_metainfo.fragments.size());
54  table_info_all_shards.fragments.insert(table_info_all_shards.fragments.end(),
55  shard_metainfo.fragments.begin(),
56  shard_metainfo.fragments.end());
57  }
58  table_info_all_shards.setPhysicalNumTuples(total_number_of_tuples);
59  return table_info_all_shards;
60 }
std::vector< FragmentInfo > fragments
Definition: Fragmenter.h:171
#define CHECK(condition)
Definition: Logger.h:291
void setPhysicalNumTuples(const size_t physNumTuples)
Definition: Fragmenter.h:166

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

size_t get_frag_count_of_table ( const shared::TableKey table_key,
Executor executor 
)

Definition at line 479 of file InputMetadata.cpp.

References CHECK, CHECK_GE, and shared::TableKey::table_id.

Referenced by RelAlgExecutor::getOuterFragmentCount().

479  {
480  const auto temporary_tables = executor->getTemporaryTables();
481  CHECK(temporary_tables);
482  auto it = temporary_tables->find(table_key.table_id);
483  if (it != temporary_tables->end()) {
484  CHECK_GE(int(0), table_key.table_id);
485  return size_t(1);
486  } else {
487  const auto table_info = executor->getTableInfo(table_key);
488  return table_info.fragments.size();
489  }
490 }
#define CHECK_GE(x, y)
Definition: Logger.h:306
#define CHECK(condition)
Definition: Logger.h:291

+ Here is the caller graph for this function:

std::vector<InputTableInfo> get_table_infos ( const std::vector< InputDescriptor > &  input_descs,
Executor executor 
)

Definition at line 492 of file InputMetadata.cpp.

References anonymous_namespace{InputMetadata.cpp}::collect_table_infos().

Referenced by RelAlgExecutor::computeWindow(), RelAlgExecutor::createAggregateWorkUnit(), RelAlgExecutor::createCompoundWorkUnit(), RelAlgExecutor::createFilterWorkUnit(), RelAlgExecutor::createProjectWorkUnit(), RelAlgExecutor::createTableFunctionWorkUnit(), RelAlgExecutor::createUnionWorkUnit(), RelAlgExecutor::executeDelete(), RelAlgExecutor::executeTableFunction(), RelAlgExecutor::executeUpdate(), RelAlgExecutor::executeWorkUnit(), TableOptimizer::getDeletedColumnStats(), RelAlgExecutor::getFilteredCountAll(), RelAlgExecutor::getFilterSelectivity(), RelAlgExecutor::getNDVEstimation(), RelAlgExecutor::handleOutOfMemoryRetry(), TableOptimizer::recomputeColumnMetadata(), and RelAlgExecutor::selectFiltersToBePushedDown().

494  {
495  std::vector<InputTableInfo> table_infos;
496  collect_table_infos(table_infos, input_descs, executor);
497  return table_infos;
498 }
void collect_table_infos(std::vector< InputTableInfo > &table_infos, const std::vector< InputDescriptor > &input_descs, Executor *executor)

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

std::vector<InputTableInfo> get_table_infos ( const RelAlgExecutionUnit ra_exe_unit,
Executor executor 
)

Definition at line 500 of file InputMetadata.cpp.

References anonymous_namespace{InputMetadata.cpp}::collect_table_infos(), and RelAlgExecutionUnit::input_descs.

501  {
502  std::vector<InputTableInfo> table_infos;
503  collect_table_infos(table_infos, ra_exe_unit.input_descs, executor);
504  return table_infos;
505 }
std::vector< InputDescriptor > input_descs
void collect_table_infos(std::vector< InputTableInfo > &table_infos, const std::vector< InputDescriptor > &input_descs, Executor *executor)

+ Here is the call graph for this function:

ChunkMetadataMap synthesize_metadata ( const ResultSet rows)

Definition at line 361 of file InputMetadata.cpp.

References CHECK, CHECK_LT, cpu_threads(), Encoder::Create(), DEBUG_TIMER, inline_fp_null_val(), inline_int_null_val(), kDOUBLE, kFLOAT, threading_serial::parallel_for(), report::rows, synthesize_metadata_table_function(), TableFunction, result_set::use_parallel_algorithms(), and anonymous_namespace{InputMetadata.cpp}::uses_int_meta().

Referenced by Fragmenter_Namespace::FragmentInfo::getChunkMetadataMap().

361  {
362  auto timer = DEBUG_TIMER(__func__);
363  ChunkMetadataMap metadata_map;
364 
365  // If the ResultSet has no rows, fill with dummy metadata and return early.
366  if (rows->definitelyHasNoRows()) {
367  // resultset has no valid storage, so we fill dummy metadata and return early
368  std::vector<std::unique_ptr<Encoder>> decoders;
369  for (size_t i = 0; i < rows->colCount(); ++i) {
370  decoders.emplace_back(Encoder::Create(nullptr, rows->getColType(i)));
371  const auto it_ok =
372  metadata_map.emplace(i, decoders.back()->getMetadata(rows->getColType(i)));
373  CHECK(it_ok.second);
374  }
375  return metadata_map;
376  }
377 
378  // Create a vector of Encoder vectors for each worker.
379  std::vector<std::vector<std::unique_ptr<Encoder>>> dummy_encoders;
380  const size_t worker_count =
382  for (size_t worker_idx = 0; worker_idx < worker_count; ++worker_idx) {
383  dummy_encoders.emplace_back();
384  for (size_t i = 0; i < rows->colCount(); ++i) {
385  const auto& col_ti = rows->getColType(i);
386  dummy_encoders.back().emplace_back(Encoder::Create(nullptr, col_ti));
387  }
388  }
389 
390  // For TableFunctions, call the optimized function we have for this format.
391  if (rows->getQueryMemDesc().getQueryDescriptionType() ==
394  }
395  rows->moveToBegin();
396 
397  // Code in the do_work lambda runs for and processes each row.
398  const auto do_work = [rows](const std::vector<TargetValue>& crt_row,
399  std::vector<std::unique_ptr<Encoder>>& dummy_encoders) {
400  for (size_t i = 0; i < rows->colCount(); ++i) {
401  const auto& col_ti = rows->getColType(i);
402  const auto& col_val = crt_row[i];
403  const auto scalar_col_val = boost::get<ScalarTargetValue>(&col_val);
404  CHECK(scalar_col_val);
405  if (uses_int_meta(col_ti)) {
406  const auto i64_p = boost::get<int64_t>(scalar_col_val);
407  CHECK(i64_p);
408  dummy_encoders[i]->updateStats(*i64_p, *i64_p == inline_int_null_val(col_ti));
409  } else if (col_ti.is_fp()) {
410  switch (col_ti.get_type()) {
411  case kFLOAT: {
412  const auto float_p = boost::get<float>(scalar_col_val);
413  CHECK(float_p);
414  dummy_encoders[i]->updateStats(*float_p,
415  *float_p == inline_fp_null_val(col_ti));
416  break;
417  }
418  case kDOUBLE: {
419  const auto double_p = boost::get<double>(scalar_col_val);
420  CHECK(double_p);
421  dummy_encoders[i]->updateStats(*double_p,
422  *double_p == inline_fp_null_val(col_ti));
423  break;
424  }
425  default:
426  CHECK(false);
427  }
428  } else {
429  throw std::runtime_error(col_ti.get_type_name() +
430  " is not supported in temporary table.");
431  }
432  }
433  };
434 
435  // Parallelize the processing using TBB if parallel algorithms are enabled.
437  const size_t entry_count = rows->entryCount();
439  tbb::blocked_range<size_t>(0, entry_count),
440  [&do_work, &rows, &dummy_encoders](const tbb::blocked_range<size_t>& range) {
441  const size_t worker_idx = tbb::this_task_arena::current_thread_index();
442  for (size_t i = range.begin(); i < range.end(); ++i) {
443  const auto crt_row = rows->getRowAtNoTranslations(i);
444  if (!crt_row.empty()) {
445  do_work(crt_row, dummy_encoders[worker_idx]);
446  }
447  }
448  });
449 
450  } else {
451  // If parallel algorithms are not enabled, process the rows sequentially.
452  while (true) {
453  auto crt_row = rows->getNextRow(false, false);
454  if (crt_row.empty()) {
455  break;
456  }
457  do_work(crt_row, dummy_encoders[0]);
458  }
459  }
460  rows->moveToBegin();
461 
462  // Reduce the results from each worker.
463  for (size_t worker_idx = 1; worker_idx < worker_count; ++worker_idx) {
464  CHECK_LT(worker_idx, dummy_encoders.size());
465  const auto& worker_encoders = dummy_encoders[worker_idx];
466  for (size_t i = 0; i < rows->colCount(); ++i) {
467  dummy_encoders[0][i]->reduceStats(*worker_encoders[i]);
468  }
469  }
470  // Add each column's results to the metadata map.
471  for (size_t i = 0; i < rows->colCount(); ++i) {
472  const auto it_ok =
473  metadata_map.emplace(i, dummy_encoders[0][i]->getMetadata(rows->getColType(i)));
474  CHECK(it_ok.second);
475  }
476  return metadata_map;
477 }
ChunkMetadataMap synthesize_metadata_table_function(const ResultSet *rows)
static Encoder * Create(Data_Namespace::AbstractBuffer *buffer, const SQLTypeInfo sqlType)
Definition: Encoder.cpp:26
double inline_fp_null_val(const SQL_TYPE_INFO &ti)
std::map< int, std::shared_ptr< ChunkMetadata >> ChunkMetadataMap
bool use_parallel_algorithms(const ResultSet &rows)
Definition: ResultSet.cpp:1600
tuple rows
Definition: report.py:114
bool uses_int_meta(const SQLTypeInfo &col_ti)
#define CHECK_LT(x, y)
Definition: Logger.h:303
void parallel_for(const blocked_range< Int > &range, const Body &body, const Partitioner &p=Partitioner())
#define CHECK(condition)
Definition: Logger.h:291
#define DEBUG_TIMER(name)
Definition: Logger.h:412
int64_t inline_int_null_val(const SQL_TYPE_INFO &ti)
int cpu_threads()
Definition: thread_count.h:25

+ Here is the call graph for this function:

+ Here is the caller graph for this function: