OmniSciDB  04ee39c94c
anonymous_namespace{InputMetadata.cpp} Namespace Reference

Functions

Fragmenter_Namespace::TableInfo copy_table_info (const Fragmenter_Namespace::TableInfo &table_info)
 
Fragmenter_Namespace::TableInfo build_table_info (const std::vector< const TableDescriptor *> &shard_tables)
 
bool uses_int_meta (const SQLTypeInfo &col_ti)
 
std::map< int, ChunkMetadatasynthesize_metadata (const ResultSet *rows)
 
Fragmenter_Namespace::TableInfo synthesize_table_info (const ResultSetPtr &rows)
 
void collect_table_infos (std::vector< InputTableInfo > &table_infos, const std::vector< InputDescriptor > &input_descs, Executor *executor)
 

Function Documentation

◆ build_table_info()

Fragmenter_Namespace::TableInfo anonymous_namespace{InputMetadata.cpp}::build_table_info ( const std::vector< const TableDescriptor *> &  shard_tables)

Definition at line 37 of file InputMetadata.cpp.

References CHECK, Fragmenter_Namespace::TableInfo::fragments, and Fragmenter_Namespace::TableInfo::setPhysicalNumTuples().

Referenced by InputTableInfoCache::getTableInfo().

38  {
39  size_t total_number_of_tuples{0};
40  Fragmenter_Namespace::TableInfo table_info_all_shards;
41  for (const TableDescriptor* shard_table : shard_tables) {
42  CHECK(shard_table->fragmenter);
43  const auto& shard_metainfo = shard_table->fragmenter->getFragmentsForQuery();
44  total_number_of_tuples += shard_metainfo.getPhysicalNumTuples();
45  table_info_all_shards.fragments.insert(table_info_all_shards.fragments.end(),
46  shard_metainfo.fragments.begin(),
47  shard_metainfo.fragments.end());
48  }
49  table_info_all_shards.setPhysicalNumTuples(total_number_of_tuples);
50  return table_info_all_shards;
51 }
std::deque< FragmentInfo > fragments
Definition: Fragmenter.h:167
#define CHECK(condition)
Definition: Logger.h:187
void setPhysicalNumTuples(const size_t physNumTuples)
Definition: Fragmenter.h:162
specifies the content in-memory of a row in the table metadata table
+ Here is the call graph for this function:
+ Here is the caller graph for this function:

◆ collect_table_infos()

void anonymous_namespace{InputMetadata.cpp}::collect_table_infos ( std::vector< InputTableInfo > &  table_infos,
const std::vector< InputDescriptor > &  input_descs,
Executor executor 
)

Definition at line 203 of file InputMetadata.cpp.

References CHECK, CHECK_LT, copy_table_info(), RESULT, synthesize_table_info(), and TABLE.

Referenced by get_table_infos().

205  {
206  const auto temporary_tables = executor->getTemporaryTables();
207  const auto cat = executor->getCatalog();
208  CHECK(cat);
209  std::unordered_map<int, size_t> info_cache;
210  for (const auto& input_desc : input_descs) {
211  const auto table_id = input_desc.getTableId();
212  const auto cached_index_it = info_cache.find(table_id);
213  if (cached_index_it != info_cache.end()) {
214  CHECK_LT(cached_index_it->second, table_infos.size());
215  table_infos.push_back(
216  {table_id, copy_table_info(table_infos[cached_index_it->second].info)});
217  continue;
218  }
219  if (input_desc.getSourceType() == InputSourceType::RESULT) {
220  CHECK_LT(table_id, 0);
221  CHECK(temporary_tables);
222  const auto it = temporary_tables->find(table_id);
223  CHECK(it != temporary_tables->end());
224  table_infos.push_back({table_id, synthesize_table_info(it->second)});
225  } else {
226  CHECK(input_desc.getSourceType() == InputSourceType::TABLE);
227  table_infos.push_back({table_id, executor->getTableInfo(table_id)});
228  }
229  CHECK(!table_infos.empty());
230  info_cache.insert(std::make_pair(table_id, table_infos.size() - 1));
231  }
232 }
Fragmenter_Namespace::TableInfo copy_table_info(const Fragmenter_Namespace::TableInfo &table_info)
Fragmenter_Namespace::TableInfo synthesize_table_info(const ResultSetPtr &rows)
#define CHECK_LT(x, y)
Definition: Logger.h:197
#define CHECK(condition)
Definition: Logger.h:187
+ Here is the call graph for this function:
+ Here is the caller graph for this function:

◆ copy_table_info()

Fragmenter_Namespace::TableInfo anonymous_namespace{InputMetadata.cpp}::copy_table_info ( const Fragmenter_Namespace::TableInfo table_info)

Definition at line 28 of file InputMetadata.cpp.

References Fragmenter_Namespace::TableInfo::chunkKeyPrefix, Fragmenter_Namespace::TableInfo::fragments, Fragmenter_Namespace::TableInfo::getPhysicalNumTuples(), and Fragmenter_Namespace::TableInfo::setPhysicalNumTuples().

Referenced by collect_table_infos(), and InputTableInfoCache::getTableInfo().

29  {
30  Fragmenter_Namespace::TableInfo table_info_copy;
31  table_info_copy.chunkKeyPrefix = table_info.chunkKeyPrefix;
32  table_info_copy.fragments = table_info.fragments;
33  table_info_copy.setPhysicalNumTuples(table_info.getPhysicalNumTuples());
34  return table_info_copy;
35 }
std::deque< FragmentInfo > fragments
Definition: Fragmenter.h:167
std::vector< int > chunkKeyPrefix
Definition: Fragmenter.h:166
size_t getPhysicalNumTuples() const
Definition: Fragmenter.h:160
void setPhysicalNumTuples(const size_t physNumTuples)
Definition: Fragmenter.h:162
+ Here is the call graph for this function:
+ Here is the caller graph for this function:

◆ synthesize_metadata()

std::map<int, ChunkMetadata> anonymous_namespace{InputMetadata.cpp}::synthesize_metadata ( const ResultSet rows)

Definition at line 84 of file InputMetadata.cpp.

References CHECK, CHECK_LT, cpu_threads(), Encoder::Create(), inline_fp_null_val(), inline_int_null_val(), kDOUBLE, kFLOAT, use_parallel_algorithms(), and uses_int_meta().

Referenced by Fragmenter_Namespace::FragmentInfo::getChunkMetadataMap().

84  {
85  rows->moveToBegin();
86  std::vector<std::vector<std::unique_ptr<Encoder>>> dummy_encoders;
87  const size_t worker_count = use_parallel_algorithms(*rows) ? cpu_threads() : 1;
88  for (size_t worker_idx = 0; worker_idx < worker_count; ++worker_idx) {
89  dummy_encoders.emplace_back();
90  for (size_t i = 0; i < rows->colCount(); ++i) {
91  const auto& col_ti = rows->getColType(i);
92  dummy_encoders.back().emplace_back(Encoder::Create(nullptr, col_ti));
93  }
94  }
95  const auto do_work = [rows](const std::vector<TargetValue>& crt_row,
96  std::vector<std::unique_ptr<Encoder>>& dummy_encoders) {
97  for (size_t i = 0; i < rows->colCount(); ++i) {
98  const auto& col_ti = rows->getColType(i);
99  const auto& col_val = crt_row[i];
100  const auto scalar_col_val = boost::get<ScalarTargetValue>(&col_val);
101  CHECK(scalar_col_val);
102  if (uses_int_meta(col_ti)) {
103  const auto i64_p = boost::get<int64_t>(scalar_col_val);
104  CHECK(i64_p);
105  dummy_encoders[i]->updateStats(*i64_p, *i64_p == inline_int_null_val(col_ti));
106  } else if (col_ti.is_fp()) {
107  switch (col_ti.get_type()) {
108  case kFLOAT: {
109  const auto float_p = boost::get<float>(scalar_col_val);
110  CHECK(float_p);
111  dummy_encoders[i]->updateStats(*float_p,
112  *float_p == inline_fp_null_val(col_ti));
113  break;
114  }
115  case kDOUBLE: {
116  const auto double_p = boost::get<double>(scalar_col_val);
117  CHECK(double_p);
118  dummy_encoders[i]->updateStats(*double_p,
119  *double_p == inline_fp_null_val(col_ti));
120  break;
121  }
122  default:
123  CHECK(false);
124  }
125  } else {
126  throw std::runtime_error(col_ti.get_type_name() +
127  " is not supported in temporary table.");
128  }
129  }
130  };
131  if (use_parallel_algorithms(*rows)) {
132  const size_t worker_count = cpu_threads();
133  std::vector<std::future<void>> compute_stats_threads;
134  const auto entry_count = rows->entryCount();
135  for (size_t i = 0,
136  start_entry = 0,
137  stride = (entry_count + worker_count - 1) / worker_count;
138  i < worker_count && start_entry < entry_count;
139  ++i, start_entry += stride) {
140  const auto end_entry = std::min(start_entry + stride, entry_count);
141  compute_stats_threads.push_back(std::async(
142  std::launch::async,
143  [rows, &do_work, &dummy_encoders](
144  const size_t start, const size_t end, const size_t worker_idx) {
145  for (size_t i = start; i < end; ++i) {
146  const auto crt_row = rows->getRowAtNoTranslations(i);
147  if (!crt_row.empty()) {
148  do_work(crt_row, dummy_encoders[worker_idx]);
149  }
150  }
151  },
152  start_entry,
153  end_entry,
154  i));
155  }
156  for (auto& child : compute_stats_threads) {
157  child.wait();
158  }
159  for (auto& child : compute_stats_threads) {
160  child.get();
161  }
162  } else {
163  while (true) {
164  auto crt_row = rows->getNextRow(false, false);
165  if (crt_row.empty()) {
166  break;
167  }
168  do_work(crt_row, dummy_encoders[0]);
169  }
170  rows->moveToBegin();
171  }
172  std::map<int, ChunkMetadata> metadata_map;
173  for (size_t worker_idx = 1; worker_idx < worker_count; ++worker_idx) {
174  CHECK_LT(worker_idx, dummy_encoders.size());
175  const auto& worker_encoders = dummy_encoders[worker_idx];
176  for (size_t i = 0; i < rows->colCount(); ++i) {
177  dummy_encoders[0][i]->reduceStats(*worker_encoders[i]);
178  }
179  }
180  for (size_t i = 0; i < rows->colCount(); ++i) {
181  const auto it_ok =
182  metadata_map.emplace(i, dummy_encoders[0][i]->getMetadata(rows->getColType(i)));
183  CHECK(it_ok.second);
184  }
185  return metadata_map;
186 }
bool use_parallel_algorithms(const ResultSet &rows)
Definition: ResultSet.cpp:873
static Encoder * Create(Data_Namespace::AbstractBuffer *buffer, const SQLTypeInfo sqlType)
Definition: Encoder.cpp:26
double inline_fp_null_val(const SQL_TYPE_INFO &ti)
bool uses_int_meta(const SQLTypeInfo &col_ti)
#define CHECK_LT(x, y)
Definition: Logger.h:197
#define CHECK(condition)
Definition: Logger.h:187
int64_t inline_int_null_val(const SQL_TYPE_INFO &ti)
int cpu_threads()
Definition: thread_count.h:23
+ Here is the call graph for this function:
+ Here is the caller graph for this function:

◆ synthesize_table_info()

Fragmenter_Namespace::TableInfo anonymous_namespace{InputMetadata.cpp}::synthesize_table_info ( const ResultSetPtr rows)

Definition at line 188 of file InputMetadata.cpp.

References Fragmenter_Namespace::TableInfo::fragments, and run-benchmark-import::result.

Referenced by collect_table_infos().

188  {
189  std::deque<Fragmenter_Namespace::FragmentInfo> result;
190  if (rows) {
191  result.resize(1);
192  auto& fragment = result.front();
193  fragment.fragmentId = 0;
194  fragment.deviceIds.resize(3);
195  fragment.resultSet = rows.get();
196  fragment.resultSetMutex.reset(new std::mutex());
197  }
199  table_info.fragments = result;
200  return table_info;
201 }
std::deque< FragmentInfo > fragments
Definition: Fragmenter.h:167
+ Here is the caller graph for this function:

◆ uses_int_meta()

bool anonymous_namespace{InputMetadata.cpp}::uses_int_meta ( const SQLTypeInfo col_ti)

Definition at line 78 of file InputMetadata.cpp.

References SQLTypeInfoCore< TYPE_FACET_PACK >::get_compression(), SQLTypeInfoCore< TYPE_FACET_PACK >::is_boolean(), SQLTypeInfoCore< TYPE_FACET_PACK >::is_decimal(), SQLTypeInfoCore< TYPE_FACET_PACK >::is_integer(), SQLTypeInfoCore< TYPE_FACET_PACK >::is_string(), SQLTypeInfoCore< TYPE_FACET_PACK >::is_time(), and kENCODING_DICT.

Referenced by synthesize_metadata().

78  {
79  return col_ti.is_integer() || col_ti.is_decimal() || col_ti.is_time() ||
80  col_ti.is_boolean() ||
81  (col_ti.is_string() && col_ti.get_compression() == kENCODING_DICT);
82 }
bool is_time() const
Definition: sqltypes.h:456
HOST DEVICE EncodingType get_compression() const
Definition: sqltypes.h:331
bool is_integer() const
Definition: sqltypes.h:452
bool is_decimal() const
Definition: sqltypes.h:453
bool is_boolean() const
Definition: sqltypes.h:457
bool is_string() const
Definition: sqltypes.h:450
+ Here is the call graph for this function:
+ Here is the caller graph for this function: