OmniSciDB  c1a53651b2
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
PerfectJoinHashTable.cpp File Reference
+ Include dependency graph for PerfectJoinHashTable.cpp:

Go to the source code of this file.

Namespaces

 anonymous_namespace{PerfectJoinHashTable.cpp}
 

Functions

std::pair< InnerOuter,
InnerOuterStringOpInfos
anonymous_namespace{PerfectJoinHashTable.cpp}::get_cols (const Analyzer::BinOper *qual_bin_oper, const TemporaryTables *temporary_tables)
 
BucketizedHashEntryInfo anonymous_namespace{PerfectJoinHashTable.cpp}::get_bucketized_hash_entry_info (SQLTypeInfo const &context_ti, ExpressionRange const &col_range, bool const is_bw_eq)
 
size_t anonymous_namespace{PerfectJoinHashTable.cpp}::get_hash_entry_count (const ExpressionRange &col_range, const bool is_bw_eq)
 
bool anonymous_namespace{PerfectJoinHashTable.cpp}::shard_count_less_or_equal_device_count (const shared::TableKey &inner_table_key, const Executor *executor)
 
size_t get_shard_count (std::pair< const Analyzer::ColumnVar *, const Analyzer::Expr * > equi_pair, const Executor *executor)
 
bool needs_dictionary_translation (const InnerOuter &inner_outer_col_pair, const InnerOuterStringOpInfos &inner_outer_string_op_infos, const Executor *executor)
 
std::vector
< Fragmenter_Namespace::FragmentInfo
only_shards_for_device (const std::vector< Fragmenter_Namespace::FragmentInfo > &fragments, const int device_id, const int device_count)
 
const InputTableInfoget_inner_query_info (const shared::TableKey &inner_table_key, const std::vector< InputTableInfo > &query_infos)
 
size_t get_entries_per_device (const size_t total_entries, const size_t shard_count, const size_t device_count, const Data_Namespace::MemoryLevel memory_level)
 

Function Documentation

size_t get_entries_per_device ( const size_t  total_entries,
const size_t  shard_count,
const size_t  device_count,
const Data_Namespace::MemoryLevel  memory_level 
)

Definition at line 1271 of file PerfectJoinHashTable.cpp.

References CHECK_GT, and Data_Namespace::GPU_LEVEL.

Referenced by OverlapsJoinHashTable::computeHashTableCounts(), RangeJoinHashTable::computeRangeHashTableCounts(), and BaselineJoinHashTable::reifyWithLayout().

1274  {
1275  const auto entries_per_shard =
1276  shard_count ? (total_entries + shard_count - 1) / shard_count : total_entries;
1277  size_t entries_per_device = entries_per_shard;
1278  if (memory_level == Data_Namespace::GPU_LEVEL && shard_count) {
1279  const auto shards_per_device = (shard_count + device_count - 1) / device_count;
1280  CHECK_GT(shards_per_device, 0u);
1281  entries_per_device = entries_per_shard * shards_per_device;
1282  }
1283  return entries_per_device;
1284 }
#define CHECK_GT(x, y)
Definition: Logger.h:305

+ Here is the caller graph for this function:

const InputTableInfo& get_inner_query_info ( const shared::TableKey inner_table_key,
const std::vector< InputTableInfo > &  query_infos 
)

Definition at line 1257 of file PerfectJoinHashTable.cpp.

References CHECK.

Referenced by PerfectJoinHashTable::getInnerQueryInfo(), RangeJoinHashTable::getInstance(), OverlapsJoinHashTable::getInstance(), RangeJoinHashTable::reifyWithLayout(), OverlapsJoinHashTable::reifyWithLayout(), and BaselineJoinHashTable::reifyWithLayout().

1259  {
1260  std::optional<size_t> ti_idx;
1261  for (size_t i = 0; i < query_infos.size(); ++i) {
1262  if (inner_table_key == query_infos[i].table_key) {
1263  ti_idx = i;
1264  break;
1265  }
1266  }
1267  CHECK(ti_idx);
1268  return query_infos[*ti_idx];
1269 }
#define CHECK(condition)
Definition: Logger.h:291

+ Here is the caller graph for this function:

size_t get_shard_count ( std::pair< const Analyzer::ColumnVar *, const Analyzer::Expr * >  equi_pair,
const Executor executor 
)

Definition at line 112 of file PerfectJoinHashTable.cpp.

References CHECK, Catalog_Namespace::get_metadata_for_table(), and anonymous_namespace{PerfectJoinHashTable.cpp}::shard_count_less_or_equal_device_count().

114  {
115  const auto inner_col = equi_pair.first;
116  const auto outer_col = dynamic_cast<const Analyzer::ColumnVar*>(equi_pair.second);
117  if (!outer_col || inner_col->getColumnKey().table_id < 0 ||
118  outer_col->getColumnKey().table_id < 0) {
119  return 0;
120  }
121  if (outer_col->get_rte_idx()) {
122  return 0;
123  }
124  if (inner_col->get_type_info() != outer_col->get_type_info()) {
125  return 0;
126  }
127 
128  const auto inner_td =
129  Catalog_Namespace::get_metadata_for_table(inner_col->getTableKey());
130  CHECK(inner_td);
131  const auto outer_td =
132  Catalog_Namespace::get_metadata_for_table(outer_col->getTableKey());
133  CHECK(outer_td);
134  if (inner_td->shardedColumnId == 0 || outer_td->shardedColumnId == 0 ||
135  inner_td->nShards != outer_td->nShards) {
136  return 0;
137  }
138  if (!shard_count_less_or_equal_device_count(inner_col->getTableKey(), executor)) {
139  return 0;
140  }
141  // The two columns involved must be the ones on which the tables have been sharded on.
142  return (inner_td->shardedColumnId == inner_col->getColumnKey().column_id &&
143  outer_td->shardedColumnId == outer_col->getColumnKey().column_id) ||
144  (outer_td->shardedColumnId == inner_col->getColumnKey().column_id &&
145  inner_td->shardedColumnId == inner_col->getColumnKey().column_id)
146  ? inner_td->nShards
147  : 0;
148 }
const TableDescriptor * get_metadata_for_table(const ::shared::TableKey &table_key, bool populate_fragmenter)
bool shard_count_less_or_equal_device_count(const shared::TableKey &inner_table_key, const Executor *executor)
#define CHECK(condition)
Definition: Logger.h:291

+ Here is the call graph for this function:

bool needs_dictionary_translation ( const InnerOuter inner_outer_col_pair,
const InnerOuterStringOpInfos inner_outer_string_op_infos,
const Executor executor 
)

Definition at line 275 of file PerfectJoinHashTable.cpp.

References CHECK, CHECK_EQ, get_column_descriptor_maybe(), and get_column_type().

278  {
279  if (inner_outer_string_op_infos.first.size() ||
280  inner_outer_string_op_infos.second.size()) {
281  return true;
282  }
283  auto inner_col = inner_outer_col_pair.first;
284  auto outer_col_expr = inner_outer_col_pair.second;
285  const auto inner_cd = get_column_descriptor_maybe(inner_col->getColumnKey());
286  const auto& inner_col_key = inner_col->getColumnKey();
287  const auto& inner_ti = get_column_type(inner_col_key.column_id,
288  inner_col_key.table_id,
289  inner_cd,
290  executor->getTemporaryTables());
291  // Only strings may need dictionary translation.
292  if (!inner_ti.is_string()) {
293  return false;
294  }
295  const auto outer_col = dynamic_cast<const Analyzer::ColumnVar*>(outer_col_expr);
296  CHECK(outer_col);
297  const auto outer_cd = get_column_descriptor_maybe(outer_col->getColumnKey());
298  // Don't want to deal with temporary tables for now, require translation.
299  if (!inner_cd || !outer_cd) {
300  return true;
301  }
302  const auto& outer_col_key = outer_col->getColumnKey();
303  const auto& outer_ti = get_column_type(outer_col_key.column_id,
304  outer_col_key.table_id,
305  outer_cd,
306  executor->getTemporaryTables());
307  CHECK_EQ(inner_ti.is_string(), outer_ti.is_string());
308  // If the two columns don't share the dictionary, translation is needed.
309  if (outer_ti.getStringDictKey() != inner_ti.getStringDictKey()) {
310  return true;
311  }
312  const auto inner_str_dict_proxy =
313  executor->getStringDictionaryProxy(inner_ti.getStringDictKey(), true);
314  CHECK(inner_str_dict_proxy);
315  const auto outer_str_dict_proxy =
316  executor->getStringDictionaryProxy(outer_ti.getStringDictKey(), true);
317  CHECK(outer_str_dict_proxy);
318 
319  return *inner_str_dict_proxy != *outer_str_dict_proxy;
320 }
#define CHECK_EQ(x, y)
Definition: Logger.h:301
const SQLTypeInfo get_column_type(const int col_id, const int table_id, const ColumnDescriptor *cd, const TemporaryTables *temporary_tables)
Definition: Execute.h:233
const ColumnDescriptor * get_column_descriptor_maybe(const shared::ColumnKey &column_key)
Definition: Execute.h:220
#define CHECK(condition)
Definition: Logger.h:291

+ Here is the call graph for this function:

std::vector<Fragmenter_Namespace::FragmentInfo> only_shards_for_device ( const std::vector< Fragmenter_Namespace::FragmentInfo > &  fragments,
const int  device_id,
const int  device_count 
)

Definition at line 322 of file PerfectJoinHashTable.cpp.

References CHECK_GE.

Referenced by PerfectJoinHashTable::reify(), OverlapsJoinHashTable::reifyImpl(), RangeJoinHashTable::reifyWithLayout(), OverlapsJoinHashTable::reifyWithLayout(), and BaselineJoinHashTable::reifyWithLayout().

325  {
326  std::vector<Fragmenter_Namespace::FragmentInfo> shards_for_device;
327  for (const auto& fragment : fragments) {
328  CHECK_GE(fragment.shard, 0);
329  if (fragment.shard % device_count == device_id) {
330  shards_for_device.push_back(fragment);
331  }
332  }
333  return shards_for_device;
334 }
#define CHECK_GE(x, y)
Definition: Logger.h:306

+ Here is the caller graph for this function: