OmniSciDB  5ade3759e0
JoinHashTable.h File Reference
#include "../Analyzer/Analyzer.h"
#include "../Catalog/Catalog.h"
#include "../Chunk/Chunk.h"
#include "../Shared/ExperimentalTypeUtilities.h"
#include "Allocators/ThrustAllocator.h"
#include "ColumnarResults.h"
#include "Descriptors/InputDescriptors.h"
#include "Descriptors/RowSetMemoryOwner.h"
#include "ExpressionRange.h"
#include "InputMetadata.h"
#include "JoinHashTableInterface.h"
#include <llvm/IR/Value.h>
#include <functional>
#include <memory>
#include <mutex>
#include <stdexcept>
+ Include dependency graph for JoinHashTable.h:
+ This graph shows which files directly or indirectly include this file:

Go to the source code of this file.

Classes

class  JoinHashTable
 
struct  JoinHashTable::JoinHashTableCacheKey
 

Functions

std::string get_table_name_by_id (const int table_id, const Catalog_Namespace::Catalog &cat)
 
size_t get_shard_count (const Analyzer::BinOper *join_condition, const Executor *executor)
 
size_t get_shard_count (std::pair< const Analyzer::ColumnVar *, const Analyzer::Expr *> equi_pair, const Executor *executor)
 
bool needs_dictionary_translation (const Analyzer::ColumnVar *inner_col, const Analyzer::Expr *outer_col, const Executor *executor)
 
InnerOuter normalize_column_pair (const Analyzer::Expr *lhs, const Analyzer::Expr *rhs, const Catalog_Namespace::Catalog &cat, const TemporaryTables *temporary_tables, const bool is_overlaps_join=false)
 
std::vector< InnerOuternormalize_column_pairs (const Analyzer::BinOper *condition, const Catalog_Namespace::Catalog &cat, const TemporaryTables *temporary_tables)
 
std::deque< Fragmenter_Namespace::FragmentInfoonly_shards_for_device (const std::deque< Fragmenter_Namespace::FragmentInfo > &fragments, const int device_id, const int device_count)
 
const InputTableInfoget_inner_query_info (const int inner_table_id, const std::vector< InputTableInfo > &query_infos)
 
size_t get_entries_per_device (const size_t total_entries, const size_t shard_count, const size_t device_count, const Data_Namespace::MemoryLevel memory_level)
 

Function Documentation

◆ get_entries_per_device()

size_t get_entries_per_device ( const size_t  total_entries,
const size_t  shard_count,
const size_t  device_count,
const Data_Namespace::MemoryLevel  memory_level 
)

Definition at line 1444 of file JoinHashTable.cpp.

References CHECK_GT, and Data_Namespace::GPU_LEVEL.

Referenced by OverlapsJoinHashTable::calculateCounts(), get_table_name_by_id(), OverlapsJoinHashTable::getInstance(), BaselineJoinHashTable::getInstance(), and BaselineJoinHashTable::reifyWithLayout().

1447  {
1448  const auto entries_per_shard =
1449  shard_count ? (total_entries + shard_count - 1) / shard_count : total_entries;
1450  size_t entries_per_device = entries_per_shard;
1451  if (memory_level == Data_Namespace::GPU_LEVEL && shard_count) {
1452  const auto shards_per_device = (shard_count + device_count - 1) / device_count;
1453  CHECK_GT(shards_per_device, 0u);
1454  entries_per_device = entries_per_shard * shards_per_device;
1455  }
1456  return entries_per_device;
1457 }
#define CHECK_GT(x, y)
Definition: Logger.h:199
+ Here is the caller graph for this function:

◆ get_inner_query_info()

const InputTableInfo& get_inner_query_info ( const int  inner_table_id,
const std::vector< InputTableInfo > &  query_infos 
)

Definition at line 1430 of file JoinHashTable.cpp.

References CHECK_NE.

Referenced by get_table_name_by_id(), JoinHashTable::getInnerQueryInfo(), OverlapsJoinHashTable::getInstance(), BaselineJoinHashTable::getInstance(), OverlapsJoinHashTable::reifyWithLayout(), and BaselineJoinHashTable::reifyWithLayout().

1432  {
1433  ssize_t ti_idx = -1;
1434  for (size_t i = 0; i < query_infos.size(); ++i) {
1435  if (inner_table_id == query_infos[i].table_id) {
1436  ti_idx = i;
1437  break;
1438  }
1439  }
1440  CHECK_NE(ssize_t(-1), ti_idx);
1441  return query_infos[ti_idx];
1442 }
#define CHECK_NE(x, y)
Definition: Logger.h:196
+ Here is the caller graph for this function:

◆ get_shard_count() [1/2]

size_t get_shard_count ( const Analyzer::BinOper join_condition,
const Executor executor 
)

Definition at line 230 of file JoinHashTable.cpp.

References anonymous_namespace{JoinHashTable.cpp}::get_cols(), and get_shard_count().

Referenced by JoinHashTable::checkHashJoinReplicationConstraint(), get_shard_count(), get_table_name_by_id(), BaselineJoinHashTable::getShardCountForCondition(), JoinHashTable::initOneToManyHashTable(), JoinHashTable::shardCount(), and Executor::skipFragmentPair().

231  {
232  const Analyzer::ColumnVar* inner_col{nullptr};
233  const Analyzer::Expr* outer_col{nullptr};
234  std::shared_ptr<Analyzer::BinOper> redirected_bin_oper;
235  try {
236  std::tie(inner_col, outer_col) =
237  get_cols(join_condition, *executor->getCatalog(), executor->getTemporaryTables());
238  } catch (...) {
239  return 0;
240  }
241  if (!inner_col || !outer_col) {
242  return 0;
243  }
244  return get_shard_count({inner_col, outer_col}, executor);
245 }
size_t get_shard_count(const Analyzer::BinOper *join_condition, const Executor *executor)
std::pair< const Analyzer::ColumnVar *, const Analyzer::Expr * > get_cols(const Analyzer::BinOper *qual_bin_oper, const Catalog_Namespace::Catalog &cat, const TemporaryTables *temporary_tables)
+ Here is the call graph for this function:
+ Here is the caller graph for this function:

◆ get_shard_count() [2/2]

size_t get_shard_count ( std::pair< const Analyzer::ColumnVar *, const Analyzer::Expr *>  equi_pair,
const Executor executor 
)

Definition at line 268 of file JoinHashTable.cpp.

References CHECK, and anonymous_namespace{JoinHashTable.cpp}::shard_count_less_or_equal_device_count().

270  {
271  const auto inner_col = equi_pair.first;
272  const auto outer_col = dynamic_cast<const Analyzer::ColumnVar*>(equi_pair.second);
273  if (!outer_col || inner_col->get_table_id() < 0 || outer_col->get_table_id() < 0) {
274  return 0;
275  }
276  if (outer_col->get_rte_idx()) {
277  return 0;
278  }
279  if (inner_col->get_type_info() != outer_col->get_type_info()) {
280  return 0;
281  }
282  const auto catalog = executor->getCatalog();
283  const auto inner_td = catalog->getMetadataForTable(inner_col->get_table_id());
284  CHECK(inner_td);
285  const auto outer_td = catalog->getMetadataForTable(outer_col->get_table_id());
286  CHECK(outer_td);
287  if (inner_td->shardedColumnId == 0 || outer_td->shardedColumnId == 0 ||
288  inner_td->nShards != outer_td->nShards) {
289  return 0;
290  }
291  if (!shard_count_less_or_equal_device_count(inner_td->tableId, executor)) {
292  return 0;
293  }
294  // The two columns involved must be the ones on which the tables have been sharded on.
295  return (inner_td->shardedColumnId == inner_col->get_column_id() &&
296  outer_td->shardedColumnId == outer_col->get_column_id()) ||
297  (outer_td->shardedColumnId == inner_col->get_column_id() &&
298  inner_td->shardedColumnId == inner_col->get_column_id())
299  ? inner_td->nShards
300  : 0;
301 }
bool shard_count_less_or_equal_device_count(const int inner_table_id, const Executor *executor)
#define CHECK(condition)
Definition: Logger.h:187
+ Here is the call graph for this function:

◆ get_table_name_by_id()

std::string get_table_name_by_id ( const int  table_id,
const Catalog_Namespace::Catalog cat 
)
inline

Definition at line 276 of file JoinHashTable.h.

References CHECK, get_entries_per_device(), get_inner_query_info(), get_shard_count(), Catalog_Namespace::Catalog::getMetadataForTable(), JoinHashTable::JoinHashTableCacheKey::inner_col, needs_dictionary_translation(), normalize_column_pair(), normalize_column_pairs(), only_shards_for_device(), JoinHashTable::JoinHashTableCacheKey::outer_col, and to_string().

277  {
278  if (table_id >= 1) {
279  const auto td = cat.getMetadataForTable(table_id);
280  CHECK(td);
281  return td->tableName;
282  }
283  return "$TEMPORARY_TABLE" + std::to_string(-table_id);
284 }
const TableDescriptor * getMetadataForTable(const std::string &tableName, const bool populateFragmenter=true) const
Returns a pointer to a const TableDescriptor struct matching the provided tableName.
std::string to_string(char const *&&v)
#define CHECK(condition)
Definition: Logger.h:187
+ Here is the call graph for this function:

◆ needs_dictionary_translation()

bool needs_dictionary_translation ( const Analyzer::ColumnVar inner_col,
const Analyzer::Expr outer_col,
const Executor executor 
)

Definition at line 434 of file JoinHashTable.cpp.

References CHECK, CHECK_EQ, get_column_descriptor_maybe(), Analyzer::ColumnVar::get_column_id(), get_column_type(), and Analyzer::ColumnVar::get_table_id().

Referenced by get_table_name_by_id(), BaselineJoinHashTable::getEffectiveMemoryLevel(), JoinHashTable::reifyOneToManyForDevice(), and JoinHashTable::reifyOneToOneForDevice().

436  {
437  const auto catalog = executor->getCatalog();
438  CHECK(catalog);
439  const auto inner_cd = get_column_descriptor_maybe(
440  inner_col->get_column_id(), inner_col->get_table_id(), *catalog);
441  const auto& inner_ti = get_column_type(inner_col->get_column_id(),
442  inner_col->get_table_id(),
443  inner_cd,
444  executor->getTemporaryTables());
445  // Only strings may need dictionary translation.
446  if (!inner_ti.is_string()) {
447  return false;
448  }
449  const auto outer_col = dynamic_cast<const Analyzer::ColumnVar*>(outer_col_expr);
450  CHECK(outer_col);
451  const auto outer_cd = get_column_descriptor_maybe(
452  outer_col->get_column_id(), outer_col->get_table_id(), *catalog);
453  // Don't want to deal with temporary tables for now, require translation.
454  if (!inner_cd || !outer_cd) {
455  return true;
456  }
457  const auto& outer_ti = get_column_type(outer_col->get_column_id(),
458  outer_col->get_table_id(),
459  outer_cd,
460  executor->getTemporaryTables());
461  CHECK_EQ(inner_ti.is_string(), outer_ti.is_string());
462  // If the two columns don't share the dictionary, translation is needed.
463  return outer_ti.get_comp_param() != inner_ti.get_comp_param();
464 }
#define CHECK_EQ(x, y)
Definition: Logger.h:195
int get_column_id() const
Definition: Analyzer.h:194
const SQLTypeInfo get_column_type(const int col_id, const int table_id, const ColumnDescriptor *cd, const TemporaryTables *temporary_tables)
Definition: Execute.h:184
const ColumnDescriptor * get_column_descriptor_maybe(const int col_id, const int table_id, const Catalog_Namespace::Catalog &cat)
Definition: Execute.h:168
int get_table_id() const
Definition: Analyzer.h:193
#define CHECK(condition)
Definition: Logger.h:187
+ Here is the call graph for this function:
+ Here is the caller graph for this function:

◆ normalize_column_pair()

InnerOuter normalize_column_pair ( const Analyzer::Expr lhs,
const Analyzer::Expr rhs,
const Catalog_Namespace::Catalog cat,
const TemporaryTables temporary_tables,
const bool  is_overlaps_join = false 
)

Definition at line 40 of file JoinHashTable.cpp.

References get_column_descriptor_maybe(), get_column_type(), Analyzer::Expr::get_type_info(), kCAST, kENCODING_DICT, kPOINT, and ScalarExprVisitor< T >::visit().

Referenced by anonymous_namespace{JoinHashTable.cpp}::get_cols(), get_table_name_by_id(), and normalize_column_pairs().

44  {
45  const auto& lhs_ti = lhs->get_type_info();
46  const auto& rhs_ti = rhs->get_type_info();
47  if (!is_overlaps_join) {
48  if (lhs_ti.get_type() != rhs_ti.get_type()) {
49  throw HashJoinFail("Equijoin types must be identical, found: " +
50  lhs_ti.get_type_name() + ", " + rhs_ti.get_type_name());
51  }
52  if (!lhs_ti.is_integer() && !lhs_ti.is_time() && !lhs_ti.is_string()) {
53  throw HashJoinFail("Cannot apply hash join to inner column type " +
54  lhs_ti.get_type_name());
55  }
56  }
57 
58  const auto lhs_cast = dynamic_cast<const Analyzer::UOper*>(lhs);
59  const auto rhs_cast = dynamic_cast<const Analyzer::UOper*>(rhs);
60  if (lhs_ti.is_string() && (static_cast<bool>(lhs_cast) != static_cast<bool>(rhs_cast) ||
61  (lhs_cast && lhs_cast->get_optype() != kCAST) ||
62  (rhs_cast && rhs_cast->get_optype() != kCAST))) {
63  throw HashJoinFail("Cannot use hash join for given expression");
64  }
65  const auto lhs_col =
66  lhs_cast ? dynamic_cast<const Analyzer::ColumnVar*>(lhs_cast->get_operand())
67  : dynamic_cast<const Analyzer::ColumnVar*>(lhs);
68  const auto rhs_col =
69  rhs_cast ? dynamic_cast<const Analyzer::ColumnVar*>(rhs_cast->get_operand())
70  : dynamic_cast<const Analyzer::ColumnVar*>(rhs);
71  if (!lhs_col && !rhs_col) {
72  throw HashJoinFail("Cannot use hash join for given expression");
73  }
74  const Analyzer::ColumnVar* inner_col{nullptr};
75  const Analyzer::ColumnVar* outer_col{nullptr};
76  auto outer_ti = lhs_ti;
77  auto inner_ti = rhs_ti;
78  const Analyzer::Expr* outer_expr{lhs};
79  if ((!lhs_col || (rhs_col && lhs_col->get_rte_idx() < rhs_col->get_rte_idx())) &&
80  (!rhs_col || (!lhs_col || lhs_col->get_rte_idx() < rhs_col->get_rte_idx()))) {
81  inner_col = rhs_col;
82  outer_col = lhs_col;
83  } else {
84  if (lhs_col && lhs_col->get_rte_idx() == 0) {
85  throw HashJoinFail("Cannot use hash join for given expression");
86  }
87  inner_col = lhs_col;
88  outer_col = rhs_col;
89  std::swap(outer_ti, inner_ti);
90  outer_expr = rhs;
91  }
92  if (!inner_col) {
93  throw HashJoinFail("Cannot use hash join for given expression");
94  }
95  if (!outer_col) {
96  MaxRangeTableIndexVisitor rte_idx_visitor;
97  int outer_rte_idx = rte_idx_visitor.visit(outer_expr);
98  // The inner column candidate is not actually inner; the outer
99  // expression contains columns which are at least as deep.
100  if (inner_col->get_rte_idx() <= outer_rte_idx) {
101  throw HashJoinFail("Cannot use hash join for given expression");
102  }
103  }
104  // We need to fetch the actual type information from the catalog since Analyzer
105  // always reports nullable as true for inner table columns in left joins.
106  const auto inner_col_cd = get_column_descriptor_maybe(
107  inner_col->get_column_id(), inner_col->get_table_id(), cat);
108  const auto inner_col_real_ti = get_column_type(inner_col->get_column_id(),
109  inner_col->get_table_id(),
110  inner_col_cd,
111  temporary_tables);
112  const auto& outer_col_ti =
113  !(dynamic_cast<const Analyzer::FunctionOper*>(lhs)) && outer_col
114  ? outer_col->get_type_info()
115  : outer_ti;
116  if (is_overlaps_join) {
117  if (!inner_col_real_ti.is_array()) {
118  throw HashJoinFail(
119  "Overlaps join only supported for inner columns with array type");
120  }
121  if (!(inner_col_real_ti.is_fixlen_array() && inner_col_real_ti.get_size() == 32)) {
122  throw HashJoinFail(
123  "Overlaps join only supported for 4-element double fixed length arrays");
124  }
125  if (!(outer_col_ti.get_type() == kPOINT)) {
126  throw HashJoinFail(
127  "Overlaps join only supported for geometry outer columns of type point");
128  }
129  } else {
130  if (!(inner_col_real_ti.is_integer() || inner_col_real_ti.is_time() ||
131  (inner_col_real_ti.is_string() &&
132  inner_col_real_ti.get_compression() == kENCODING_DICT))) {
133  throw HashJoinFail(
134  "Can only apply hash join to integer-like types and dictionary encoded "
135  "strings");
136  }
137  }
138  return {inner_col, outer_col ? outer_col : outer_expr};
139 }
const SQLTypeInfo get_column_type(const int col_id, const int table_id, const ColumnDescriptor *cd, const TemporaryTables *temporary_tables)
Definition: Execute.h:184
Definition: sqldefs.h:49
const ColumnDescriptor * get_column_descriptor_maybe(const int col_id, const int table_id, const Catalog_Namespace::Catalog &cat)
Definition: Execute.h:168
T visit(const Analyzer::Expr *expr) const
const SQLTypeInfo & get_type_info() const
Definition: Analyzer.h:77
+ Here is the call graph for this function:
+ Here is the caller graph for this function:

◆ normalize_column_pairs()

std::vector<InnerOuter> normalize_column_pairs ( const Analyzer::BinOper condition,
const Catalog_Namespace::Catalog cat,
const TemporaryTables temporary_tables 
)

Definition at line 141 of file JoinHashTable.cpp.

References CHECK, CHECK_EQ, Analyzer::BinOper::get_left_operand(), Analyzer::BinOper::get_right_operand(), Analyzer::BinOper::is_overlaps_oper(), normalize_column_pair(), and run-benchmark-import::result.

Referenced by anonymous_namespace{FromTableReordering.cpp}::get_join_qual_cost(), get_table_name_by_id(), OverlapsJoinHashTable::getInstance(), BaselineJoinHashTable::getInstance(), and Executor::skipFragmentPair().

143  {
144  std::vector<InnerOuter> result;
145  const auto lhs_tuple_expr =
146  dynamic_cast<const Analyzer::ExpressionTuple*>(condition->get_left_operand());
147  const auto rhs_tuple_expr =
148  dynamic_cast<const Analyzer::ExpressionTuple*>(condition->get_right_operand());
149 
150  CHECK_EQ(static_cast<bool>(lhs_tuple_expr), static_cast<bool>(rhs_tuple_expr));
151  if (lhs_tuple_expr) {
152  const auto& lhs_tuple = lhs_tuple_expr->getTuple();
153  const auto& rhs_tuple = rhs_tuple_expr->getTuple();
154  CHECK_EQ(lhs_tuple.size(), rhs_tuple.size());
155  for (size_t i = 0; i < lhs_tuple.size(); ++i) {
156  result.push_back(normalize_column_pair(lhs_tuple[i].get(),
157  rhs_tuple[i].get(),
158  cat,
159  temporary_tables,
160  condition->is_overlaps_oper()));
161  }
162  } else {
163  CHECK(!lhs_tuple_expr && !rhs_tuple_expr);
164  result.push_back(normalize_column_pair(condition->get_left_operand(),
165  condition->get_right_operand(),
166  cat,
167  temporary_tables,
168  condition->is_overlaps_oper()));
169  }
170 
171  return result;
172 }
#define CHECK_EQ(x, y)
Definition: Logger.h:195
bool is_overlaps_oper() const
Definition: Analyzer.h:433
#define CHECK(condition)
Definition: Logger.h:187
InnerOuter normalize_column_pair(const Analyzer::Expr *lhs, const Analyzer::Expr *rhs, const Catalog_Namespace::Catalog &cat, const TemporaryTables *temporary_tables, const bool is_overlaps_join)
const Expr * get_right_operand() const
Definition: Analyzer.h:436
const Expr * get_left_operand() const
Definition: Analyzer.h:435
+ Here is the call graph for this function:
+ Here is the caller graph for this function:

◆ only_shards_for_device()

std::deque<Fragmenter_Namespace::FragmentInfo> only_shards_for_device ( const std::deque< Fragmenter_Namespace::FragmentInfo > &  fragments,
const int  device_id,
const int  device_count 
)

Definition at line 466 of file JoinHashTable.cpp.

References CHECK_GE.

Referenced by OverlapsJoinHashTable::calculateCounts(), get_table_name_by_id(), JoinHashTable::reify(), OverlapsJoinHashTable::reifyWithLayout(), and BaselineJoinHashTable::reifyWithLayout().

469  {
470  std::deque<Fragmenter_Namespace::FragmentInfo> shards_for_device;
471  for (const auto& fragment : fragments) {
472  CHECK_GE(fragment.shard, 0);
473  if (fragment.shard % device_count == device_id) {
474  shards_for_device.push_back(fragment);
475  }
476  }
477  return shards_for_device;
478 }
#define CHECK_GE(x, y)
Definition: Logger.h:200
+ Here is the caller graph for this function: