OmniSciDB  b24e664e58
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
JoinHashTable.cpp File Reference
#include "JoinHashTable.h"
#include "CodeGenerator.h"
#include "ColumnFetcher.h"
#include "Execute.h"
#include "ExpressionRewrite.h"
#include "HashJoinRuntime.h"
#include "RangeTableIndexVisitor.h"
#include "RuntimeFunctions.h"
#include "Shared/Logger.h"
#include <future>
#include <numeric>
#include <thread>
+ Include dependency graph for JoinHashTable.cpp:

Go to the source code of this file.

Classes

class  anonymous_namespace{JoinHashTable.cpp}::NeedsOneToManyHash
 

Namespaces

 anonymous_namespace{JoinHashTable.cpp}
 

Functions

InnerOuter normalize_column_pair (const Analyzer::Expr *lhs, const Analyzer::Expr *rhs, const Catalog_Namespace::Catalog &cat, const TemporaryTables *temporary_tables, const bool is_overlaps_join)
 
std::vector< InnerOuternormalize_column_pairs (const Analyzer::BinOper *condition, const Catalog_Namespace::Catalog &cat, const TemporaryTables *temporary_tables)
 
std::pair< const
Analyzer::ColumnVar *, const
Analyzer::Expr * > 
anonymous_namespace{JoinHashTable.cpp}::get_cols (const Analyzer::BinOper *qual_bin_oper, const Catalog_Namespace::Catalog &cat, const TemporaryTables *temporary_tables)
 
HashEntryInfo anonymous_namespace{JoinHashTable.cpp}::get_bucketized_hash_entry_info (SQLTypeInfo const &context_ti, ExpressionRange const &col_range, bool const is_bw_eq)
 
size_t anonymous_namespace{JoinHashTable.cpp}::get_hash_entry_count (const ExpressionRange &col_range, const bool is_bw_eq)
 
size_t get_shard_count (const Analyzer::BinOper *join_condition, const Executor *executor)
 
bool anonymous_namespace{JoinHashTable.cpp}::shard_count_less_or_equal_device_count (const int inner_table_id, const Executor *executor)
 
size_t get_shard_count (std::pair< const Analyzer::ColumnVar *, const Analyzer::Expr * > equi_pair, const Executor *executor)
 
bool needs_dictionary_translation (const Analyzer::ColumnVar *inner_col, const Analyzer::Expr *outer_col_expr, const Executor *executor)
 
std::deque
< Fragmenter_Namespace::FragmentInfo
only_shards_for_device (const std::deque< Fragmenter_Namespace::FragmentInfo > &fragments, const int device_id, const int device_count)
 
const InputTableInfoget_inner_query_info (const int inner_table_id, const std::vector< InputTableInfo > &query_infos)
 
size_t get_entries_per_device (const size_t total_entries, const size_t shard_count, const size_t device_count, const Data_Namespace::MemoryLevel memory_level)
 

Function Documentation

size_t get_entries_per_device ( const size_t  total_entries,
const size_t  shard_count,
const size_t  device_count,
const Data_Namespace::MemoryLevel  memory_level 
)

Definition at line 1605 of file JoinHashTable.cpp.

References CHECK_GT, and Data_Namespace::GPU_LEVEL.

Referenced by OverlapsJoinHashTable::calculateCounts(), OverlapsJoinHashTable::getInstance(), BaselineJoinHashTable::getInstance(), and BaselineJoinHashTable::reifyWithLayout().

1608  {
1609  const auto entries_per_shard =
1610  shard_count ? (total_entries + shard_count - 1) / shard_count : total_entries;
1611  size_t entries_per_device = entries_per_shard;
1612  if (memory_level == Data_Namespace::GPU_LEVEL && shard_count) {
1613  const auto shards_per_device = (shard_count + device_count - 1) / device_count;
1614  CHECK_GT(shards_per_device, 0u);
1615  entries_per_device = entries_per_shard * shards_per_device;
1616  }
1617  return entries_per_device;
1618 }
#define CHECK_GT(x, y)
Definition: Logger.h:202

+ Here is the caller graph for this function:

const InputTableInfo& get_inner_query_info ( const int  inner_table_id,
const std::vector< InputTableInfo > &  query_infos 
)

Definition at line 1591 of file JoinHashTable.cpp.

References CHECK_NE.

Referenced by JoinHashTable::getInnerQueryInfo(), OverlapsJoinHashTable::getInstance(), BaselineJoinHashTable::getInstance(), OverlapsJoinHashTable::reifyWithLayout(), and BaselineJoinHashTable::reifyWithLayout().

1593  {
1594  ssize_t ti_idx = -1;
1595  for (size_t i = 0; i < query_infos.size(); ++i) {
1596  if (inner_table_id == query_infos[i].table_id) {
1597  ti_idx = i;
1598  break;
1599  }
1600  }
1601  CHECK_NE(ssize_t(-1), ti_idx);
1602  return query_infos[ti_idx];
1603 }
#define CHECK_NE(x, y)
Definition: Logger.h:199

+ Here is the caller graph for this function:

size_t get_shard_count ( const Analyzer::BinOper join_condition,
const Executor executor 
)

Definition at line 230 of file JoinHashTable.cpp.

References anonymous_namespace{JoinHashTable.cpp}::get_cols(), and get_shard_count().

Referenced by JoinHashTable::checkHashJoinReplicationConstraint(), get_shard_count(), BaselineJoinHashTable::getShardCountForCondition(), JoinHashTable::initOneToManyHashTable(), JoinHashTable::shardCount(), and Executor::skipFragmentPair().

231  {
232  const Analyzer::ColumnVar* inner_col{nullptr};
233  const Analyzer::Expr* outer_col{nullptr};
234  std::shared_ptr<Analyzer::BinOper> redirected_bin_oper;
235  try {
236  std::tie(inner_col, outer_col) =
237  get_cols(join_condition, *executor->getCatalog(), executor->getTemporaryTables());
238  } catch (...) {
239  return 0;
240  }
241  if (!inner_col || !outer_col) {
242  return 0;
243  }
244  return get_shard_count({inner_col, outer_col}, executor);
245 }
size_t get_shard_count(const Analyzer::BinOper *join_condition, const Executor *executor)
std::pair< const Analyzer::ColumnVar *, const Analyzer::Expr * > get_cols(const Analyzer::BinOper *qual_bin_oper, const Catalog_Namespace::Catalog &cat, const TemporaryTables *temporary_tables)

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

size_t get_shard_count ( std::pair< const Analyzer::ColumnVar *, const Analyzer::Expr * >  equi_pair,
const Executor executor 
)

Definition at line 268 of file JoinHashTable.cpp.

References CHECK(), and anonymous_namespace{JoinHashTable.cpp}::shard_count_less_or_equal_device_count().

270  {
271  const auto inner_col = equi_pair.first;
272  const auto outer_col = dynamic_cast<const Analyzer::ColumnVar*>(equi_pair.second);
273  if (!outer_col || inner_col->get_table_id() < 0 || outer_col->get_table_id() < 0) {
274  return 0;
275  }
276  if (outer_col->get_rte_idx()) {
277  return 0;
278  }
279  if (inner_col->get_type_info() != outer_col->get_type_info()) {
280  return 0;
281  }
282  const auto catalog = executor->getCatalog();
283  const auto inner_td = catalog->getMetadataForTable(inner_col->get_table_id());
284  CHECK(inner_td);
285  const auto outer_td = catalog->getMetadataForTable(outer_col->get_table_id());
286  CHECK(outer_td);
287  if (inner_td->shardedColumnId == 0 || outer_td->shardedColumnId == 0 ||
288  inner_td->nShards != outer_td->nShards) {
289  return 0;
290  }
291  if (!shard_count_less_or_equal_device_count(inner_td->tableId, executor)) {
292  return 0;
293  }
294  // The two columns involved must be the ones on which the tables have been sharded on.
295  return (inner_td->shardedColumnId == inner_col->get_column_id() &&
296  outer_td->shardedColumnId == outer_col->get_column_id()) ||
297  (outer_td->shardedColumnId == inner_col->get_column_id() &&
298  inner_td->shardedColumnId == inner_col->get_column_id())
299  ? inner_td->nShards
300  : 0;
301 }
bool shard_count_less_or_equal_device_count(const int inner_table_id, const Executor *executor)
CHECK(cgen_state)

+ Here is the call graph for this function:

bool needs_dictionary_translation ( const Analyzer::ColumnVar inner_col,
const Analyzer::Expr outer_col_expr,
const Executor executor 
)

Definition at line 499 of file JoinHashTable.cpp.

References CHECK(), CHECK_EQ, get_column_descriptor_maybe(), Analyzer::ColumnVar::get_column_id(), get_column_type(), and Analyzer::ColumnVar::get_table_id().

Referenced by BaselineJoinHashTable::getEffectiveMemoryLevel(), JoinHashTable::reifyOneToManyForDevice(), and JoinHashTable::reifyOneToOneForDevice().

501  {
502  const auto catalog = executor->getCatalog();
503  CHECK(catalog);
504  const auto inner_cd = get_column_descriptor_maybe(
505  inner_col->get_column_id(), inner_col->get_table_id(), *catalog);
506  const auto& inner_ti = get_column_type(inner_col->get_column_id(),
507  inner_col->get_table_id(),
508  inner_cd,
509  executor->getTemporaryTables());
510  // Only strings may need dictionary translation.
511  if (!inner_ti.is_string()) {
512  return false;
513  }
514  const auto outer_col = dynamic_cast<const Analyzer::ColumnVar*>(outer_col_expr);
515  CHECK(outer_col);
516  const auto outer_cd = get_column_descriptor_maybe(
517  outer_col->get_column_id(), outer_col->get_table_id(), *catalog);
518  // Don't want to deal with temporary tables for now, require translation.
519  if (!inner_cd || !outer_cd) {
520  return true;
521  }
522  const auto& outer_ti = get_column_type(outer_col->get_column_id(),
523  outer_col->get_table_id(),
524  outer_cd,
525  executor->getTemporaryTables());
526  CHECK_EQ(inner_ti.is_string(), outer_ti.is_string());
527  // If the two columns don't share the dictionary, translation is needed.
528  return outer_ti.get_comp_param() != inner_ti.get_comp_param();
529 }
int get_table_id() const
Definition: Analyzer.h:194
#define CHECK_EQ(x, y)
Definition: Logger.h:198
const SQLTypeInfo get_column_type(const int col_id, const int table_id, const ColumnDescriptor *cd, const TemporaryTables *temporary_tables)
Definition: Execute.h:187
CHECK(cgen_state)
const ColumnDescriptor * get_column_descriptor_maybe(const int col_id, const int table_id, const Catalog_Namespace::Catalog &cat)
Definition: Execute.h:171
int get_column_id() const
Definition: Analyzer.h:195

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

InnerOuter normalize_column_pair ( const Analyzer::Expr lhs,
const Analyzer::Expr rhs,
const Catalog_Namespace::Catalog cat,
const TemporaryTables temporary_tables,
const bool  is_overlaps_join 
)

Definition at line 40 of file JoinHashTable.cpp.

References get_column_descriptor_maybe(), get_column_type(), Analyzer::Expr::get_type_info(), kCAST, kENCODING_DICT, kPOINT, and ScalarExprVisitor< T >::visit().

Referenced by anonymous_namespace{JoinHashTable.cpp}::get_cols(), and normalize_column_pairs().

44  {
45  const auto& lhs_ti = lhs->get_type_info();
46  const auto& rhs_ti = rhs->get_type_info();
47  if (!is_overlaps_join) {
48  if (lhs_ti.get_type() != rhs_ti.get_type()) {
49  throw HashJoinFail("Equijoin types must be identical, found: " +
50  lhs_ti.get_type_name() + ", " + rhs_ti.get_type_name());
51  }
52  if (!lhs_ti.is_integer() && !lhs_ti.is_time() && !lhs_ti.is_string()) {
53  throw HashJoinFail("Cannot apply hash join to inner column type " +
54  lhs_ti.get_type_name());
55  }
56  }
57 
58  const auto lhs_cast = dynamic_cast<const Analyzer::UOper*>(lhs);
59  const auto rhs_cast = dynamic_cast<const Analyzer::UOper*>(rhs);
60  if (lhs_ti.is_string() && (static_cast<bool>(lhs_cast) != static_cast<bool>(rhs_cast) ||
61  (lhs_cast && lhs_cast->get_optype() != kCAST) ||
62  (rhs_cast && rhs_cast->get_optype() != kCAST))) {
63  throw HashJoinFail("Cannot use hash join for given expression");
64  }
65  const auto lhs_col =
66  lhs_cast ? dynamic_cast<const Analyzer::ColumnVar*>(lhs_cast->get_operand())
67  : dynamic_cast<const Analyzer::ColumnVar*>(lhs);
68  const auto rhs_col =
69  rhs_cast ? dynamic_cast<const Analyzer::ColumnVar*>(rhs_cast->get_operand())
70  : dynamic_cast<const Analyzer::ColumnVar*>(rhs);
71  if (!lhs_col && !rhs_col) {
72  throw HashJoinFail("Cannot use hash join for given expression");
73  }
74  const Analyzer::ColumnVar* inner_col{nullptr};
75  const Analyzer::ColumnVar* outer_col{nullptr};
76  auto outer_ti = lhs_ti;
77  auto inner_ti = rhs_ti;
78  const Analyzer::Expr* outer_expr{lhs};
79  if ((!lhs_col || (rhs_col && lhs_col->get_rte_idx() < rhs_col->get_rte_idx())) &&
80  (!rhs_col || (!lhs_col || lhs_col->get_rte_idx() < rhs_col->get_rte_idx()))) {
81  inner_col = rhs_col;
82  outer_col = lhs_col;
83  } else {
84  if (lhs_col && lhs_col->get_rte_idx() == 0) {
85  throw HashJoinFail("Cannot use hash join for given expression");
86  }
87  inner_col = lhs_col;
88  outer_col = rhs_col;
89  std::swap(outer_ti, inner_ti);
90  outer_expr = rhs;
91  }
92  if (!inner_col) {
93  throw HashJoinFail("Cannot use hash join for given expression");
94  }
95  if (!outer_col) {
96  MaxRangeTableIndexVisitor rte_idx_visitor;
97  int outer_rte_idx = rte_idx_visitor.visit(outer_expr);
98  // The inner column candidate is not actually inner; the outer
99  // expression contains columns which are at least as deep.
100  if (inner_col->get_rte_idx() <= outer_rte_idx) {
101  throw HashJoinFail("Cannot use hash join for given expression");
102  }
103  }
104  // We need to fetch the actual type information from the catalog since Analyzer
105  // always reports nullable as true for inner table columns in left joins.
106  const auto inner_col_cd = get_column_descriptor_maybe(
107  inner_col->get_column_id(), inner_col->get_table_id(), cat);
108  const auto inner_col_real_ti = get_column_type(inner_col->get_column_id(),
109  inner_col->get_table_id(),
110  inner_col_cd,
111  temporary_tables);
112  const auto& outer_col_ti =
113  !(dynamic_cast<const Analyzer::FunctionOper*>(lhs)) && outer_col
114  ? outer_col->get_type_info()
115  : outer_ti;
116  if (is_overlaps_join) {
117  if (!inner_col_real_ti.is_array()) {
118  throw HashJoinFail(
119  "Overlaps join only supported for inner columns with array type");
120  }
121  if (!(inner_col_real_ti.is_fixlen_array() && inner_col_real_ti.get_size() == 32)) {
122  throw HashJoinFail(
123  "Overlaps join only supported for 4-element double fixed length arrays");
124  }
125  if (!(outer_col_ti.get_type() == kPOINT)) {
126  throw HashJoinFail(
127  "Overlaps join only supported for geometry outer columns of type point");
128  }
129  } else {
130  if (!(inner_col_real_ti.is_integer() || inner_col_real_ti.is_time() ||
131  (inner_col_real_ti.is_string() &&
132  inner_col_real_ti.get_compression() == kENCODING_DICT))) {
133  throw HashJoinFail(
134  "Can only apply hash join to integer-like types and dictionary encoded "
135  "strings");
136  }
137  }
138  return {inner_col, outer_col ? outer_col : outer_expr};
139 }
const SQLTypeInfo get_column_type(const int col_id, const int table_id, const ColumnDescriptor *cd, const TemporaryTables *temporary_tables)
Definition: Execute.h:187
Definition: sqldefs.h:49
T visit(const Analyzer::Expr *expr) const
const ColumnDescriptor * get_column_descriptor_maybe(const int col_id, const int table_id, const Catalog_Namespace::Catalog &cat)
Definition: Execute.h:171
const SQLTypeInfo & get_type_info() const
Definition: Analyzer.h:78

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

std::vector<InnerOuter> normalize_column_pairs ( const Analyzer::BinOper condition,
const Catalog_Namespace::Catalog cat,
const TemporaryTables temporary_tables 
)

Definition at line 141 of file JoinHashTable.cpp.

References CHECK(), CHECK_EQ, Analyzer::BinOper::get_left_operand(), Analyzer::BinOper::get_right_operand(), Analyzer::BinOper::is_overlaps_oper(), normalize_column_pair(), and run_benchmark_import::result.

Referenced by anonymous_namespace{FromTableReordering.cpp}::get_join_qual_cost(), OverlapsJoinHashTable::getInstance(), BaselineJoinHashTable::getInstance(), and Executor::skipFragmentPair().

143  {
144  std::vector<InnerOuter> result;
145  const auto lhs_tuple_expr =
146  dynamic_cast<const Analyzer::ExpressionTuple*>(condition->get_left_operand());
147  const auto rhs_tuple_expr =
148  dynamic_cast<const Analyzer::ExpressionTuple*>(condition->get_right_operand());
149 
150  CHECK_EQ(static_cast<bool>(lhs_tuple_expr), static_cast<bool>(rhs_tuple_expr));
151  if (lhs_tuple_expr) {
152  const auto& lhs_tuple = lhs_tuple_expr->getTuple();
153  const auto& rhs_tuple = rhs_tuple_expr->getTuple();
154  CHECK_EQ(lhs_tuple.size(), rhs_tuple.size());
155  for (size_t i = 0; i < lhs_tuple.size(); ++i) {
156  result.push_back(normalize_column_pair(lhs_tuple[i].get(),
157  rhs_tuple[i].get(),
158  cat,
159  temporary_tables,
160  condition->is_overlaps_oper()));
161  }
162  } else {
163  CHECK(!lhs_tuple_expr && !rhs_tuple_expr);
164  result.push_back(normalize_column_pair(condition->get_left_operand(),
165  condition->get_right_operand(),
166  cat,
167  temporary_tables,
168  condition->is_overlaps_oper()));
169  }
170 
171  return result;
172 }
#define CHECK_EQ(x, y)
Definition: Logger.h:198
const Expr * get_right_operand() const
Definition: Analyzer.h:437
CHECK(cgen_state)
const Expr * get_left_operand() const
Definition: Analyzer.h:436
bool is_overlaps_oper() const
Definition: Analyzer.h:434
InnerOuter normalize_column_pair(const Analyzer::Expr *lhs, const Analyzer::Expr *rhs, const Catalog_Namespace::Catalog &cat, const TemporaryTables *temporary_tables, const bool is_overlaps_join)

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

std::deque<Fragmenter_Namespace::FragmentInfo> only_shards_for_device ( const std::deque< Fragmenter_Namespace::FragmentInfo > &  fragments,
const int  device_id,
const int  device_count 
)

Definition at line 531 of file JoinHashTable.cpp.

References CHECK_GE.

Referenced by OverlapsJoinHashTable::calculateCounts(), JoinHashTable::reify(), OverlapsJoinHashTable::reifyWithLayout(), and BaselineJoinHashTable::reifyWithLayout().

534  {
535  std::deque<Fragmenter_Namespace::FragmentInfo> shards_for_device;
536  for (const auto& fragment : fragments) {
537  CHECK_GE(fragment.shard, 0);
538  if (fragment.shard % device_count == device_id) {
539  shards_for_device.push_back(fragment);
540  }
541  }
542  return shards_for_device;
543 }
#define CHECK_GE(x, y)
Definition: Logger.h:203

+ Here is the caller graph for this function: