OmniSciDB  06b3bd477c
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
JoinHashTable.h File Reference
#include "Analyzer/Analyzer.h"
#include "Catalog/Catalog.h"
#include "ColumnarResults.h"
#include "DataMgr/Allocators/ThrustAllocator.h"
#include "DataMgr/Chunk/Chunk.h"
#include "Descriptors/InputDescriptors.h"
#include "Descriptors/RowSetMemoryOwner.h"
#include "ExpressionRange.h"
#include "InputMetadata.h"
#include "JoinHashTableInterface.h"
#include <llvm/IR/Value.h>
#include <functional>
#include <memory>
#include <mutex>
#include <stdexcept>
+ Include dependency graph for JoinHashTable.h:
+ This graph shows which files directly or indirectly include this file:

Go to the source code of this file.

Classes

class  JoinHashTable
 
struct  JoinHashTable::JoinHashTableCacheKey
 

Functions

size_t get_shard_count (const Analyzer::BinOper *join_condition, const Executor *executor)
 
size_t get_shard_count (std::pair< const Analyzer::ColumnVar *, const Analyzer::Expr * > equi_pair, const Executor *executor)
 
bool needs_dictionary_translation (const Analyzer::ColumnVar *inner_col, const Analyzer::Expr *outer_col, const Executor *executor)
 
InnerOuter normalize_column_pair (const Analyzer::Expr *lhs, const Analyzer::Expr *rhs, const Catalog_Namespace::Catalog &cat, const TemporaryTables *temporary_tables, const bool is_overlaps_join=false)
 
std::vector< InnerOuternormalize_column_pairs (const Analyzer::BinOper *condition, const Catalog_Namespace::Catalog &cat, const TemporaryTables *temporary_tables)
 
std::vector
< Fragmenter_Namespace::FragmentInfo
only_shards_for_device (const std::vector< Fragmenter_Namespace::FragmentInfo > &fragments, const int device_id, const int device_count)
 
const InputTableInfoget_inner_query_info (const int inner_table_id, const std::vector< InputTableInfo > &query_infos)
 
size_t get_entries_per_device (const size_t total_entries, const size_t shard_count, const size_t device_count, const Data_Namespace::MemoryLevel memory_level)
 

Function Documentation

size_t get_entries_per_device ( const size_t  total_entries,
const size_t  shard_count,
const size_t  device_count,
const Data_Namespace::MemoryLevel  memory_level 
)

Definition at line 1572 of file JoinHashTable.cpp.

References CHECK_GT, and Data_Namespace::GPU_LEVEL.

Referenced by OverlapsJoinHashTable::calculateCounts(), BaselineJoinHashTable::getInstance(), OverlapsJoinHashTable::getInstance(), and BaselineJoinHashTable::reifyWithLayout().

1575  {
1576  const auto entries_per_shard =
1577  shard_count ? (total_entries + shard_count - 1) / shard_count : total_entries;
1578  size_t entries_per_device = entries_per_shard;
1579  if (memory_level == Data_Namespace::GPU_LEVEL && shard_count) {
1580  const auto shards_per_device = (shard_count + device_count - 1) / device_count;
1581  CHECK_GT(shards_per_device, 0u);
1582  entries_per_device = entries_per_shard * shards_per_device;
1583  }
1584  return entries_per_device;
1585 }
#define CHECK_GT(x, y)
Definition: Logger.h:209

+ Here is the caller graph for this function:

const InputTableInfo& get_inner_query_info ( const int  inner_table_id,
const std::vector< InputTableInfo > &  query_infos 
)

Definition at line 1558 of file JoinHashTable.cpp.

References CHECK_NE.

Referenced by JoinHashTable::getInnerQueryInfo(), OverlapsJoinHashTable::getInstance(), BaselineJoinHashTable::getInstance(), OverlapsJoinHashTable::reifyWithLayout(), and BaselineJoinHashTable::reifyWithLayout().

1560  {
1561  ssize_t ti_idx = -1;
1562  for (size_t i = 0; i < query_infos.size(); ++i) {
1563  if (inner_table_id == query_infos[i].table_id) {
1564  ti_idx = i;
1565  break;
1566  }
1567  }
1568  CHECK_NE(ssize_t(-1), ti_idx);
1569  return query_infos[ti_idx];
1570 }
#define CHECK_NE(x, y)
Definition: Logger.h:206

+ Here is the caller graph for this function:

size_t get_shard_count ( const Analyzer::BinOper join_condition,
const Executor executor 
)

Definition at line 263 of file JoinHashTable.cpp.

References anonymous_namespace{JoinHashTable.cpp}::get_cols(), and get_shard_count().

Referenced by JoinHashTable::checkHashJoinReplicationConstraint(), get_shard_count(), BaselineJoinHashTable::getShardCountForCondition(), JoinHashTable::initOneToManyHashTable(), JoinHashTable::shardCount(), and Executor::skipFragmentPair().

264  {
265  const Analyzer::ColumnVar* inner_col{nullptr};
266  const Analyzer::Expr* outer_col{nullptr};
267  std::shared_ptr<Analyzer::BinOper> redirected_bin_oper;
268  try {
269  std::tie(inner_col, outer_col) =
270  get_cols(join_condition, *executor->getCatalog(), executor->getTemporaryTables());
271  } catch (...) {
272  return 0;
273  }
274  if (!inner_col || !outer_col) {
275  return 0;
276  }
277  return get_shard_count({inner_col, outer_col}, executor);
278 }
size_t get_shard_count(const Analyzer::BinOper *join_condition, const Executor *executor)
std::pair< const Analyzer::ColumnVar *, const Analyzer::Expr * > get_cols(const Analyzer::BinOper *qual_bin_oper, const Catalog_Namespace::Catalog &cat, const TemporaryTables *temporary_tables)

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

size_t get_shard_count ( std::pair< const Analyzer::ColumnVar *, const Analyzer::Expr * >  equi_pair,
const Executor executor 
)

Definition at line 301 of file JoinHashTable.cpp.

References CHECK(), and anonymous_namespace{JoinHashTable.cpp}::shard_count_less_or_equal_device_count().

303  {
304  const auto inner_col = equi_pair.first;
305  const auto outer_col = dynamic_cast<const Analyzer::ColumnVar*>(equi_pair.second);
306  if (!outer_col || inner_col->get_table_id() < 0 || outer_col->get_table_id() < 0) {
307  return 0;
308  }
309  if (outer_col->get_rte_idx()) {
310  return 0;
311  }
312  if (inner_col->get_type_info() != outer_col->get_type_info()) {
313  return 0;
314  }
315  const auto catalog = executor->getCatalog();
316  const auto inner_td = catalog->getMetadataForTable(inner_col->get_table_id());
317  CHECK(inner_td);
318  const auto outer_td = catalog->getMetadataForTable(outer_col->get_table_id());
319  CHECK(outer_td);
320  if (inner_td->shardedColumnId == 0 || outer_td->shardedColumnId == 0 ||
321  inner_td->nShards != outer_td->nShards) {
322  return 0;
323  }
324  if (!shard_count_less_or_equal_device_count(inner_td->tableId, executor)) {
325  return 0;
326  }
327  // The two columns involved must be the ones on which the tables have been sharded on.
328  return (inner_td->shardedColumnId == inner_col->get_column_id() &&
329  outer_td->shardedColumnId == outer_col->get_column_id()) ||
330  (outer_td->shardedColumnId == inner_col->get_column_id() &&
331  inner_td->shardedColumnId == inner_col->get_column_id())
332  ? inner_td->nShards
333  : 0;
334 }
bool shard_count_less_or_equal_device_count(const int inner_table_id, const Executor *executor)
CHECK(cgen_state)

+ Here is the call graph for this function:

bool needs_dictionary_translation ( const Analyzer::ColumnVar inner_col,
const Analyzer::Expr outer_col,
const Executor executor 
)

Definition at line 446 of file JoinHashTable.cpp.

References CHECK(), CHECK_EQ, get_column_descriptor_maybe(), Analyzer::ColumnVar::get_column_id(), get_column_type(), and Analyzer::ColumnVar::get_table_id().

Referenced by BaselineJoinHashTable::getEffectiveMemoryLevel(), JoinHashTable::reifyOneToManyForDevice(), and JoinHashTable::reifyOneToOneForDevice().

448  {
449  const auto catalog = executor->getCatalog();
450  CHECK(catalog);
451  const auto inner_cd = get_column_descriptor_maybe(
452  inner_col->get_column_id(), inner_col->get_table_id(), *catalog);
453  const auto& inner_ti = get_column_type(inner_col->get_column_id(),
454  inner_col->get_table_id(),
455  inner_cd,
456  executor->getTemporaryTables());
457  // Only strings may need dictionary translation.
458  if (!inner_ti.is_string()) {
459  return false;
460  }
461  const auto outer_col = dynamic_cast<const Analyzer::ColumnVar*>(outer_col_expr);
462  CHECK(outer_col);
463  const auto outer_cd = get_column_descriptor_maybe(
464  outer_col->get_column_id(), outer_col->get_table_id(), *catalog);
465  // Don't want to deal with temporary tables for now, require translation.
466  if (!inner_cd || !outer_cd) {
467  return true;
468  }
469  const auto& outer_ti = get_column_type(outer_col->get_column_id(),
470  outer_col->get_table_id(),
471  outer_cd,
472  executor->getTemporaryTables());
473  CHECK_EQ(inner_ti.is_string(), outer_ti.is_string());
474  // If the two columns don't share the dictionary, translation is needed.
475  return outer_ti.get_comp_param() != inner_ti.get_comp_param();
476 }
int get_table_id() const
Definition: Analyzer.h:195
#define CHECK_EQ(x, y)
Definition: Logger.h:205
const SQLTypeInfo get_column_type(const int col_id, const int table_id, const ColumnDescriptor *cd, const TemporaryTables *temporary_tables)
Definition: Execute.h:165
CHECK(cgen_state)
const ColumnDescriptor * get_column_descriptor_maybe(const int col_id, const int table_id, const Catalog_Namespace::Catalog &cat)
Definition: Execute.h:149
int get_column_id() const
Definition: Analyzer.h:196

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

InnerOuter normalize_column_pair ( const Analyzer::Expr lhs,
const Analyzer::Expr rhs,
const Catalog_Namespace::Catalog cat,
const TemporaryTables temporary_tables,
const bool  is_overlaps_join = false 
)

Definition at line 40 of file JoinHashTable.cpp.

References cat(), get_column_descriptor_maybe(), get_column_type(), Analyzer::Expr::get_type_info(), kCAST, kENCODING_DICT, kPOINT, and ScalarExprVisitor< T >::visit().

Referenced by anonymous_namespace{JoinHashTable.cpp}::get_cols(), and normalize_column_pairs().

44  {
45  const auto& lhs_ti = lhs->get_type_info();
46  const auto& rhs_ti = rhs->get_type_info();
47  if (!is_overlaps_join) {
48  if (lhs_ti.get_type() != rhs_ti.get_type()) {
49  throw HashJoinFail("Equijoin types must be identical, found: " +
50  lhs_ti.get_type_name() + ", " + rhs_ti.get_type_name());
51  }
52  if (!lhs_ti.is_integer() && !lhs_ti.is_time() && !lhs_ti.is_string() &&
53  !lhs_ti.is_decimal()) {
54  throw HashJoinFail("Cannot apply hash join to inner column type " +
55  lhs_ti.get_type_name());
56  }
57  // Decimal types should be identical.
58  if (lhs_ti.is_decimal() && (lhs_ti.get_scale() != rhs_ti.get_scale() ||
59  lhs_ti.get_precision() != rhs_ti.get_precision())) {
60  throw HashJoinFail("Equijoin with different decimal types");
61  }
62  }
63 
64  const auto lhs_cast = dynamic_cast<const Analyzer::UOper*>(lhs);
65  const auto rhs_cast = dynamic_cast<const Analyzer::UOper*>(rhs);
66  if (lhs_ti.is_string() && (static_cast<bool>(lhs_cast) != static_cast<bool>(rhs_cast) ||
67  (lhs_cast && lhs_cast->get_optype() != kCAST) ||
68  (rhs_cast && rhs_cast->get_optype() != kCAST))) {
69  throw HashJoinFail("Cannot use hash join for given expression");
70  }
71  // Casts to decimal are not suported.
72  if (lhs_ti.is_decimal() && (lhs_cast || rhs_cast)) {
73  throw HashJoinFail("Cannot use hash join for given expression");
74  }
75  const auto lhs_col =
76  lhs_cast ? dynamic_cast<const Analyzer::ColumnVar*>(lhs_cast->get_operand())
77  : dynamic_cast<const Analyzer::ColumnVar*>(lhs);
78  const auto rhs_col =
79  rhs_cast ? dynamic_cast<const Analyzer::ColumnVar*>(rhs_cast->get_operand())
80  : dynamic_cast<const Analyzer::ColumnVar*>(rhs);
81  if (!lhs_col && !rhs_col) {
82  throw HashJoinFail("Cannot use hash join for given expression");
83  }
84  const Analyzer::ColumnVar* inner_col{nullptr};
85  const Analyzer::ColumnVar* outer_col{nullptr};
86  auto outer_ti = lhs_ti;
87  auto inner_ti = rhs_ti;
88  const Analyzer::Expr* outer_expr{lhs};
89  if ((!lhs_col || (rhs_col && lhs_col->get_rte_idx() < rhs_col->get_rte_idx())) &&
90  (!rhs_col || (!lhs_col || lhs_col->get_rte_idx() < rhs_col->get_rte_idx()))) {
91  inner_col = rhs_col;
92  outer_col = lhs_col;
93  } else {
94  if (lhs_col && lhs_col->get_rte_idx() == 0) {
95  throw HashJoinFail("Cannot use hash join for given expression");
96  }
97  inner_col = lhs_col;
98  outer_col = rhs_col;
99  std::swap(outer_ti, inner_ti);
100  outer_expr = rhs;
101  }
102  if (!inner_col) {
103  throw HashJoinFail("Cannot use hash join for given expression");
104  }
105  if (!outer_col) {
106  MaxRangeTableIndexVisitor rte_idx_visitor;
107  int outer_rte_idx = rte_idx_visitor.visit(outer_expr);
108  // The inner column candidate is not actually inner; the outer
109  // expression contains columns which are at least as deep.
110  if (inner_col->get_rte_idx() <= outer_rte_idx) {
111  throw HashJoinFail("Cannot use hash join for given expression");
112  }
113  }
114  // We need to fetch the actual type information from the catalog since Analyzer
115  // always reports nullable as true for inner table columns in left joins.
116  const auto inner_col_cd = get_column_descriptor_maybe(
117  inner_col->get_column_id(), inner_col->get_table_id(), cat);
118  const auto inner_col_real_ti = get_column_type(inner_col->get_column_id(),
119  inner_col->get_table_id(),
120  inner_col_cd,
121  temporary_tables);
122  const auto& outer_col_ti =
123  !(dynamic_cast<const Analyzer::FunctionOper*>(lhs)) && outer_col
124  ? outer_col->get_type_info()
125  : outer_ti;
126  // Casts from decimal are not supported.
127  if ((inner_col_real_ti.is_decimal() || outer_col_ti.is_decimal()) &&
128  (lhs_cast || rhs_cast)) {
129  throw HashJoinFail("Cannot use hash join for given expression");
130  }
131  if (is_overlaps_join) {
132  if (!inner_col_real_ti.is_array()) {
133  throw HashJoinFail(
134  "Overlaps join only supported for inner columns with array type");
135  }
136  auto is_bounds_array = [](const auto ti) {
137  return ti.is_fixlen_array() && ti.get_size() == 32;
138  };
139  if (!is_bounds_array(inner_col_real_ti)) {
140  throw HashJoinFail(
141  "Overlaps join only supported for 4-element double fixed length arrays");
142  }
143  if (!(outer_col_ti.get_type() == kPOINT || is_bounds_array(outer_col_ti))) {
144  throw HashJoinFail(
145  "Overlaps join only supported for geometry outer columns of type point or "
146  "geometry columns with bounds");
147  }
148  } else {
149  if (!(inner_col_real_ti.is_integer() || inner_col_real_ti.is_time() ||
150  inner_col_real_ti.is_decimal() ||
151  (inner_col_real_ti.is_string() &&
152  inner_col_real_ti.get_compression() == kENCODING_DICT))) {
153  throw HashJoinFail(
154  "Can only apply hash join to integer-like types and dictionary encoded "
155  "strings");
156  }
157  }
158 
159  auto normalized_inner_col = inner_col;
160  auto normalized_outer_col = outer_col ? outer_col : outer_expr;
161 
162  const auto& normalized_inner_ti = normalized_inner_col->get_type_info();
163  const auto& normalized_outer_ti = normalized_outer_col->get_type_info();
164 
165  if (normalized_inner_ti.is_string() != normalized_outer_ti.is_string()) {
166  throw HashJoinFail(std::string("Could not build hash tables for incompatible types " +
167  normalized_inner_ti.get_type_name() + " and " +
168  normalized_outer_ti.get_type_name()));
169  }
170 
171  return {normalized_inner_col, normalized_outer_col};
172 }
std::string cat(Ts &&...args)
const SQLTypeInfo get_column_type(const int col_id, const int table_id, const ColumnDescriptor *cd, const TemporaryTables *temporary_tables)
Definition: Execute.h:165
Definition: sqldefs.h:49
T visit(const Analyzer::Expr *expr) const
const ColumnDescriptor * get_column_descriptor_maybe(const int col_id, const int table_id, const Catalog_Namespace::Catalog &cat)
Definition: Execute.h:149
const SQLTypeInfo & get_type_info() const
Definition: Analyzer.h:79

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

std::vector<InnerOuter> normalize_column_pairs ( const Analyzer::BinOper condition,
const Catalog_Namespace::Catalog cat,
const TemporaryTables temporary_tables 
)

Definition at line 174 of file JoinHashTable.cpp.

References cat(), CHECK(), CHECK_EQ, Analyzer::BinOper::get_left_operand(), Analyzer::BinOper::get_right_operand(), Analyzer::BinOper::is_overlaps_oper(), normalize_column_pair(), and run_benchmark_import::result.

Referenced by anonymous_namespace{FromTableReordering.cpp}::get_join_qual_cost(), OverlapsJoinHashTable::getInstance(), BaselineJoinHashTable::getInstance(), and Executor::skipFragmentPair().

176  {
177  std::vector<InnerOuter> result;
178  const auto lhs_tuple_expr =
179  dynamic_cast<const Analyzer::ExpressionTuple*>(condition->get_left_operand());
180  const auto rhs_tuple_expr =
181  dynamic_cast<const Analyzer::ExpressionTuple*>(condition->get_right_operand());
182 
183  CHECK_EQ(static_cast<bool>(lhs_tuple_expr), static_cast<bool>(rhs_tuple_expr));
184  if (lhs_tuple_expr) {
185  const auto& lhs_tuple = lhs_tuple_expr->getTuple();
186  const auto& rhs_tuple = rhs_tuple_expr->getTuple();
187  CHECK_EQ(lhs_tuple.size(), rhs_tuple.size());
188  for (size_t i = 0; i < lhs_tuple.size(); ++i) {
189  result.push_back(normalize_column_pair(lhs_tuple[i].get(),
190  rhs_tuple[i].get(),
191  cat,
192  temporary_tables,
193  condition->is_overlaps_oper()));
194  }
195  } else {
196  CHECK(!lhs_tuple_expr && !rhs_tuple_expr);
197  result.push_back(normalize_column_pair(condition->get_left_operand(),
198  condition->get_right_operand(),
199  cat,
200  temporary_tables,
201  condition->is_overlaps_oper()));
202  }
203 
204  return result;
205 }
#define CHECK_EQ(x, y)
Definition: Logger.h:205
std::string cat(Ts &&...args)
const Expr * get_right_operand() const
Definition: Analyzer.h:444
CHECK(cgen_state)
const Expr * get_left_operand() const
Definition: Analyzer.h:443
bool is_overlaps_oper() const
Definition: Analyzer.h:441
InnerOuter normalize_column_pair(const Analyzer::Expr *lhs, const Analyzer::Expr *rhs, const Catalog_Namespace::Catalog &cat, const TemporaryTables *temporary_tables, const bool is_overlaps_join)

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

std::vector<Fragmenter_Namespace::FragmentInfo> only_shards_for_device ( const std::vector< Fragmenter_Namespace::FragmentInfo > &  fragments,
const int  device_id,
const int  device_count 
)

Definition at line 478 of file JoinHashTable.cpp.

References CHECK_GE.

Referenced by JoinHashTable::reify(), OverlapsJoinHashTable::reifyWithLayout(), and BaselineJoinHashTable::reifyWithLayout().

481  {
482  std::vector<Fragmenter_Namespace::FragmentInfo> shards_for_device;
483  for (const auto& fragment : fragments) {
484  CHECK_GE(fragment.shard, 0);
485  if (fragment.shard % device_count == device_id) {
486  shards_for_device.push_back(fragment);
487  }
488  }
489  return shards_for_device;
490 }
#define CHECK_GE(x, y)
Definition: Logger.h:210

+ Here is the caller graph for this function: