OmniSciDB  85c2d10cdc
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
HashJoin.h File Reference
+ Include dependency graph for HashJoin.h:
+ This graph shows which files directly or indirectly include this file:

Go to the source code of this file.

Classes

class  TooManyHashEntries
 
class  TableMustBeReplicated
 
class  HashJoinFail
 
class  NeedsOneToManyHash
 
class  FailedToFetchColumn
 
class  FailedToJoinOnVirtualColumn
 
class  OverlapsHashTableTooBig
 
struct  ColumnsForDevice
 
struct  HashJoinMatchingSet
 
struct  CompositeKeyInfo
 
class  HashJoin
 

Typedefs

using InnerOuter = std::pair< const Analyzer::ColumnVar *, const Analyzer::Expr * >
 

Functions

std::ostream & operator<< (std::ostream &os, const DecodedJoinHashBufferEntry &e)
 
std::ostream & operator<< (std::ostream &os, const DecodedJoinHashBufferSet &s)
 
std::shared_ptr
< Analyzer::ColumnVar
getSyntheticColumnVar (std::string_view table, std::string_view column, int rte_idx, Executor *executor)
 
size_t get_shard_count (const Analyzer::BinOper *join_condition, const Executor *executor)
 
size_t get_shard_count (std::pair< const Analyzer::ColumnVar *, const Analyzer::Expr * > equi_pair, const Executor *executor)
 
InnerOuter normalize_column_pair (const Analyzer::Expr *lhs, const Analyzer::Expr *rhs, const Catalog_Namespace::Catalog &cat, const TemporaryTables *temporary_tables, const bool is_overlaps_join=false)
 
std::vector< InnerOuternormalize_column_pairs (const Analyzer::BinOper *condition, const Catalog_Namespace::Catalog &cat, const TemporaryTables *temporary_tables)
 

Typedef Documentation

Definition at line 76 of file HashJoin.h.

Function Documentation

size_t get_shard_count ( const Analyzer::BinOper join_condition,
const Executor executor 
)

Definition at line 553 of file HashJoin.cpp.

References anonymous_namespace{HashJoin.cpp}::get_cols(), and get_shard_count().

Referenced by get_shard_count(), BaselineJoinHashTable::getShardCountForCondition(), PerfectJoinHashTable::reify(), PerfectJoinHashTable::shardCount(), and Executor::skipFragmentPair().

554  {
555  const Analyzer::ColumnVar* inner_col{nullptr};
556  const Analyzer::Expr* outer_col{nullptr};
557  std::shared_ptr<Analyzer::BinOper> redirected_bin_oper;
558  try {
559  std::tie(inner_col, outer_col) =
560  get_cols(join_condition, *executor->getCatalog(), executor->getTemporaryTables());
561  } catch (...) {
562  return 0;
563  }
564  if (!inner_col || !outer_col) {
565  return 0;
566  }
567  return get_shard_count({inner_col, outer_col}, executor);
568 }
size_t get_shard_count(const Analyzer::BinOper *join_condition, const Executor *executor)
Definition: HashJoin.cpp:553
InnerOuter get_cols(const Analyzer::BinOper *qual_bin_oper, const Catalog_Namespace::Catalog &cat, const TemporaryTables *temporary_tables)
Definition: HashJoin.cpp:543

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

size_t get_shard_count ( std::pair< const Analyzer::ColumnVar *, const Analyzer::Expr * >  equi_pair,
const Executor executor 
)

Definition at line 110 of file PerfectJoinHashTable.cpp.

References CHECK, and anonymous_namespace{PerfectJoinHashTable.cpp}::shard_count_less_or_equal_device_count().

112  {
113  const auto inner_col = equi_pair.first;
114  const auto outer_col = dynamic_cast<const Analyzer::ColumnVar*>(equi_pair.second);
115  if (!outer_col || inner_col->get_table_id() < 0 || outer_col->get_table_id() < 0) {
116  return 0;
117  }
118  if (outer_col->get_rte_idx()) {
119  return 0;
120  }
121  if (inner_col->get_type_info() != outer_col->get_type_info()) {
122  return 0;
123  }
124  const auto catalog = executor->getCatalog();
125  const auto inner_td = catalog->getMetadataForTable(inner_col->get_table_id());
126  CHECK(inner_td);
127  const auto outer_td = catalog->getMetadataForTable(outer_col->get_table_id());
128  CHECK(outer_td);
129  if (inner_td->shardedColumnId == 0 || outer_td->shardedColumnId == 0 ||
130  inner_td->nShards != outer_td->nShards) {
131  return 0;
132  }
133  if (!shard_count_less_or_equal_device_count(inner_td->tableId, executor)) {
134  return 0;
135  }
136  // The two columns involved must be the ones on which the tables have been sharded on.
137  return (inner_td->shardedColumnId == inner_col->get_column_id() &&
138  outer_td->shardedColumnId == outer_col->get_column_id()) ||
139  (outer_td->shardedColumnId == inner_col->get_column_id() &&
140  inner_td->shardedColumnId == inner_col->get_column_id())
141  ? inner_td->nShards
142  : 0;
143 }
bool shard_count_less_or_equal_device_count(const int inner_table_id, const Executor *executor)
#define CHECK(condition)
Definition: Logger.h:197

+ Here is the call graph for this function:

std::shared_ptr<Analyzer::ColumnVar> getSyntheticColumnVar ( std::string_view  table,
std::string_view  column,
int  rte_idx,
Executor executor 
)

Definition at line 356 of file HashJoin.cpp.

References CHECK, kLINESTRING, kMULTIPOLYGON, kPOINT, and kPOLYGON.

Referenced by HashJoin::getSyntheticInstance().

359  {
360  auto catalog = executor->getCatalog();
361  CHECK(catalog);
362 
363  auto tmeta = catalog->getMetadataForTable(std::string(table));
364  CHECK(tmeta);
365 
366  auto cmeta = catalog->getMetadataForColumn(tmeta->tableId, std::string(column));
367  CHECK(cmeta);
368 
369  auto ti = cmeta->columnType;
370 
371  if (ti.is_geometry() && ti.get_type() != kPOINT) {
372  int geoColumnId{0};
373  switch (ti.get_type()) {
374  case kLINESTRING: {
375  geoColumnId = cmeta->columnId + 2;
376  break;
377  }
378  case kPOLYGON: {
379  geoColumnId = cmeta->columnId + 3;
380  break;
381  }
382  case kMULTIPOLYGON: {
383  geoColumnId = cmeta->columnId + 4;
384  break;
385  }
386  default:
387  CHECK(false);
388  }
389  cmeta = catalog->getMetadataForColumn(tmeta->tableId, geoColumnId);
390  CHECK(cmeta);
391  ti = cmeta->columnType;
392  }
393 
394  auto cv =
395  std::make_shared<Analyzer::ColumnVar>(ti, tmeta->tableId, cmeta->columnId, rte_idx);
396  return cv;
397 }
#define CHECK(condition)
Definition: Logger.h:197

+ Here is the caller graph for this function:

InnerOuter normalize_column_pair ( const Analyzer::Expr lhs,
const Analyzer::Expr rhs,
const Catalog_Namespace::Catalog cat,
const TemporaryTables temporary_tables,
const bool  is_overlaps_join = false 
)

Definition at line 570 of file HashJoin.cpp.

References cat(), get_column_descriptor_maybe(), get_column_type(), Analyzer::Expr::get_type_info(), is_constructed_point(), kCAST, kENCODING_DICT, kPOINT, gpu_enabled::swap(), and ScalarExprVisitor< T >::visit().

Referenced by anonymous_namespace{PerfectJoinHashTable.cpp}::get_cols(), anonymous_namespace{HashJoin.cpp}::get_cols(), and normalize_column_pairs().

574  {
575  const auto& lhs_ti = lhs->get_type_info();
576  const auto& rhs_ti = rhs->get_type_info();
577  if (!is_overlaps_join) {
578  if (lhs_ti.get_type() != rhs_ti.get_type()) {
579  throw HashJoinFail("Equijoin types must be identical, found: " +
580  lhs_ti.get_type_name() + ", " + rhs_ti.get_type_name());
581  }
582  if (!lhs_ti.is_integer() && !lhs_ti.is_time() && !lhs_ti.is_string() &&
583  !lhs_ti.is_decimal()) {
584  throw HashJoinFail("Cannot apply hash join to inner column type " +
585  lhs_ti.get_type_name());
586  }
587  // Decimal types should be identical.
588  if (lhs_ti.is_decimal() && (lhs_ti.get_scale() != rhs_ti.get_scale() ||
589  lhs_ti.get_precision() != rhs_ti.get_precision())) {
590  throw HashJoinFail("Equijoin with different decimal types");
591  }
592  }
593 
594  const auto lhs_cast = dynamic_cast<const Analyzer::UOper*>(lhs);
595  const auto rhs_cast = dynamic_cast<const Analyzer::UOper*>(rhs);
596  if (lhs_ti.is_string() && (static_cast<bool>(lhs_cast) != static_cast<bool>(rhs_cast) ||
597  (lhs_cast && lhs_cast->get_optype() != kCAST) ||
598  (rhs_cast && rhs_cast->get_optype() != kCAST))) {
599  throw HashJoinFail("Cannot use hash join for given expression");
600  }
601  // Casts to decimal are not suported.
602  if (lhs_ti.is_decimal() && (lhs_cast || rhs_cast)) {
603  throw HashJoinFail("Cannot use hash join for given expression");
604  }
605  const auto lhs_col =
606  lhs_cast ? dynamic_cast<const Analyzer::ColumnVar*>(lhs_cast->get_operand())
607  : dynamic_cast<const Analyzer::ColumnVar*>(lhs);
608  const auto rhs_col =
609  rhs_cast ? dynamic_cast<const Analyzer::ColumnVar*>(rhs_cast->get_operand())
610  : dynamic_cast<const Analyzer::ColumnVar*>(rhs);
611  if (!lhs_col && !rhs_col) {
612  throw HashJoinFail("Cannot use hash join for given expression");
613  }
614  const Analyzer::ColumnVar* inner_col{nullptr};
615  const Analyzer::ColumnVar* outer_col{nullptr};
616  auto outer_ti = lhs_ti;
617  auto inner_ti = rhs_ti;
618  const Analyzer::Expr* outer_expr{lhs};
619  if ((!lhs_col || (rhs_col && lhs_col->get_rte_idx() < rhs_col->get_rte_idx())) &&
620  (!rhs_col || (!lhs_col || lhs_col->get_rte_idx() < rhs_col->get_rte_idx()))) {
621  inner_col = rhs_col;
622  outer_col = lhs_col;
623  } else {
624  if (lhs_col && lhs_col->get_rte_idx() == 0) {
625  throw HashJoinFail("Cannot use hash join for given expression");
626  }
627  inner_col = lhs_col;
628  outer_col = rhs_col;
629  std::swap(outer_ti, inner_ti);
630  outer_expr = rhs;
631  }
632  if (!inner_col) {
633  throw HashJoinFail("Cannot use hash join for given expression");
634  }
635  if (!outer_col) {
636  MaxRangeTableIndexVisitor rte_idx_visitor;
637  int outer_rte_idx = rte_idx_visitor.visit(outer_expr);
638  // The inner column candidate is not actually inner; the outer
639  // expression contains columns which are at least as deep.
640  if (inner_col->get_rte_idx() <= outer_rte_idx) {
641  throw HashJoinFail("Cannot use hash join for given expression");
642  }
643  }
644  // We need to fetch the actual type information from the catalog since Analyzer
645  // always reports nullable as true for inner table columns in left joins.
646  const auto inner_col_cd = get_column_descriptor_maybe(
647  inner_col->get_column_id(), inner_col->get_table_id(), cat);
648  const auto inner_col_real_ti = get_column_type(inner_col->get_column_id(),
649  inner_col->get_table_id(),
650  inner_col_cd,
651  temporary_tables);
652  const auto& outer_col_ti =
653  !(dynamic_cast<const Analyzer::FunctionOper*>(lhs)) && outer_col
654  ? outer_col->get_type_info()
655  : outer_ti;
656  // Casts from decimal are not supported.
657  if ((inner_col_real_ti.is_decimal() || outer_col_ti.is_decimal()) &&
658  (lhs_cast || rhs_cast)) {
659  throw HashJoinFail("Cannot use hash join for given expression");
660  }
661  if (is_overlaps_join) {
662  if (!inner_col_real_ti.is_array()) {
663  throw HashJoinFail(
664  "Overlaps join only supported for inner columns with array type");
665  }
666  auto is_bounds_array = [](const auto ti) {
667  return ti.is_fixlen_array() && ti.get_size() == 32;
668  };
669  if (!is_bounds_array(inner_col_real_ti)) {
670  throw HashJoinFail(
671  "Overlaps join only supported for 4-element double fixed length arrays");
672  }
673  if (!(outer_col_ti.get_type() == kPOINT || is_bounds_array(outer_col_ti) ||
674  is_constructed_point(outer_expr))) {
675  throw HashJoinFail(
676  "Overlaps join only supported for geometry outer columns of type point, "
677  "geometry columns with bounds or constructed points");
678  }
679  } else {
680  if (!(inner_col_real_ti.is_integer() || inner_col_real_ti.is_time() ||
681  inner_col_real_ti.is_decimal() ||
682  (inner_col_real_ti.is_string() &&
683  inner_col_real_ti.get_compression() == kENCODING_DICT))) {
684  throw HashJoinFail(
685  "Can only apply hash join to integer-like types and dictionary encoded "
686  "strings");
687  }
688  }
689 
690  auto normalized_inner_col = inner_col;
691  auto normalized_outer_col = outer_col ? outer_col : outer_expr;
692 
693  const auto& normalized_inner_ti = normalized_inner_col->get_type_info();
694  const auto& normalized_outer_ti = normalized_outer_col->get_type_info();
695 
696  if (normalized_inner_ti.is_string() != normalized_outer_ti.is_string()) {
697  throw HashJoinFail(std::string("Could not build hash tables for incompatible types " +
698  normalized_inner_ti.get_type_name() + " and " +
699  normalized_outer_ti.get_type_name()));
700  }
701 
702  return {normalized_inner_col, normalized_outer_col};
703 }
std::string cat(Ts &&...args)
bool is_constructed_point(const Analyzer::Expr *expr)
Definition: Execute.h:1176
const SQLTypeInfo get_column_type(const int col_id, const int table_id, const ColumnDescriptor *cd, const TemporaryTables *temporary_tables)
Definition: Execute.h:238
Definition: sqldefs.h:49
T visit(const Analyzer::Expr *expr) const
const ColumnDescriptor * get_column_descriptor_maybe(const int col_id, const int table_id, const Catalog_Namespace::Catalog &cat)
Definition: Execute.h:222
const SQLTypeInfo & get_type_info() const
Definition: Analyzer.h:78
DEVICE void swap(ARGS &&...args)
Definition: gpu_enabled.h:114

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

std::vector<InnerOuter> normalize_column_pairs ( const Analyzer::BinOper condition,
const Catalog_Namespace::Catalog cat,
const TemporaryTables temporary_tables 
)

Definition at line 705 of file HashJoin.cpp.

References cat(), CHECK, CHECK_EQ, Analyzer::BinOper::get_left_operand(), Analyzer::BinOper::get_right_operand(), i, Analyzer::BinOper::is_overlaps_oper(), normalize_column_pair(), and run_benchmark_import::result.

Referenced by anonymous_namespace{FromTableReordering.cpp}::get_join_qual_cost(), BaselineJoinHashTable::getInstance(), OverlapsJoinHashTable::getInstance(), and Executor::skipFragmentPair().

707  {
708  std::vector<InnerOuter> result;
709  const auto lhs_tuple_expr =
710  dynamic_cast<const Analyzer::ExpressionTuple*>(condition->get_left_operand());
711  const auto rhs_tuple_expr =
712  dynamic_cast<const Analyzer::ExpressionTuple*>(condition->get_right_operand());
713 
714  CHECK_EQ(static_cast<bool>(lhs_tuple_expr), static_cast<bool>(rhs_tuple_expr));
715  if (lhs_tuple_expr) {
716  const auto& lhs_tuple = lhs_tuple_expr->getTuple();
717  const auto& rhs_tuple = rhs_tuple_expr->getTuple();
718  CHECK_EQ(lhs_tuple.size(), rhs_tuple.size());
719  for (size_t i = 0; i < lhs_tuple.size(); ++i) {
720  result.push_back(normalize_column_pair(lhs_tuple[i].get(),
721  rhs_tuple[i].get(),
722  cat,
723  temporary_tables,
724  condition->is_overlaps_oper()));
725  }
726  } else {
727  CHECK(!lhs_tuple_expr && !rhs_tuple_expr);
728  result.push_back(normalize_column_pair(condition->get_left_operand(),
729  condition->get_right_operand(),
730  cat,
731  temporary_tables,
732  condition->is_overlaps_oper()));
733  }
734 
735  return result;
736 }
#define CHECK_EQ(x, y)
Definition: Logger.h:205
std::string cat(Ts &&...args)
const Expr * get_right_operand() const
Definition: Analyzer.h:443
InnerOuter normalize_column_pair(const Analyzer::Expr *lhs, const Analyzer::Expr *rhs, const Catalog_Namespace::Catalog &cat, const TemporaryTables *temporary_tables, const bool is_overlaps_join)
Definition: HashJoin.cpp:570
#define CHECK(condition)
Definition: Logger.h:197
const Expr * get_left_operand() const
Definition: Analyzer.h:442
bool is_overlaps_oper() const
Definition: Analyzer.h:440

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

std::ostream& operator<< ( std::ostream &  os,
const DecodedJoinHashBufferEntry e 
)

Definition at line 122 of file HashJoin.cpp.

References generate_TableFunctionsFactory_init::k, DecodedJoinHashBufferEntry::key, and DecodedJoinHashBufferEntry::payload.

122  {
123  os << " {{";
124  bool first = true;
125  for (auto k : e.key) {
126  if (!first) {
127  os << ",";
128  } else {
129  first = false;
130  }
131  os << k;
132  }
133  os << "}, ";
134  os << "{";
135  first = true;
136  for (auto p : e.payload) {
137  if (!first) {
138  os << ", ";
139  } else {
140  first = false;
141  }
142  os << p;
143  }
144  os << "}}";
145  return os;
146 }
std::set< int32_t > payload
Definition: HashTable.h:23
std::vector< int64_t > key
Definition: HashTable.h:22
std::ostream& operator<< ( std::ostream &  os,
const DecodedJoinHashBufferSet s 
)

Definition at line 148 of file HashJoin.cpp.

148  {
149  os << "{\n";
150  bool first = true;
151  for (auto e : s) {
152  if (!first) {
153  os << ",\n";
154  } else {
155  first = false;
156  }
157  os << e;
158  }
159  if (!s.empty()) {
160  os << "\n";
161  }
162  os << "}\n";
163  return os;
164 }