OmniSciDB  0264ff685a
HashJoin.cpp File Reference
+ Include dependency graph for HashJoin.cpp:

Go to the source code of this file.

Classes

class  AllColumnVarsVisitor
 

Namespaces

 anonymous_namespace{HashJoin.cpp}
 

Functions

template<typename T >
std::string anonymous_namespace{HashJoin.cpp}::toStringFlat (const HashJoin *hash_table, const ExecutorDeviceType device_type, const int device_id)
 
std::ostream & operator<< (std::ostream &os, const DecodedJoinHashBufferEntry &e)
 
std::ostream & operator<< (std::ostream &os, const DecodedJoinHashBufferSet &s)
 
std::shared_ptr< Analyzer::ColumnVargetSyntheticColumnVar (std::string_view table, std::string_view column, int rte_idx, Executor *executor)
 
void setupSyntheticCaching (std::set< const Analyzer::ColumnVar *> cvs, Executor *executor)
 
std::vector< InputTableInfogetSyntheticInputTableInfo (std::set< const Analyzer::ColumnVar *> cvs, Executor *executor)
 
InnerOuter anonymous_namespace{HashJoin.cpp}::get_cols (const Analyzer::BinOper *qual_bin_oper, const Catalog_Namespace::Catalog &cat, const TemporaryTables *temporary_tables)
 
size_t get_shard_count (const Analyzer::BinOper *join_condition, const Executor *executor)
 
InnerOuter normalize_column_pair (const Analyzer::Expr *lhs, const Analyzer::Expr *rhs, const Catalog_Namespace::Catalog &cat, const TemporaryTables *temporary_tables, const bool is_overlaps_join)
 
std::vector< InnerOuternormalize_column_pairs (const Analyzer::BinOper *condition, const Catalog_Namespace::Catalog &cat, const TemporaryTables *temporary_tables)
 

Variables

bool g_enable_overlaps_hashjoin
 

Function Documentation

◆ get_shard_count()

size_t get_shard_count ( const Analyzer::BinOper join_condition,
const Executor executor 
)

Definition at line 533 of file HashJoin.cpp.

References anonymous_namespace{HashJoin.cpp}::get_cols(), and get_shard_count().

Referenced by get_shard_count(), BaselineJoinHashTable::getShardCountForCondition(), and Executor::skipFragmentPair().

534  {
535  const Analyzer::ColumnVar* inner_col{nullptr};
536  const Analyzer::Expr* outer_col{nullptr};
537  std::shared_ptr<Analyzer::BinOper> redirected_bin_oper;
538  try {
539  std::tie(inner_col, outer_col) =
540  get_cols(join_condition, *executor->getCatalog(), executor->getTemporaryTables());
541  } catch (...) {
542  return 0;
543  }
544  if (!inner_col || !outer_col) {
545  return 0;
546  }
547  return get_shard_count({inner_col, outer_col}, executor);
548 }
size_t get_shard_count(const Analyzer::BinOper *join_condition, const Executor *executor)
Definition: HashJoin.cpp:533
InnerOuter get_cols(const Analyzer::BinOper *qual_bin_oper, const Catalog_Namespace::Catalog &cat, const TemporaryTables *temporary_tables)
Definition: HashJoin.cpp:523
+ Here is the call graph for this function:
+ Here is the caller graph for this function:

◆ getSyntheticColumnVar()

std::shared_ptr<Analyzer::ColumnVar> getSyntheticColumnVar ( std::string_view  table,
std::string_view  column,
int  rte_idx,
Executor executor 
)

Definition at line 336 of file HashJoin.cpp.

References CHECK, kLINESTRING, kMULTIPOLYGON, kPOINT, and kPOLYGON.

Referenced by HashJoin::getSyntheticInstance().

339  {
340  auto catalog = executor->getCatalog();
341  CHECK(catalog);
342 
343  auto tmeta = catalog->getMetadataForTable(std::string(table));
344  CHECK(tmeta);
345 
346  auto cmeta = catalog->getMetadataForColumn(tmeta->tableId, std::string(column));
347  CHECK(cmeta);
348 
349  auto ti = cmeta->columnType;
350 
351  if (ti.is_geometry() && ti.get_type() != kPOINT) {
352  int geoColumnId{0};
353  switch (ti.get_type()) {
354  case kLINESTRING: {
355  geoColumnId = cmeta->columnId + 2;
356  break;
357  }
358  case kPOLYGON: {
359  geoColumnId = cmeta->columnId + 3;
360  break;
361  }
362  case kMULTIPOLYGON: {
363  geoColumnId = cmeta->columnId + 4;
364  break;
365  }
366  default:
367  CHECK(false);
368  }
369  cmeta = catalog->getMetadataForColumn(tmeta->tableId, geoColumnId);
370  CHECK(cmeta);
371  ti = cmeta->columnType;
372  }
373 
374  auto cv =
375  std::make_shared<Analyzer::ColumnVar>(ti, tmeta->tableId, cmeta->columnId, rte_idx);
376  return cv;
377 }
#define CHECK(condition)
Definition: Logger.h:197
+ Here is the caller graph for this function:

◆ getSyntheticInputTableInfo()

std::vector<InputTableInfo> getSyntheticInputTableInfo ( std::set< const Analyzer::ColumnVar *>  cvs,
Executor executor 
)

Definition at line 421 of file HashJoin.cpp.

References CHECK.

Referenced by HashJoin::getSyntheticInstance().

423  {
424  auto catalog = executor->getCatalog();
425  CHECK(catalog);
426 
427  std::unordered_set<int> phys_table_ids;
428  for (auto cv : cvs) {
429  phys_table_ids.insert(cv->get_table_id());
430  }
431 
432  // NOTE(sy): This vector ordering seems to work for now, but maybe we need to
433  // review how rte_idx is assigned for ColumnVars. See for example Analyzer.h
434  // and RelAlgExecutor.cpp and rte_idx there.
435  std::vector<InputTableInfo> query_infos(phys_table_ids.size());
436  size_t i = 0;
437  for (auto id : phys_table_ids) {
438  auto tmeta = catalog->getMetadataForTable(id);
439  query_infos[i].table_id = id;
440  query_infos[i].info = tmeta->fragmenter->getFragmentsForQuery();
441  ++i;
442  }
443 
444  return query_infos;
445 }
#define CHECK(condition)
Definition: Logger.h:197
+ Here is the caller graph for this function:

◆ normalize_column_pair()

InnerOuter normalize_column_pair ( const Analyzer::Expr lhs,
const Analyzer::Expr rhs,
const Catalog_Namespace::Catalog cat,
const TemporaryTables temporary_tables,
const bool  is_overlaps_join 
)

Definition at line 550 of file HashJoin.cpp.

References cat(), get_column_descriptor_maybe(), get_column_type(), Analyzer::Expr::get_type_info(), kCAST, kENCODING_DICT, kPOINT, gpu_enabled::swap(), and ScalarExprVisitor< T >::visit().

Referenced by anonymous_namespace{PerfectJoinHashTable.cpp}::get_cols(), anonymous_namespace{HashJoin.cpp}::get_cols(), and normalize_column_pairs().

554  {
555  const auto& lhs_ti = lhs->get_type_info();
556  const auto& rhs_ti = rhs->get_type_info();
557  if (!is_overlaps_join) {
558  if (lhs_ti.get_type() != rhs_ti.get_type()) {
559  throw HashJoinFail("Equijoin types must be identical, found: " +
560  lhs_ti.get_type_name() + ", " + rhs_ti.get_type_name());
561  }
562  if (!lhs_ti.is_integer() && !lhs_ti.is_time() && !lhs_ti.is_string() &&
563  !lhs_ti.is_decimal()) {
564  throw HashJoinFail("Cannot apply hash join to inner column type " +
565  lhs_ti.get_type_name());
566  }
567  // Decimal types should be identical.
568  if (lhs_ti.is_decimal() && (lhs_ti.get_scale() != rhs_ti.get_scale() ||
569  lhs_ti.get_precision() != rhs_ti.get_precision())) {
570  throw HashJoinFail("Equijoin with different decimal types");
571  }
572  }
573 
574  const auto lhs_cast = dynamic_cast<const Analyzer::UOper*>(lhs);
575  const auto rhs_cast = dynamic_cast<const Analyzer::UOper*>(rhs);
576  if (lhs_ti.is_string() && (static_cast<bool>(lhs_cast) != static_cast<bool>(rhs_cast) ||
577  (lhs_cast && lhs_cast->get_optype() != kCAST) ||
578  (rhs_cast && rhs_cast->get_optype() != kCAST))) {
579  throw HashJoinFail("Cannot use hash join for given expression");
580  }
581  // Casts to decimal are not suported.
582  if (lhs_ti.is_decimal() && (lhs_cast || rhs_cast)) {
583  throw HashJoinFail("Cannot use hash join for given expression");
584  }
585  const auto lhs_col =
586  lhs_cast ? dynamic_cast<const Analyzer::ColumnVar*>(lhs_cast->get_operand())
587  : dynamic_cast<const Analyzer::ColumnVar*>(lhs);
588  const auto rhs_col =
589  rhs_cast ? dynamic_cast<const Analyzer::ColumnVar*>(rhs_cast->get_operand())
590  : dynamic_cast<const Analyzer::ColumnVar*>(rhs);
591  if (!lhs_col && !rhs_col) {
592  throw HashJoinFail("Cannot use hash join for given expression");
593  }
594  const Analyzer::ColumnVar* inner_col{nullptr};
595  const Analyzer::ColumnVar* outer_col{nullptr};
596  auto outer_ti = lhs_ti;
597  auto inner_ti = rhs_ti;
598  const Analyzer::Expr* outer_expr{lhs};
599  if ((!lhs_col || (rhs_col && lhs_col->get_rte_idx() < rhs_col->get_rte_idx())) &&
600  (!rhs_col || (!lhs_col || lhs_col->get_rte_idx() < rhs_col->get_rte_idx()))) {
601  inner_col = rhs_col;
602  outer_col = lhs_col;
603  } else {
604  if (lhs_col && lhs_col->get_rte_idx() == 0) {
605  throw HashJoinFail("Cannot use hash join for given expression");
606  }
607  inner_col = lhs_col;
608  outer_col = rhs_col;
609  std::swap(outer_ti, inner_ti);
610  outer_expr = rhs;
611  }
612  if (!inner_col) {
613  throw HashJoinFail("Cannot use hash join for given expression");
614  }
615  if (!outer_col) {
616  MaxRangeTableIndexVisitor rte_idx_visitor;
617  int outer_rte_idx = rte_idx_visitor.visit(outer_expr);
618  // The inner column candidate is not actually inner; the outer
619  // expression contains columns which are at least as deep.
620  if (inner_col->get_rte_idx() <= outer_rte_idx) {
621  throw HashJoinFail("Cannot use hash join for given expression");
622  }
623  }
624  // We need to fetch the actual type information from the catalog since Analyzer
625  // always reports nullable as true for inner table columns in left joins.
626  const auto inner_col_cd = get_column_descriptor_maybe(
627  inner_col->get_column_id(), inner_col->get_table_id(), cat);
628  const auto inner_col_real_ti = get_column_type(inner_col->get_column_id(),
629  inner_col->get_table_id(),
630  inner_col_cd,
631  temporary_tables);
632  const auto& outer_col_ti =
633  !(dynamic_cast<const Analyzer::FunctionOper*>(lhs)) && outer_col
634  ? outer_col->get_type_info()
635  : outer_ti;
636  // Casts from decimal are not supported.
637  if ((inner_col_real_ti.is_decimal() || outer_col_ti.is_decimal()) &&
638  (lhs_cast || rhs_cast)) {
639  throw HashJoinFail("Cannot use hash join for given expression");
640  }
641  if (is_overlaps_join) {
642  if (!inner_col_real_ti.is_array()) {
643  throw HashJoinFail(
644  "Overlaps join only supported for inner columns with array type");
645  }
646  auto is_bounds_array = [](const auto ti) {
647  return ti.is_fixlen_array() && ti.get_size() == 32;
648  };
649  if (!is_bounds_array(inner_col_real_ti)) {
650  throw HashJoinFail(
651  "Overlaps join only supported for 4-element double fixed length arrays");
652  }
653  if (!(outer_col_ti.get_type() == kPOINT || is_bounds_array(outer_col_ti))) {
654  throw HashJoinFail(
655  "Overlaps join only supported for geometry outer columns of type point or "
656  "geometry columns with bounds");
657  }
658  } else {
659  if (!(inner_col_real_ti.is_integer() || inner_col_real_ti.is_time() ||
660  inner_col_real_ti.is_decimal() ||
661  (inner_col_real_ti.is_string() &&
662  inner_col_real_ti.get_compression() == kENCODING_DICT))) {
663  throw HashJoinFail(
664  "Can only apply hash join to integer-like types and dictionary encoded "
665  "strings");
666  }
667  }
668 
669  auto normalized_inner_col = inner_col;
670  auto normalized_outer_col = outer_col ? outer_col : outer_expr;
671 
672  const auto& normalized_inner_ti = normalized_inner_col->get_type_info();
673  const auto& normalized_outer_ti = normalized_outer_col->get_type_info();
674 
675  if (normalized_inner_ti.is_string() != normalized_outer_ti.is_string()) {
676  throw HashJoinFail(std::string("Could not build hash tables for incompatible types " +
677  normalized_inner_ti.get_type_name() + " and " +
678  normalized_outer_ti.get_type_name()));
679  }
680 
681  return {normalized_inner_col, normalized_outer_col};
682 }
const SQLTypeInfo get_column_type(const int col_id, const int table_id, const ColumnDescriptor *cd, const TemporaryTables *temporary_tables)
Definition: Execute.h:232
Definition: sqldefs.h:49
std::string cat(Ts &&... args)
const ColumnDescriptor * get_column_descriptor_maybe(const int col_id, const int table_id, const Catalog_Namespace::Catalog &cat)
Definition: Execute.h:216
T visit(const Analyzer::Expr *expr) const
const SQLTypeInfo & get_type_info() const
Definition: Analyzer.h:78
DEVICE void swap(ARGS &&... args)
Definition: gpu_enabled.h:114
+ Here is the call graph for this function:
+ Here is the caller graph for this function:

◆ normalize_column_pairs()

std::vector<InnerOuter> normalize_column_pairs ( const Analyzer::BinOper condition,
const Catalog_Namespace::Catalog cat,
const TemporaryTables temporary_tables 
)

Definition at line 684 of file HashJoin.cpp.

References cat(), CHECK, CHECK_EQ, Analyzer::BinOper::get_left_operand(), Analyzer::BinOper::get_right_operand(), Analyzer::BinOper::is_overlaps_oper(), normalize_column_pair(), and run_benchmark_import::result.

Referenced by anonymous_namespace{FromTableReordering.cpp}::get_join_qual_cost(), OverlapsJoinHashTable::getInstance(), BaselineJoinHashTable::getInstance(), and Executor::skipFragmentPair().

686  {
687  std::vector<InnerOuter> result;
688  const auto lhs_tuple_expr =
689  dynamic_cast<const Analyzer::ExpressionTuple*>(condition->get_left_operand());
690  const auto rhs_tuple_expr =
691  dynamic_cast<const Analyzer::ExpressionTuple*>(condition->get_right_operand());
692 
693  CHECK_EQ(static_cast<bool>(lhs_tuple_expr), static_cast<bool>(rhs_tuple_expr));
694  if (lhs_tuple_expr) {
695  const auto& lhs_tuple = lhs_tuple_expr->getTuple();
696  const auto& rhs_tuple = rhs_tuple_expr->getTuple();
697  CHECK_EQ(lhs_tuple.size(), rhs_tuple.size());
698  for (size_t i = 0; i < lhs_tuple.size(); ++i) {
699  result.push_back(normalize_column_pair(lhs_tuple[i].get(),
700  rhs_tuple[i].get(),
701  cat,
702  temporary_tables,
703  condition->is_overlaps_oper()));
704  }
705  } else {
706  CHECK(!lhs_tuple_expr && !rhs_tuple_expr);
707  result.push_back(normalize_column_pair(condition->get_left_operand(),
708  condition->get_right_operand(),
709  cat,
710  temporary_tables,
711  condition->is_overlaps_oper()));
712  }
713 
714  return result;
715 }
#define CHECK_EQ(x, y)
Definition: Logger.h:205
std::string cat(Ts &&... args)
bool is_overlaps_oper() const
Definition: Analyzer.h:440
InnerOuter normalize_column_pair(const Analyzer::Expr *lhs, const Analyzer::Expr *rhs, const Catalog_Namespace::Catalog &cat, const TemporaryTables *temporary_tables, const bool is_overlaps_join)
Definition: HashJoin.cpp:550
#define CHECK(condition)
Definition: Logger.h:197
const Expr * get_right_operand() const
Definition: Analyzer.h:443
const Expr * get_left_operand() const
Definition: Analyzer.h:442
+ Here is the call graph for this function:
+ Here is the caller graph for this function:

◆ operator<<() [1/2]

std::ostream& operator<< ( std::ostream &  os,
const DecodedJoinHashBufferEntry e 
)

Definition at line 102 of file HashJoin.cpp.

References DecodedJoinHashBufferEntry::key, and DecodedJoinHashBufferEntry::payload.

102  {
103  os << " {{";
104  bool first = true;
105  for (auto k : e.key) {
106  if (!first) {
107  os << ",";
108  } else {
109  first = false;
110  }
111  os << k;
112  }
113  os << "}, ";
114  os << "{";
115  first = true;
116  for (auto p : e.payload) {
117  if (!first) {
118  os << ", ";
119  } else {
120  first = false;
121  }
122  os << p;
123  }
124  os << "}}";
125  return os;
126 }
std::set< int32_t > payload
Definition: HashTable.h:23
std::vector< int64_t > key
Definition: HashTable.h:22

◆ operator<<() [2/2]

std::ostream& operator<< ( std::ostream &  os,
const DecodedJoinHashBufferSet s 
)

Definition at line 128 of file HashJoin.cpp.

128  {
129  os << "{\n";
130  bool first = true;
131  for (auto e : s) {
132  if (!first) {
133  os << ",\n";
134  } else {
135  first = false;
136  }
137  os << e;
138  }
139  if (!s.empty()) {
140  os << "\n";
141  }
142  os << "}\n";
143  return os;
144 }

◆ setupSyntheticCaching()

void setupSyntheticCaching ( std::set< const Analyzer::ColumnVar *>  cvs,
Executor executor 
)

Definition at line 407 of file HashJoin.cpp.

Referenced by HashJoin::getSyntheticInstance().

407  {
408  std::unordered_set<int> phys_table_ids;
409  for (auto cv : cvs) {
410  phys_table_ids.insert(cv->get_table_id());
411  }
412 
413  std::unordered_set<PhysicalInput> phys_inputs;
414  for (auto cv : cvs) {
415  phys_inputs.emplace(PhysicalInput{cv->get_column_id(), cv->get_table_id()});
416  }
417 
418  executor->setupCaching(phys_inputs, phys_table_ids);
419 }
+ Here is the caller graph for this function:

Variable Documentation

◆ g_enable_overlaps_hashjoin

bool g_enable_overlaps_hashjoin

Definition at line 94 of file Execute.cpp.

Referenced by HashJoin::getInstance().