OmniSciDB  85c2d10cdc
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
HashJoin.cpp File Reference
+ Include dependency graph for HashJoin.cpp:

Go to the source code of this file.

Classes

class  AllColumnVarsVisitor
 

Namespaces

 anonymous_namespace{HashJoin.cpp}
 

Functions

template<typename T >
std::string anonymous_namespace{HashJoin.cpp}::toStringFlat (const HashJoin *hash_table, const ExecutorDeviceType device_type, const int device_id)
 
std::ostream & operator<< (std::ostream &os, const DecodedJoinHashBufferEntry &e)
 
std::ostream & operator<< (std::ostream &os, const DecodedJoinHashBufferSet &s)
 
std::shared_ptr
< Analyzer::ColumnVar
getSyntheticColumnVar (std::string_view table, std::string_view column, int rte_idx, Executor *executor)
 
void setupSyntheticCaching (std::set< const Analyzer::ColumnVar * > cvs, Executor *executor)
 
std::vector< InputTableInfogetSyntheticInputTableInfo (std::set< const Analyzer::ColumnVar * > cvs, Executor *executor)
 
InnerOuter anonymous_namespace{HashJoin.cpp}::get_cols (const Analyzer::BinOper *qual_bin_oper, const Catalog_Namespace::Catalog &cat, const TemporaryTables *temporary_tables)
 
size_t get_shard_count (const Analyzer::BinOper *join_condition, const Executor *executor)
 
InnerOuter normalize_column_pair (const Analyzer::Expr *lhs, const Analyzer::Expr *rhs, const Catalog_Namespace::Catalog &cat, const TemporaryTables *temporary_tables, const bool is_overlaps_join)
 
std::vector< InnerOuternormalize_column_pairs (const Analyzer::BinOper *condition, const Catalog_Namespace::Catalog &cat, const TemporaryTables *temporary_tables)
 

Variables

bool g_enable_overlaps_hashjoin
 

Function Documentation

size_t get_shard_count ( const Analyzer::BinOper join_condition,
const Executor executor 
)

Definition at line 553 of file HashJoin.cpp.

References anonymous_namespace{HashJoin.cpp}::get_cols(), and get_shard_count().

Referenced by get_shard_count(), BaselineJoinHashTable::getShardCountForCondition(), PerfectJoinHashTable::reify(), PerfectJoinHashTable::shardCount(), and Executor::skipFragmentPair().

554  {
555  const Analyzer::ColumnVar* inner_col{nullptr};
556  const Analyzer::Expr* outer_col{nullptr};
557  std::shared_ptr<Analyzer::BinOper> redirected_bin_oper;
558  try {
559  std::tie(inner_col, outer_col) =
560  get_cols(join_condition, *executor->getCatalog(), executor->getTemporaryTables());
561  } catch (...) {
562  return 0;
563  }
564  if (!inner_col || !outer_col) {
565  return 0;
566  }
567  return get_shard_count({inner_col, outer_col}, executor);
568 }
size_t get_shard_count(const Analyzer::BinOper *join_condition, const Executor *executor)
Definition: HashJoin.cpp:553
InnerOuter get_cols(const Analyzer::BinOper *qual_bin_oper, const Catalog_Namespace::Catalog &cat, const TemporaryTables *temporary_tables)
Definition: HashJoin.cpp:543

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

std::shared_ptr<Analyzer::ColumnVar> getSyntheticColumnVar ( std::string_view  table,
std::string_view  column,
int  rte_idx,
Executor executor 
)

Definition at line 356 of file HashJoin.cpp.

References CHECK, kLINESTRING, kMULTIPOLYGON, kPOINT, and kPOLYGON.

Referenced by HashJoin::getSyntheticInstance().

359  {
360  auto catalog = executor->getCatalog();
361  CHECK(catalog);
362 
363  auto tmeta = catalog->getMetadataForTable(std::string(table));
364  CHECK(tmeta);
365 
366  auto cmeta = catalog->getMetadataForColumn(tmeta->tableId, std::string(column));
367  CHECK(cmeta);
368 
369  auto ti = cmeta->columnType;
370 
371  if (ti.is_geometry() && ti.get_type() != kPOINT) {
372  int geoColumnId{0};
373  switch (ti.get_type()) {
374  case kLINESTRING: {
375  geoColumnId = cmeta->columnId + 2;
376  break;
377  }
378  case kPOLYGON: {
379  geoColumnId = cmeta->columnId + 3;
380  break;
381  }
382  case kMULTIPOLYGON: {
383  geoColumnId = cmeta->columnId + 4;
384  break;
385  }
386  default:
387  CHECK(false);
388  }
389  cmeta = catalog->getMetadataForColumn(tmeta->tableId, geoColumnId);
390  CHECK(cmeta);
391  ti = cmeta->columnType;
392  }
393 
394  auto cv =
395  std::make_shared<Analyzer::ColumnVar>(ti, tmeta->tableId, cmeta->columnId, rte_idx);
396  return cv;
397 }
#define CHECK(condition)
Definition: Logger.h:197

+ Here is the caller graph for this function:

std::vector<InputTableInfo> getSyntheticInputTableInfo ( std::set< const Analyzer::ColumnVar * >  cvs,
Executor executor 
)

Definition at line 441 of file HashJoin.cpp.

References CHECK, and i.

Referenced by HashJoin::getSyntheticInstance().

443  {
444  auto catalog = executor->getCatalog();
445  CHECK(catalog);
446 
447  std::unordered_set<int> phys_table_ids;
448  for (auto cv : cvs) {
449  phys_table_ids.insert(cv->get_table_id());
450  }
451 
452  // NOTE(sy): This vector ordering seems to work for now, but maybe we need to
453  // review how rte_idx is assigned for ColumnVars. See for example Analyzer.h
454  // and RelAlgExecutor.cpp and rte_idx there.
455  std::vector<InputTableInfo> query_infos(phys_table_ids.size());
456  size_t i = 0;
457  for (auto id : phys_table_ids) {
458  auto tmeta = catalog->getMetadataForTable(id);
459  query_infos[i].table_id = id;
460  query_infos[i].info = tmeta->fragmenter->getFragmentsForQuery();
461  ++i;
462  }
463 
464  return query_infos;
465 }
#define CHECK(condition)
Definition: Logger.h:197

+ Here is the caller graph for this function:

InnerOuter normalize_column_pair ( const Analyzer::Expr lhs,
const Analyzer::Expr rhs,
const Catalog_Namespace::Catalog cat,
const TemporaryTables temporary_tables,
const bool  is_overlaps_join 
)

Definition at line 570 of file HashJoin.cpp.

References cat(), get_column_descriptor_maybe(), get_column_type(), Analyzer::Expr::get_type_info(), is_constructed_point(), kCAST, kENCODING_DICT, kPOINT, gpu_enabled::swap(), and ScalarExprVisitor< T >::visit().

Referenced by anonymous_namespace{PerfectJoinHashTable.cpp}::get_cols(), anonymous_namespace{HashJoin.cpp}::get_cols(), and normalize_column_pairs().

574  {
575  const auto& lhs_ti = lhs->get_type_info();
576  const auto& rhs_ti = rhs->get_type_info();
577  if (!is_overlaps_join) {
578  if (lhs_ti.get_type() != rhs_ti.get_type()) {
579  throw HashJoinFail("Equijoin types must be identical, found: " +
580  lhs_ti.get_type_name() + ", " + rhs_ti.get_type_name());
581  }
582  if (!lhs_ti.is_integer() && !lhs_ti.is_time() && !lhs_ti.is_string() &&
583  !lhs_ti.is_decimal()) {
584  throw HashJoinFail("Cannot apply hash join to inner column type " +
585  lhs_ti.get_type_name());
586  }
587  // Decimal types should be identical.
588  if (lhs_ti.is_decimal() && (lhs_ti.get_scale() != rhs_ti.get_scale() ||
589  lhs_ti.get_precision() != rhs_ti.get_precision())) {
590  throw HashJoinFail("Equijoin with different decimal types");
591  }
592  }
593 
594  const auto lhs_cast = dynamic_cast<const Analyzer::UOper*>(lhs);
595  const auto rhs_cast = dynamic_cast<const Analyzer::UOper*>(rhs);
596  if (lhs_ti.is_string() && (static_cast<bool>(lhs_cast) != static_cast<bool>(rhs_cast) ||
597  (lhs_cast && lhs_cast->get_optype() != kCAST) ||
598  (rhs_cast && rhs_cast->get_optype() != kCAST))) {
599  throw HashJoinFail("Cannot use hash join for given expression");
600  }
601  // Casts to decimal are not suported.
602  if (lhs_ti.is_decimal() && (lhs_cast || rhs_cast)) {
603  throw HashJoinFail("Cannot use hash join for given expression");
604  }
605  const auto lhs_col =
606  lhs_cast ? dynamic_cast<const Analyzer::ColumnVar*>(lhs_cast->get_operand())
607  : dynamic_cast<const Analyzer::ColumnVar*>(lhs);
608  const auto rhs_col =
609  rhs_cast ? dynamic_cast<const Analyzer::ColumnVar*>(rhs_cast->get_operand())
610  : dynamic_cast<const Analyzer::ColumnVar*>(rhs);
611  if (!lhs_col && !rhs_col) {
612  throw HashJoinFail("Cannot use hash join for given expression");
613  }
614  const Analyzer::ColumnVar* inner_col{nullptr};
615  const Analyzer::ColumnVar* outer_col{nullptr};
616  auto outer_ti = lhs_ti;
617  auto inner_ti = rhs_ti;
618  const Analyzer::Expr* outer_expr{lhs};
619  if ((!lhs_col || (rhs_col && lhs_col->get_rte_idx() < rhs_col->get_rte_idx())) &&
620  (!rhs_col || (!lhs_col || lhs_col->get_rte_idx() < rhs_col->get_rte_idx()))) {
621  inner_col = rhs_col;
622  outer_col = lhs_col;
623  } else {
624  if (lhs_col && lhs_col->get_rte_idx() == 0) {
625  throw HashJoinFail("Cannot use hash join for given expression");
626  }
627  inner_col = lhs_col;
628  outer_col = rhs_col;
629  std::swap(outer_ti, inner_ti);
630  outer_expr = rhs;
631  }
632  if (!inner_col) {
633  throw HashJoinFail("Cannot use hash join for given expression");
634  }
635  if (!outer_col) {
636  MaxRangeTableIndexVisitor rte_idx_visitor;
637  int outer_rte_idx = rte_idx_visitor.visit(outer_expr);
638  // The inner column candidate is not actually inner; the outer
639  // expression contains columns which are at least as deep.
640  if (inner_col->get_rte_idx() <= outer_rte_idx) {
641  throw HashJoinFail("Cannot use hash join for given expression");
642  }
643  }
644  // We need to fetch the actual type information from the catalog since Analyzer
645  // always reports nullable as true for inner table columns in left joins.
646  const auto inner_col_cd = get_column_descriptor_maybe(
647  inner_col->get_column_id(), inner_col->get_table_id(), cat);
648  const auto inner_col_real_ti = get_column_type(inner_col->get_column_id(),
649  inner_col->get_table_id(),
650  inner_col_cd,
651  temporary_tables);
652  const auto& outer_col_ti =
653  !(dynamic_cast<const Analyzer::FunctionOper*>(lhs)) && outer_col
654  ? outer_col->get_type_info()
655  : outer_ti;
656  // Casts from decimal are not supported.
657  if ((inner_col_real_ti.is_decimal() || outer_col_ti.is_decimal()) &&
658  (lhs_cast || rhs_cast)) {
659  throw HashJoinFail("Cannot use hash join for given expression");
660  }
661  if (is_overlaps_join) {
662  if (!inner_col_real_ti.is_array()) {
663  throw HashJoinFail(
664  "Overlaps join only supported for inner columns with array type");
665  }
666  auto is_bounds_array = [](const auto ti) {
667  return ti.is_fixlen_array() && ti.get_size() == 32;
668  };
669  if (!is_bounds_array(inner_col_real_ti)) {
670  throw HashJoinFail(
671  "Overlaps join only supported for 4-element double fixed length arrays");
672  }
673  if (!(outer_col_ti.get_type() == kPOINT || is_bounds_array(outer_col_ti) ||
674  is_constructed_point(outer_expr))) {
675  throw HashJoinFail(
676  "Overlaps join only supported for geometry outer columns of type point, "
677  "geometry columns with bounds or constructed points");
678  }
679  } else {
680  if (!(inner_col_real_ti.is_integer() || inner_col_real_ti.is_time() ||
681  inner_col_real_ti.is_decimal() ||
682  (inner_col_real_ti.is_string() &&
683  inner_col_real_ti.get_compression() == kENCODING_DICT))) {
684  throw HashJoinFail(
685  "Can only apply hash join to integer-like types and dictionary encoded "
686  "strings");
687  }
688  }
689 
690  auto normalized_inner_col = inner_col;
691  auto normalized_outer_col = outer_col ? outer_col : outer_expr;
692 
693  const auto& normalized_inner_ti = normalized_inner_col->get_type_info();
694  const auto& normalized_outer_ti = normalized_outer_col->get_type_info();
695 
696  if (normalized_inner_ti.is_string() != normalized_outer_ti.is_string()) {
697  throw HashJoinFail(std::string("Could not build hash tables for incompatible types " +
698  normalized_inner_ti.get_type_name() + " and " +
699  normalized_outer_ti.get_type_name()));
700  }
701 
702  return {normalized_inner_col, normalized_outer_col};
703 }
std::string cat(Ts &&...args)
bool is_constructed_point(const Analyzer::Expr *expr)
Definition: Execute.h:1176
const SQLTypeInfo get_column_type(const int col_id, const int table_id, const ColumnDescriptor *cd, const TemporaryTables *temporary_tables)
Definition: Execute.h:238
Definition: sqldefs.h:49
T visit(const Analyzer::Expr *expr) const
const ColumnDescriptor * get_column_descriptor_maybe(const int col_id, const int table_id, const Catalog_Namespace::Catalog &cat)
Definition: Execute.h:222
const SQLTypeInfo & get_type_info() const
Definition: Analyzer.h:78
DEVICE void swap(ARGS &&...args)
Definition: gpu_enabled.h:114

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

std::vector<InnerOuter> normalize_column_pairs ( const Analyzer::BinOper condition,
const Catalog_Namespace::Catalog cat,
const TemporaryTables temporary_tables 
)

Definition at line 705 of file HashJoin.cpp.

References cat(), CHECK, CHECK_EQ, Analyzer::BinOper::get_left_operand(), Analyzer::BinOper::get_right_operand(), i, Analyzer::BinOper::is_overlaps_oper(), normalize_column_pair(), and run_benchmark_import::result.

Referenced by anonymous_namespace{FromTableReordering.cpp}::get_join_qual_cost(), BaselineJoinHashTable::getInstance(), OverlapsJoinHashTable::getInstance(), and Executor::skipFragmentPair().

707  {
708  std::vector<InnerOuter> result;
709  const auto lhs_tuple_expr =
710  dynamic_cast<const Analyzer::ExpressionTuple*>(condition->get_left_operand());
711  const auto rhs_tuple_expr =
712  dynamic_cast<const Analyzer::ExpressionTuple*>(condition->get_right_operand());
713 
714  CHECK_EQ(static_cast<bool>(lhs_tuple_expr), static_cast<bool>(rhs_tuple_expr));
715  if (lhs_tuple_expr) {
716  const auto& lhs_tuple = lhs_tuple_expr->getTuple();
717  const auto& rhs_tuple = rhs_tuple_expr->getTuple();
718  CHECK_EQ(lhs_tuple.size(), rhs_tuple.size());
719  for (size_t i = 0; i < lhs_tuple.size(); ++i) {
720  result.push_back(normalize_column_pair(lhs_tuple[i].get(),
721  rhs_tuple[i].get(),
722  cat,
723  temporary_tables,
724  condition->is_overlaps_oper()));
725  }
726  } else {
727  CHECK(!lhs_tuple_expr && !rhs_tuple_expr);
728  result.push_back(normalize_column_pair(condition->get_left_operand(),
729  condition->get_right_operand(),
730  cat,
731  temporary_tables,
732  condition->is_overlaps_oper()));
733  }
734 
735  return result;
736 }
#define CHECK_EQ(x, y)
Definition: Logger.h:205
std::string cat(Ts &&...args)
const Expr * get_right_operand() const
Definition: Analyzer.h:443
InnerOuter normalize_column_pair(const Analyzer::Expr *lhs, const Analyzer::Expr *rhs, const Catalog_Namespace::Catalog &cat, const TemporaryTables *temporary_tables, const bool is_overlaps_join)
Definition: HashJoin.cpp:570
#define CHECK(condition)
Definition: Logger.h:197
const Expr * get_left_operand() const
Definition: Analyzer.h:442
bool is_overlaps_oper() const
Definition: Analyzer.h:440

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

std::ostream& operator<< ( std::ostream &  os,
const DecodedJoinHashBufferEntry e 
)

Definition at line 122 of file HashJoin.cpp.

References generate_TableFunctionsFactory_init::k, DecodedJoinHashBufferEntry::key, and DecodedJoinHashBufferEntry::payload.

122  {
123  os << " {{";
124  bool first = true;
125  for (auto k : e.key) {
126  if (!first) {
127  os << ",";
128  } else {
129  first = false;
130  }
131  os << k;
132  }
133  os << "}, ";
134  os << "{";
135  first = true;
136  for (auto p : e.payload) {
137  if (!first) {
138  os << ", ";
139  } else {
140  first = false;
141  }
142  os << p;
143  }
144  os << "}}";
145  return os;
146 }
std::set< int32_t > payload
Definition: HashTable.h:23
std::vector< int64_t > key
Definition: HashTable.h:22
std::ostream& operator<< ( std::ostream &  os,
const DecodedJoinHashBufferSet s 
)

Definition at line 148 of file HashJoin.cpp.

148  {
149  os << "{\n";
150  bool first = true;
151  for (auto e : s) {
152  if (!first) {
153  os << ",\n";
154  } else {
155  first = false;
156  }
157  os << e;
158  }
159  if (!s.empty()) {
160  os << "\n";
161  }
162  os << "}\n";
163  return os;
164 }
void setupSyntheticCaching ( std::set< const Analyzer::ColumnVar * >  cvs,
Executor executor 
)

Definition at line 427 of file HashJoin.cpp.

Referenced by HashJoin::getSyntheticInstance().

427  {
428  std::unordered_set<int> phys_table_ids;
429  for (auto cv : cvs) {
430  phys_table_ids.insert(cv->get_table_id());
431  }
432 
433  std::unordered_set<PhysicalInput> phys_inputs;
434  for (auto cv : cvs) {
435  phys_inputs.emplace(PhysicalInput{cv->get_column_id(), cv->get_table_id()});
436  }
437 
438  executor->setupCaching(phys_inputs, phys_table_ids);
439 }

+ Here is the caller graph for this function:

Variable Documentation

bool g_enable_overlaps_hashjoin

Definition at line 96 of file Execute.cpp.