OmniSciDB  c1a53651b2
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
HashJoin.cpp File Reference
+ Include dependency graph for HashJoin.cpp:

Go to the source code of this file.

Classes

class  AllColumnVarsVisitor
 

Namespaces

 anonymous_namespace{HashJoin.cpp}
 

Functions

template<typename T >
std::string anonymous_namespace{HashJoin.cpp}::toStringFlat (const HashJoin *hash_table, const ExecutorDeviceType device_type, const int device_id)
 
std::ostream & operator<< (std::ostream &os, const DecodedJoinHashBufferEntry &e)
 
std::ostream & operator<< (std::ostream &os, const DecodedJoinHashBufferSet &s)
 
std::ostream & operator<< (std::ostream &os, const InnerOuterStringOpInfos &inner_outer_string_op_infos)
 
std::string toString (const InnerOuterStringOpInfos &inner_outer_string_op_infos)
 
std::ostream & operator<< (std::ostream &os, const std::vector< InnerOuterStringOpInfos > &inner_outer_string_op_infos_pairs)
 
std::string toString (const std::vector< InnerOuterStringOpInfos > &inner_outer_string_op_infos_pairs)
 
std::shared_ptr
< Analyzer::ColumnVar
getSyntheticColumnVar (std::string_view table, std::string_view column, int rte_idx, const Catalog_Namespace::Catalog &catalog)
 
void setupSyntheticCaching (std::set< const Analyzer::ColumnVar * > cvs, Executor *executor)
 
std::vector< InputTableInfogetSyntheticInputTableInfo (std::set< const Analyzer::ColumnVar * > cvs, Executor *executor)
 
InnerOuter anonymous_namespace{HashJoin.cpp}::get_cols (const Analyzer::BinOper *qual_bin_oper, const TemporaryTables *temporary_tables)
 
size_t get_shard_count (const Analyzer::BinOper *join_condition, const Executor *executor)
 

Variables

bool g_enable_overlaps_hashjoin
 

Function Documentation

size_t get_shard_count ( const Analyzer::BinOper join_condition,
const Executor executor 
)

Definition at line 1048 of file HashJoin.cpp.

References anonymous_namespace{HashJoin.cpp}::get_cols(), and get_shard_count().

Referenced by get_shard_count(), BaselineJoinHashTable::getShardCountForCondition(), PerfectJoinHashTable::reify(), PerfectJoinHashTable::shardCount(), and Executor::skipFragmentPair().

1049  {
1050  const Analyzer::ColumnVar* inner_col{nullptr};
1051  const Analyzer::Expr* outer_col{nullptr};
1052  std::shared_ptr<Analyzer::BinOper> redirected_bin_oper;
1053  try {
1054  std::tie(inner_col, outer_col) =
1055  get_cols(join_condition, executor->getTemporaryTables());
1056  } catch (...) {
1057  return 0;
1058  }
1059  if (!inner_col || !outer_col) {
1060  return 0;
1061  }
1062  return get_shard_count({inner_col, outer_col}, executor);
1063 }
InnerOuter get_cols(const Analyzer::BinOper *qual_bin_oper, const TemporaryTables *temporary_tables)
Definition: HashJoin.cpp:1039
size_t get_shard_count(const Analyzer::BinOper *join_condition, const Executor *executor)
Definition: HashJoin.cpp:1048

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

std::shared_ptr<Analyzer::ColumnVar> getSyntheticColumnVar ( std::string_view  table,
std::string_view  column,
int  rte_idx,
const Catalog_Namespace::Catalog catalog 
)

Definition at line 561 of file HashJoin.cpp.

References CHECK, Catalog_Namespace::Catalog::getDatabaseId(), Catalog_Namespace::Catalog::getMetadataForColumn(), Catalog_Namespace::Catalog::getMetadataForTable(), kLINESTRING, kMULTIPOLYGON, kPOINT, and kPOLYGON.

Referenced by HashJoin::getSyntheticInstance().

565  {
566  auto tmeta = catalog.getMetadataForTable(std::string(table));
567  CHECK(tmeta);
568 
569  auto cmeta = catalog.getMetadataForColumn(tmeta->tableId, std::string(column));
570  CHECK(cmeta);
571 
572  auto ti = cmeta->columnType;
573 
574  if (ti.is_geometry() && ti.get_type() != kPOINT) {
575  int geoColumnId{0};
576  switch (ti.get_type()) {
577  case kLINESTRING: {
578  geoColumnId = cmeta->columnId + 2;
579  break;
580  }
581  case kPOLYGON: {
582  geoColumnId = cmeta->columnId + 3;
583  break;
584  }
585  case kMULTIPOLYGON: {
586  geoColumnId = cmeta->columnId + 4;
587  break;
588  }
589  default:
590  CHECK(false);
591  }
592  cmeta = catalog.getMetadataForColumn(tmeta->tableId, geoColumnId);
593  CHECK(cmeta);
594  ti = cmeta->columnType;
595  }
596 
597  auto cv = std::make_shared<Analyzer::ColumnVar>(
598  ti,
599  shared::ColumnKey{catalog.getDatabaseId(), tmeta->tableId, cmeta->columnId},
600  rte_idx);
601  return cv;
602 }
const ColumnDescriptor * getMetadataForColumn(int tableId, const std::string &colName) const
int getDatabaseId() const
Definition: Catalog.h:304
#define CHECK(condition)
Definition: Logger.h:291
const TableDescriptor * getMetadataForTable(const std::string &tableName, const bool populateFragmenter=true) const
Returns a pointer to a const TableDescriptor struct matching the provided tableName.

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

std::vector<InputTableInfo> getSyntheticInputTableInfo ( std::set< const Analyzer::ColumnVar * >  cvs,
Executor executor 
)

Definition at line 648 of file HashJoin.cpp.

References CHECK, and Catalog_Namespace::get_metadata_for_table().

Referenced by HashJoin::getSyntheticInstance().

650  {
651  std::unordered_set<shared::TableKey> phys_table_ids;
652  for (auto cv : cvs) {
653  phys_table_ids.insert(cv->getTableKey());
654  }
655 
656  // NOTE(sy): This vector ordering seems to work for now, but maybe we need to
657  // review how rte_idx is assigned for ColumnVars. See for example Analyzer.h
658  // and RelAlgExecutor.cpp and rte_idx there.
659  std::vector<InputTableInfo> query_infos;
660  query_infos.reserve(phys_table_ids.size());
661  size_t i = 0;
662  for (const auto& table_key : phys_table_ids) {
663  auto td = Catalog_Namespace::get_metadata_for_table(table_key);
664  CHECK(td);
665  query_infos.push_back({table_key, td->fragmenter->getFragmentsForQuery()});
666  ++i;
667  }
668 
669  return query_infos;
670 }
const TableDescriptor * get_metadata_for_table(const ::shared::TableKey &table_key, bool populate_fragmenter)
#define CHECK(condition)
Definition: Logger.h:291

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

std::ostream& operator<< ( std::ostream &  os,
const DecodedJoinHashBufferEntry e 
)

Definition at line 126 of file HashJoin.cpp.

References DecodedJoinHashBufferEntry::key, and DecodedJoinHashBufferEntry::payload.

126  {
127  os << " {{";
128  bool first = true;
129  for (auto k : e.key) {
130  if (!first) {
131  os << ",";
132  } else {
133  first = false;
134  }
135  os << k;
136  }
137  os << "}, ";
138  os << "{";
139  first = true;
140  for (auto p : e.payload) {
141  if (!first) {
142  os << ", ";
143  } else {
144  first = false;
145  }
146  os << p;
147  }
148  os << "}}";
149  return os;
150 }
std::set< int32_t > payload
Definition: HashTable.h:23
std::vector< int64_t > key
Definition: HashTable.h:22
std::ostream& operator<< ( std::ostream &  os,
const DecodedJoinHashBufferSet s 
)

Definition at line 152 of file HashJoin.cpp.

152  {
153  os << "{\n";
154  bool first = true;
155  for (auto e : s) {
156  if (!first) {
157  os << ",\n";
158  } else {
159  first = false;
160  }
161  os << e;
162  }
163  if (!s.empty()) {
164  os << "\n";
165  }
166  os << "}\n";
167  return os;
168 }
std::ostream& operator<< ( std::ostream &  os,
const InnerOuterStringOpInfos inner_outer_string_op_infos 
)

Definition at line 170 of file HashJoin.cpp.

171  {
172  os << "(" << inner_outer_string_op_infos.first << ", "
173  << inner_outer_string_op_infos.second << ")";
174  return os;
175 }
std::ostream& operator<< ( std::ostream &  os,
const std::vector< InnerOuterStringOpInfos > &  inner_outer_string_op_infos_pairs 
)

Definition at line 183 of file HashJoin.cpp.

185  {
186  os << "[";
187  bool first_elem = true;
188  for (const auto& inner_outer_string_op_infos : inner_outer_string_op_infos_pairs) {
189  if (!first_elem) {
190  os << ", ";
191  }
192  first_elem = false;
193  os << inner_outer_string_op_infos;
194  }
195  os << "]";
196  return os;
197 }
void setupSyntheticCaching ( std::set< const Analyzer::ColumnVar * >  cvs,
Executor executor 
)

Definition at line 632 of file HashJoin.cpp.

References PhysicalInput::table_id.

Referenced by HashJoin::getSyntheticInstance().

632  {
633  std::unordered_set<shared::TableKey> phys_table_ids;
634  for (auto cv : cvs) {
635  phys_table_ids.insert(cv->getTableKey());
636  }
637 
638  std::unordered_set<PhysicalInput> phys_inputs;
639  for (auto cv : cvs) {
640  const auto& column_key = cv->getColumnKey();
641  phys_inputs.emplace(
642  PhysicalInput{column_key.column_id, column_key.table_id, column_key.db_id});
643  }
644 
645  executor->setupCaching(phys_inputs, phys_table_ids);
646 }

+ Here is the caller graph for this function:

std::string toString ( const InnerOuterStringOpInfos inner_outer_string_op_infos)

Definition at line 177 of file HashJoin.cpp.

177  {
178  std::ostringstream os;
179  os << inner_outer_string_op_infos;
180  return os.str();
181 }
std::string toString ( const std::vector< InnerOuterStringOpInfos > &  inner_outer_string_op_infos_pairs)

Definition at line 199 of file HashJoin.cpp.

200  {
201  std::ostringstream os;
202  os << inner_outer_string_op_infos_pairs;
203  return os.str();
204 }

Variable Documentation

bool g_enable_overlaps_hashjoin

Definition at line 102 of file Execute.cpp.