#include "JoinFilterPushDown.h"
#include "DeepCopyVisitor.h"
#include "RelAlgExecutor.h"

Include dependency graph for JoinFilterPushDown.cpp:

Classes
class	anonymous_namespace{JoinFilterPushDown.cpp}::BindFilterToOutermostVisitor

class	anonymous_namespace{JoinFilterPushDown.cpp}::CollectInputColumnsVisitor

Namespaces
	anonymous_namespace{JoinFilterPushDown.cpp}

Functions
bool	to_gather_info_for_filter_selectivity (const std::vector< InputTableInfo > &table_infos)

std::vector< PushedDownFilterInfo >	find_push_down_filters (const RelAlgExecutionUnit &ra_exe_unit, const std::vector< size_t > &input_permutation, const std::vector< size_t > &left_deep_join_input_sizes)

Function Documentation

std::vector<PushedDownFilterInfo> find_push_down_filters	(	const RelAlgExecutionUnit &	ra_exe_unit,
		const std::vector< size_t > &	input_permutation,
		const std::vector< size_t > &	left_deep_join_input_sizes
	)

Go through all tables involved in the relational algebra plan, and select potential candidates to be pushed down by calcite. For each filter we store a set of intermediate indices (previous, current, and next table) based on the column indices in their query string.

Definition at line 215 of file JoinFilterPushDown.cpp.

References CHECK_EQ, CHECK_GE, CHECK_LT, RelAlgExecutionUnit::input_descs, gpu_enabled::iota(), RelAlgExecutionUnit::join_quals, gpu_enabled::partial_sum(), run_benchmark_import::result, and ScalarExprVisitor< T >::visit().

Referenced by RelAlgExecutor::selectFiltersToBePushedDown().

                                                          {
   std::vector<PushedDownFilterInfo> result;
   if (left_deep_join_input_sizes.empty()) {
     return result;
   }
   std::vector<size_t> input_size_prefix_sums(left_deep_join_input_sizes.size());
   std::partial_sum(left_deep_join_input_sizes.begin(),
                    left_deep_join_input_sizes.end(),
                    input_size_prefix_sums.begin());
   std::vector<int> to_original_rte_idx(ra_exe_unit.input_descs.size(),
                                        ra_exe_unit.input_descs.size());
   if (!input_permutation.empty()) {
     CHECK_EQ(to_original_rte_idx.size(), input_permutation.size());
     for (size_t i = 0; i < input_permutation.size(); ++i) {
       CHECK_LT(input_permutation[i], to_original_rte_idx.size());
       CHECK_EQ(static_cast<size_t>(to_original_rte_idx[input_permutation[i]]),
                to_original_rte_idx.size());
       to_original_rte_idx[input_permutation[i]] = i;
     }
   } else {
     std::iota(to_original_rte_idx.begin(), to_original_rte_idx.end(), 0);
   }
   std::unordered_map<int, std::vector<std::shared_ptr<Analyzer::Expr>>>
       filters_per_nesting_level;
   for (const auto& level_conditions : ra_exe_unit.join_quals) {
     AllRangeTableIndexVisitor visitor;
     for (const auto& cond : level_conditions.quals) {
       const auto rte_indices = visitor.visit(cond.get());
       if (rte_indices.size() > 1) {
         continue;
       }
       const int rte_idx = (!rte_indices.empty()) ? *rte_indices.cbegin() : 0;
       if (!rte_idx) {
         continue;
       }
       CHECK_GE(rte_idx, 0);
       CHECK_LT(static_cast<size_t>(rte_idx), to_original_rte_idx.size());
       filters_per_nesting_level[to_original_rte_idx[rte_idx]].push_back(cond);
     }
   }
   for (const auto& kv : filters_per_nesting_level) {
     CHECK_GE(kv.first, 0);
     CHECK_LT(static_cast<size_t>(kv.first), input_size_prefix_sums.size());
     size_t input_prev = (kv.first > 1) ? input_size_prefix_sums[kv.first - 2] : 0;
     size_t input_start = kv.first ? input_size_prefix_sums[kv.first - 1] : 0;
     size_t input_next = input_size_prefix_sums[kv.first];
     result.emplace_back(
         PushedDownFilterInfo{kv.second, input_prev, input_start, input_next});
   }
   return result;
 }

Here is the call graph for this function:

Here is the caller graph for this function:

bool to_gather_info_for_filter_selectivity ( const std::vector< InputTableInfo > & table_infos )

The main purpose of this function is to prevent going through extra overhead of computing required statistics for finding the right candidates and then the actual push-down, unless the problem is large enough that such effort is potentially helpful.

Definition at line 188 of file JoinFilterPushDown.cpp.

Referenced by RelAlgExecutor::selectFiltersToBePushedDown().

                                                   {
   if (table_infos.size() < 2) {
     return false;
   }
   // we currently do not support filter push down when there is a self-join involved:
   // TODO(Saman): prevent Calcite from optimizing self-joins to remove this exclusion
   std::unordered_set<shared::TableKey> table_keys;
   for (auto ti : table_infos) {
     if (table_keys.find(ti.table_key) == table_keys.end()) {
       table_keys.insert(ti.table_key);
     } else {
       // a self-join is involved
       return false;
     }
   }
   // TODO(Saman): add some extra heuristics to avoid preflight count and push down if it
   // is not going to be helpful.
   return true;
 }

Here is the caller graph for this function:

Classes

Namespaces

Functions

Function Documentation