OmniSciDB  16c4e035a1
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
QueryRewriter Class Reference

#include <QueryRewrite.h>

+ Collaboration diagram for QueryRewriter:

Public Member Functions

 QueryRewriter (const std::vector< InputTableInfo > &query_infos, Executor *executor)
 
RelAlgExecutionUnit rewrite (const RelAlgExecutionUnit &ra_exe_unit_in) const
 
RelAlgExecutionUnit rewriteColumnarUpdate (const RelAlgExecutionUnit &ra_exe_unit_in, std::shared_ptr< Analyzer::Expr > column_to_update) const
 
RelAlgExecutionUnit rewriteColumnarDelete (const RelAlgExecutionUnit &ra_exe_unit_in, std::shared_ptr< Analyzer::ColumnVar > delete_column) const
 
RelAlgExecutionUnit rewriteAggregateOnGroupByColumn (const RelAlgExecutionUnit &ra_exe_unit_in) const
 

Private Member Functions

RelAlgExecutionUnit rewriteOverlapsJoin (const RelAlgExecutionUnit &ra_exe_unit_in) const
 
RelAlgExecutionUnit rewriteConstrainedByIn (const RelAlgExecutionUnit &ra_exe_unit_in) const
 
RelAlgExecutionUnit rewriteConstrainedByInImpl (const RelAlgExecutionUnit &ra_exe_unit_in, const std::shared_ptr< Analyzer::CaseExpr >, const Analyzer::InValues *) const
 
std::pair< bool, std::set
< size_t > > 
is_all_groupby_exprs_are_col_var (const std::list< std::shared_ptr< Analyzer::Expr >> &groupby_exprs) const
 
std::shared_ptr
< Analyzer::CaseExpr
generateCaseExprForCountDistinctOnGroupByCol (std::shared_ptr< Analyzer::Expr > expr) const
 

Static Private Member Functions

static std::shared_ptr
< Analyzer::CaseExpr
generateCaseForDomainValues (const Analyzer::InValues *)
 

Private Attributes

const std::vector
< InputTableInfo > & 
query_infos_
 
Executorexecutor_
 
std::vector< std::shared_ptr
< Analyzer::Expr > > 
target_exprs_owned_
 

Detailed Description

Definition at line 28 of file QueryRewrite.h.

Constructor & Destructor Documentation

QueryRewriter::QueryRewriter ( const std::vector< InputTableInfo > &  query_infos,
Executor executor 
)
inline

Definition at line 30 of file QueryRewrite.h.

31  : query_infos_(query_infos), executor_(executor) {}
Executor * executor_
Definition: QueryRewrite.h:67
const std::vector< InputTableInfo > & query_infos_
Definition: QueryRewrite.h:66

Member Function Documentation

std::shared_ptr< Analyzer::CaseExpr > QueryRewriter::generateCaseExprForCountDistinctOnGroupByCol ( std::shared_ptr< Analyzer::Expr expr) const
private

Definition at line 209 of file QueryRewrite.cpp.

References Datum::bigintval, is_null(), kBIGINT, kBOOLEAN, kISNULL, and kNOT.

Referenced by rewriteAggregateOnGroupByColumn().

210  {
211  std::list<std::pair<std::shared_ptr<Analyzer::Expr>, std::shared_ptr<Analyzer::Expr>>>
212  case_expr_list;
213  auto is_null = std::make_shared<Analyzer::UOper>(kBOOLEAN, kISNULL, expr);
214  auto is_not_null = std::make_shared<Analyzer::UOper>(kBOOLEAN, kNOT, is_null);
215  Datum then_d;
216  then_d.bigintval = 1;
217  const auto then_constant = makeExpr<Analyzer::Constant>(kBIGINT, false, then_d);
218  case_expr_list.emplace_back(is_not_null, then_constant);
219  Datum else_d;
220  else_d.bigintval = 0;
221  const auto else_constant = makeExpr<Analyzer::Constant>(kBIGINT, false, else_d);
222  auto case_expr = makeExpr<Analyzer::CaseExpr>(
223  then_constant->get_type_info(), false, case_expr_list, else_constant);
224  return case_expr;
225 }
CONSTEXPR DEVICE bool is_null(const T &value)
int64_t bigintval
Definition: sqltypes.h:215
Definition: sqldefs.h:39

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

std::shared_ptr< Analyzer::CaseExpr > QueryRewriter::generateCaseForDomainValues ( const Analyzer::InValues in_vals)
staticprivate

Definition at line 185 of file QueryRewrite.cpp.

References Analyzer::Expr::deep_copy(), Analyzer::InValues::get_arg(), Analyzer::InValues::get_value_list(), kBOOLEAN, kENCODING_DICT, kEQ, and kONE.

Referenced by rewriteConstrainedByIn().

186  {
187  std::list<std::pair<std::shared_ptr<Analyzer::Expr>, std::shared_ptr<Analyzer::Expr>>>
188  case_expr_list;
189  auto in_val_arg = in_vals->get_arg()->deep_copy();
190  for (const auto& in_val : in_vals->get_value_list()) {
191  auto case_cond = makeExpr<Analyzer::BinOper>(
192  SQLTypeInfo(kBOOLEAN, true), false, kEQ, kONE, in_val_arg, in_val);
193  auto in_val_copy = in_val->deep_copy();
194  auto ti = in_val_copy->get_type_info();
195  if (ti.is_string() && ti.get_compression() == kENCODING_DICT) {
196  ti.set_comp_param(0);
197  }
198  in_val_copy->set_type_info(ti);
199  case_expr_list.emplace_back(case_cond, in_val_copy);
200  }
201  // TODO(alex): refine the expression range for case with empty else expression;
202  // for now, add a dummy else which should never be taken
203  auto else_expr = case_expr_list.front().second;
204  return makeExpr<Analyzer::CaseExpr>(
205  case_expr_list.front().second->get_type_info(), false, case_expr_list, else_expr);
206 }
Definition: sqldefs.h:30
Definition: sqldefs.h:69
const std::list< std::shared_ptr< Analyzer::Expr > > & get_value_list() const
Definition: Analyzer.h:632
virtual std::shared_ptr< Analyzer::Expr > deep_copy() const =0
const Expr * get_arg() const
Definition: Analyzer.h:630

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

std::pair< bool, std::set< size_t > > QueryRewriter::is_all_groupby_exprs_are_col_var ( const std::list< std::shared_ptr< Analyzer::Expr >> &  groupby_exprs) const
private

Definition at line 518 of file QueryRewrite.cpp.

Referenced by rewriteAggregateOnGroupByColumn().

519  {
520  std::set<size_t> gby_col_exprs_hash;
521  for (auto gby_expr : groupby_exprs) {
522  if (auto gby_col_var = std::dynamic_pointer_cast<Analyzer::ColumnVar>(gby_expr)) {
523  gby_col_exprs_hash.insert(boost::hash_value(gby_col_var->toString()));
524  } else {
525  return {false, {}};
526  }
527  }
528  return {true, gby_col_exprs_hash};
529 }

+ Here is the caller graph for this function:

RelAlgExecutionUnit QueryRewriter::rewrite ( const RelAlgExecutionUnit ra_exe_unit_in) const

Definition at line 29 of file QueryRewrite.cpp.

References rewriteAggregateOnGroupByColumn(), rewriteConstrainedByIn(), and rewriteOverlapsJoin().

Referenced by rewriteConstrainedByInImpl().

30  {
31  auto rewritten_exe_unit = rewriteConstrainedByIn(ra_exe_unit_in);
32  auto rewritten_exe_unit_for_agg_on_gby_col =
33  rewriteAggregateOnGroupByColumn(rewritten_exe_unit);
34  return rewriteOverlapsJoin(rewritten_exe_unit_for_agg_on_gby_col);
35 }
RelAlgExecutionUnit rewriteOverlapsJoin(const RelAlgExecutionUnit &ra_exe_unit_in) const
RelAlgExecutionUnit rewriteConstrainedByIn(const RelAlgExecutionUnit &ra_exe_unit_in) const
RelAlgExecutionUnit rewriteAggregateOnGroupByColumn(const RelAlgExecutionUnit &ra_exe_unit_in) const

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

RelAlgExecutionUnit QueryRewriter::rewriteAggregateOnGroupByColumn ( const RelAlgExecutionUnit ra_exe_unit_in) const

Definition at line 531 of file QueryRewrite.cpp.

References CHECK, CHECK_GT, RelAlgExecutionUnit::estimator, generateCaseExprForCountDistinctOnGroupByCol(), RelAlgExecutionUnit::groupby_exprs, RelAlgExecutionUnit::hash_table_build_plan_dag, RelAlgExecutionUnit::input_col_descs, RelAlgExecutionUnit::input_descs, is_all_groupby_exprs_are_col_var(), RelAlgExecutionUnit::join_quals, kAPPROX_COUNT_DISTINCT, kAPPROX_QUANTILE, kAVG, kCOUNT, kFLOAT, kMAX, kMIN, kSAMPLE, RelAlgExecutionUnit::quals, RelAlgExecutionUnit::query_hint, RelAlgExecutionUnit::query_plan_dag, RelAlgExecutionUnit::query_state, RelAlgExecutionUnit::scan_limit, RelAlgExecutionUnit::simple_quals, RelAlgExecutionUnit::sort_info, RelAlgExecutionUnit::table_id_to_node_map, RelAlgExecutionUnit::target_exprs, target_exprs_owned_, RelAlgExecutionUnit::union_all, and RelAlgExecutionUnit::use_bump_allocator.

Referenced by rewrite().

532  {
533  auto check_precond = is_all_groupby_exprs_are_col_var(ra_exe_unit_in.groupby_exprs);
534  auto is_expr_on_gby_col = [&check_precond](const Analyzer::AggExpr* agg_expr) {
535  CHECK(agg_expr);
536  if (agg_expr->get_arg()) {
537  // some expr does not have its own arg, i.e., count(*)
538  auto agg_expr_hash = boost::hash_value(agg_expr->get_arg()->toString());
539  // a valid expr should have hashed value > 0
540  CHECK_GT(agg_expr_hash, 0u);
541  if (check_precond.second.count(agg_expr_hash)) {
542  return true;
543  }
544  }
545  return false;
546  };
547  if (!check_precond.first) {
548  // return the input ra_exe_unit if we have gby expr which is not col_var
549  // i.e., group by x+1, y instead of group by x, y
550  // todo (yoonmin) : can we relax this with a simple analysis of groupby / agg exprs?
551  return ra_exe_unit_in;
552  }
553 
554  std::vector<Analyzer::Expr*> new_target_exprs;
555  for (auto expr : ra_exe_unit_in.target_exprs) {
556  bool rewritten = false;
557  if (auto agg_expr = dynamic_cast<Analyzer::AggExpr*>(expr)) {
558  if (is_expr_on_gby_col(agg_expr)) {
559  auto target_expr = agg_expr->get_arg();
560  // we have some issues when this rewriting is applied to float_type groupby column
561  // in subquery, i.e., SELECT MIN(v1) FROM (SELECT v1, AGG(v1) FROM T GROUP BY v1);
562  if (target_expr && target_expr->get_type_info().get_type() != SQLTypes::kFLOAT) {
563  switch (agg_expr->get_aggtype()) {
564  case SQLAgg::kCOUNT:
566  if (agg_expr->get_aggtype() == SQLAgg::kCOUNT &&
567  !agg_expr->get_is_distinct()) {
568  break;
569  }
570  auto case_expr =
571  generateCaseExprForCountDistinctOnGroupByCol(agg_expr->get_own_arg());
572  new_target_exprs.push_back(case_expr.get());
573  target_exprs_owned_.emplace_back(case_expr);
574  rewritten = true;
575  break;
576  }
578  case SQLAgg::kAVG:
579  case SQLAgg::kSAMPLE:
580  case SQLAgg::kMAX:
581  case SQLAgg::kMIN: {
582  // we just replace the agg_expr into a plain expr
583  // i.e, avg(x1) --> x1
584  auto agg_expr_ti = agg_expr->get_type_info();
585  auto target_expr = agg_expr->get_own_arg();
586  if (agg_expr_ti != target_expr->get_type_info()) {
587  target_expr = target_expr->add_cast(agg_expr_ti);
588  }
589  new_target_exprs.push_back(target_expr.get());
590  target_exprs_owned_.emplace_back(target_expr);
591  rewritten = true;
592  break;
593  }
594  default:
595  break;
596  }
597  }
598  }
599  }
600  if (!rewritten) {
601  new_target_exprs.push_back(expr);
602  }
603  }
604 
605  RelAlgExecutionUnit rewritten_exe_unit{ra_exe_unit_in.input_descs,
606  ra_exe_unit_in.input_col_descs,
607  ra_exe_unit_in.simple_quals,
608  ra_exe_unit_in.quals,
609  ra_exe_unit_in.join_quals,
610  ra_exe_unit_in.groupby_exprs,
611  new_target_exprs,
612  ra_exe_unit_in.estimator,
613  ra_exe_unit_in.sort_info,
614  ra_exe_unit_in.scan_limit,
615  ra_exe_unit_in.query_hint,
616  ra_exe_unit_in.query_plan_dag,
617  ra_exe_unit_in.hash_table_build_plan_dag,
618  ra_exe_unit_in.table_id_to_node_map,
619  ra_exe_unit_in.use_bump_allocator,
620  ra_exe_unit_in.union_all,
621  ra_exe_unit_in.query_state};
622  return rewritten_exe_unit;
623 }
std::vector< Analyzer::Expr * > target_exprs
const std::optional< bool > union_all
std::vector< InputDescriptor > input_descs
const std::list< std::shared_ptr< Analyzer::Expr > > groupby_exprs
#define CHECK_GT(x, y)
Definition: Logger.h:223
Definition: sqldefs.h:73
const JoinQualsPerNestingLevel join_quals
std::pair< bool, std::set< size_t > > is_all_groupby_exprs_are_col_var(const std::list< std::shared_ptr< Analyzer::Expr >> &groupby_exprs) const
TableIdToNodeMap table_id_to_node_map
const std::shared_ptr< Analyzer::Estimator > estimator
Definition: sqldefs.h:76
std::list< std::shared_ptr< Analyzer::Expr > > quals
RegisteredQueryHint query_hint
#define CHECK(condition)
Definition: Logger.h:211
std::shared_ptr< Analyzer::CaseExpr > generateCaseExprForCountDistinctOnGroupByCol(std::shared_ptr< Analyzer::Expr > expr) const
std::shared_ptr< const query_state::QueryState > query_state
std::list< std::shared_ptr< const InputColDescriptor > > input_col_descs
Definition: sqldefs.h:74
Definition: sqldefs.h:72
std::list< std::shared_ptr< Analyzer::Expr > > simple_quals
std::vector< std::shared_ptr< Analyzer::Expr > > target_exprs_owned_
Definition: QueryRewrite.h:68
HashTableBuildDagMap hash_table_build_plan_dag

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

RelAlgExecutionUnit QueryRewriter::rewriteColumnarDelete ( const RelAlgExecutionUnit ra_exe_unit_in,
std::shared_ptr< Analyzer::ColumnVar delete_column 
) const

Definition at line 418 of file QueryRewrite.cpp.

References anonymous_namespace{Utm.h}::a, gpu_enabled::accumulate(), Datum::boolval, CHECK, CHECK_EQ, RelAlgExecutionUnit::estimator, RelAlgExecutionUnit::groupby_exprs, RelAlgExecutionUnit::hash_table_build_plan_dag, RelAlgExecutionUnit::input_col_descs, RelAlgExecutionUnit::input_descs, RelAlgExecutionUnit::join_quals, kAND, kONE, Parser::CaseExpr::normalize(), RelAlgExecutionUnit::quals, RelAlgExecutionUnit::query_hint, RelAlgExecutionUnit::query_plan_dag, RelAlgExecutionUnit::query_state, RelAlgExecutionUnit::scan_limit, RelAlgExecutionUnit::simple_quals, RelAlgExecutionUnit::sort_info, RelAlgExecutionUnit::table_id_to_node_map, RelAlgExecutionUnit::target_exprs, target_exprs_owned_, RelAlgExecutionUnit::union_all, and RelAlgExecutionUnit::use_bump_allocator.

420  {
421  CHECK_EQ(ra_exe_unit_in.target_exprs.size(), size_t(1));
422  CHECK(ra_exe_unit_in.groupby_exprs.size() == 1 &&
423  !ra_exe_unit_in.groupby_exprs.front());
424 
425  // TODO(adb): is this possible?
426  if (ra_exe_unit_in.join_quals.size() > 0) {
427  throw std::runtime_error("Delete via join not yet supported for temporary tables.");
428  }
429 
430  Datum true_datum;
431  true_datum.boolval = true;
432  const auto deleted_constant =
433  makeExpr<Analyzer::Constant>(delete_column->get_type_info(), false, true_datum);
434 
435  auto input_col_descs = ra_exe_unit_in.input_col_descs;
436 
437  std::shared_ptr<Analyzer::Expr> filter;
438  std::vector<std::shared_ptr<Analyzer::Expr>> filter_exprs;
439  filter_exprs.insert(filter_exprs.end(),
440  ra_exe_unit_in.simple_quals.begin(),
441  ra_exe_unit_in.simple_quals.end());
442  filter_exprs.insert(
443  filter_exprs.end(), ra_exe_unit_in.quals.begin(), ra_exe_unit_in.quals.end());
444 
445  if (filter_exprs.size() > 0) {
446  std::list<std::pair<std::shared_ptr<Analyzer::Expr>, std::shared_ptr<Analyzer::Expr>>>
447  case_expr_list;
448  if (filter_exprs.size() == 1) {
449  filter = filter_exprs.front();
450  } else {
451  filter = std::accumulate(
452  std::next(filter_exprs.begin()),
453  filter_exprs.end(),
454  filter_exprs.front(),
455  [](const std::shared_ptr<Analyzer::Expr> a,
456  const std::shared_ptr<Analyzer::Expr> b) {
457  CHECK_EQ(a->get_type_info().get_type(), b->get_type_info().get_type());
458  return makeExpr<Analyzer::BinOper>(a->get_type_info().get_type(),
459  SQLOps::kAND,
461  a->deep_copy(),
462  b->deep_copy());
463  });
464  }
465  std::shared_ptr<Analyzer::Expr> column_to_update{nullptr};
466  auto when_expr = filter; // only one filter, will be a BinOper if multiple filters
467  case_expr_list.emplace_back(std::make_pair(when_expr, deleted_constant));
468  auto case_expr = Parser::CaseExpr::normalize(case_expr_list, delete_column);
469 
470  // the delete column should not be projected, but check anyway
471  auto delete_col_desc_it = std::find_if(
472  input_col_descs.begin(),
473  input_col_descs.end(),
474  [&delete_column](const std::shared_ptr<const InputColDescriptor>& in) {
475  return in->getColId() == delete_column->get_column_id();
476  });
477  CHECK(delete_col_desc_it == input_col_descs.end());
478  auto delete_col_desc =
479  std::make_shared<const InputColDescriptor>(delete_column->get_column_id(),
480  delete_column->get_table_id(),
481  delete_column->get_rte_idx());
482  input_col_descs.push_back(delete_col_desc);
483  target_exprs_owned_.emplace_back(case_expr);
484  } else {
485  // no filters, simply project the deleted=true column value for all rows
486  auto delete_col_desc =
487  std::make_shared<const InputColDescriptor>(delete_column->get_column_id(),
488  delete_column->get_table_id(),
489  delete_column->get_rte_idx());
490  input_col_descs.push_back(delete_col_desc);
491  target_exprs_owned_.emplace_back(deleted_constant);
492  }
493 
494  std::vector<Analyzer::Expr*> target_exprs;
495  CHECK_EQ(target_exprs_owned_.size(), size_t(1));
496  target_exprs.emplace_back(target_exprs_owned_.front().get());
497 
498  RelAlgExecutionUnit rewritten_exe_unit{ra_exe_unit_in.input_descs,
499  input_col_descs,
500  {},
501  {},
502  ra_exe_unit_in.join_quals,
503  ra_exe_unit_in.groupby_exprs,
504  target_exprs,
505  ra_exe_unit_in.estimator,
506  ra_exe_unit_in.sort_info,
507  ra_exe_unit_in.scan_limit,
508  ra_exe_unit_in.query_hint,
509  ra_exe_unit_in.query_plan_dag,
510  ra_exe_unit_in.hash_table_build_plan_dag,
511  ra_exe_unit_in.table_id_to_node_map,
512  ra_exe_unit_in.use_bump_allocator,
513  ra_exe_unit_in.union_all,
514  ra_exe_unit_in.query_state};
515  return rewritten_exe_unit;
516 }
std::vector< Analyzer::Expr * > target_exprs
#define CHECK_EQ(x, y)
Definition: Logger.h:219
const std::optional< bool > union_all
int8_t boolval
Definition: sqltypes.h:211
std::vector< InputDescriptor > input_descs
const std::list< std::shared_ptr< Analyzer::Expr > > groupby_exprs
constexpr double a
Definition: Utm.h:32
static std::shared_ptr< Analyzer::Expr > normalize(const std::list< std::pair< std::shared_ptr< Analyzer::Expr >, std::shared_ptr< Analyzer::Expr >>> &, const std::shared_ptr< Analyzer::Expr >)
Definition: ParserNode.cpp:979
const JoinQualsPerNestingLevel join_quals
TableIdToNodeMap table_id_to_node_map
Definition: sqldefs.h:37
DEVICE auto accumulate(ARGS &&...args)
Definition: gpu_enabled.h:42
const std::shared_ptr< Analyzer::Estimator > estimator
Definition: sqldefs.h:69
std::list< std::shared_ptr< Analyzer::Expr > > quals
RegisteredQueryHint query_hint
#define CHECK(condition)
Definition: Logger.h:211
std::shared_ptr< const query_state::QueryState > query_state
std::list< std::shared_ptr< const InputColDescriptor > > input_col_descs
std::list< std::shared_ptr< Analyzer::Expr > > simple_quals
std::vector< std::shared_ptr< Analyzer::Expr > > target_exprs_owned_
Definition: QueryRewrite.h:68
HashTableBuildDagMap hash_table_build_plan_dag

+ Here is the call graph for this function:

RelAlgExecutionUnit QueryRewriter::rewriteColumnarUpdate ( const RelAlgExecutionUnit ra_exe_unit_in,
std::shared_ptr< Analyzer::Expr column_to_update 
) const

Definition at line 251 of file QueryRewrite.cpp.

References anonymous_namespace{Utm.h}::a, gpu_enabled::accumulate(), cat(), CHECK, CHECK_EQ, anonymous_namespace{QueryRewrite.cpp}::check_string_id_overflow(), RelAlgExecutionUnit::estimator, executor_, RelAlgExecutionUnit::groupby_exprs, RelAlgExecutionUnit::hash_table_build_plan_dag, inline_fixed_encoding_null_val(), RelAlgExecutionUnit::input_col_descs, RelAlgExecutionUnit::input_descs, RelAlgExecutionUnit::join_quals, kAND, kCAST, kONE, Parser::CaseExpr::normalize(), RelAlgExecutionUnit::quals, RelAlgExecutionUnit::query_hint, RelAlgExecutionUnit::query_plan_dag, RelAlgExecutionUnit::query_state, RelAlgExecutionUnit::scan_limit, RelAlgExecutionUnit::simple_quals, RelAlgExecutionUnit::sort_info, Datum::stringval, RelAlgExecutionUnit::table_id_to_node_map, RelAlgExecutionUnit::target_exprs, target_exprs_owned_, to_string(), RelAlgExecutionUnit::union_all, and RelAlgExecutionUnit::use_bump_allocator.

253  {
254  CHECK_EQ(ra_exe_unit_in.target_exprs.size(), size_t(2));
255  CHECK(ra_exe_unit_in.groupby_exprs.size() == 1 &&
256  !ra_exe_unit_in.groupby_exprs.front());
257 
258  if (ra_exe_unit_in.join_quals.size() > 0) {
259  throw std::runtime_error("Update via join not yet supported for temporary tables.");
260  }
261 
262  auto new_column_value = ra_exe_unit_in.target_exprs.front()->deep_copy();
263  const auto& new_column_ti = new_column_value->get_type_info();
264  if (column_to_update->get_type_info().is_dict_encoded_string()) {
265  CHECK(new_column_ti.is_dict_encoded_string());
266  if (new_column_ti.get_comp_param() > 0 &&
267  new_column_ti.get_comp_param() !=
268  column_to_update->get_type_info().get_comp_param()) {
269  throw std::runtime_error(
270  "Updating a dictionary encoded string using another dictionary encoded string "
271  "column is not yet supported, unless both columns share dictionaries.");
272  }
273  if (auto uoper = dynamic_cast<Analyzer::UOper*>(new_column_value.get())) {
274  if (uoper->get_optype() == kCAST &&
275  dynamic_cast<const Analyzer::Constant*>(uoper->get_operand())) {
276  const auto original_constant_expr =
277  dynamic_cast<const Analyzer::Constant*>(uoper->get_operand());
278  CHECK(original_constant_expr);
279  CHECK(original_constant_expr->get_type_info().is_string());
280  // extract the string, insert it into the dict for the table we are updating,
281  // and place the dictionary ID in the oper
282  auto cat = executor_->getCatalog();
283  CHECK(cat);
284 
285  CHECK(column_to_update->get_type_info().is_dict_encoded_string());
286  const auto dict_id = column_to_update->get_type_info().get_comp_param();
287  std::map<int, StringDictionary*> string_dicts;
288  const auto dd = cat->getMetadataForDict(dict_id, /*load_dict=*/true);
289  CHECK(dd);
290  auto string_dict = dd->stringDict;
291  CHECK(string_dict);
292 
293  auto string_id =
294  string_dict->getOrAdd(*original_constant_expr->get_constval().stringval);
295  if (check_string_id_overflow(string_id, column_to_update->get_type_info())) {
296  throw std::runtime_error(
297  "Ran out of space in dictionary, cannot update column with dictionary "
298  "encoded string value. Dictionary ID: " +
299  std::to_string(dict_id));
300  }
301  if (string_id == inline_int_null_value<int32_t>()) {
302  string_id = inline_fixed_encoding_null_val(column_to_update->get_type_info());
303  }
304 
305  // Codegen expects a string value. The string will be
306  // resolved to its ID during Constant codegen. Copy the string from the
307  // original expr
308  Datum datum;
309  datum.stringval =
310  new std::string(*original_constant_expr->get_constval().stringval);
311  Datum new_string_datum{datum};
312 
313  new_column_value =
314  makeExpr<Analyzer::Constant>(column_to_update->get_type_info(),
315  original_constant_expr->get_is_null(),
316  new_string_datum);
317 
318  // Roll the string dict generation forward, as we have added a string
319  auto row_set_mem_owner = executor_->getRowSetMemoryOwner();
320  CHECK(row_set_mem_owner);
321  auto& str_dict_generations = row_set_mem_owner->getStringDictionaryGenerations();
322  if (str_dict_generations.getGeneration(dict_id) > -1) {
323  str_dict_generations.updateGeneration(dict_id,
324  string_dict->storageEntryCount());
325  } else {
326  // Simple update with no filters does not use a CASE, and therefore does not add
327  // a valid generation
328  str_dict_generations.setGeneration(dict_id, string_dict->storageEntryCount());
329  }
330  }
331  }
332  }
333 
334  auto input_col_descs = ra_exe_unit_in.input_col_descs;
335 
336  std::shared_ptr<Analyzer::Expr> filter;
337  std::vector<std::shared_ptr<Analyzer::Expr>> filter_exprs;
338  filter_exprs.insert(filter_exprs.end(),
339  ra_exe_unit_in.simple_quals.begin(),
340  ra_exe_unit_in.simple_quals.end());
341  filter_exprs.insert(
342  filter_exprs.end(), ra_exe_unit_in.quals.begin(), ra_exe_unit_in.quals.end());
343 
344  if (filter_exprs.size() > 0) {
345  std::list<std::pair<std::shared_ptr<Analyzer::Expr>, std::shared_ptr<Analyzer::Expr>>>
346  case_expr_list;
347  if (filter_exprs.size() == 1) {
348  filter = filter_exprs.front();
349  } else {
350  filter = std::accumulate(
351  std::next(filter_exprs.begin()),
352  filter_exprs.end(),
353  filter_exprs.front(),
354  [](const std::shared_ptr<Analyzer::Expr> a,
355  const std::shared_ptr<Analyzer::Expr> b) {
356  CHECK_EQ(a->get_type_info().get_type(), b->get_type_info().get_type());
357  return makeExpr<Analyzer::BinOper>(a->get_type_info().get_type(),
358  SQLOps::kAND,
360  a->deep_copy(),
361  b->deep_copy());
362  });
363  }
364  auto when_expr = filter; // only one filter, will be a BinOper if multiple filters
365  case_expr_list.emplace_back(std::make_pair(when_expr, new_column_value));
366  auto case_expr = Parser::CaseExpr::normalize(case_expr_list, column_to_update);
367 
368  auto col_to_update_var =
369  std::dynamic_pointer_cast<Analyzer::ColumnVar>(column_to_update);
370  CHECK(col_to_update_var);
371  auto col_to_update_desc =
372  std::make_shared<const InputColDescriptor>(col_to_update_var->get_column_id(),
373  col_to_update_var->get_table_id(),
374  col_to_update_var->get_rte_idx());
375  auto existing_col_desc_it = std::find_if(
376  input_col_descs.begin(),
377  input_col_descs.end(),
378  [&col_to_update_desc](const std::shared_ptr<const InputColDescriptor>& in) {
379  return *in == *col_to_update_desc;
380  });
381  if (existing_col_desc_it == input_col_descs.end()) {
382  input_col_descs.push_back(col_to_update_desc);
383  }
384  target_exprs_owned_.emplace_back(case_expr);
385  } else {
386  // no filters, simply project the update value
387  target_exprs_owned_.emplace_back(new_column_value);
388  }
389 
390  std::vector<Analyzer::Expr*> target_exprs;
391  CHECK_EQ(target_exprs_owned_.size(), size_t(1));
392  target_exprs.emplace_back(target_exprs_owned_.front().get());
393 
394  RelAlgExecutionUnit rewritten_exe_unit{ra_exe_unit_in.input_descs,
395  input_col_descs,
396  {},
397  {},
398  ra_exe_unit_in.join_quals,
399  ra_exe_unit_in.groupby_exprs,
400  target_exprs,
401  ra_exe_unit_in.estimator,
402  ra_exe_unit_in.sort_info,
403  ra_exe_unit_in.scan_limit,
404  ra_exe_unit_in.query_hint,
405  ra_exe_unit_in.query_plan_dag,
406  ra_exe_unit_in.hash_table_build_plan_dag,
407  ra_exe_unit_in.table_id_to_node_map,
408  ra_exe_unit_in.use_bump_allocator,
409  ra_exe_unit_in.union_all,
410  ra_exe_unit_in.query_state};
411  return rewritten_exe_unit;
412 }
std::vector< Analyzer::Expr * > target_exprs
#define CHECK_EQ(x, y)
Definition: Logger.h:219
std::string cat(Ts &&...args)
const std::optional< bool > union_all
std::vector< InputDescriptor > input_descs
Definition: sqldefs.h:49
const std::list< std::shared_ptr< Analyzer::Expr > > groupby_exprs
std::string to_string(char const *&&v)
constexpr double a
Definition: Utm.h:32
static std::shared_ptr< Analyzer::Expr > normalize(const std::list< std::pair< std::shared_ptr< Analyzer::Expr >, std::shared_ptr< Analyzer::Expr >>> &, const std::shared_ptr< Analyzer::Expr >)
Definition: ParserNode.cpp:979
const JoinQualsPerNestingLevel join_quals
Executor * executor_
Definition: QueryRewrite.h:67
TableIdToNodeMap table_id_to_node_map
Definition: sqldefs.h:37
DEVICE auto accumulate(ARGS &&...args)
Definition: gpu_enabled.h:42
const std::shared_ptr< Analyzer::Estimator > estimator
std::string * stringval
Definition: sqltypes.h:220
Definition: sqldefs.h:69
bool check_string_id_overflow(const int32_t string_id, const SQLTypeInfo &ti)
std::list< std::shared_ptr< Analyzer::Expr > > quals
RegisteredQueryHint query_hint
#define CHECK(condition)
Definition: Logger.h:211
int64_t inline_fixed_encoding_null_val(const SQL_TYPE_INFO &ti)
std::shared_ptr< const query_state::QueryState > query_state
std::list< std::shared_ptr< const InputColDescriptor > > input_col_descs
std::list< std::shared_ptr< Analyzer::Expr > > simple_quals
std::vector< std::shared_ptr< Analyzer::Expr > > target_exprs_owned_
Definition: QueryRewrite.h:68
HashTableBuildDagMap hash_table_build_plan_dag

+ Here is the call graph for this function:

RelAlgExecutionUnit QueryRewriter::rewriteConstrainedByIn ( const RelAlgExecutionUnit ra_exe_unit_in) const
private

Definition at line 90 of file QueryRewrite.cpp.

References generateCaseForDomainValues(), RelAlgExecutionUnit::groupby_exprs, RelAlgExecutionUnit::quals, rewrite_expr(), rewriteConstrainedByInImpl(), and RelAlgExecutionUnit::simple_quals.

Referenced by rewrite().

91  {
92  if (ra_exe_unit_in.groupby_exprs.empty()) {
93  return ra_exe_unit_in;
94  }
95  if (ra_exe_unit_in.groupby_exprs.size() == 1 && !ra_exe_unit_in.groupby_exprs.front()) {
96  return ra_exe_unit_in;
97  }
98  if (!ra_exe_unit_in.simple_quals.empty()) {
99  return ra_exe_unit_in;
100  }
101  if (ra_exe_unit_in.quals.size() != 1) {
102  return ra_exe_unit_in;
103  }
104  auto in_vals =
105  std::dynamic_pointer_cast<Analyzer::InValues>(ra_exe_unit_in.quals.front());
106  if (!in_vals) {
107  in_vals = std::dynamic_pointer_cast<Analyzer::InValues>(
108  rewrite_expr(ra_exe_unit_in.quals.front().get()));
109  }
110  if (!in_vals || in_vals->get_value_list().empty()) {
111  return ra_exe_unit_in;
112  }
113  for (const auto& in_val : in_vals->get_value_list()) {
114  if (!std::dynamic_pointer_cast<Analyzer::Constant>(in_val)) {
115  break;
116  }
117  }
118  if (dynamic_cast<const Analyzer::CaseExpr*>(in_vals->get_arg())) {
119  return ra_exe_unit_in;
120  }
121  auto case_expr = generateCaseForDomainValues(in_vals.get());
122  return rewriteConstrainedByInImpl(ra_exe_unit_in, case_expr, in_vals.get());
123 }
const std::list< std::shared_ptr< Analyzer::Expr > > groupby_exprs
Analyzer::ExpressionPtr rewrite_expr(const Analyzer::Expr *expr)
std::list< std::shared_ptr< Analyzer::Expr > > quals
static std::shared_ptr< Analyzer::CaseExpr > generateCaseForDomainValues(const Analyzer::InValues *)
std::list< std::shared_ptr< Analyzer::Expr > > simple_quals
RelAlgExecutionUnit rewriteConstrainedByInImpl(const RelAlgExecutionUnit &ra_exe_unit_in, const std::shared_ptr< Analyzer::CaseExpr >, const Analyzer::InValues *) const

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

RelAlgExecutionUnit QueryRewriter::rewriteConstrainedByInImpl ( const RelAlgExecutionUnit ra_exe_unit_in,
const std::shared_ptr< Analyzer::CaseExpr case_expr,
const Analyzer::InValues in_vals 
) const
private

Definition at line 125 of file QueryRewrite.cpp.

References CHECK, executor_, g_constrained_by_in_threshold, Analyzer::InValues::get_arg(), getExpressionRange(), ExpressionRange::getIntMax(), RelAlgExecutionUnit::groupby_exprs, RelAlgExecutionUnit::hash_table_build_plan_dag, i, RelAlgExecutionUnit::input_col_descs, RelAlgExecutionUnit::input_descs, Integer, RelAlgExecutionUnit::join_quals, Analyzer::Var::kGROUPBY, RelAlgExecutionUnit::quals, RelAlgExecutionUnit::query_hint, query_infos_, RelAlgExecutionUnit::query_plan_dag, rewrite(), RelAlgExecutionUnit::scan_limit, RelAlgExecutionUnit::simple_quals, RelAlgExecutionUnit::sort_info, RelAlgExecutionUnit::table_id_to_node_map, RelAlgExecutionUnit::target_exprs, and target_exprs_owned_.

Referenced by rewriteConstrainedByIn().

128  {
129  std::list<std::shared_ptr<Analyzer::Expr>> new_groupby_list;
130  std::vector<Analyzer::Expr*> new_target_exprs;
131  bool rewrite{false};
132  size_t groupby_idx{0};
133  auto it = ra_exe_unit_in.groupby_exprs.begin();
134  for (const auto& group_expr : ra_exe_unit_in.groupby_exprs) {
135  CHECK(group_expr);
136  ++groupby_idx;
137  if (*group_expr == *in_vals->get_arg()) {
138  const auto expr_range = getExpressionRange(it->get(), query_infos_, executor_);
139  if (expr_range.getType() != ExpressionRangeType::Integer) {
140  ++it;
141  continue;
142  }
143  const size_t range_sz = expr_range.getIntMax() - expr_range.getIntMin() + 1;
144  if (range_sz <= in_vals->get_value_list().size() * g_constrained_by_in_threshold) {
145  ++it;
146  continue;
147  }
148  new_groupby_list.push_back(case_expr);
149  for (size_t i = 0; i < ra_exe_unit_in.target_exprs.size(); ++i) {
150  const auto target = ra_exe_unit_in.target_exprs[i];
151  if (*target == *in_vals->get_arg()) {
152  auto var_case_expr = makeExpr<Analyzer::Var>(
153  case_expr->get_type_info(), Analyzer::Var::kGROUPBY, groupby_idx);
154  target_exprs_owned_.push_back(var_case_expr);
155  new_target_exprs.push_back(var_case_expr.get());
156  } else {
157  new_target_exprs.push_back(target);
158  }
159  }
160  rewrite = true;
161  } else {
162  new_groupby_list.push_back(group_expr);
163  }
164  ++it;
165  }
166  if (!rewrite) {
167  return ra_exe_unit_in;
168  }
169  return {ra_exe_unit_in.input_descs,
170  ra_exe_unit_in.input_col_descs,
171  ra_exe_unit_in.simple_quals,
172  ra_exe_unit_in.quals,
173  ra_exe_unit_in.join_quals,
174  new_groupby_list,
175  new_target_exprs,
176  nullptr,
177  ra_exe_unit_in.sort_info,
178  ra_exe_unit_in.scan_limit,
179  ra_exe_unit_in.query_hint,
180  ra_exe_unit_in.query_plan_dag,
181  ra_exe_unit_in.hash_table_build_plan_dag,
182  ra_exe_unit_in.table_id_to_node_map};
183 }
std::vector< Analyzer::Expr * > target_exprs
size_t g_constrained_by_in_threshold
Definition: Execute.cpp:104
std::vector< InputDescriptor > input_descs
const std::list< std::shared_ptr< Analyzer::Expr > > groupby_exprs
RelAlgExecutionUnit rewrite(const RelAlgExecutionUnit &ra_exe_unit_in) const
const JoinQualsPerNestingLevel join_quals
Executor * executor_
Definition: QueryRewrite.h:67
TableIdToNodeMap table_id_to_node_map
ExpressionRange getExpressionRange(const Analyzer::BinOper *expr, const std::vector< InputTableInfo > &query_infos, const Executor *, boost::optional< std::list< std::shared_ptr< Analyzer::Expr >>> simple_quals)
const std::vector< InputTableInfo > & query_infos_
Definition: QueryRewrite.h:66
std::list< std::shared_ptr< Analyzer::Expr > > quals
int64_t getIntMax() const
RegisteredQueryHint query_hint
#define CHECK(condition)
Definition: Logger.h:211
std::list< std::shared_ptr< const InputColDescriptor > > input_col_descs
const Expr * get_arg() const
Definition: Analyzer.h:630
std::list< std::shared_ptr< Analyzer::Expr > > simple_quals
std::vector< std::shared_ptr< Analyzer::Expr > > target_exprs_owned_
Definition: QueryRewrite.h:68
HashTableBuildDagMap hash_table_build_plan_dag

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

RelAlgExecutionUnit QueryRewriter::rewriteOverlapsJoin ( const RelAlgExecutionUnit ra_exe_unit_in) const
private

Definition at line 37 of file QueryRewrite.cpp.

References RelAlgExecutionUnit::estimator, g_enable_overlaps_hashjoin, RelAlgExecutionUnit::groupby_exprs, RelAlgExecutionUnit::hash_table_build_plan_dag, RelAlgExecutionUnit::input_col_descs, RelAlgExecutionUnit::input_descs, RelAlgExecutionUnit::join_quals, RelAlgExecutionUnit::quals, RelAlgExecutionUnit::query_hint, RelAlgExecutionUnit::query_plan_dag, rewrite_overlaps_conjunction(), RelAlgExecutionUnit::scan_limit, RelAlgExecutionUnit::simple_quals, RelAlgExecutionUnit::sort_info, RelAlgExecutionUnit::table_id_to_node_map, RelAlgExecutionUnit::target_exprs, JoinCondition::type, and RelAlgExecutionUnit::use_bump_allocator.

Referenced by rewrite().

38  {
40  return ra_exe_unit_in;
41  }
42  if (ra_exe_unit_in.join_quals.empty()) {
43  return ra_exe_unit_in;
44  }
45 
46  std::list<std::shared_ptr<Analyzer::Expr>> quals;
47  quals.insert(quals.end(), ra_exe_unit_in.quals.begin(), ra_exe_unit_in.quals.end());
48 
49  JoinQualsPerNestingLevel join_condition_per_nesting_level;
50  for (const auto& join_condition_in : ra_exe_unit_in.join_quals) {
51  JoinCondition join_condition{{}, join_condition_in.type};
52 
53  for (const auto& join_qual_expr_in : join_condition_in.quals) {
54  auto new_overlaps_quals = rewrite_overlaps_conjunction(join_qual_expr_in);
55  if (new_overlaps_quals) {
56  const auto& overlaps_quals = *new_overlaps_quals;
57 
58  // Add overlaps qual
59  join_condition.quals.insert(join_condition.quals.end(),
60  overlaps_quals.join_quals.begin(),
61  overlaps_quals.join_quals.end());
62 
63  // Add original quals
64  join_condition.quals.insert(join_condition.quals.end(),
65  overlaps_quals.quals.begin(),
66  overlaps_quals.quals.end());
67  } else {
68  join_condition.quals.push_back(join_qual_expr_in);
69  }
70  }
71  join_condition_per_nesting_level.push_back(join_condition);
72  }
73  return {ra_exe_unit_in.input_descs,
74  ra_exe_unit_in.input_col_descs,
75  ra_exe_unit_in.simple_quals,
76  quals,
77  join_condition_per_nesting_level,
78  ra_exe_unit_in.groupby_exprs,
79  ra_exe_unit_in.target_exprs,
80  ra_exe_unit_in.estimator,
81  ra_exe_unit_in.sort_info,
82  ra_exe_unit_in.scan_limit,
83  ra_exe_unit_in.query_hint,
84  ra_exe_unit_in.query_plan_dag,
85  ra_exe_unit_in.hash_table_build_plan_dag,
86  ra_exe_unit_in.table_id_to_node_map,
87  ra_exe_unit_in.use_bump_allocator};
88 }
std::vector< Analyzer::Expr * > target_exprs
std::vector< InputDescriptor > input_descs
std::vector< JoinCondition > JoinQualsPerNestingLevel
const std::list< std::shared_ptr< Analyzer::Expr > > groupby_exprs
bool g_enable_overlaps_hashjoin
Definition: Execute.cpp:98
const JoinQualsPerNestingLevel join_quals
TableIdToNodeMap table_id_to_node_map
boost::optional< OverlapsJoinConjunction > rewrite_overlaps_conjunction(const std::shared_ptr< Analyzer::Expr > expr)
const std::shared_ptr< Analyzer::Estimator > estimator
std::list< std::shared_ptr< Analyzer::Expr > > quals
RegisteredQueryHint query_hint
std::list< std::shared_ptr< const InputColDescriptor > > input_col_descs
std::list< std::shared_ptr< Analyzer::Expr > > simple_quals
HashTableBuildDagMap hash_table_build_plan_dag

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

Member Data Documentation

Executor* QueryRewriter::executor_
private

Definition at line 67 of file QueryRewrite.h.

Referenced by rewriteColumnarUpdate(), and rewriteConstrainedByInImpl().

const std::vector<InputTableInfo>& QueryRewriter::query_infos_
private

Definition at line 66 of file QueryRewrite.h.

Referenced by rewriteConstrainedByInImpl().

std::vector<std::shared_ptr<Analyzer::Expr> > QueryRewriter::target_exprs_owned_
mutableprivate

The documentation for this class was generated from the following files: