OmniSciDB  72c90bc290
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
QueryRewriter Class Reference

#include <QueryRewrite.h>

+ Collaboration diagram for QueryRewriter:

Public Member Functions

 QueryRewriter (const std::vector< InputTableInfo > &query_infos, Executor *executor)
 
RelAlgExecutionUnit rewrite (const RelAlgExecutionUnit &ra_exe_unit_in) const
 
RelAlgExecutionUnit rewriteColumnarUpdate (const RelAlgExecutionUnit &ra_exe_unit_in, std::shared_ptr< Analyzer::ColumnVar > column_to_update) const
 
RelAlgExecutionUnit rewriteColumnarDelete (const RelAlgExecutionUnit &ra_exe_unit_in, std::shared_ptr< Analyzer::ColumnVar > delete_column) const
 
RelAlgExecutionUnit rewriteAggregateOnGroupByColumn (const RelAlgExecutionUnit &ra_exe_unit_in) const
 

Private Member Functions

RelAlgExecutionUnit rewriteConstrainedByIn (const RelAlgExecutionUnit &ra_exe_unit_in) const
 
RelAlgExecutionUnit rewriteConstrainedByInImpl (const RelAlgExecutionUnit &ra_exe_unit_in, const std::shared_ptr< Analyzer::CaseExpr >, const Analyzer::InValues *) const
 
std::pair< bool, std::set
< size_t > > 
is_all_groupby_exprs_are_col_var (const std::list< std::shared_ptr< Analyzer::Expr >> &groupby_exprs) const
 
std::shared_ptr
< Analyzer::CaseExpr
generateCaseExprForCountDistinctOnGroupByCol (std::shared_ptr< Analyzer::Expr > expr) const
 

Static Private Member Functions

static std::shared_ptr
< Analyzer::CaseExpr
generateCaseForDomainValues (const Analyzer::InValues *)
 

Private Attributes

const std::vector
< InputTableInfo > & 
query_infos_
 
Executorexecutor_
 
std::vector< std::shared_ptr
< Analyzer::Expr > > 
target_exprs_owned_
 

Detailed Description

Definition at line 28 of file QueryRewrite.h.

Constructor & Destructor Documentation

QueryRewriter::QueryRewriter ( const std::vector< InputTableInfo > &  query_infos,
Executor executor 
)
inline

Definition at line 30 of file QueryRewrite.h.

31  : query_infos_(query_infos), executor_(executor) {}
Executor * executor_
Definition: QueryRewrite.h:64
const std::vector< InputTableInfo > & query_infos_
Definition: QueryRewrite.h:63

Member Function Documentation

std::shared_ptr< Analyzer::CaseExpr > QueryRewriter::generateCaseExprForCountDistinctOnGroupByCol ( std::shared_ptr< Analyzer::Expr expr) const
private

Definition at line 174 of file QueryRewrite.cpp.

References Datum::bigintval, is_null(), kBIGINT, kBOOLEAN, kISNULL, and kNOT.

Referenced by rewriteAggregateOnGroupByColumn().

175  {
176  std::list<std::pair<std::shared_ptr<Analyzer::Expr>, std::shared_ptr<Analyzer::Expr>>>
177  case_expr_list;
178  auto is_null = std::make_shared<Analyzer::UOper>(kBOOLEAN, kISNULL, expr);
179  auto is_not_null = std::make_shared<Analyzer::UOper>(kBOOLEAN, kNOT, is_null);
180  Datum then_d;
181  then_d.bigintval = 1;
182  const auto then_constant = makeExpr<Analyzer::Constant>(kBIGINT, false, then_d);
183  case_expr_list.emplace_back(is_not_null, then_constant);
184  Datum else_d;
185  else_d.bigintval = 0;
186  const auto else_constant = makeExpr<Analyzer::Constant>(kBIGINT, false, else_d);
187  auto case_expr = makeExpr<Analyzer::CaseExpr>(
188  then_constant->get_type_info(), false, case_expr_list, else_constant);
189  return case_expr;
190 }
CONSTEXPR DEVICE bool is_null(const T &value)
int64_t bigintval
Definition: Datum.h:74
Definition: Datum.h:69
Definition: sqldefs.h:38

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

std::shared_ptr< Analyzer::CaseExpr > QueryRewriter::generateCaseForDomainValues ( const Analyzer::InValues in_vals)
staticprivate

Definition at line 149 of file QueryRewrite.cpp.

References Analyzer::Expr::deep_copy(), Analyzer::InValues::get_arg(), Analyzer::InValues::get_value_list(), kBOOLEAN, kENCODING_DICT, kEQ, kONE, shared::StringDictKey::kTransientDictKey, and TRANSIENT_DICT_ID.

Referenced by rewriteConstrainedByIn().

150  {
151  std::list<std::pair<std::shared_ptr<Analyzer::Expr>, std::shared_ptr<Analyzer::Expr>>>
152  case_expr_list;
153  auto in_val_arg = in_vals->get_arg()->deep_copy();
154  for (const auto& in_val : in_vals->get_value_list()) {
155  auto case_cond = makeExpr<Analyzer::BinOper>(
156  SQLTypeInfo(kBOOLEAN, true), false, kEQ, kONE, in_val_arg, in_val);
157  auto in_val_copy = in_val->deep_copy();
158  auto ti = in_val_copy->get_type_info();
159  if (ti.is_string() && ti.get_compression() == kENCODING_DICT) {
160  ti.set_comp_param(TRANSIENT_DICT_ID);
161  ti.setStringDictKey(shared::StringDictKey::kTransientDictKey);
162  }
163  in_val_copy->set_type_info(ti);
164  case_expr_list.emplace_back(case_cond, in_val_copy);
165  }
166  // TODO(alex): refine the expression range for case with empty else expression;
167  // for now, add a dummy else which should never be taken
168  auto else_expr = case_expr_list.front().second;
169  return makeExpr<Analyzer::CaseExpr>(
170  case_expr_list.front().second->get_type_info(), false, case_expr_list, else_expr);
171 }
Definition: sqldefs.h:29
#define TRANSIENT_DICT_ID
Definition: DbObjectKeys.h:24
Definition: sqldefs.h:71
const std::list< std::shared_ptr< Analyzer::Expr > > & get_value_list() const
Definition: Analyzer.h:646
virtual std::shared_ptr< Analyzer::Expr > deep_copy() const =0
static const StringDictKey kTransientDictKey
Definition: DbObjectKeys.h:45
const Expr * get_arg() const
Definition: Analyzer.h:644

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

std::pair< bool, std::set< size_t > > QueryRewriter::is_all_groupby_exprs_are_col_var ( const std::list< std::shared_ptr< Analyzer::Expr >> &  groupby_exprs) const
private

Definition at line 492 of file QueryRewrite.cpp.

References hash_value().

Referenced by rewriteAggregateOnGroupByColumn().

493  {
494  std::set<size_t> gby_col_exprs_hash;
495  for (auto gby_expr : groupby_exprs) {
496  if (auto gby_col_var = std::dynamic_pointer_cast<Analyzer::ColumnVar>(gby_expr)) {
497  gby_col_exprs_hash.insert(boost::hash_value(gby_col_var->toString()));
498  } else {
499  return {false, {}};
500  }
501  }
502  return {true, gby_col_exprs_hash};
503 }
std::size_t hash_value(RexAbstractInput const &rex_ab_input)
Definition: RelAlgDag.cpp:3525

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

RelAlgExecutionUnit QueryRewriter::rewrite ( const RelAlgExecutionUnit ra_exe_unit_in) const

Definition at line 29 of file QueryRewrite.cpp.

References rewriteAggregateOnGroupByColumn(), and rewriteConstrainedByIn().

Referenced by rewriteConstrainedByInImpl().

30  {
31  auto rewritten_exe_unit = rewriteConstrainedByIn(ra_exe_unit_in);
32  return rewriteAggregateOnGroupByColumn(rewritten_exe_unit);
33 }
RelAlgExecutionUnit rewriteConstrainedByIn(const RelAlgExecutionUnit &ra_exe_unit_in) const
RelAlgExecutionUnit rewriteAggregateOnGroupByColumn(const RelAlgExecutionUnit &ra_exe_unit_in) const

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

RelAlgExecutionUnit QueryRewriter::rewriteAggregateOnGroupByColumn ( const RelAlgExecutionUnit ra_exe_unit_in) const

Definition at line 505 of file QueryRewrite.cpp.

References CHECK, CHECK_GT, RelAlgExecutionUnit::estimator, generateCaseExprForCountDistinctOnGroupByCol(), RelAlgExecutionUnit::groupby_exprs, RelAlgExecutionUnit::hash_table_build_plan_dag, hash_value(), RelAlgExecutionUnit::input_col_descs, RelAlgExecutionUnit::input_descs, is_all_groupby_exprs_are_col_var(), RelAlgExecutionUnit::join_quals, kAPPROX_COUNT_DISTINCT, kAPPROX_QUANTILE, kAVG, kCOUNT, kCOUNT_IF, kFLOAT, kMAX, kMIN, kSAMPLE, RelAlgExecutionUnit::quals, RelAlgExecutionUnit::query_hint, RelAlgExecutionUnit::query_plan_dag_hash, RelAlgExecutionUnit::query_state, RelAlgExecutionUnit::scan_limit, RelAlgExecutionUnit::simple_quals, RelAlgExecutionUnit::sort_info, RelAlgExecutionUnit::table_id_to_node_map, RelAlgExecutionUnit::target_exprs, RelAlgExecutionUnit::target_exprs_original_type_infos, target_exprs_owned_, RelAlgExecutionUnit::union_all, and RelAlgExecutionUnit::use_bump_allocator.

Referenced by rewrite().

506  {
507  auto check_precond = is_all_groupby_exprs_are_col_var(ra_exe_unit_in.groupby_exprs);
508  auto is_expr_on_gby_col = [&check_precond](const Analyzer::AggExpr* agg_expr) {
509  CHECK(agg_expr);
510  if (agg_expr->get_arg()) {
511  // some expr does not have its own arg, i.e., count(*)
512  auto agg_expr_hash = boost::hash_value(agg_expr->get_arg()->toString());
513  // a valid expr should have hashed value > 0
514  CHECK_GT(agg_expr_hash, 0u);
515  if (check_precond.second.count(agg_expr_hash)) {
516  return true;
517  }
518  }
519  return false;
520  };
521  if (!check_precond.first) {
522  // return the input ra_exe_unit if we have gby expr which is not col_var
523  // i.e., group by x+1, y instead of group by x, y
524  // todo (yoonmin) : can we relax this with a simple analysis of groupby / agg exprs?
525  return ra_exe_unit_in;
526  }
527 
528  std::vector<Analyzer::Expr*> new_target_exprs;
529  for (auto expr : ra_exe_unit_in.target_exprs) {
530  bool rewritten = false;
531  if (auto agg_expr = dynamic_cast<Analyzer::AggExpr*>(expr)) {
532  if (is_expr_on_gby_col(agg_expr)) {
533  auto target_expr = agg_expr->get_arg();
534  // we have some issues when this rewriting is applied to float_type groupby column
535  // in subquery, i.e., SELECT MIN(v1) FROM (SELECT v1, AGG(v1) FROM T GROUP BY v1);
536  if (target_expr && target_expr->get_type_info().get_type() != SQLTypes::kFLOAT) {
537  switch (agg_expr->get_aggtype()) {
538  case SQLAgg::kCOUNT:
539  case SQLAgg::kCOUNT_IF:
541  if (agg_expr->get_aggtype() == SQLAgg::kCOUNT &&
542  !agg_expr->get_is_distinct()) {
543  break;
544  }
545  auto case_expr =
546  generateCaseExprForCountDistinctOnGroupByCol(agg_expr->get_own_arg());
547  new_target_exprs.push_back(case_expr.get());
548  target_exprs_owned_.emplace_back(case_expr);
549  rewritten = true;
550  break;
551  }
553  case SQLAgg::kAVG:
554  case SQLAgg::kSAMPLE:
555  case SQLAgg::kMAX:
556  case SQLAgg::kMIN: {
557  // we just replace the agg_expr into a plain expr
558  // i.e, avg(x1) --> x1
559  auto agg_expr_ti = agg_expr->get_type_info();
560  auto target_expr = agg_expr->get_own_arg();
561  if (agg_expr_ti != target_expr->get_type_info()) {
562  target_expr = target_expr->add_cast(agg_expr_ti);
563  }
564  new_target_exprs.push_back(target_expr.get());
565  target_exprs_owned_.emplace_back(target_expr);
566  rewritten = true;
567  break;
568  }
569  default:
570  break;
571  }
572  }
573  }
574  }
575  if (!rewritten) {
576  new_target_exprs.push_back(expr);
577  }
578  }
579 
580  RelAlgExecutionUnit rewritten_exe_unit{ra_exe_unit_in.input_descs,
581  ra_exe_unit_in.input_col_descs,
582  ra_exe_unit_in.simple_quals,
583  ra_exe_unit_in.quals,
584  ra_exe_unit_in.join_quals,
585  ra_exe_unit_in.groupby_exprs,
586  new_target_exprs,
587  ra_exe_unit_in.target_exprs_original_type_infos,
588  ra_exe_unit_in.estimator,
589  ra_exe_unit_in.sort_info,
590  ra_exe_unit_in.scan_limit,
591  ra_exe_unit_in.query_hint,
592  ra_exe_unit_in.query_plan_dag_hash,
593  ra_exe_unit_in.hash_table_build_plan_dag,
594  ra_exe_unit_in.table_id_to_node_map,
595  ra_exe_unit_in.use_bump_allocator,
596  ra_exe_unit_in.union_all,
597  ra_exe_unit_in.query_state};
598  return rewritten_exe_unit;
599 }
std::vector< Analyzer::Expr * > target_exprs
QueryPlanHash query_plan_dag_hash
const std::optional< bool > union_all
std::vector< InputDescriptor > input_descs
const std::list< std::shared_ptr< Analyzer::Expr > > groupby_exprs
#define CHECK_GT(x, y)
Definition: Logger.h:305
Definition: sqldefs.h:75
const JoinQualsPerNestingLevel join_quals
std::pair< bool, std::set< size_t > > is_all_groupby_exprs_are_col_var(const std::list< std::shared_ptr< Analyzer::Expr >> &groupby_exprs) const
TableIdToNodeMap table_id_to_node_map
const std::shared_ptr< Analyzer::Estimator > estimator
std::unordered_map< size_t, SQLTypeInfo > target_exprs_original_type_infos
Definition: sqldefs.h:78
std::size_t hash_value(RexAbstractInput const &rex_ab_input)
Definition: RelAlgDag.cpp:3525
std::list< std::shared_ptr< Analyzer::Expr > > quals
RegisteredQueryHint query_hint
#define CHECK(condition)
Definition: Logger.h:291
std::shared_ptr< Analyzer::CaseExpr > generateCaseExprForCountDistinctOnGroupByCol(std::shared_ptr< Analyzer::Expr > expr) const
std::shared_ptr< const query_state::QueryState > query_state
std::list< std::shared_ptr< const InputColDescriptor > > input_col_descs
Definition: sqldefs.h:76
Definition: sqldefs.h:74
std::list< std::shared_ptr< Analyzer::Expr > > simple_quals
std::vector< std::shared_ptr< Analyzer::Expr > > target_exprs_owned_
Definition: QueryRewrite.h:65
HashTableBuildDagMap hash_table_build_plan_dag

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

RelAlgExecutionUnit QueryRewriter::rewriteColumnarDelete ( const RelAlgExecutionUnit ra_exe_unit_in,
std::shared_ptr< Analyzer::ColumnVar delete_column 
) const

Definition at line 387 of file QueryRewrite.cpp.

References anonymous_namespace{Utm.h}::a, gpu_enabled::accumulate(), Datum::boolval, CHECK, CHECK_EQ, RelAlgExecutionUnit::estimator, RelAlgExecutionUnit::groupby_exprs, RelAlgExecutionUnit::hash_table_build_plan_dag, RelAlgExecutionUnit::input_col_descs, RelAlgExecutionUnit::input_descs, RelAlgExecutionUnit::join_quals, kAND, kONE, Parser::CaseExpr::normalize(), RelAlgExecutionUnit::quals, RelAlgExecutionUnit::query_hint, RelAlgExecutionUnit::query_plan_dag_hash, RelAlgExecutionUnit::query_state, RelAlgExecutionUnit::scan_limit, RelAlgExecutionUnit::simple_quals, RelAlgExecutionUnit::sort_info, RelAlgExecutionUnit::table_id_to_node_map, RelAlgExecutionUnit::target_exprs, RelAlgExecutionUnit::target_exprs_original_type_infos, target_exprs_owned_, RelAlgExecutionUnit::union_all, and RelAlgExecutionUnit::use_bump_allocator.

389  {
390  CHECK_EQ(ra_exe_unit_in.target_exprs.size(), size_t(1));
391  CHECK(ra_exe_unit_in.groupby_exprs.size() == 1 &&
392  !ra_exe_unit_in.groupby_exprs.front());
393 
394  // TODO(adb): is this possible?
395  if (ra_exe_unit_in.join_quals.size() > 0) {
396  throw std::runtime_error("Delete via join not yet supported for temporary tables.");
397  }
398 
399  Datum true_datum;
400  true_datum.boolval = true;
401  const auto deleted_constant =
402  makeExpr<Analyzer::Constant>(delete_column->get_type_info(), false, true_datum);
403 
404  auto input_col_descs = ra_exe_unit_in.input_col_descs;
405 
406  std::shared_ptr<Analyzer::Expr> filter;
407  std::vector<std::shared_ptr<Analyzer::Expr>> filter_exprs;
408  filter_exprs.insert(filter_exprs.end(),
409  ra_exe_unit_in.simple_quals.begin(),
410  ra_exe_unit_in.simple_quals.end());
411  filter_exprs.insert(
412  filter_exprs.end(), ra_exe_unit_in.quals.begin(), ra_exe_unit_in.quals.end());
413 
414  if (filter_exprs.size() > 0) {
415  std::list<std::pair<std::shared_ptr<Analyzer::Expr>, std::shared_ptr<Analyzer::Expr>>>
416  case_expr_list;
417  if (filter_exprs.size() == 1) {
418  filter = filter_exprs.front();
419  } else {
420  filter = std::accumulate(
421  std::next(filter_exprs.begin()),
422  filter_exprs.end(),
423  filter_exprs.front(),
424  [](const std::shared_ptr<Analyzer::Expr> a,
425  const std::shared_ptr<Analyzer::Expr> b) {
426  CHECK_EQ(a->get_type_info().get_type(), b->get_type_info().get_type());
427  return makeExpr<Analyzer::BinOper>(a->get_type_info().get_type(),
428  SQLOps::kAND,
430  a->deep_copy(),
431  b->deep_copy());
432  });
433  }
434  std::shared_ptr<Analyzer::Expr> column_to_update{nullptr};
435  auto when_expr = filter; // only one filter, will be a BinOper if multiple filters
436  case_expr_list.emplace_back(std::make_pair(when_expr, deleted_constant));
437  auto case_expr = Parser::CaseExpr::normalize(case_expr_list, delete_column);
438 
439  // the delete column should not be projected, but check anyway
440  auto delete_col_desc_it = std::find_if(
441  input_col_descs.begin(),
442  input_col_descs.end(),
443  [&delete_column](const std::shared_ptr<const InputColDescriptor>& in) {
444  return in->getColId() == delete_column->getColumnKey().column_id;
445  });
446  CHECK(delete_col_desc_it == input_col_descs.end());
447  const auto& column_key = delete_column->getColumnKey();
448  auto delete_col_desc =
449  std::make_shared<const InputColDescriptor>(column_key.column_id,
450  column_key.table_id,
451  column_key.db_id,
452  delete_column->get_rte_idx());
453  input_col_descs.push_back(delete_col_desc);
454  target_exprs_owned_.emplace_back(case_expr);
455  } else {
456  // no filters, simply project the deleted=true column value for all rows
457  const auto& column_key = delete_column->getColumnKey();
458  auto delete_col_desc =
459  std::make_shared<const InputColDescriptor>(column_key.column_id,
460  column_key.table_id,
461  column_key.db_id,
462  delete_column->get_rte_idx());
463  input_col_descs.push_back(delete_col_desc);
464  target_exprs_owned_.emplace_back(deleted_constant);
465  }
466 
467  std::vector<Analyzer::Expr*> target_exprs;
468  CHECK_EQ(target_exprs_owned_.size(), size_t(1));
469  target_exprs.emplace_back(target_exprs_owned_.front().get());
470 
471  RelAlgExecutionUnit rewritten_exe_unit{ra_exe_unit_in.input_descs,
472  input_col_descs,
473  {},
474  {},
475  ra_exe_unit_in.join_quals,
476  ra_exe_unit_in.groupby_exprs,
477  target_exprs,
478  ra_exe_unit_in.target_exprs_original_type_infos,
479  ra_exe_unit_in.estimator,
480  ra_exe_unit_in.sort_info,
481  ra_exe_unit_in.scan_limit,
482  ra_exe_unit_in.query_hint,
483  ra_exe_unit_in.query_plan_dag_hash,
484  ra_exe_unit_in.hash_table_build_plan_dag,
485  ra_exe_unit_in.table_id_to_node_map,
486  ra_exe_unit_in.use_bump_allocator,
487  ra_exe_unit_in.union_all,
488  ra_exe_unit_in.query_state};
489  return rewritten_exe_unit;
490 }
std::vector< Analyzer::Expr * > target_exprs
static std::shared_ptr< Analyzer::Expr > normalize(const std::list< std::pair< std::shared_ptr< Analyzer::Expr >, std::shared_ptr< Analyzer::Expr >>> &, const std::shared_ptr< Analyzer::Expr >, const Executor *executor=nullptr)
#define CHECK_EQ(x, y)
Definition: Logger.h:301
QueryPlanHash query_plan_dag_hash
const std::optional< bool > union_all
int8_t boolval
Definition: Datum.h:70
std::vector< InputDescriptor > input_descs
const std::list< std::shared_ptr< Analyzer::Expr > > groupby_exprs
constexpr double a
Definition: Utm.h:32
const JoinQualsPerNestingLevel join_quals
TableIdToNodeMap table_id_to_node_map
Definition: sqldefs.h:36
DEVICE auto accumulate(ARGS &&...args)
Definition: gpu_enabled.h:42
const std::shared_ptr< Analyzer::Estimator > estimator
Definition: sqldefs.h:71
std::unordered_map< size_t, SQLTypeInfo > target_exprs_original_type_infos
std::list< std::shared_ptr< Analyzer::Expr > > quals
RegisteredQueryHint query_hint
#define CHECK(condition)
Definition: Logger.h:291
std::shared_ptr< const query_state::QueryState > query_state
std::list< std::shared_ptr< const InputColDescriptor > > input_col_descs
Definition: Datum.h:69
std::list< std::shared_ptr< Analyzer::Expr > > simple_quals
std::vector< std::shared_ptr< Analyzer::Expr > > target_exprs_owned_
Definition: QueryRewrite.h:65
HashTableBuildDagMap hash_table_build_plan_dag

+ Here is the call graph for this function:

RelAlgExecutionUnit QueryRewriter::rewriteColumnarUpdate ( const RelAlgExecutionUnit ra_exe_unit_in,
std::shared_ptr< Analyzer::ColumnVar column_to_update 
) const

Definition at line 216 of file QueryRewrite.cpp.

References anonymous_namespace{Utm.h}::a, gpu_enabled::accumulate(), CHECK, CHECK_EQ, anonymous_namespace{QueryRewrite.cpp}::check_string_id_overflow(), RelAlgExecutionUnit::estimator, executor_, Catalog_Namespace::SysCatalog::getCatalog(), RelAlgExecutionUnit::groupby_exprs, RelAlgExecutionUnit::hash_table_build_plan_dag, inline_fixed_encoding_null_val(), RelAlgExecutionUnit::input_col_descs, RelAlgExecutionUnit::input_descs, Catalog_Namespace::SysCatalog::instance(), RelAlgExecutionUnit::join_quals, kAND, kCAST, kONE, Parser::CaseExpr::normalize(), RelAlgExecutionUnit::quals, RelAlgExecutionUnit::query_hint, RelAlgExecutionUnit::query_plan_dag_hash, RelAlgExecutionUnit::query_state, RelAlgExecutionUnit::scan_limit, RelAlgExecutionUnit::simple_quals, RelAlgExecutionUnit::sort_info, Datum::stringval, RelAlgExecutionUnit::table_id_to_node_map, RelAlgExecutionUnit::target_exprs, RelAlgExecutionUnit::target_exprs_original_type_infos, target_exprs_owned_, to_string(), RelAlgExecutionUnit::union_all, and RelAlgExecutionUnit::use_bump_allocator.

218  {
219  CHECK_EQ(ra_exe_unit_in.target_exprs.size(), size_t(2));
220  CHECK(ra_exe_unit_in.groupby_exprs.size() == 1 &&
221  !ra_exe_unit_in.groupby_exprs.front());
222 
223  if (ra_exe_unit_in.join_quals.size() > 0) {
224  throw std::runtime_error("Update via join not yet supported for temporary tables.");
225  }
226 
227  auto new_column_value = ra_exe_unit_in.target_exprs.front()->deep_copy();
228  const auto& new_column_ti = new_column_value->get_type_info();
229  if (column_to_update->get_type_info().is_dict_encoded_string()) {
230  CHECK(new_column_ti.is_dict_encoded_string());
231  if (new_column_ti.getStringDictKey().dict_id > 0 &&
232  new_column_ti.getStringDictKey() !=
233  column_to_update->get_type_info().getStringDictKey()) {
234  throw std::runtime_error(
235  "Updating a dictionary encoded string using another dictionary encoded string "
236  "column is not yet supported, unless both columns share dictionaries.");
237  }
238  if (auto uoper = dynamic_cast<Analyzer::UOper*>(new_column_value.get())) {
239  if (uoper->get_optype() == kCAST &&
240  dynamic_cast<const Analyzer::Constant*>(uoper->get_operand())) {
241  const auto original_constant_expr =
242  dynamic_cast<const Analyzer::Constant*>(uoper->get_operand());
243  CHECK(original_constant_expr);
244  CHECK(original_constant_expr->get_type_info().is_string());
245  // extract the string, insert it into the dict for the table we are updating,
246  // and place the dictionary ID in the oper
247 
248  CHECK(column_to_update->get_type_info().is_dict_encoded_string());
249  const auto& dict_key = column_to_update->get_type_info().getStringDictKey();
250  std::map<int, StringDictionary*> string_dicts;
251  const auto catalog =
253  CHECK(catalog);
254  const auto dd = catalog->getMetadataForDict(dict_key.dict_id, /*load_dict=*/true);
255  CHECK(dd);
256  auto string_dict = dd->stringDict;
257  CHECK(string_dict);
258 
259  auto string_id =
260  string_dict->getOrAdd(*original_constant_expr->get_constval().stringval);
261  if (check_string_id_overflow(string_id, column_to_update->get_type_info())) {
262  throw std::runtime_error(
263  "Ran out of space in dictionary, cannot update column with dictionary "
264  "encoded string value. Dictionary ID: " +
265  std::to_string(dict_key.dict_id));
266  }
267  if (string_id == inline_int_null_value<int32_t>()) {
268  string_id = inline_fixed_encoding_null_val(column_to_update->get_type_info());
269  }
270 
271  // Codegen expects a string value. The string will be
272  // resolved to its ID during Constant codegen. Copy the string from the
273  // original expr
274  Datum datum;
275  datum.stringval =
276  new std::string(*original_constant_expr->get_constval().stringval);
277  Datum new_string_datum{datum};
278 
279  new_column_value =
280  makeExpr<Analyzer::Constant>(column_to_update->get_type_info(),
281  original_constant_expr->get_is_null(),
282  new_string_datum);
283 
284  // Roll the string dict generation forward, as we have added a string
285  auto row_set_mem_owner = executor_->getRowSetMemoryOwner();
286  CHECK(row_set_mem_owner);
287  auto& str_dict_generations = row_set_mem_owner->getStringDictionaryGenerations();
288  if (str_dict_generations.getGeneration(dict_key) > -1) {
289  str_dict_generations.updateGeneration(dict_key,
290  string_dict->storageEntryCount());
291  } else {
292  // Simple update with no filters does not use a CASE, and therefore does not add
293  // a valid generation
294  str_dict_generations.setGeneration(dict_key, string_dict->storageEntryCount());
295  }
296  }
297  }
298  }
299 
300  auto input_col_descs = ra_exe_unit_in.input_col_descs;
301 
302  std::shared_ptr<Analyzer::Expr> filter;
303  std::vector<std::shared_ptr<Analyzer::Expr>> filter_exprs;
304  filter_exprs.insert(filter_exprs.end(),
305  ra_exe_unit_in.simple_quals.begin(),
306  ra_exe_unit_in.simple_quals.end());
307  filter_exprs.insert(
308  filter_exprs.end(), ra_exe_unit_in.quals.begin(), ra_exe_unit_in.quals.end());
309 
310  if (filter_exprs.size() > 0) {
311  std::list<std::pair<std::shared_ptr<Analyzer::Expr>, std::shared_ptr<Analyzer::Expr>>>
312  case_expr_list;
313  if (filter_exprs.size() == 1) {
314  filter = filter_exprs.front();
315  } else {
316  filter = std::accumulate(
317  std::next(filter_exprs.begin()),
318  filter_exprs.end(),
319  filter_exprs.front(),
320  [](const std::shared_ptr<Analyzer::Expr> a,
321  const std::shared_ptr<Analyzer::Expr> b) {
322  CHECK_EQ(a->get_type_info().get_type(), b->get_type_info().get_type());
323  return makeExpr<Analyzer::BinOper>(a->get_type_info().get_type(),
324  SQLOps::kAND,
326  a->deep_copy(),
327  b->deep_copy());
328  });
329  }
330  auto when_expr = filter; // only one filter, will be a BinOper if multiple filters
331  case_expr_list.emplace_back(std::make_pair(when_expr, new_column_value));
332  auto case_expr = Parser::CaseExpr::normalize(case_expr_list, column_to_update);
333 
334  auto col_to_update_var =
335  std::dynamic_pointer_cast<Analyzer::ColumnVar>(column_to_update);
336  CHECK(col_to_update_var);
337  const auto& column_key = col_to_update_var->getColumnKey();
338  auto col_to_update_desc =
339  std::make_shared<const InputColDescriptor>(column_key.column_id,
340  column_key.table_id,
341  column_key.db_id,
342  col_to_update_var->get_rte_idx());
343  auto existing_col_desc_it = std::find_if(
344  input_col_descs.begin(),
345  input_col_descs.end(),
346  [&col_to_update_desc](const std::shared_ptr<const InputColDescriptor>& in) {
347  return *in == *col_to_update_desc;
348  });
349  if (existing_col_desc_it == input_col_descs.end()) {
350  input_col_descs.push_back(col_to_update_desc);
351  }
352  target_exprs_owned_.emplace_back(case_expr);
353  } else {
354  // no filters, simply project the update value
355  target_exprs_owned_.emplace_back(new_column_value);
356  }
357 
358  std::vector<Analyzer::Expr*> target_exprs;
359  CHECK_EQ(target_exprs_owned_.size(), size_t(1));
360  target_exprs.emplace_back(target_exprs_owned_.front().get());
361 
362  RelAlgExecutionUnit rewritten_exe_unit{ra_exe_unit_in.input_descs,
363  input_col_descs,
364  {},
365  {},
366  ra_exe_unit_in.join_quals,
367  ra_exe_unit_in.groupby_exprs,
368  target_exprs,
369  ra_exe_unit_in.target_exprs_original_type_infos,
370  ra_exe_unit_in.estimator,
371  ra_exe_unit_in.sort_info,
372  ra_exe_unit_in.scan_limit,
373  ra_exe_unit_in.query_hint,
374  ra_exe_unit_in.query_plan_dag_hash,
375  ra_exe_unit_in.hash_table_build_plan_dag,
376  ra_exe_unit_in.table_id_to_node_map,
377  ra_exe_unit_in.use_bump_allocator,
378  ra_exe_unit_in.union_all,
379  ra_exe_unit_in.query_state};
380  return rewritten_exe_unit;
381 }
std::vector< Analyzer::Expr * > target_exprs
static std::shared_ptr< Analyzer::Expr > normalize(const std::list< std::pair< std::shared_ptr< Analyzer::Expr >, std::shared_ptr< Analyzer::Expr >>> &, const std::shared_ptr< Analyzer::Expr >, const Executor *executor=nullptr)
#define CHECK_EQ(x, y)
Definition: Logger.h:301
QueryPlanHash query_plan_dag_hash
const std::optional< bool > union_all
std::vector< InputDescriptor > input_descs
Definition: sqldefs.h:48
const std::list< std::shared_ptr< Analyzer::Expr > > groupby_exprs
std::string to_string(char const *&&v)
constexpr double a
Definition: Utm.h:32
static SysCatalog & instance()
Definition: SysCatalog.h:343
const JoinQualsPerNestingLevel join_quals
Executor * executor_
Definition: QueryRewrite.h:64
TableIdToNodeMap table_id_to_node_map
Definition: sqldefs.h:36
DEVICE auto accumulate(ARGS &&...args)
Definition: gpu_enabled.h:42
const std::shared_ptr< Analyzer::Estimator > estimator
std::string * stringval
Definition: Datum.h:79
std::shared_ptr< Catalog > getCatalog(const std::string &dbName)
Definition: sqldefs.h:71
bool check_string_id_overflow(const int32_t string_id, const SQLTypeInfo &ti)
std::unordered_map< size_t, SQLTypeInfo > target_exprs_original_type_infos
std::list< std::shared_ptr< Analyzer::Expr > > quals
RegisteredQueryHint query_hint
#define CHECK(condition)
Definition: Logger.h:291
int64_t inline_fixed_encoding_null_val(const SQL_TYPE_INFO &ti)
std::shared_ptr< const query_state::QueryState > query_state
std::list< std::shared_ptr< const InputColDescriptor > > input_col_descs
Definition: Datum.h:69
std::list< std::shared_ptr< Analyzer::Expr > > simple_quals
std::vector< std::shared_ptr< Analyzer::Expr > > target_exprs_owned_
Definition: QueryRewrite.h:65
HashTableBuildDagMap hash_table_build_plan_dag

+ Here is the call graph for this function:

RelAlgExecutionUnit QueryRewriter::rewriteConstrainedByIn ( const RelAlgExecutionUnit ra_exe_unit_in) const
private

Definition at line 35 of file QueryRewrite.cpp.

References generateCaseForDomainValues(), RelAlgExecutionUnit::groupby_exprs, RelAlgExecutionUnit::quals, rewrite_expr(), rewriteConstrainedByInImpl(), and RelAlgExecutionUnit::simple_quals.

Referenced by rewrite().

36  {
37  if (ra_exe_unit_in.groupby_exprs.empty()) {
38  return ra_exe_unit_in;
39  }
40  if (ra_exe_unit_in.groupby_exprs.size() == 1 && !ra_exe_unit_in.groupby_exprs.front()) {
41  return ra_exe_unit_in;
42  }
43  if (!ra_exe_unit_in.simple_quals.empty()) {
44  return ra_exe_unit_in;
45  }
46  if (ra_exe_unit_in.quals.size() != 1) {
47  return ra_exe_unit_in;
48  }
49  auto in_vals =
50  std::dynamic_pointer_cast<Analyzer::InValues>(ra_exe_unit_in.quals.front());
51  if (!in_vals) {
52  in_vals = std::dynamic_pointer_cast<Analyzer::InValues>(
53  rewrite_expr(ra_exe_unit_in.quals.front().get()));
54  }
55  if (!in_vals || in_vals->get_value_list().empty()) {
56  return ra_exe_unit_in;
57  }
58  for (const auto& in_val : in_vals->get_value_list()) {
59  if (!std::dynamic_pointer_cast<Analyzer::Constant>(in_val)) {
60  break;
61  }
62  }
63  if (dynamic_cast<const Analyzer::CaseExpr*>(in_vals->get_arg())) {
64  return ra_exe_unit_in;
65  }
66  auto in_val_cv = dynamic_cast<const Analyzer::ColumnVar*>(in_vals->get_arg());
67  if (in_val_cv) {
68  auto it = std::find_if(
69  ra_exe_unit_in.groupby_exprs.begin(),
70  ra_exe_unit_in.groupby_exprs.end(),
71  [&in_val_cv](std::shared_ptr<Analyzer::Expr> groupby_expr) {
72  if (auto groupby_cv =
73  std::dynamic_pointer_cast<Analyzer::ColumnVar>(groupby_expr)) {
74  return *in_val_cv == *groupby_cv.get();
75  }
76  return false;
77  });
78  if (it != ra_exe_unit_in.groupby_exprs.end()) {
79  // we do not need to deploy case-when rewriting when in_val cv is listed as groupby
80  // col i.e., ... WHERE v IN (SELECT DISTINCT v FROM ...)
81  return ra_exe_unit_in;
82  }
83  }
84  auto case_expr = generateCaseForDomainValues(in_vals.get());
85  return rewriteConstrainedByInImpl(ra_exe_unit_in, case_expr, in_vals.get());
86 }
const std::list< std::shared_ptr< Analyzer::Expr > > groupby_exprs
Analyzer::ExpressionPtr rewrite_expr(const Analyzer::Expr *expr)
std::list< std::shared_ptr< Analyzer::Expr > > quals
static std::shared_ptr< Analyzer::CaseExpr > generateCaseForDomainValues(const Analyzer::InValues *)
std::list< std::shared_ptr< Analyzer::Expr > > simple_quals
RelAlgExecutionUnit rewriteConstrainedByInImpl(const RelAlgExecutionUnit &ra_exe_unit_in, const std::shared_ptr< Analyzer::CaseExpr >, const Analyzer::InValues *) const

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

RelAlgExecutionUnit QueryRewriter::rewriteConstrainedByInImpl ( const RelAlgExecutionUnit ra_exe_unit_in,
const std::shared_ptr< Analyzer::CaseExpr case_expr,
const Analyzer::InValues in_vals 
) const
private

Definition at line 88 of file QueryRewrite.cpp.

References CHECK, executor_, g_constrained_by_in_threshold, Analyzer::InValues::get_arg(), getExpressionRange(), ExpressionRange::getIntMax(), RelAlgExecutionUnit::groupby_exprs, RelAlgExecutionUnit::hash_table_build_plan_dag, RelAlgExecutionUnit::input_col_descs, RelAlgExecutionUnit::input_descs, Integer, RelAlgExecutionUnit::join_quals, Analyzer::Var::kGROUPBY, RelAlgExecutionUnit::quals, RelAlgExecutionUnit::query_hint, query_infos_, RelAlgExecutionUnit::query_plan_dag_hash, rewrite(), RelAlgExecutionUnit::scan_limit, RelAlgExecutionUnit::simple_quals, RelAlgExecutionUnit::sort_info, RelAlgExecutionUnit::table_id_to_node_map, RelAlgExecutionUnit::target_exprs, RelAlgExecutionUnit::target_exprs_original_type_infos, and target_exprs_owned_.

Referenced by rewriteConstrainedByIn().

91  {
92  std::list<std::shared_ptr<Analyzer::Expr>> new_groupby_list;
93  std::vector<Analyzer::Expr*> new_target_exprs;
94  bool rewrite{false};
95  size_t groupby_idx{0};
96  auto it = ra_exe_unit_in.groupby_exprs.begin();
97  for (const auto& group_expr : ra_exe_unit_in.groupby_exprs) {
98  CHECK(group_expr);
99  ++groupby_idx;
100  if (*group_expr == *in_vals->get_arg()) {
101  const auto expr_range = getExpressionRange(it->get(), query_infos_, executor_);
102  if (expr_range.getType() != ExpressionRangeType::Integer) {
103  ++it;
104  continue;
105  }
106  const size_t range_sz = expr_range.getIntMax() - expr_range.getIntMin() + 1;
107  if (range_sz <= in_vals->get_value_list().size() * g_constrained_by_in_threshold) {
108  ++it;
109  continue;
110  }
111  new_groupby_list.push_back(case_expr);
112  for (size_t i = 0; i < ra_exe_unit_in.target_exprs.size(); ++i) {
113  const auto target = ra_exe_unit_in.target_exprs[i];
114  if (*target == *in_vals->get_arg()) {
115  auto var_case_expr = makeExpr<Analyzer::Var>(
116  case_expr->get_type_info(), Analyzer::Var::kGROUPBY, groupby_idx);
117  target_exprs_owned_.push_back(var_case_expr);
118  new_target_exprs.push_back(var_case_expr.get());
119  } else {
120  new_target_exprs.push_back(target);
121  }
122  }
123  rewrite = true;
124  } else {
125  new_groupby_list.push_back(group_expr);
126  }
127  ++it;
128  }
129  if (!rewrite) {
130  return ra_exe_unit_in;
131  }
132  return {ra_exe_unit_in.input_descs,
133  ra_exe_unit_in.input_col_descs,
134  ra_exe_unit_in.simple_quals,
135  ra_exe_unit_in.quals,
136  ra_exe_unit_in.join_quals,
137  new_groupby_list,
138  new_target_exprs,
139  ra_exe_unit_in.target_exprs_original_type_infos,
140  nullptr,
141  ra_exe_unit_in.sort_info,
142  ra_exe_unit_in.scan_limit,
143  ra_exe_unit_in.query_hint,
144  ra_exe_unit_in.query_plan_dag_hash,
145  ra_exe_unit_in.hash_table_build_plan_dag,
146  ra_exe_unit_in.table_id_to_node_map};
147 }
std::vector< Analyzer::Expr * > target_exprs
size_t g_constrained_by_in_threshold
Definition: Execute.cpp:113
QueryPlanHash query_plan_dag_hash
std::vector< InputDescriptor > input_descs
const std::list< std::shared_ptr< Analyzer::Expr > > groupby_exprs
RelAlgExecutionUnit rewrite(const RelAlgExecutionUnit &ra_exe_unit_in) const
const JoinQualsPerNestingLevel join_quals
Executor * executor_
Definition: QueryRewrite.h:64
TableIdToNodeMap table_id_to_node_map
ExpressionRange getExpressionRange(const Analyzer::BinOper *expr, const std::vector< InputTableInfo > &query_infos, const Executor *, boost::optional< std::list< std::shared_ptr< Analyzer::Expr >>> simple_quals)
std::unordered_map< size_t, SQLTypeInfo > target_exprs_original_type_infos
const std::vector< InputTableInfo > & query_infos_
Definition: QueryRewrite.h:63
std::list< std::shared_ptr< Analyzer::Expr > > quals
int64_t getIntMax() const
RegisteredQueryHint query_hint
#define CHECK(condition)
Definition: Logger.h:291
std::list< std::shared_ptr< const InputColDescriptor > > input_col_descs
const Expr * get_arg() const
Definition: Analyzer.h:644
std::list< std::shared_ptr< Analyzer::Expr > > simple_quals
std::vector< std::shared_ptr< Analyzer::Expr > > target_exprs_owned_
Definition: QueryRewrite.h:65
HashTableBuildDagMap hash_table_build_plan_dag

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

Member Data Documentation

Executor* QueryRewriter::executor_
private

Definition at line 64 of file QueryRewrite.h.

Referenced by rewriteColumnarUpdate(), and rewriteConstrainedByInImpl().

const std::vector<InputTableInfo>& QueryRewriter::query_infos_
private

Definition at line 63 of file QueryRewrite.h.

Referenced by rewriteConstrainedByInImpl().

std::vector<std::shared_ptr<Analyzer::Expr> > QueryRewriter::target_exprs_owned_
mutableprivate

The documentation for this class was generated from the following files: