OmniSciDB  085a039ca4
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
QueryRewrite.cpp
Go to the documentation of this file.
1 /*
2  * Copyright 2017 MapD Technologies, Inc.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #include "QueryRewrite.h"
18 
19 #include <algorithm>
20 #include <memory>
21 #include <vector>
22 
23 #include "ExpressionRange.h"
24 #include "ExpressionRewrite.h"
25 #include "Logger/Logger.h"
26 #include "Parser/ParserNode.h"
27 #include "Shared/sqltypes.h"
28 
30  const RelAlgExecutionUnit& ra_exe_unit_in) const {
31  auto rewritten_exe_unit = rewriteConstrainedByIn(ra_exe_unit_in);
32  auto rewritten_exe_unit_for_agg_on_gby_col =
33  rewriteAggregateOnGroupByColumn(rewritten_exe_unit);
34  return rewriteOverlapsJoin(rewritten_exe_unit_for_agg_on_gby_col);
35 }
36 
38  const RelAlgExecutionUnit& ra_exe_unit_in) const {
40  return ra_exe_unit_in;
41  }
42  if (ra_exe_unit_in.join_quals.empty()) {
43  return ra_exe_unit_in;
44  }
45 
46  std::list<std::shared_ptr<Analyzer::Expr>> quals;
47  quals.insert(quals.end(), ra_exe_unit_in.quals.begin(), ra_exe_unit_in.quals.end());
48 
49  JoinQualsPerNestingLevel join_condition_per_nesting_level;
50  for (const auto& join_condition_in : ra_exe_unit_in.join_quals) {
51  JoinCondition join_condition{{}, join_condition_in.type};
52 
53  for (const auto& join_qual_expr_in : join_condition_in.quals) {
54  auto new_overlaps_quals = rewrite_overlaps_conjunction(join_qual_expr_in);
55  if (new_overlaps_quals) {
56  const auto& overlaps_quals = *new_overlaps_quals;
57 
58  // Add overlaps qual
59  join_condition.quals.insert(join_condition.quals.end(),
60  overlaps_quals.join_quals.begin(),
61  overlaps_quals.join_quals.end());
62 
63  // Add original quals
64  join_condition.quals.insert(join_condition.quals.end(),
65  overlaps_quals.quals.begin(),
66  overlaps_quals.quals.end());
67  } else {
68  join_condition.quals.push_back(join_qual_expr_in);
69  }
70  }
71  join_condition_per_nesting_level.push_back(join_condition);
72  }
73  return {ra_exe_unit_in.input_descs,
74  ra_exe_unit_in.input_col_descs,
75  ra_exe_unit_in.simple_quals,
76  quals,
77  join_condition_per_nesting_level,
78  ra_exe_unit_in.groupby_exprs,
79  ra_exe_unit_in.target_exprs,
80  ra_exe_unit_in.estimator,
81  ra_exe_unit_in.sort_info,
82  ra_exe_unit_in.scan_limit,
83  ra_exe_unit_in.query_hint,
84  ra_exe_unit_in.query_plan_dag_hash,
85  ra_exe_unit_in.hash_table_build_plan_dag,
86  ra_exe_unit_in.table_id_to_node_map,
87  ra_exe_unit_in.use_bump_allocator};
88 }
89 
91  const RelAlgExecutionUnit& ra_exe_unit_in) const {
92  if (ra_exe_unit_in.groupby_exprs.empty()) {
93  return ra_exe_unit_in;
94  }
95  if (ra_exe_unit_in.groupby_exprs.size() == 1 && !ra_exe_unit_in.groupby_exprs.front()) {
96  return ra_exe_unit_in;
97  }
98  if (!ra_exe_unit_in.simple_quals.empty()) {
99  return ra_exe_unit_in;
100  }
101  if (ra_exe_unit_in.quals.size() != 1) {
102  return ra_exe_unit_in;
103  }
104  auto in_vals =
105  std::dynamic_pointer_cast<Analyzer::InValues>(ra_exe_unit_in.quals.front());
106  if (!in_vals) {
107  in_vals = std::dynamic_pointer_cast<Analyzer::InValues>(
108  rewrite_expr(ra_exe_unit_in.quals.front().get()));
109  }
110  if (!in_vals || in_vals->get_value_list().empty()) {
111  return ra_exe_unit_in;
112  }
113  for (const auto& in_val : in_vals->get_value_list()) {
114  if (!std::dynamic_pointer_cast<Analyzer::Constant>(in_val)) {
115  break;
116  }
117  }
118  if (dynamic_cast<const Analyzer::CaseExpr*>(in_vals->get_arg())) {
119  return ra_exe_unit_in;
120  }
121  auto in_val_cv = dynamic_cast<const Analyzer::ColumnVar*>(in_vals->get_arg());
122  if (in_val_cv) {
123  auto it = std::find_if(
124  ra_exe_unit_in.groupby_exprs.begin(),
125  ra_exe_unit_in.groupby_exprs.end(),
126  [&in_val_cv](std::shared_ptr<Analyzer::Expr> groupby_expr) {
127  if (auto groupby_cv =
128  std::dynamic_pointer_cast<Analyzer::ColumnVar>(groupby_expr)) {
129  return *in_val_cv == *groupby_cv.get();
130  }
131  return false;
132  });
133  if (it != ra_exe_unit_in.groupby_exprs.end()) {
134  // we do not need to deploy case-when rewriting when in_val cv is listed as groupby
135  // col i.e., ... WHERE v IN (SELECT DISTINCT v FROM ...)
136  return ra_exe_unit_in;
137  }
138  }
139  auto case_expr = generateCaseForDomainValues(in_vals.get());
140  return rewriteConstrainedByInImpl(ra_exe_unit_in, case_expr, in_vals.get());
141 }
142 
144  const RelAlgExecutionUnit& ra_exe_unit_in,
145  const std::shared_ptr<Analyzer::CaseExpr> case_expr,
146  const Analyzer::InValues* in_vals) const {
147  std::list<std::shared_ptr<Analyzer::Expr>> new_groupby_list;
148  std::vector<Analyzer::Expr*> new_target_exprs;
149  bool rewrite{false};
150  size_t groupby_idx{0};
151  auto it = ra_exe_unit_in.groupby_exprs.begin();
152  for (const auto& group_expr : ra_exe_unit_in.groupby_exprs) {
153  CHECK(group_expr);
154  ++groupby_idx;
155  if (*group_expr == *in_vals->get_arg()) {
156  const auto expr_range = getExpressionRange(it->get(), query_infos_, executor_);
157  if (expr_range.getType() != ExpressionRangeType::Integer) {
158  ++it;
159  continue;
160  }
161  const size_t range_sz = expr_range.getIntMax() - expr_range.getIntMin() + 1;
162  if (range_sz <= in_vals->get_value_list().size() * g_constrained_by_in_threshold) {
163  ++it;
164  continue;
165  }
166  new_groupby_list.push_back(case_expr);
167  for (size_t i = 0; i < ra_exe_unit_in.target_exprs.size(); ++i) {
168  const auto target = ra_exe_unit_in.target_exprs[i];
169  if (*target == *in_vals->get_arg()) {
170  auto var_case_expr = makeExpr<Analyzer::Var>(
171  case_expr->get_type_info(), Analyzer::Var::kGROUPBY, groupby_idx);
172  target_exprs_owned_.push_back(var_case_expr);
173  new_target_exprs.push_back(var_case_expr.get());
174  } else {
175  new_target_exprs.push_back(target);
176  }
177  }
178  rewrite = true;
179  } else {
180  new_groupby_list.push_back(group_expr);
181  }
182  ++it;
183  }
184  if (!rewrite) {
185  return ra_exe_unit_in;
186  }
187  return {ra_exe_unit_in.input_descs,
188  ra_exe_unit_in.input_col_descs,
189  ra_exe_unit_in.simple_quals,
190  ra_exe_unit_in.quals,
191  ra_exe_unit_in.join_quals,
192  new_groupby_list,
193  new_target_exprs,
194  nullptr,
195  ra_exe_unit_in.sort_info,
196  ra_exe_unit_in.scan_limit,
197  ra_exe_unit_in.query_hint,
198  ra_exe_unit_in.query_plan_dag_hash,
199  ra_exe_unit_in.hash_table_build_plan_dag,
200  ra_exe_unit_in.table_id_to_node_map};
201 }
202 
203 std::shared_ptr<Analyzer::CaseExpr> QueryRewriter::generateCaseForDomainValues(
204  const Analyzer::InValues* in_vals) {
205  std::list<std::pair<std::shared_ptr<Analyzer::Expr>, std::shared_ptr<Analyzer::Expr>>>
206  case_expr_list;
207  auto in_val_arg = in_vals->get_arg()->deep_copy();
208  for (const auto& in_val : in_vals->get_value_list()) {
209  auto case_cond = makeExpr<Analyzer::BinOper>(
210  SQLTypeInfo(kBOOLEAN, true), false, kEQ, kONE, in_val_arg, in_val);
211  auto in_val_copy = in_val->deep_copy();
212  auto ti = in_val_copy->get_type_info();
213  if (ti.is_string() && ti.get_compression() == kENCODING_DICT) {
214  ti.set_comp_param(0);
215  }
216  in_val_copy->set_type_info(ti);
217  case_expr_list.emplace_back(case_cond, in_val_copy);
218  }
219  // TODO(alex): refine the expression range for case with empty else expression;
220  // for now, add a dummy else which should never be taken
221  auto else_expr = case_expr_list.front().second;
222  return makeExpr<Analyzer::CaseExpr>(
223  case_expr_list.front().second->get_type_info(), false, case_expr_list, else_expr);
224 }
225 
226 std::shared_ptr<Analyzer::CaseExpr>
228  std::shared_ptr<Analyzer::Expr> expr) const {
229  std::list<std::pair<std::shared_ptr<Analyzer::Expr>, std::shared_ptr<Analyzer::Expr>>>
230  case_expr_list;
231  auto is_null = std::make_shared<Analyzer::UOper>(kBOOLEAN, kISNULL, expr);
232  auto is_not_null = std::make_shared<Analyzer::UOper>(kBOOLEAN, kNOT, is_null);
233  Datum then_d;
234  then_d.bigintval = 1;
235  const auto then_constant = makeExpr<Analyzer::Constant>(kBIGINT, false, then_d);
236  case_expr_list.emplace_back(is_not_null, then_constant);
237  Datum else_d;
238  else_d.bigintval = 0;
239  const auto else_constant = makeExpr<Analyzer::Constant>(kBIGINT, false, else_d);
240  auto case_expr = makeExpr<Analyzer::CaseExpr>(
241  then_constant->get_type_info(), false, case_expr_list, else_constant);
242  return case_expr;
243 }
244 
245 namespace {
246 
247 // TODO(adb): centralize and share (e..g with insert_one_dict_str)
248 bool check_string_id_overflow(const int32_t string_id, const SQLTypeInfo& ti) {
249  switch (ti.get_size()) {
250  case 1:
251  return string_id > max_valid_int_value<int8_t>();
252  case 2:
253  return string_id > max_valid_int_value<int16_t>();
254  case 4:
255  return string_id > max_valid_int_value<int32_t>();
256  default:
257  UNREACHABLE();
258  }
259  UNREACHABLE();
260  return false;
261 }
262 
263 } // namespace
264 
265 /* Rewrites an update query of the form `SELECT new_value, OFFSET_IN_FRAGMENT() FROM t
266  * WHERE <update_filter_condition>` to `SELECT CASE WHEN <update_filer_condition> THEN
267  * new_value ELSE existing value END FROM t`
268  */
270  const RelAlgExecutionUnit& ra_exe_unit_in,
271  std::shared_ptr<Analyzer::Expr> column_to_update) const {
272  CHECK_EQ(ra_exe_unit_in.target_exprs.size(), size_t(2));
273  CHECK(ra_exe_unit_in.groupby_exprs.size() == 1 &&
274  !ra_exe_unit_in.groupby_exprs.front());
275 
276  if (ra_exe_unit_in.join_quals.size() > 0) {
277  throw std::runtime_error("Update via join not yet supported for temporary tables.");
278  }
279 
280  auto new_column_value = ra_exe_unit_in.target_exprs.front()->deep_copy();
281  const auto& new_column_ti = new_column_value->get_type_info();
282  if (column_to_update->get_type_info().is_dict_encoded_string()) {
283  CHECK(new_column_ti.is_dict_encoded_string());
284  if (new_column_ti.get_comp_param() > 0 &&
285  new_column_ti.get_comp_param() !=
286  column_to_update->get_type_info().get_comp_param()) {
287  throw std::runtime_error(
288  "Updating a dictionary encoded string using another dictionary encoded string "
289  "column is not yet supported, unless both columns share dictionaries.");
290  }
291  if (auto uoper = dynamic_cast<Analyzer::UOper*>(new_column_value.get())) {
292  if (uoper->get_optype() == kCAST &&
293  dynamic_cast<const Analyzer::Constant*>(uoper->get_operand())) {
294  const auto original_constant_expr =
295  dynamic_cast<const Analyzer::Constant*>(uoper->get_operand());
296  CHECK(original_constant_expr);
297  CHECK(original_constant_expr->get_type_info().is_string());
298  // extract the string, insert it into the dict for the table we are updating,
299  // and place the dictionary ID in the oper
300  auto cat = executor_->getCatalog();
301  CHECK(cat);
302 
303  CHECK(column_to_update->get_type_info().is_dict_encoded_string());
304  const auto dict_id = column_to_update->get_type_info().get_comp_param();
305  std::map<int, StringDictionary*> string_dicts;
306  const auto dd = cat->getMetadataForDict(dict_id, /*load_dict=*/true);
307  CHECK(dd);
308  auto string_dict = dd->stringDict;
309  CHECK(string_dict);
310 
311  auto string_id =
312  string_dict->getOrAdd(*original_constant_expr->get_constval().stringval);
313  if (check_string_id_overflow(string_id, column_to_update->get_type_info())) {
314  throw std::runtime_error(
315  "Ran out of space in dictionary, cannot update column with dictionary "
316  "encoded string value. Dictionary ID: " +
317  std::to_string(dict_id));
318  }
319  if (string_id == inline_int_null_value<int32_t>()) {
320  string_id = inline_fixed_encoding_null_val(column_to_update->get_type_info());
321  }
322 
323  // Codegen expects a string value. The string will be
324  // resolved to its ID during Constant codegen. Copy the string from the
325  // original expr
326  Datum datum;
327  datum.stringval =
328  new std::string(*original_constant_expr->get_constval().stringval);
329  Datum new_string_datum{datum};
330 
331  new_column_value =
332  makeExpr<Analyzer::Constant>(column_to_update->get_type_info(),
333  original_constant_expr->get_is_null(),
334  new_string_datum);
335 
336  // Roll the string dict generation forward, as we have added a string
337  auto row_set_mem_owner = executor_->getRowSetMemoryOwner();
338  CHECK(row_set_mem_owner);
339  auto& str_dict_generations = row_set_mem_owner->getStringDictionaryGenerations();
340  if (str_dict_generations.getGeneration(dict_id) > -1) {
341  str_dict_generations.updateGeneration(dict_id,
342  string_dict->storageEntryCount());
343  } else {
344  // Simple update with no filters does not use a CASE, and therefore does not add
345  // a valid generation
346  str_dict_generations.setGeneration(dict_id, string_dict->storageEntryCount());
347  }
348  }
349  }
350  }
351 
352  auto input_col_descs = ra_exe_unit_in.input_col_descs;
353 
354  std::shared_ptr<Analyzer::Expr> filter;
355  std::vector<std::shared_ptr<Analyzer::Expr>> filter_exprs;
356  filter_exprs.insert(filter_exprs.end(),
357  ra_exe_unit_in.simple_quals.begin(),
358  ra_exe_unit_in.simple_quals.end());
359  filter_exprs.insert(
360  filter_exprs.end(), ra_exe_unit_in.quals.begin(), ra_exe_unit_in.quals.end());
361 
362  if (filter_exprs.size() > 0) {
363  std::list<std::pair<std::shared_ptr<Analyzer::Expr>, std::shared_ptr<Analyzer::Expr>>>
364  case_expr_list;
365  if (filter_exprs.size() == 1) {
366  filter = filter_exprs.front();
367  } else {
368  filter = std::accumulate(
369  std::next(filter_exprs.begin()),
370  filter_exprs.end(),
371  filter_exprs.front(),
372  [](const std::shared_ptr<Analyzer::Expr> a,
373  const std::shared_ptr<Analyzer::Expr> b) {
374  CHECK_EQ(a->get_type_info().get_type(), b->get_type_info().get_type());
375  return makeExpr<Analyzer::BinOper>(a->get_type_info().get_type(),
376  SQLOps::kAND,
378  a->deep_copy(),
379  b->deep_copy());
380  });
381  }
382  auto when_expr = filter; // only one filter, will be a BinOper if multiple filters
383  case_expr_list.emplace_back(std::make_pair(when_expr, new_column_value));
384  auto case_expr = Parser::CaseExpr::normalize(case_expr_list, column_to_update);
385 
386  auto col_to_update_var =
387  std::dynamic_pointer_cast<Analyzer::ColumnVar>(column_to_update);
388  CHECK(col_to_update_var);
389  auto col_to_update_desc =
390  std::make_shared<const InputColDescriptor>(col_to_update_var->get_column_id(),
391  col_to_update_var->get_table_id(),
392  col_to_update_var->get_rte_idx());
393  auto existing_col_desc_it = std::find_if(
394  input_col_descs.begin(),
395  input_col_descs.end(),
396  [&col_to_update_desc](const std::shared_ptr<const InputColDescriptor>& in) {
397  return *in == *col_to_update_desc;
398  });
399  if (existing_col_desc_it == input_col_descs.end()) {
400  input_col_descs.push_back(col_to_update_desc);
401  }
402  target_exprs_owned_.emplace_back(case_expr);
403  } else {
404  // no filters, simply project the update value
405  target_exprs_owned_.emplace_back(new_column_value);
406  }
407 
408  std::vector<Analyzer::Expr*> target_exprs;
409  CHECK_EQ(target_exprs_owned_.size(), size_t(1));
410  target_exprs.emplace_back(target_exprs_owned_.front().get());
411 
412  RelAlgExecutionUnit rewritten_exe_unit{ra_exe_unit_in.input_descs,
413  input_col_descs,
414  {},
415  {},
416  ra_exe_unit_in.join_quals,
417  ra_exe_unit_in.groupby_exprs,
418  target_exprs,
419  ra_exe_unit_in.estimator,
420  ra_exe_unit_in.sort_info,
421  ra_exe_unit_in.scan_limit,
422  ra_exe_unit_in.query_hint,
423  ra_exe_unit_in.query_plan_dag_hash,
424  ra_exe_unit_in.hash_table_build_plan_dag,
425  ra_exe_unit_in.table_id_to_node_map,
426  ra_exe_unit_in.use_bump_allocator,
427  ra_exe_unit_in.union_all,
428  ra_exe_unit_in.query_state};
429  return rewritten_exe_unit;
430 }
431 
432 /* Rewrites a delete query of the form `SELECT OFFSET_IN_FRAGMENT() FROM t
433  * WHERE <delete_filter_condition>` to `SELECT CASE WHEN <delete_filter_condition> THEN
434  * true ELSE existing value END FROM t`
435  */
437  const RelAlgExecutionUnit& ra_exe_unit_in,
438  std::shared_ptr<Analyzer::ColumnVar> delete_column) const {
439  CHECK_EQ(ra_exe_unit_in.target_exprs.size(), size_t(1));
440  CHECK(ra_exe_unit_in.groupby_exprs.size() == 1 &&
441  !ra_exe_unit_in.groupby_exprs.front());
442 
443  // TODO(adb): is this possible?
444  if (ra_exe_unit_in.join_quals.size() > 0) {
445  throw std::runtime_error("Delete via join not yet supported for temporary tables.");
446  }
447 
448  Datum true_datum;
449  true_datum.boolval = true;
450  const auto deleted_constant =
451  makeExpr<Analyzer::Constant>(delete_column->get_type_info(), false, true_datum);
452 
453  auto input_col_descs = ra_exe_unit_in.input_col_descs;
454 
455  std::shared_ptr<Analyzer::Expr> filter;
456  std::vector<std::shared_ptr<Analyzer::Expr>> filter_exprs;
457  filter_exprs.insert(filter_exprs.end(),
458  ra_exe_unit_in.simple_quals.begin(),
459  ra_exe_unit_in.simple_quals.end());
460  filter_exprs.insert(
461  filter_exprs.end(), ra_exe_unit_in.quals.begin(), ra_exe_unit_in.quals.end());
462 
463  if (filter_exprs.size() > 0) {
464  std::list<std::pair<std::shared_ptr<Analyzer::Expr>, std::shared_ptr<Analyzer::Expr>>>
465  case_expr_list;
466  if (filter_exprs.size() == 1) {
467  filter = filter_exprs.front();
468  } else {
469  filter = std::accumulate(
470  std::next(filter_exprs.begin()),
471  filter_exprs.end(),
472  filter_exprs.front(),
473  [](const std::shared_ptr<Analyzer::Expr> a,
474  const std::shared_ptr<Analyzer::Expr> b) {
475  CHECK_EQ(a->get_type_info().get_type(), b->get_type_info().get_type());
476  return makeExpr<Analyzer::BinOper>(a->get_type_info().get_type(),
477  SQLOps::kAND,
479  a->deep_copy(),
480  b->deep_copy());
481  });
482  }
483  std::shared_ptr<Analyzer::Expr> column_to_update{nullptr};
484  auto when_expr = filter; // only one filter, will be a BinOper if multiple filters
485  case_expr_list.emplace_back(std::make_pair(when_expr, deleted_constant));
486  auto case_expr = Parser::CaseExpr::normalize(case_expr_list, delete_column);
487 
488  // the delete column should not be projected, but check anyway
489  auto delete_col_desc_it = std::find_if(
490  input_col_descs.begin(),
491  input_col_descs.end(),
492  [&delete_column](const std::shared_ptr<const InputColDescriptor>& in) {
493  return in->getColId() == delete_column->get_column_id();
494  });
495  CHECK(delete_col_desc_it == input_col_descs.end());
496  auto delete_col_desc =
497  std::make_shared<const InputColDescriptor>(delete_column->get_column_id(),
498  delete_column->get_table_id(),
499  delete_column->get_rte_idx());
500  input_col_descs.push_back(delete_col_desc);
501  target_exprs_owned_.emplace_back(case_expr);
502  } else {
503  // no filters, simply project the deleted=true column value for all rows
504  auto delete_col_desc =
505  std::make_shared<const InputColDescriptor>(delete_column->get_column_id(),
506  delete_column->get_table_id(),
507  delete_column->get_rte_idx());
508  input_col_descs.push_back(delete_col_desc);
509  target_exprs_owned_.emplace_back(deleted_constant);
510  }
511 
512  std::vector<Analyzer::Expr*> target_exprs;
513  CHECK_EQ(target_exprs_owned_.size(), size_t(1));
514  target_exprs.emplace_back(target_exprs_owned_.front().get());
515 
516  RelAlgExecutionUnit rewritten_exe_unit{ra_exe_unit_in.input_descs,
517  input_col_descs,
518  {},
519  {},
520  ra_exe_unit_in.join_quals,
521  ra_exe_unit_in.groupby_exprs,
522  target_exprs,
523  ra_exe_unit_in.estimator,
524  ra_exe_unit_in.sort_info,
525  ra_exe_unit_in.scan_limit,
526  ra_exe_unit_in.query_hint,
527  ra_exe_unit_in.query_plan_dag_hash,
528  ra_exe_unit_in.hash_table_build_plan_dag,
529  ra_exe_unit_in.table_id_to_node_map,
530  ra_exe_unit_in.use_bump_allocator,
531  ra_exe_unit_in.union_all,
532  ra_exe_unit_in.query_state};
533  return rewritten_exe_unit;
534 }
535 
536 std::pair<bool, std::set<size_t>> QueryRewriter::is_all_groupby_exprs_are_col_var(
537  const std::list<std::shared_ptr<Analyzer::Expr>>& groupby_exprs) const {
538  std::set<size_t> gby_col_exprs_hash;
539  for (auto gby_expr : groupby_exprs) {
540  if (auto gby_col_var = std::dynamic_pointer_cast<Analyzer::ColumnVar>(gby_expr)) {
541  gby_col_exprs_hash.insert(boost::hash_value(gby_col_var->toString()));
542  } else {
543  return {false, {}};
544  }
545  }
546  return {true, gby_col_exprs_hash};
547 }
548 
550  const RelAlgExecutionUnit& ra_exe_unit_in) const {
551  auto check_precond = is_all_groupby_exprs_are_col_var(ra_exe_unit_in.groupby_exprs);
552  auto is_expr_on_gby_col = [&check_precond](const Analyzer::AggExpr* agg_expr) {
553  CHECK(agg_expr);
554  if (agg_expr->get_arg()) {
555  // some expr does not have its own arg, i.e., count(*)
556  auto agg_expr_hash = boost::hash_value(agg_expr->get_arg()->toString());
557  // a valid expr should have hashed value > 0
558  CHECK_GT(agg_expr_hash, 0u);
559  if (check_precond.second.count(agg_expr_hash)) {
560  return true;
561  }
562  }
563  return false;
564  };
565  if (!check_precond.first) {
566  // return the input ra_exe_unit if we have gby expr which is not col_var
567  // i.e., group by x+1, y instead of group by x, y
568  // todo (yoonmin) : can we relax this with a simple analysis of groupby / agg exprs?
569  return ra_exe_unit_in;
570  }
571 
572  std::vector<Analyzer::Expr*> new_target_exprs;
573  for (auto expr : ra_exe_unit_in.target_exprs) {
574  bool rewritten = false;
575  if (auto agg_expr = dynamic_cast<Analyzer::AggExpr*>(expr)) {
576  if (is_expr_on_gby_col(agg_expr)) {
577  auto target_expr = agg_expr->get_arg();
578  // we have some issues when this rewriting is applied to float_type groupby column
579  // in subquery, i.e., SELECT MIN(v1) FROM (SELECT v1, AGG(v1) FROM T GROUP BY v1);
580  if (target_expr && target_expr->get_type_info().get_type() != SQLTypes::kFLOAT) {
581  switch (agg_expr->get_aggtype()) {
582  case SQLAgg::kCOUNT:
584  if (agg_expr->get_aggtype() == SQLAgg::kCOUNT &&
585  !agg_expr->get_is_distinct()) {
586  break;
587  }
588  auto case_expr =
589  generateCaseExprForCountDistinctOnGroupByCol(agg_expr->get_own_arg());
590  new_target_exprs.push_back(case_expr.get());
591  target_exprs_owned_.emplace_back(case_expr);
592  rewritten = true;
593  break;
594  }
596  case SQLAgg::kAVG:
597  case SQLAgg::kSAMPLE:
598  case SQLAgg::kMAX:
599  case SQLAgg::kMIN: {
600  // we just replace the agg_expr into a plain expr
601  // i.e, avg(x1) --> x1
602  auto agg_expr_ti = agg_expr->get_type_info();
603  auto target_expr = agg_expr->get_own_arg();
604  if (agg_expr_ti != target_expr->get_type_info()) {
605  target_expr = target_expr->add_cast(agg_expr_ti);
606  }
607  new_target_exprs.push_back(target_expr.get());
608  target_exprs_owned_.emplace_back(target_expr);
609  rewritten = true;
610  break;
611  }
612  default:
613  break;
614  }
615  }
616  }
617  }
618  if (!rewritten) {
619  new_target_exprs.push_back(expr);
620  }
621  }
622 
623  RelAlgExecutionUnit rewritten_exe_unit{ra_exe_unit_in.input_descs,
624  ra_exe_unit_in.input_col_descs,
625  ra_exe_unit_in.simple_quals,
626  ra_exe_unit_in.quals,
627  ra_exe_unit_in.join_quals,
628  ra_exe_unit_in.groupby_exprs,
629  new_target_exprs,
630  ra_exe_unit_in.estimator,
631  ra_exe_unit_in.sort_info,
632  ra_exe_unit_in.scan_limit,
633  ra_exe_unit_in.query_hint,
634  ra_exe_unit_in.query_plan_dag_hash,
635  ra_exe_unit_in.hash_table_build_plan_dag,
636  ra_exe_unit_in.table_id_to_node_map,
637  ra_exe_unit_in.use_bump_allocator,
638  ra_exe_unit_in.union_all,
639  ra_exe_unit_in.query_state};
640  return rewritten_exe_unit;
641 }
std::vector< Analyzer::Expr * > target_exprs
static std::shared_ptr< Analyzer::Expr > normalize(const std::list< std::pair< std::shared_ptr< Analyzer::Expr >, std::shared_ptr< Analyzer::Expr >>> &, const std::shared_ptr< Analyzer::Expr >, const Executor *executor=nullptr)
#define CHECK_EQ(x, y)
Definition: Logger.h:231
HOST DEVICE int get_size() const
Definition: sqltypes.h:339
size_t g_constrained_by_in_threshold
Definition: Execute.cpp:108
std::string cat(Ts &&...args)
QueryPlanHash query_plan_dag_hash
const std::optional< bool > union_all
int8_t boolval
Definition: sqltypes.h:211
std::vector< InputDescriptor > input_descs
#define UNREACHABLE()
Definition: Logger.h:267
Definition: sqldefs.h:49
Definition: sqldefs.h:30
Constants for Builtin SQL Types supported by OmniSci.
std::vector< JoinCondition > JoinQualsPerNestingLevel
const std::list< std::shared_ptr< Analyzer::Expr > > groupby_exprs
Analyzer::ExpressionPtr rewrite_expr(const Analyzer::Expr *expr)
#define CHECK_GT(x, y)
Definition: Logger.h:235
std::string to_string(char const *&&v)
bool g_enable_overlaps_hashjoin
Definition: Execute.cpp:102
constexpr double a
Definition: Utm.h:32
Definition: sqldefs.h:75
CONSTEXPR DEVICE bool is_null(const T &value)
Classes representing a parse tree.
RelAlgExecutionUnit rewrite(const RelAlgExecutionUnit &ra_exe_unit_in) const
const JoinQualsPerNestingLevel join_quals
std::pair< bool, std::set< size_t > > is_all_groupby_exprs_are_col_var(const std::list< std::shared_ptr< Analyzer::Expr >> &groupby_exprs) const
Executor * executor_
Definition: QueryRewrite.h:67
int64_t bigintval
Definition: sqltypes.h:215
TableIdToNodeMap table_id_to_node_map
RelAlgExecutionUnit rewriteOverlapsJoin(const RelAlgExecutionUnit &ra_exe_unit_in) const
Definition: sqldefs.h:37
DEVICE auto accumulate(ARGS &&...args)
Definition: gpu_enabled.h:42
boost::optional< OverlapsJoinConjunction > rewrite_overlaps_conjunction(const std::shared_ptr< Analyzer::Expr > expr)
ExpressionRange getExpressionRange(const Analyzer::BinOper *expr, const std::vector< InputTableInfo > &query_infos, const Executor *, boost::optional< std::list< std::shared_ptr< Analyzer::Expr >>> simple_quals)
const std::shared_ptr< Analyzer::Estimator > estimator
std::string * stringval
Definition: sqltypes.h:220
Definition: sqldefs.h:71
bool check_string_id_overflow(const int32_t string_id, const SQLTypeInfo &ti)
RelAlgExecutionUnit rewriteColumnarUpdate(const RelAlgExecutionUnit &ra_exe_unit_in, std::shared_ptr< Analyzer::Expr > column_to_update) const
Definition: sqldefs.h:78
const std::list< std::shared_ptr< Analyzer::Expr > > & get_value_list() const
Definition: Analyzer.h:640
virtual std::shared_ptr< Analyzer::Expr > deep_copy() const =0
const std::vector< InputTableInfo > & query_infos_
Definition: QueryRewrite.h:66
std::list< std::shared_ptr< Analyzer::Expr > > quals
int64_t getIntMax() const
RegisteredQueryHint query_hint
#define CHECK(condition)
Definition: Logger.h:223
int64_t inline_fixed_encoding_null_val(const SQL_TYPE_INFO &ti)
std::shared_ptr< Analyzer::CaseExpr > generateCaseExprForCountDistinctOnGroupByCol(std::shared_ptr< Analyzer::Expr > expr) const
std::shared_ptr< const query_state::QueryState > query_state
std::list< std::shared_ptr< const InputColDescriptor > > input_col_descs
RelAlgExecutionUnit rewriteColumnarDelete(const RelAlgExecutionUnit &ra_exe_unit_in, std::shared_ptr< Analyzer::ColumnVar > delete_column) const
Definition: sqldefs.h:76
static std::shared_ptr< Analyzer::CaseExpr > generateCaseForDomainValues(const Analyzer::InValues *)
Definition: sqldefs.h:74
RelAlgExecutionUnit rewriteConstrainedByIn(const RelAlgExecutionUnit &ra_exe_unit_in) const
Definition: sqldefs.h:39
RelAlgExecutionUnit rewriteAggregateOnGroupByColumn(const RelAlgExecutionUnit &ra_exe_unit_in) const
const Expr * get_arg() const
Definition: Analyzer.h:638
std::list< std::shared_ptr< Analyzer::Expr > > simple_quals
RelAlgExecutionUnit rewriteConstrainedByInImpl(const RelAlgExecutionUnit &ra_exe_unit_in, const std::shared_ptr< Analyzer::CaseExpr >, const Analyzer::InValues *) const
std::vector< std::shared_ptr< Analyzer::Expr > > target_exprs_owned_
Definition: QueryRewrite.h:68
HashTableBuildDagMap hash_table_build_plan_dag