OmniSciDB  c1a53651b2
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
QueryRewrite.cpp
Go to the documentation of this file.
1 /*
2  * Copyright 2022 HEAVY.AI, Inc.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #include "QueryRewrite.h"
18 
19 #include <algorithm>
20 #include <memory>
21 #include <vector>
22 
23 #include "ExpressionRange.h"
24 #include "ExpressionRewrite.h"
25 #include "Logger/Logger.h"
26 #include "Parser/ParserNode.h"
27 #include "Shared/sqltypes.h"
28 
30  const RelAlgExecutionUnit& ra_exe_unit_in) const {
31  auto rewritten_exe_unit = rewriteConstrainedByIn(ra_exe_unit_in);
32  auto rewritten_exe_unit_for_agg_on_gby_col =
33  rewriteAggregateOnGroupByColumn(rewritten_exe_unit);
34  return rewriteOverlapsJoin(rewritten_exe_unit_for_agg_on_gby_col);
35 }
36 
38  const RelAlgExecutionUnit& ra_exe_unit_in) const {
40  return ra_exe_unit_in;
41  }
42  if (ra_exe_unit_in.join_quals.empty()) {
43  return ra_exe_unit_in;
44  }
45 
46  std::list<std::shared_ptr<Analyzer::Expr>> quals;
47  quals.insert(quals.end(), ra_exe_unit_in.quals.begin(), ra_exe_unit_in.quals.end());
48 
49  JoinQualsPerNestingLevel join_condition_per_nesting_level;
50  for (const auto& join_condition_in : ra_exe_unit_in.join_quals) {
51  JoinCondition join_condition{{}, join_condition_in.type};
52 
53  for (const auto& join_qual_expr_in : join_condition_in.quals) {
54  bool try_to_rewrite_expr_to_overlaps_join = false;
56  auto func_oper = dynamic_cast<Analyzer::FunctionOper*>(join_qual_expr_in.get());
57  if (func_oper) {
58  const auto func_name = func_oper->getName();
60  try_to_rewrite_expr_to_overlaps_join = true;
62  }
63  }
64  auto bin_oper = dynamic_cast<Analyzer::BinOper*>(join_qual_expr_in.get());
65  if (bin_oper && (bin_oper->get_optype() == kLE || bin_oper->get_optype() == kLT)) {
66  auto lhs =
67  dynamic_cast<const Analyzer::GeoOperator*>(bin_oper->get_left_operand());
68  auto rhs = dynamic_cast<const Analyzer::Constant*>(bin_oper->get_right_operand());
69  if (g_enable_distance_rangejoin && lhs && rhs) {
70  try_to_rewrite_expr_to_overlaps_join = true;
72  }
73  }
74  boost::optional<OverlapsJoinConjunction> new_overlaps_quals = boost::none;
75  if (try_to_rewrite_expr_to_overlaps_join) {
76  new_overlaps_quals = rewrite_overlaps_conjunction(
77  join_qual_expr_in, ra_exe_unit_in.input_descs, rewrite_type, executor_);
78  }
79  if (new_overlaps_quals) {
80  const auto& overlaps_quals = *new_overlaps_quals;
81  // Add overlaps qual
82  join_condition.quals.insert(join_condition.quals.end(),
83  overlaps_quals.join_quals.begin(),
84  overlaps_quals.join_quals.end());
85  // Add original quals
86  join_condition.quals.insert(join_condition.quals.end(),
87  overlaps_quals.quals.begin(),
88  overlaps_quals.quals.end());
89  } else {
90  join_condition.quals.push_back(join_qual_expr_in);
91  }
92  }
93  join_condition_per_nesting_level.push_back(join_condition);
94  }
95  return {ra_exe_unit_in.input_descs,
96  ra_exe_unit_in.input_col_descs,
97  ra_exe_unit_in.simple_quals,
98  quals,
99  join_condition_per_nesting_level,
100  ra_exe_unit_in.groupby_exprs,
101  ra_exe_unit_in.target_exprs,
102  ra_exe_unit_in.target_exprs_original_type_infos,
103  ra_exe_unit_in.estimator,
104  ra_exe_unit_in.sort_info,
105  ra_exe_unit_in.scan_limit,
106  ra_exe_unit_in.query_hint,
107  ra_exe_unit_in.query_plan_dag_hash,
108  ra_exe_unit_in.hash_table_build_plan_dag,
109  ra_exe_unit_in.table_id_to_node_map,
110  ra_exe_unit_in.use_bump_allocator};
111 }
112 
114  const RelAlgExecutionUnit& ra_exe_unit_in) const {
115  if (ra_exe_unit_in.groupby_exprs.empty()) {
116  return ra_exe_unit_in;
117  }
118  if (ra_exe_unit_in.groupby_exprs.size() == 1 && !ra_exe_unit_in.groupby_exprs.front()) {
119  return ra_exe_unit_in;
120  }
121  if (!ra_exe_unit_in.simple_quals.empty()) {
122  return ra_exe_unit_in;
123  }
124  if (ra_exe_unit_in.quals.size() != 1) {
125  return ra_exe_unit_in;
126  }
127  auto in_vals =
128  std::dynamic_pointer_cast<Analyzer::InValues>(ra_exe_unit_in.quals.front());
129  if (!in_vals) {
130  in_vals = std::dynamic_pointer_cast<Analyzer::InValues>(
131  rewrite_expr(ra_exe_unit_in.quals.front().get()));
132  }
133  if (!in_vals || in_vals->get_value_list().empty()) {
134  return ra_exe_unit_in;
135  }
136  for (const auto& in_val : in_vals->get_value_list()) {
137  if (!std::dynamic_pointer_cast<Analyzer::Constant>(in_val)) {
138  break;
139  }
140  }
141  if (dynamic_cast<const Analyzer::CaseExpr*>(in_vals->get_arg())) {
142  return ra_exe_unit_in;
143  }
144  auto in_val_cv = dynamic_cast<const Analyzer::ColumnVar*>(in_vals->get_arg());
145  if (in_val_cv) {
146  auto it = std::find_if(
147  ra_exe_unit_in.groupby_exprs.begin(),
148  ra_exe_unit_in.groupby_exprs.end(),
149  [&in_val_cv](std::shared_ptr<Analyzer::Expr> groupby_expr) {
150  if (auto groupby_cv =
151  std::dynamic_pointer_cast<Analyzer::ColumnVar>(groupby_expr)) {
152  return *in_val_cv == *groupby_cv.get();
153  }
154  return false;
155  });
156  if (it != ra_exe_unit_in.groupby_exprs.end()) {
157  // we do not need to deploy case-when rewriting when in_val cv is listed as groupby
158  // col i.e., ... WHERE v IN (SELECT DISTINCT v FROM ...)
159  return ra_exe_unit_in;
160  }
161  }
162  auto case_expr = generateCaseForDomainValues(in_vals.get());
163  return rewriteConstrainedByInImpl(ra_exe_unit_in, case_expr, in_vals.get());
164 }
165 
167  const RelAlgExecutionUnit& ra_exe_unit_in,
168  const std::shared_ptr<Analyzer::CaseExpr> case_expr,
169  const Analyzer::InValues* in_vals) const {
170  std::list<std::shared_ptr<Analyzer::Expr>> new_groupby_list;
171  std::vector<Analyzer::Expr*> new_target_exprs;
172  bool rewrite{false};
173  size_t groupby_idx{0};
174  auto it = ra_exe_unit_in.groupby_exprs.begin();
175  for (const auto& group_expr : ra_exe_unit_in.groupby_exprs) {
176  CHECK(group_expr);
177  ++groupby_idx;
178  if (*group_expr == *in_vals->get_arg()) {
179  const auto expr_range = getExpressionRange(it->get(), query_infos_, executor_);
180  if (expr_range.getType() != ExpressionRangeType::Integer) {
181  ++it;
182  continue;
183  }
184  const size_t range_sz = expr_range.getIntMax() - expr_range.getIntMin() + 1;
185  if (range_sz <= in_vals->get_value_list().size() * g_constrained_by_in_threshold) {
186  ++it;
187  continue;
188  }
189  new_groupby_list.push_back(case_expr);
190  for (size_t i = 0; i < ra_exe_unit_in.target_exprs.size(); ++i) {
191  const auto target = ra_exe_unit_in.target_exprs[i];
192  if (*target == *in_vals->get_arg()) {
193  auto var_case_expr = makeExpr<Analyzer::Var>(
194  case_expr->get_type_info(), Analyzer::Var::kGROUPBY, groupby_idx);
195  target_exprs_owned_.push_back(var_case_expr);
196  new_target_exprs.push_back(var_case_expr.get());
197  } else {
198  new_target_exprs.push_back(target);
199  }
200  }
201  rewrite = true;
202  } else {
203  new_groupby_list.push_back(group_expr);
204  }
205  ++it;
206  }
207  if (!rewrite) {
208  return ra_exe_unit_in;
209  }
210  return {ra_exe_unit_in.input_descs,
211  ra_exe_unit_in.input_col_descs,
212  ra_exe_unit_in.simple_quals,
213  ra_exe_unit_in.quals,
214  ra_exe_unit_in.join_quals,
215  new_groupby_list,
216  new_target_exprs,
217  ra_exe_unit_in.target_exprs_original_type_infos,
218  nullptr,
219  ra_exe_unit_in.sort_info,
220  ra_exe_unit_in.scan_limit,
221  ra_exe_unit_in.query_hint,
222  ra_exe_unit_in.query_plan_dag_hash,
223  ra_exe_unit_in.hash_table_build_plan_dag,
224  ra_exe_unit_in.table_id_to_node_map};
225 }
226 
227 std::shared_ptr<Analyzer::CaseExpr> QueryRewriter::generateCaseForDomainValues(
228  const Analyzer::InValues* in_vals) {
229  std::list<std::pair<std::shared_ptr<Analyzer::Expr>, std::shared_ptr<Analyzer::Expr>>>
230  case_expr_list;
231  auto in_val_arg = in_vals->get_arg()->deep_copy();
232  for (const auto& in_val : in_vals->get_value_list()) {
233  auto case_cond = makeExpr<Analyzer::BinOper>(
234  SQLTypeInfo(kBOOLEAN, true), false, kEQ, kONE, in_val_arg, in_val);
235  auto in_val_copy = in_val->deep_copy();
236  auto ti = in_val_copy->get_type_info();
237  if (ti.is_string() && ti.get_compression() == kENCODING_DICT) {
238  ti.set_comp_param(0);
239  }
240  in_val_copy->set_type_info(ti);
241  case_expr_list.emplace_back(case_cond, in_val_copy);
242  }
243  // TODO(alex): refine the expression range for case with empty else expression;
244  // for now, add a dummy else which should never be taken
245  auto else_expr = case_expr_list.front().second;
246  return makeExpr<Analyzer::CaseExpr>(
247  case_expr_list.front().second->get_type_info(), false, case_expr_list, else_expr);
248 }
249 
250 std::shared_ptr<Analyzer::CaseExpr>
252  std::shared_ptr<Analyzer::Expr> expr) const {
253  std::list<std::pair<std::shared_ptr<Analyzer::Expr>, std::shared_ptr<Analyzer::Expr>>>
254  case_expr_list;
255  auto is_null = std::make_shared<Analyzer::UOper>(kBOOLEAN, kISNULL, expr);
256  auto is_not_null = std::make_shared<Analyzer::UOper>(kBOOLEAN, kNOT, is_null);
257  Datum then_d;
258  then_d.bigintval = 1;
259  const auto then_constant = makeExpr<Analyzer::Constant>(kBIGINT, false, then_d);
260  case_expr_list.emplace_back(is_not_null, then_constant);
261  Datum else_d;
262  else_d.bigintval = 0;
263  const auto else_constant = makeExpr<Analyzer::Constant>(kBIGINT, false, else_d);
264  auto case_expr = makeExpr<Analyzer::CaseExpr>(
265  then_constant->get_type_info(), false, case_expr_list, else_constant);
266  return case_expr;
267 }
268 
269 namespace {
270 
271 // TODO(adb): centralize and share (e..g with insert_one_dict_str)
272 bool check_string_id_overflow(const int32_t string_id, const SQLTypeInfo& ti) {
273  switch (ti.get_size()) {
274  case 1:
275  return string_id > max_valid_int_value<int8_t>();
276  case 2:
277  return string_id > max_valid_int_value<int16_t>();
278  case 4:
279  return string_id > max_valid_int_value<int32_t>();
280  default:
281  UNREACHABLE();
282  }
283  UNREACHABLE();
284  return false;
285 }
286 
287 } // namespace
288 
289 /* Rewrites an update query of the form `SELECT new_value, OFFSET_IN_FRAGMENT() FROM t
290  * WHERE <update_filter_condition>` to `SELECT CASE WHEN <update_filer_condition> THEN
291  * new_value ELSE existing value END FROM t`
292  */
294  const RelAlgExecutionUnit& ra_exe_unit_in,
295  std::shared_ptr<Analyzer::ColumnVar> column_to_update) const {
296  CHECK_EQ(ra_exe_unit_in.target_exprs.size(), size_t(2));
297  CHECK(ra_exe_unit_in.groupby_exprs.size() == 1 &&
298  !ra_exe_unit_in.groupby_exprs.front());
299 
300  if (ra_exe_unit_in.join_quals.size() > 0) {
301  throw std::runtime_error("Update via join not yet supported for temporary tables.");
302  }
303 
304  auto new_column_value = ra_exe_unit_in.target_exprs.front()->deep_copy();
305  const auto& new_column_ti = new_column_value->get_type_info();
306  if (column_to_update->get_type_info().is_dict_encoded_string()) {
307  CHECK(new_column_ti.is_dict_encoded_string());
308  if (new_column_ti.getStringDictKey().dict_id > 0 &&
309  new_column_ti.getStringDictKey() !=
310  column_to_update->get_type_info().getStringDictKey()) {
311  throw std::runtime_error(
312  "Updating a dictionary encoded string using another dictionary encoded string "
313  "column is not yet supported, unless both columns share dictionaries.");
314  }
315  if (auto uoper = dynamic_cast<Analyzer::UOper*>(new_column_value.get())) {
316  if (uoper->get_optype() == kCAST &&
317  dynamic_cast<const Analyzer::Constant*>(uoper->get_operand())) {
318  const auto original_constant_expr =
319  dynamic_cast<const Analyzer::Constant*>(uoper->get_operand());
320  CHECK(original_constant_expr);
321  CHECK(original_constant_expr->get_type_info().is_string());
322  // extract the string, insert it into the dict for the table we are updating,
323  // and place the dictionary ID in the oper
324 
325  CHECK(column_to_update->get_type_info().is_dict_encoded_string());
326  const auto& dict_key = column_to_update->get_type_info().getStringDictKey();
327  std::map<int, StringDictionary*> string_dicts;
328  const auto catalog =
330  CHECK(catalog);
331  const auto dd = catalog->getMetadataForDict(dict_key.dict_id, /*load_dict=*/true);
332  CHECK(dd);
333  auto string_dict = dd->stringDict;
334  CHECK(string_dict);
335 
336  auto string_id =
337  string_dict->getOrAdd(*original_constant_expr->get_constval().stringval);
338  if (check_string_id_overflow(string_id, column_to_update->get_type_info())) {
339  throw std::runtime_error(
340  "Ran out of space in dictionary, cannot update column with dictionary "
341  "encoded string value. Dictionary ID: " +
342  std::to_string(dict_key.dict_id));
343  }
344  if (string_id == inline_int_null_value<int32_t>()) {
345  string_id = inline_fixed_encoding_null_val(column_to_update->get_type_info());
346  }
347 
348  // Codegen expects a string value. The string will be
349  // resolved to its ID during Constant codegen. Copy the string from the
350  // original expr
351  Datum datum;
352  datum.stringval =
353  new std::string(*original_constant_expr->get_constval().stringval);
354  Datum new_string_datum{datum};
355 
356  new_column_value =
357  makeExpr<Analyzer::Constant>(column_to_update->get_type_info(),
358  original_constant_expr->get_is_null(),
359  new_string_datum);
360 
361  // Roll the string dict generation forward, as we have added a string
362  auto row_set_mem_owner = executor_->getRowSetMemoryOwner();
363  CHECK(row_set_mem_owner);
364  auto& str_dict_generations = row_set_mem_owner->getStringDictionaryGenerations();
365  if (str_dict_generations.getGeneration(dict_key) > -1) {
366  str_dict_generations.updateGeneration(dict_key,
367  string_dict->storageEntryCount());
368  } else {
369  // Simple update with no filters does not use a CASE, and therefore does not add
370  // a valid generation
371  str_dict_generations.setGeneration(dict_key, string_dict->storageEntryCount());
372  }
373  }
374  }
375  }
376 
377  auto input_col_descs = ra_exe_unit_in.input_col_descs;
378 
379  std::shared_ptr<Analyzer::Expr> filter;
380  std::vector<std::shared_ptr<Analyzer::Expr>> filter_exprs;
381  filter_exprs.insert(filter_exprs.end(),
382  ra_exe_unit_in.simple_quals.begin(),
383  ra_exe_unit_in.simple_quals.end());
384  filter_exprs.insert(
385  filter_exprs.end(), ra_exe_unit_in.quals.begin(), ra_exe_unit_in.quals.end());
386 
387  if (filter_exprs.size() > 0) {
388  std::list<std::pair<std::shared_ptr<Analyzer::Expr>, std::shared_ptr<Analyzer::Expr>>>
389  case_expr_list;
390  if (filter_exprs.size() == 1) {
391  filter = filter_exprs.front();
392  } else {
393  filter = std::accumulate(
394  std::next(filter_exprs.begin()),
395  filter_exprs.end(),
396  filter_exprs.front(),
397  [](const std::shared_ptr<Analyzer::Expr> a,
398  const std::shared_ptr<Analyzer::Expr> b) {
399  CHECK_EQ(a->get_type_info().get_type(), b->get_type_info().get_type());
400  return makeExpr<Analyzer::BinOper>(a->get_type_info().get_type(),
401  SQLOps::kAND,
403  a->deep_copy(),
404  b->deep_copy());
405  });
406  }
407  auto when_expr = filter; // only one filter, will be a BinOper if multiple filters
408  case_expr_list.emplace_back(std::make_pair(when_expr, new_column_value));
409  auto case_expr = Parser::CaseExpr::normalize(case_expr_list, column_to_update);
410 
411  auto col_to_update_var =
412  std::dynamic_pointer_cast<Analyzer::ColumnVar>(column_to_update);
413  CHECK(col_to_update_var);
414  const auto& column_key = col_to_update_var->getColumnKey();
415  auto col_to_update_desc =
416  std::make_shared<const InputColDescriptor>(column_key.column_id,
417  column_key.table_id,
418  column_key.db_id,
419  col_to_update_var->get_rte_idx());
420  auto existing_col_desc_it = std::find_if(
421  input_col_descs.begin(),
422  input_col_descs.end(),
423  [&col_to_update_desc](const std::shared_ptr<const InputColDescriptor>& in) {
424  return *in == *col_to_update_desc;
425  });
426  if (existing_col_desc_it == input_col_descs.end()) {
427  input_col_descs.push_back(col_to_update_desc);
428  }
429  target_exprs_owned_.emplace_back(case_expr);
430  } else {
431  // no filters, simply project the update value
432  target_exprs_owned_.emplace_back(new_column_value);
433  }
434 
435  std::vector<Analyzer::Expr*> target_exprs;
436  CHECK_EQ(target_exprs_owned_.size(), size_t(1));
437  target_exprs.emplace_back(target_exprs_owned_.front().get());
438 
439  RelAlgExecutionUnit rewritten_exe_unit{ra_exe_unit_in.input_descs,
440  input_col_descs,
441  {},
442  {},
443  ra_exe_unit_in.join_quals,
444  ra_exe_unit_in.groupby_exprs,
445  target_exprs,
446  ra_exe_unit_in.target_exprs_original_type_infos,
447  ra_exe_unit_in.estimator,
448  ra_exe_unit_in.sort_info,
449  ra_exe_unit_in.scan_limit,
450  ra_exe_unit_in.query_hint,
451  ra_exe_unit_in.query_plan_dag_hash,
452  ra_exe_unit_in.hash_table_build_plan_dag,
453  ra_exe_unit_in.table_id_to_node_map,
454  ra_exe_unit_in.use_bump_allocator,
455  ra_exe_unit_in.union_all,
456  ra_exe_unit_in.query_state};
457  return rewritten_exe_unit;
458 }
459 
460 /* Rewrites a delete query of the form `SELECT OFFSET_IN_FRAGMENT() FROM t
461  * WHERE <delete_filter_condition>` to `SELECT CASE WHEN <delete_filter_condition> THEN
462  * true ELSE existing value END FROM t`
463  */
465  const RelAlgExecutionUnit& ra_exe_unit_in,
466  std::shared_ptr<Analyzer::ColumnVar> delete_column) const {
467  CHECK_EQ(ra_exe_unit_in.target_exprs.size(), size_t(1));
468  CHECK(ra_exe_unit_in.groupby_exprs.size() == 1 &&
469  !ra_exe_unit_in.groupby_exprs.front());
470 
471  // TODO(adb): is this possible?
472  if (ra_exe_unit_in.join_quals.size() > 0) {
473  throw std::runtime_error("Delete via join not yet supported for temporary tables.");
474  }
475 
476  Datum true_datum;
477  true_datum.boolval = true;
478  const auto deleted_constant =
479  makeExpr<Analyzer::Constant>(delete_column->get_type_info(), false, true_datum);
480 
481  auto input_col_descs = ra_exe_unit_in.input_col_descs;
482 
483  std::shared_ptr<Analyzer::Expr> filter;
484  std::vector<std::shared_ptr<Analyzer::Expr>> filter_exprs;
485  filter_exprs.insert(filter_exprs.end(),
486  ra_exe_unit_in.simple_quals.begin(),
487  ra_exe_unit_in.simple_quals.end());
488  filter_exprs.insert(
489  filter_exprs.end(), ra_exe_unit_in.quals.begin(), ra_exe_unit_in.quals.end());
490 
491  if (filter_exprs.size() > 0) {
492  std::list<std::pair<std::shared_ptr<Analyzer::Expr>, std::shared_ptr<Analyzer::Expr>>>
493  case_expr_list;
494  if (filter_exprs.size() == 1) {
495  filter = filter_exprs.front();
496  } else {
497  filter = std::accumulate(
498  std::next(filter_exprs.begin()),
499  filter_exprs.end(),
500  filter_exprs.front(),
501  [](const std::shared_ptr<Analyzer::Expr> a,
502  const std::shared_ptr<Analyzer::Expr> b) {
503  CHECK_EQ(a->get_type_info().get_type(), b->get_type_info().get_type());
504  return makeExpr<Analyzer::BinOper>(a->get_type_info().get_type(),
505  SQLOps::kAND,
507  a->deep_copy(),
508  b->deep_copy());
509  });
510  }
511  std::shared_ptr<Analyzer::Expr> column_to_update{nullptr};
512  auto when_expr = filter; // only one filter, will be a BinOper if multiple filters
513  case_expr_list.emplace_back(std::make_pair(when_expr, deleted_constant));
514  auto case_expr = Parser::CaseExpr::normalize(case_expr_list, delete_column);
515 
516  // the delete column should not be projected, but check anyway
517  auto delete_col_desc_it = std::find_if(
518  input_col_descs.begin(),
519  input_col_descs.end(),
520  [&delete_column](const std::shared_ptr<const InputColDescriptor>& in) {
521  return in->getColId() == delete_column->getColumnKey().column_id;
522  });
523  CHECK(delete_col_desc_it == input_col_descs.end());
524  const auto& column_key = delete_column->getColumnKey();
525  auto delete_col_desc =
526  std::make_shared<const InputColDescriptor>(column_key.column_id,
527  column_key.table_id,
528  column_key.db_id,
529  delete_column->get_rte_idx());
530  input_col_descs.push_back(delete_col_desc);
531  target_exprs_owned_.emplace_back(case_expr);
532  } else {
533  // no filters, simply project the deleted=true column value for all rows
534  const auto& column_key = delete_column->getColumnKey();
535  auto delete_col_desc =
536  std::make_shared<const InputColDescriptor>(column_key.column_id,
537  column_key.table_id,
538  column_key.db_id,
539  delete_column->get_rte_idx());
540  input_col_descs.push_back(delete_col_desc);
541  target_exprs_owned_.emplace_back(deleted_constant);
542  }
543 
544  std::vector<Analyzer::Expr*> target_exprs;
545  CHECK_EQ(target_exprs_owned_.size(), size_t(1));
546  target_exprs.emplace_back(target_exprs_owned_.front().get());
547 
548  RelAlgExecutionUnit rewritten_exe_unit{ra_exe_unit_in.input_descs,
549  input_col_descs,
550  {},
551  {},
552  ra_exe_unit_in.join_quals,
553  ra_exe_unit_in.groupby_exprs,
554  target_exprs,
555  ra_exe_unit_in.target_exprs_original_type_infos,
556  ra_exe_unit_in.estimator,
557  ra_exe_unit_in.sort_info,
558  ra_exe_unit_in.scan_limit,
559  ra_exe_unit_in.query_hint,
560  ra_exe_unit_in.query_plan_dag_hash,
561  ra_exe_unit_in.hash_table_build_plan_dag,
562  ra_exe_unit_in.table_id_to_node_map,
563  ra_exe_unit_in.use_bump_allocator,
564  ra_exe_unit_in.union_all,
565  ra_exe_unit_in.query_state};
566  return rewritten_exe_unit;
567 }
568 
569 std::pair<bool, std::set<size_t>> QueryRewriter::is_all_groupby_exprs_are_col_var(
570  const std::list<std::shared_ptr<Analyzer::Expr>>& groupby_exprs) const {
571  std::set<size_t> gby_col_exprs_hash;
572  for (auto gby_expr : groupby_exprs) {
573  if (auto gby_col_var = std::dynamic_pointer_cast<Analyzer::ColumnVar>(gby_expr)) {
574  gby_col_exprs_hash.insert(boost::hash_value(gby_col_var->toString()));
575  } else {
576  return {false, {}};
577  }
578  }
579  return {true, gby_col_exprs_hash};
580 }
581 
583  const RelAlgExecutionUnit& ra_exe_unit_in) const {
584  auto check_precond = is_all_groupby_exprs_are_col_var(ra_exe_unit_in.groupby_exprs);
585  auto is_expr_on_gby_col = [&check_precond](const Analyzer::AggExpr* agg_expr) {
586  CHECK(agg_expr);
587  if (agg_expr->get_arg()) {
588  // some expr does not have its own arg, i.e., count(*)
589  auto agg_expr_hash = boost::hash_value(agg_expr->get_arg()->toString());
590  // a valid expr should have hashed value > 0
591  CHECK_GT(agg_expr_hash, 0u);
592  if (check_precond.second.count(agg_expr_hash)) {
593  return true;
594  }
595  }
596  return false;
597  };
598  if (!check_precond.first) {
599  // return the input ra_exe_unit if we have gby expr which is not col_var
600  // i.e., group by x+1, y instead of group by x, y
601  // todo (yoonmin) : can we relax this with a simple analysis of groupby / agg exprs?
602  return ra_exe_unit_in;
603  }
604 
605  std::vector<Analyzer::Expr*> new_target_exprs;
606  for (auto expr : ra_exe_unit_in.target_exprs) {
607  bool rewritten = false;
608  if (auto agg_expr = dynamic_cast<Analyzer::AggExpr*>(expr)) {
609  if (is_expr_on_gby_col(agg_expr)) {
610  auto target_expr = agg_expr->get_arg();
611  // we have some issues when this rewriting is applied to float_type groupby column
612  // in subquery, i.e., SELECT MIN(v1) FROM (SELECT v1, AGG(v1) FROM T GROUP BY v1);
613  if (target_expr && target_expr->get_type_info().get_type() != SQLTypes::kFLOAT) {
614  switch (agg_expr->get_aggtype()) {
615  case SQLAgg::kCOUNT:
616  case SQLAgg::kCOUNT_IF:
618  if (agg_expr->get_aggtype() == SQLAgg::kCOUNT &&
619  !agg_expr->get_is_distinct()) {
620  break;
621  }
622  auto case_expr =
623  generateCaseExprForCountDistinctOnGroupByCol(agg_expr->get_own_arg());
624  new_target_exprs.push_back(case_expr.get());
625  target_exprs_owned_.emplace_back(case_expr);
626  rewritten = true;
627  break;
628  }
630  case SQLAgg::kAVG:
631  case SQLAgg::kSAMPLE:
632  case SQLAgg::kMAX:
633  case SQLAgg::kMIN: {
634  // we just replace the agg_expr into a plain expr
635  // i.e, avg(x1) --> x1
636  auto agg_expr_ti = agg_expr->get_type_info();
637  auto target_expr = agg_expr->get_own_arg();
638  if (agg_expr_ti != target_expr->get_type_info()) {
639  target_expr = target_expr->add_cast(agg_expr_ti);
640  }
641  new_target_exprs.push_back(target_expr.get());
642  target_exprs_owned_.emplace_back(target_expr);
643  rewritten = true;
644  break;
645  }
646  default:
647  break;
648  }
649  }
650  }
651  }
652  if (!rewritten) {
653  new_target_exprs.push_back(expr);
654  }
655  }
656 
657  RelAlgExecutionUnit rewritten_exe_unit{ra_exe_unit_in.input_descs,
658  ra_exe_unit_in.input_col_descs,
659  ra_exe_unit_in.simple_quals,
660  ra_exe_unit_in.quals,
661  ra_exe_unit_in.join_quals,
662  ra_exe_unit_in.groupby_exprs,
663  new_target_exprs,
664  ra_exe_unit_in.target_exprs_original_type_infos,
665  ra_exe_unit_in.estimator,
666  ra_exe_unit_in.sort_info,
667  ra_exe_unit_in.scan_limit,
668  ra_exe_unit_in.query_hint,
669  ra_exe_unit_in.query_plan_dag_hash,
670  ra_exe_unit_in.hash_table_build_plan_dag,
671  ra_exe_unit_in.table_id_to_node_map,
672  ra_exe_unit_in.use_bump_allocator,
673  ra_exe_unit_in.union_all,
674  ra_exe_unit_in.query_state};
675  return rewritten_exe_unit;
676 }
std::vector< Analyzer::Expr * > target_exprs
static std::shared_ptr< Analyzer::Expr > normalize(const std::list< std::pair< std::shared_ptr< Analyzer::Expr >, std::shared_ptr< Analyzer::Expr >>> &, const std::shared_ptr< Analyzer::Expr >, const Executor *executor=nullptr)
#define CHECK_EQ(x, y)
Definition: Logger.h:301
HOST DEVICE int get_size() const
Definition: sqltypes.h:393
size_t g_constrained_by_in_threshold
Definition: Execute.cpp:108
boost::optional< OverlapsJoinConjunction > rewrite_overlaps_conjunction(const std::shared_ptr< Analyzer::Expr > expr, const std::vector< InputDescriptor > &input_table_info, const OverlapsJoinRewriteType rewrite_type, const Executor *executor)
QueryPlanHash query_plan_dag_hash
static bool is_overlaps_supported_func(std::string_view target_func_name)
const std::optional< bool > union_all
RelAlgExecutionUnit rewriteColumnarUpdate(const RelAlgExecutionUnit &ra_exe_unit_in, std::shared_ptr< Analyzer::ColumnVar > column_to_update) const
Definition: sqldefs.h:34
int8_t boolval
Definition: Datum.h:68
std::vector< InputDescriptor > input_descs
#define UNREACHABLE()
Definition: Logger.h:337
Definition: sqldefs.h:48
Definition: sqldefs.h:29
Constants for Builtin SQL Types supported by HEAVY.AI.
std::vector< JoinCondition > JoinQualsPerNestingLevel
const std::list< std::shared_ptr< Analyzer::Expr > > groupby_exprs
Analyzer::ExpressionPtr rewrite_expr(const Analyzer::Expr *expr)
#define CHECK_GT(x, y)
Definition: Logger.h:305
std::string to_string(char const *&&v)
bool g_enable_overlaps_hashjoin
Definition: Execute.cpp:102
constexpr double a
Definition: Utm.h:32
Definition: sqldefs.h:75
static SysCatalog & instance()
Definition: SysCatalog.h:343
CONSTEXPR DEVICE bool is_null(const T &value)
Classes representing a parse tree.
RelAlgExecutionUnit rewrite(const RelAlgExecutionUnit &ra_exe_unit_in) const
const JoinQualsPerNestingLevel join_quals
std::pair< bool, std::set< size_t > > is_all_groupby_exprs_are_col_var(const std::list< std::shared_ptr< Analyzer::Expr >> &groupby_exprs) const
Executor * executor_
Definition: QueryRewrite.h:70
int64_t bigintval
Definition: Datum.h:72
TableIdToNodeMap table_id_to_node_map
RelAlgExecutionUnit rewriteOverlapsJoin(const RelAlgExecutionUnit &ra_exe_unit_in) const
bool g_enable_distance_rangejoin
Definition: Execute.cpp:103
Definition: sqldefs.h:36
DEVICE auto accumulate(ARGS &&...args)
Definition: gpu_enabled.h:42
ExpressionRange getExpressionRange(const Analyzer::BinOper *expr, const std::vector< InputTableInfo > &query_infos, const Executor *, boost::optional< std::list< std::shared_ptr< Analyzer::Expr >>> simple_quals)
const std::shared_ptr< Analyzer::Estimator > estimator
std::string * stringval
Definition: Datum.h:77
std::shared_ptr< Catalog > getCatalog(const std::string &dbName)
Definition: sqldefs.h:71
bool check_string_id_overflow(const int32_t string_id, const SQLTypeInfo &ti)
std::unordered_map< size_t, SQLTypeInfo > target_exprs_original_type_infos
Definition: sqldefs.h:78
const std::list< std::shared_ptr< Analyzer::Expr > > & get_value_list() const
Definition: Analyzer.h:646
virtual std::shared_ptr< Analyzer::Expr > deep_copy() const =0
const std::vector< InputTableInfo > & query_infos_
Definition: QueryRewrite.h:69
std::list< std::shared_ptr< Analyzer::Expr > > quals
int64_t getIntMax() const
RegisteredQueryHint query_hint
#define CHECK(condition)
Definition: Logger.h:291
OverlapsJoinRewriteType
int64_t inline_fixed_encoding_null_val(const SQL_TYPE_INFO &ti)
Definition: sqldefs.h:32
std::shared_ptr< Analyzer::CaseExpr > generateCaseExprForCountDistinctOnGroupByCol(std::shared_ptr< Analyzer::Expr > expr) const
std::shared_ptr< const query_state::QueryState > query_state
std::list< std::shared_ptr< const InputColDescriptor > > input_col_descs
std::string getName() const
Definition: Analyzer.h:2406
RelAlgExecutionUnit rewriteColumnarDelete(const RelAlgExecutionUnit &ra_exe_unit_in, std::shared_ptr< Analyzer::ColumnVar > delete_column) const
Definition: sqldefs.h:76
Definition: Datum.h:67
static std::shared_ptr< Analyzer::CaseExpr > generateCaseForDomainValues(const Analyzer::InValues *)
Definition: sqldefs.h:74
RelAlgExecutionUnit rewriteConstrainedByIn(const RelAlgExecutionUnit &ra_exe_unit_in) const
Definition: sqldefs.h:38
RelAlgExecutionUnit rewriteAggregateOnGroupByColumn(const RelAlgExecutionUnit &ra_exe_unit_in) const
const Expr * get_arg() const
Definition: Analyzer.h:644
std::list< std::shared_ptr< Analyzer::Expr > > simple_quals
RelAlgExecutionUnit rewriteConstrainedByInImpl(const RelAlgExecutionUnit &ra_exe_unit_in, const std::shared_ptr< Analyzer::CaseExpr >, const Analyzer::InValues *) const
std::vector< std::shared_ptr< Analyzer::Expr > > target_exprs_owned_
Definition: QueryRewrite.h:71
HashTableBuildDagMap hash_table_build_plan_dag