OmniSciDB  72c90bc290
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
LogicalIR.cpp
Go to the documentation of this file.
1 /*
2  * Copyright 2022 HEAVY.AI, Inc.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #include "CodeGenerator.h"
18 #include "Execute.h"
19 #include "NullableValue.h"
20 
21 #include <llvm/IR/MDBuilder.h>
22 
23 namespace {
24 
26  auto is_div = [](const Analyzer::Expr* e) -> bool {
27  auto bin_oper = dynamic_cast<const Analyzer::BinOper*>(e);
28  if (bin_oper && bin_oper->get_optype() == kDIVIDE) {
29  auto rhs = bin_oper->get_right_operand();
30  auto rhs_constant = dynamic_cast<const Analyzer::Constant*>(rhs);
31  if (!rhs_constant || rhs_constant->get_is_null()) {
32  return true;
33  }
34  const auto& datum = rhs_constant->get_constval();
35  const auto& ti = rhs_constant->get_type_info();
36  const auto type = ti.is_decimal() ? decimal_to_int_type(ti) : ti.get_type();
37  if ((type == kBOOLEAN && datum.boolval == 0) ||
38  (type == kTINYINT && datum.tinyintval == 0) ||
39  (type == kSMALLINT && datum.smallintval == 0) ||
40  (type == kINT && datum.intval == 0) ||
41  (type == kBIGINT && datum.bigintval == 0LL) ||
42  (type == kFLOAT && datum.floatval == 0.0) ||
43  (type == kDOUBLE && datum.doubleval == 0.0)) {
44  return true;
45  }
46  }
47  return false;
48  };
49  std::list<const Analyzer::Expr*> binoper_list;
50  expr->find_expr(is_div, binoper_list);
51  return !binoper_list.empty();
52 }
53 
54 bool should_defer_eval(const std::shared_ptr<Analyzer::Expr> expr) {
55  if (std::dynamic_pointer_cast<Analyzer::LikeExpr>(expr)) {
56  return true;
57  }
58  if (std::dynamic_pointer_cast<Analyzer::RegexpExpr>(expr)) {
59  return true;
60  }
61  if (std::dynamic_pointer_cast<Analyzer::FunctionOper>(expr)) {
62  return true;
63  }
64  if (!std::dynamic_pointer_cast<Analyzer::BinOper>(expr)) {
65  return false;
66  }
67  const auto bin_expr = std::static_pointer_cast<Analyzer::BinOper>(expr);
68  if (contains_unsafe_division(bin_expr.get())) {
69  return true;
70  }
71  if (bin_expr->is_bbox_intersect_oper()) {
72  return false;
73  }
74  const auto rhs = bin_expr->get_right_operand();
75  return rhs->get_type_info().is_array();
76 }
77 
79  Likelihood truth{1.0};
80  auto likelihood_expr = dynamic_cast<const Analyzer::LikelihoodExpr*>(expr);
81  if (likelihood_expr) {
82  return Likelihood(likelihood_expr->get_likelihood());
83  }
84  auto u_oper = dynamic_cast<const Analyzer::UOper*>(expr);
85  if (u_oper) {
86  Likelihood oper_likelihood = get_likelihood(u_oper->get_operand());
87  if (oper_likelihood.isInvalid()) {
88  return Likelihood();
89  }
90  if (u_oper->get_optype() == kNOT) {
91  return truth - oper_likelihood;
92  }
93  return oper_likelihood;
94  }
95  auto bin_oper = dynamic_cast<const Analyzer::BinOper*>(expr);
96  if (bin_oper) {
97  auto lhs = bin_oper->get_left_operand();
98  auto rhs = bin_oper->get_right_operand();
99  Likelihood lhs_likelihood = get_likelihood(lhs);
100  Likelihood rhs_likelihood = get_likelihood(rhs);
101  if (lhs_likelihood.isInvalid() && rhs_likelihood.isInvalid()) {
102  return Likelihood();
103  }
104  const auto optype = bin_oper->get_optype();
105  if (optype == kOR) {
106  auto both_false = (truth - lhs_likelihood) * (truth - rhs_likelihood);
107  return truth - both_false;
108  }
109  if (optype == kAND) {
110  return lhs_likelihood * rhs_likelihood;
111  }
112  return (lhs_likelihood + rhs_likelihood) / 2.0;
113  }
114 
115  return Likelihood();
116 }
117 
118 Weight get_weight(const Analyzer::Expr* expr, int depth = 0) {
119  auto like_expr = dynamic_cast<const Analyzer::LikeExpr*>(expr);
120  if (like_expr) {
121  // heavy weight expr, start valid weight propagation
122  return Weight((like_expr->get_is_simple()) ? 200 : 1000);
123  }
124  auto regexp_expr = dynamic_cast<const Analyzer::RegexpExpr*>(expr);
125  if (regexp_expr) {
126  // heavy weight expr, start valid weight propagation
127  return Weight(2000);
128  }
129  auto u_oper = dynamic_cast<const Analyzer::UOper*>(expr);
130  if (u_oper) {
131  auto weight = get_weight(u_oper->get_operand(), depth + 1);
132  return weight + 1;
133  }
134  auto bin_oper = dynamic_cast<const Analyzer::BinOper*>(expr);
135  if (bin_oper) {
136  auto lhs = bin_oper->get_left_operand();
137  auto rhs = bin_oper->get_right_operand();
138  auto lhs_weight = get_weight(lhs, depth + 1);
139  auto rhs_weight = get_weight(rhs, depth + 1);
140  if (rhs->get_type_info().is_array()) {
141  // heavy weight expr, start valid weight propagation
142  rhs_weight = rhs_weight + Weight(100);
143  }
144  auto weight = lhs_weight + rhs_weight;
145  return weight + 1;
146  }
147 
148  if (depth > 4) {
149  return Weight(1);
150  }
151 
152  return Weight();
153 }
154 
155 } // namespace
156 
158  std::vector<Analyzer::Expr*>& primary_quals,
159  std::vector<Analyzer::Expr*>& deferred_quals,
160  const PlanState::HoistedFiltersSet& hoisted_quals) {
161  for (auto expr : ra_exe_unit.simple_quals) {
162  if (hoisted_quals.find(expr) != hoisted_quals.end()) {
163  continue;
164  }
165  if (should_defer_eval(expr)) {
166  deferred_quals.push_back(expr.get());
167  continue;
168  }
169  primary_quals.push_back(expr.get());
170  }
171 
172  bool short_circuit = false;
173 
174  for (auto expr : ra_exe_unit.quals) {
175  if (hoisted_quals.find(expr) != hoisted_quals.end()) {
176  continue;
177  }
178 
179  if (get_likelihood(expr.get()) < 0.10 && !contains_unsafe_division(expr.get())) {
180  if (!short_circuit) {
181  primary_quals.push_back(expr.get());
182  short_circuit = true;
183  continue;
184  }
185  }
186  if (short_circuit || should_defer_eval(expr)) {
187  deferred_quals.push_back(expr.get());
188  continue;
189  }
190  primary_quals.push_back(expr.get());
191  }
192 
193  return short_circuit;
194 }
195 
197  const CompilationOptions& co) {
199  const auto optype = bin_oper->get_optype();
200  auto lhs = bin_oper->get_left_operand();
201  auto rhs = bin_oper->get_right_operand();
202 
203  if (contains_unsafe_division(rhs)) {
204  // rhs contains a possible div-by-0: short-circuit
205  } else if (contains_unsafe_division(lhs)) {
206  // lhs contains a possible div-by-0: swap and short-circuit
207  std::swap(rhs, lhs);
208  } else if (((optype == kOR && get_likelihood(lhs) > 0.90) ||
209  (optype == kAND && get_likelihood(lhs) < 0.10)) &&
210  get_weight(rhs) > 10) {
211  // short circuit if we're likely to see either (trueA || heavyB) or (falseA && heavyB)
212  } else if (((optype == kOR && get_likelihood(rhs) > 0.90) ||
213  (optype == kAND && get_likelihood(rhs) < 0.10)) &&
214  get_weight(lhs) > 10) {
215  // swap and short circuit if we're likely to see either (heavyA || trueB) or (heavyA
216  // && falseB)
217  std::swap(rhs, lhs);
218  } else {
219  // no motivation to short circuit
220  return nullptr;
221  }
222 
223  const auto& ti = bin_oper->get_type_info();
224  auto lhs_lv = codegen(lhs, true, co).front();
225 
226  // Here the linear control flow will diverge and expressions cached during the
227  // code branch code generation (currently just column decoding) are not going
228  // to be available once we're done generating the short-circuited logic.
229  // Take a snapshot of the cache with FetchCacheAnchor and restore it once
230  // the control flow converges.
232 
233  auto rhs_bb = llvm::BasicBlock::Create(
235  auto ret_bb = llvm::BasicBlock::Create(
237  llvm::BasicBlock* nullcheck_ok_bb{nullptr};
238  llvm::BasicBlock* nullcheck_fail_bb{nullptr};
239 
240  if (!ti.get_notnull()) {
241  // need lhs nullcheck before short circuiting
242  nullcheck_ok_bb = llvm::BasicBlock::Create(
243  cgen_state_->context_, "nullcheck_ok_bb", cgen_state_->current_func_);
244  nullcheck_fail_bb = llvm::BasicBlock::Create(
245  cgen_state_->context_, "nullcheck_fail_bb", cgen_state_->current_func_);
246  if (lhs_lv->getType()->isIntegerTy(1)) {
247  lhs_lv = cgen_state_->castToTypeIn(lhs_lv, 8);
248  }
249  auto lhs_nullcheck =
250  cgen_state_->ir_builder_.CreateICmpEQ(lhs_lv, cgen_state_->inlineIntNull(ti));
251  cgen_state_->ir_builder_.CreateCondBr(
252  lhs_nullcheck, nullcheck_fail_bb, nullcheck_ok_bb);
253  cgen_state_->ir_builder_.SetInsertPoint(nullcheck_ok_bb);
254  }
255 
256  auto sc_check_bb = cgen_state_->ir_builder_.GetInsertBlock();
257  auto cnst_lv = llvm::ConstantInt::get(lhs_lv->getType(), (optype == kOR));
258  // Branch to codegen rhs if NOT getting (true || rhs) or (false && rhs), likelihood of
259  // the branch is < 0.10
260  cgen_state_->ir_builder_.CreateCondBr(
261  cgen_state_->ir_builder_.CreateICmpNE(lhs_lv, cnst_lv),
262  rhs_bb,
263  ret_bb,
264  llvm::MDBuilder(cgen_state_->context_).createBranchWeights(10, 90));
265 
266  // Codegen rhs when unable to short circuit.
267  cgen_state_->ir_builder_.SetInsertPoint(rhs_bb);
268  auto rhs_lv = codegen(rhs, true, co).front();
269  if (!ti.get_notnull()) {
270  // need rhs nullcheck as well
271  if (rhs_lv->getType()->isIntegerTy(1)) {
272  rhs_lv = cgen_state_->castToTypeIn(rhs_lv, 8);
273  }
274  auto rhs_nullcheck =
275  cgen_state_->ir_builder_.CreateICmpEQ(rhs_lv, cgen_state_->inlineIntNull(ti));
276  cgen_state_->ir_builder_.CreateCondBr(rhs_nullcheck, nullcheck_fail_bb, ret_bb);
277  } else {
278  cgen_state_->ir_builder_.CreateBr(ret_bb);
279  }
280  auto rhs_codegen_bb = cgen_state_->ir_builder_.GetInsertBlock();
281 
282  if (!ti.get_notnull()) {
283  cgen_state_->ir_builder_.SetInsertPoint(nullcheck_fail_bb);
284  cgen_state_->ir_builder_.CreateBr(ret_bb);
285  }
286 
287  cgen_state_->ir_builder_.SetInsertPoint(ret_bb);
288  auto result_phi =
289  cgen_state_->ir_builder_.CreatePHI(lhs_lv->getType(), (!ti.get_notnull()) ? 3 : 2);
290  if (!ti.get_notnull()) {
291  result_phi->addIncoming(cgen_state_->inlineIntNull(ti), nullcheck_fail_bb);
292  }
293  result_phi->addIncoming(cnst_lv, sc_check_bb);
294  result_phi->addIncoming(rhs_lv, rhs_codegen_bb);
295  return result_phi;
296 }
297 
299  const CompilationOptions& co) {
301  const auto optype = bin_oper->get_optype();
302  CHECK(IS_LOGIC(optype));
303 
304  if (llvm::Value* short_circuit = codegenLogicalShortCircuit(bin_oper, co)) {
305  return short_circuit;
306  }
307 
308  const auto lhs = bin_oper->get_left_operand();
309  const auto rhs = bin_oper->get_right_operand();
310  auto lhs_lv = codegen(lhs, true, co).front();
311  auto rhs_lv = codegen(rhs, true, co).front();
312  const auto& ti = bin_oper->get_type_info();
313  if (ti.get_notnull()) {
314  switch (optype) {
315  case kAND:
316  return cgen_state_->ir_builder_.CreateAnd(toBool(lhs_lv), toBool(rhs_lv));
317  case kOR:
318  return cgen_state_->ir_builder_.CreateOr(toBool(lhs_lv), toBool(rhs_lv));
319  default:
320  CHECK(false);
321  }
322  }
323  CHECK(lhs_lv->getType()->isIntegerTy(1) || lhs_lv->getType()->isIntegerTy(8));
324  CHECK(rhs_lv->getType()->isIntegerTy(1) || rhs_lv->getType()->isIntegerTy(8));
325  if (lhs_lv->getType()->isIntegerTy(1)) {
326  lhs_lv = cgen_state_->castToTypeIn(lhs_lv, 8);
327  }
328  if (rhs_lv->getType()->isIntegerTy(1)) {
329  rhs_lv = cgen_state_->castToTypeIn(rhs_lv, 8);
330  }
331  switch (optype) {
332  case kAND:
333  return cgen_state_->emitCall("logical_and",
334  {lhs_lv, rhs_lv, cgen_state_->inlineIntNull(ti)});
335  case kOR:
336  return cgen_state_->emitCall("logical_or",
337  {lhs_lv, rhs_lv, cgen_state_->inlineIntNull(ti)});
338  default:
339  abort();
340  }
341 }
342 
343 llvm::Value* CodeGenerator::toBool(llvm::Value* lv) {
345  CHECK(lv->getType()->isIntegerTy());
346  if (static_cast<llvm::IntegerType*>(lv->getType())->getBitWidth() > 1) {
347  return cgen_state_->ir_builder_.CreateICmp(
348  llvm::ICmpInst::ICMP_SGT, lv, llvm::ConstantInt::get(lv->getType(), 0));
349  }
350  return lv;
351 }
352 
353 namespace {
354 
356  const auto bin_oper = dynamic_cast<const Analyzer::BinOper*>(expr);
357  return bin_oper && bin_oper->get_qualifier() != kONE;
358 }
359 
360 } // namespace
361 
363  const CompilationOptions& co) {
365  const auto optype = uoper->get_optype();
366  CHECK_EQ(kNOT, optype);
367  const auto operand = uoper->get_operand();
368  const auto& operand_ti = operand->get_type_info();
369  CHECK(operand_ti.is_boolean());
370  const auto operand_lv = codegen(operand, true, co).front();
371  CHECK(operand_lv->getType()->isIntegerTy());
372  const bool not_null = (operand_ti.get_notnull() || is_qualified_bin_oper(operand));
373  CHECK(not_null || operand_lv->getType()->isIntegerTy(8));
374  return not_null
375  ? cgen_state_->ir_builder_.CreateNot(toBool(operand_lv))
377  "logical_not", {operand_lv, cgen_state_->inlineIntNull(operand_ti)});
378 }
379 
381  const CompilationOptions& co) {
383  const auto operand = uoper->get_operand();
384  if (dynamic_cast<const Analyzer::Constant*>(operand) &&
385  dynamic_cast<const Analyzer::Constant*>(operand)->get_is_null()) {
386  // for null constants, short-circuit to true
387  return llvm::ConstantInt::get(get_int_type(1, cgen_state_->context_), 1);
388  }
389  const auto& ti = operand->get_type_info();
390  CHECK(ti.is_integer() || ti.is_boolean() || ti.is_decimal() || ti.is_time() ||
391  ti.is_string() || ti.is_fp() || ti.is_array() || ti.is_geometry());
392  // if the type is inferred as non null, short-circuit to false
393  if (ti.get_notnull()) {
394  return llvm::ConstantInt::get(get_int_type(1, cgen_state_->context_), 0);
395  }
396  llvm::Value* operand_lv = codegen(operand, true, co).front();
397  // NULL-check array or geo's coords array
398  if (ti.is_array() || ti.is_geometry()) {
399  // POINT [un]compressed coord check requires custom checker and chunk iterator
400  // Non-POINT NULL geographies will have a normally encoded null coord array
401  auto fname =
402  (ti.get_type() == kPOINT) ? "point_coord_array_is_null" : "array_is_null";
404  fname, get_int_type(1, cgen_state_->context_), {operand_lv, posArg(operand)});
405  } else if (ti.is_none_encoded_string()) {
406  operand_lv = cgen_state_->ir_builder_.CreateExtractValue(operand_lv, 0);
407  operand_lv = cgen_state_->castToTypeIn(operand_lv, sizeof(int64_t) * 8);
408  }
409  return codegenIsNullNumber(operand_lv, ti);
410 }
411 
412 llvm::Value* CodeGenerator::codegenIsNullNumber(llvm::Value* operand_lv,
413  const SQLTypeInfo& ti) {
415  if (ti.is_fp()) {
416  return cgen_state_->ir_builder_.CreateFCmp(llvm::FCmpInst::FCMP_OEQ,
417  operand_lv,
418  ti.get_type() == kFLOAT
421  }
422  return cgen_state_->ir_builder_.CreateICmp(
423  llvm::ICmpInst::ICMP_EQ, operand_lv, cgen_state_->inlineIntNull(ti));
424 }
#define CHECK_EQ(x, y)
Definition: Logger.h:301
llvm::Value * castToTypeIn(llvm::Value *val, const size_t bit_width)
Definition: CgenState.cpp:150
#define IS_LOGIC(X)
Definition: sqldefs.h:61
#define NULL_DOUBLE
bool should_defer_eval(const std::shared_ptr< Analyzer::Expr > expr)
Definition: LogicalIR.cpp:54
CgenState * cgen_state_
#define NULL_FLOAT
bool is_fp() const
Definition: sqltypes.h:571
const Expr * get_right_operand() const
Definition: Analyzer.h:456
llvm::IRBuilder ir_builder_
Definition: CgenState.h:384
llvm::Value * posArg(const Analyzer::Expr *) const
Definition: ColumnIR.cpp:590
Definition: sqldefs.h:37
std::unordered_set< std::shared_ptr< Analyzer::Expr >> HoistedFiltersSet
Definition: PlanState.h:45
HOST DEVICE SQLTypes get_type() const
Definition: sqltypes.h:391
llvm::Type * get_int_type(const int width, llvm::LLVMContext &context)
llvm::Value * codegenIsNull(const Analyzer::UOper *, const CompilationOptions &)
Definition: LogicalIR.cpp:380
SQLOps get_optype() const
Definition: Analyzer.h:452
Likelihood get_likelihood(const Analyzer::Expr *expr)
Definition: LogicalIR.cpp:78
llvm::LLVMContext & context_
Definition: CgenState.h:382
llvm::Function * current_func_
Definition: CgenState.h:376
llvm::Value * emitExternalCall(const std::string &fname, llvm::Type *ret_type, const std::vector< llvm::Value * > args, const std::vector< llvm::Attribute::AttrKind > &fnattrs={}, const bool has_struct_return=false)
Definition: CgenState.cpp:395
bool is_qualified_bin_oper(const Analyzer::Expr *expr)
Definition: LogicalIR.cpp:355
llvm::ConstantInt * inlineIntNull(const SQLTypeInfo &)
Definition: CgenState.cpp:65
Weight get_weight(const Analyzer::Expr *expr, int depth=0)
Definition: LogicalIR.cpp:118
bool isInvalid() const
Definition: NullableValue.h:33
Definition: sqldefs.h:36
NullableValue< float > Likelihood
Definition: NullableValue.h:99
llvm::ConstantFP * llFp(const float v) const
Definition: CgenState.h:253
#define AUTOMATIC_IR_METADATA(CGENSTATE)
const SQLTypeInfo & get_type_info() const
Definition: Analyzer.h:79
llvm::Value * emitCall(const std::string &fname, const std::vector< llvm::Value * > &args)
Definition: CgenState.cpp:217
SQLTypes decimal_to_int_type(const SQLTypeInfo &ti)
Definition: Datum.cpp:561
std::vector< llvm::Value * > codegen(const Analyzer::Expr *, const bool fetch_columns, const CompilationOptions &)
Definition: IRCodegen.cpp:30
Definition: sqldefs.h:71
const Expr * get_operand() const
Definition: Analyzer.h:384
Datum get_constval() const
Definition: Analyzer.h:348
llvm::Value * toBool(llvm::Value *)
Definition: LogicalIR.cpp:343
static bool prioritizeQuals(const RelAlgExecutionUnit &ra_exe_unit, std::vector< Analyzer::Expr * > &primary_quals, std::vector< Analyzer::Expr * > &deferred_quals, const PlanState::HoistedFiltersSet &hoisted_quals)
Definition: LogicalIR.cpp:157
std::list< std::shared_ptr< Analyzer::Expr > > quals
#define CHECK(condition)
Definition: Logger.h:291
llvm::Value * codegenIsNullNumber(llvm::Value *, const SQLTypeInfo &)
Definition: LogicalIR.cpp:412
llvm::Value * codegenLogical(const Analyzer::BinOper *, const CompilationOptions &)
Definition: LogicalIR.cpp:298
bool contains_unsafe_division(const Analyzer::Expr *expr)
Definition: LogicalIR.cpp:25
const Expr * get_left_operand() const
Definition: Analyzer.h:455
Definition: sqltypes.h:72
llvm::Value * codegenLogicalShortCircuit(const Analyzer::BinOper *, const CompilationOptions &)
Definition: LogicalIR.cpp:196
virtual void find_expr(std::function< bool(const Expr *)> f, std::list< const Expr * > &expr_list) const
Definition: Analyzer.h:163
DEVICE void swap(ARGS &&...args)
Definition: gpu_enabled.h:114
Definition: sqldefs.h:38
SQLOps get_optype() const
Definition: Analyzer.h:383
bool is_array() const
Definition: sqltypes.h:583
NullableValue< uint64_t > Weight
std::list< std::shared_ptr< Analyzer::Expr > > simple_quals
SQLQualifier get_qualifier() const
Definition: Analyzer.h:454