OmniSciDB  85c2d10cdc
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
OuterJoinOptViaNullRejectionRule.java
Go to the documentation of this file.
1 /*
2  * Copyright 2020 OmniSci, Inc.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 package org.apache.calcite.rel.rules;
18 
19 import org.apache.calcite.plan.RelOptRuleCall;
20 import org.apache.calcite.plan.hep.HepRelVertex;
21 import org.apache.calcite.rel.RelNode;
22 import org.apache.calcite.rel.core.Join;
23 import org.apache.calcite.rel.core.JoinRelType;
24 import org.apache.calcite.rel.logical.LogicalFilter;
25 import org.apache.calcite.rel.logical.LogicalJoin;
26 import org.apache.calcite.rel.logical.LogicalProject;
27 import org.apache.calcite.rel.logical.LogicalTableScan;
28 import org.apache.calcite.rex.RexCall;
29 import org.apache.calcite.rex.RexInputRef;
30 import org.apache.calcite.rex.RexLiteral;
31 import org.apache.calcite.rex.RexNode;
32 import org.apache.calcite.sql.SqlKind;
33 import org.apache.calcite.tools.RelBuilder;
34 import org.apache.calcite.tools.RelBuilderFactory;
35 import org.apache.calcite.util.mapping.Mappings;
36 import org.slf4j.Logger;
37 import org.slf4j.LoggerFactory;
38 
39 import java.util.ArrayList;
40 import java.util.HashMap;
41 import java.util.HashSet;
42 import java.util.List;
43 import java.util.Map;
44 import java.util.Set;
45 
47  // goal: relax full outer join to either left or inner joins
48  // consider two tables 'foo(a int, b int)' and 'bar(c int, d int)'
49  // foo = {(1,3), (2,4), (NULL, 5)} // bar = {(1,2), (4, 3), (NULL, 5)}
50 
51  // 1. full outer join -> left
52  // : select * from foo full outer join bar on a = c where a is not null;
53  // = select * from foo left outer join bar on a = c where a is not null;
54 
55  // 2. full outer join -> inner
56  // : select * from foo full outer join bar on a = c where a is not null and c is
57  // not null; = select * from foo join bar on a = c; (or select * from foo, bar
58  // where a = c;)
59 
60  // 3. left outer join --> inner
61  // : select * from foo left outer join bar on a = c where c is not null;
62  // = select * from foo join bar on a = c; (or select * from foo, bar where a = c;)
63 
64  // null rejection: "col IS NOT NULL" or "col > NULL_INDICATOR" in WHERE clause
65  // i.e., col > 1 must reject any tuples having null value in a col column
66 
67  // todo(yoonmin): runtime query optimization via statistic
68  // in fact, we can optimize more broad range of the query having outer joins
69  // by using filter predicates on join tables (but not on join cols)
70  // because such filter conditions could affect join tables and
71  // they can make join cols to be null rejected
72 
73  public static Set<String> visitedJoinMemo = new HashSet<>();
74  final static Logger MAPDLOGGER =
75  LoggerFactory.getLogger(OuterJoinOptViaNullRejectionRule.class);
76 
77  public OuterJoinOptViaNullRejectionRule(RelBuilderFactory relBuilderFactory) {
78  super(operand(RelNode.class, operand(Join.class, null, any())),
79  relBuilderFactory,
80  "OuterJoinOptViaNullRejectionRule");
81  clearMemo();
82  }
83 
84  void clearMemo() {
85  visitedJoinMemo.clear();
86  }
87 
88  @Override
89  public void onMatch(RelOptRuleCall call) {
90  RelNode parentNode = call.rel(0);
91  LogicalJoin join = (LogicalJoin) call.rel(1);
92  String condString = join.getCondition().toString();
93  if (visitedJoinMemo.contains(condString)) {
94  return;
95  } else {
96  visitedJoinMemo.add(condString);
97  }
98  if (!(join.getCondition() instanceof RexCall)) {
99  return; // an inner join
100  }
101  if (join.getJoinType() == JoinRelType.INNER || join.getJoinType() == JoinRelType.SEMI
102  || join.getJoinType() == JoinRelType.ANTI) {
103  return; // non target
104  }
105  RelNode joinLeftChild = ((HepRelVertex) join.getLeft()).getCurrentRel();
106  RelNode joinRightChild = ((HepRelVertex) join.getRight()).getCurrentRel();
107  if (joinLeftChild instanceof LogicalProject) {
108  return; // disable this opt when LHS has subquery (i.e., filter push-down)
109  }
110  if (!(joinRightChild instanceof LogicalTableScan)) {
111  return; // disable this opt when RHS has subquery (i.e., filter push-down)
112  }
113  // an outer join contains its join cond in itself,
114  // not in a filter as typical inner join op. does
115  RexCall joinCond = (RexCall) join.getCondition();
116  Set<Integer> leftJoinCols = new HashSet<>();
117  Set<Integer> rightJoinCols = new HashSet<>();
118  Map<Integer, String> leftJoinColToColNameMap = new HashMap<>();
119  Map<Integer, String> rightJoinColToColNameMap = new HashMap<>();
120  Set<Integer> originalLeftJoinCols = new HashSet<>();
121  Set<Integer> originalRightJoinCols = new HashSet<>();
122  Map<Integer, String> originalLeftJoinColToColNameMap = new HashMap<>();
123  Map<Integer, String> originalRightJoinColToColNameMap = new HashMap<>();
124  List<RexCall> capturedFilterPredFromJoin = new ArrayList<>();
125  if (joinCond.getKind() == SqlKind.EQUALS) {
126  addJoinCols(joinCond,
127  join,
128  leftJoinCols,
129  rightJoinCols,
130  leftJoinColToColNameMap,
131  rightJoinColToColNameMap,
132  originalLeftJoinCols,
133  originalRightJoinCols,
134  originalLeftJoinColToColNameMap,
135  originalRightJoinColToColNameMap);
136  } else if (joinCond.getKind() == SqlKind.AND || joinCond.getKind() == SqlKind.OR) {
137  for (RexNode n : joinCond.getOperands()) {
138  if (n instanceof RexCall) {
139  RexCall op = (RexCall) n;
140  if (op.getOperands().size() > 2
141  && op.getOperands().get(1) instanceof RexLiteral) {
142  // try to capture literal comparison of join column located in the cur join
143  // node
144  capturedFilterPredFromJoin.add(op);
145  continue;
146  }
147  addJoinCols(op,
148  join,
149  leftJoinCols,
150  rightJoinCols,
151  leftJoinColToColNameMap,
152  rightJoinColToColNameMap,
153  originalLeftJoinCols,
154  originalRightJoinCols,
155  originalLeftJoinColToColNameMap,
156  originalRightJoinColToColNameMap);
157  }
158  }
159  }
160 
161  if (leftJoinCols.isEmpty() || rightJoinCols.isEmpty()) {
162  return;
163  }
164 
165  // find filter node(s)
166  RelNode root = call.getPlanner().getRoot();
167  List<LogicalFilter> collectedFilterNodes = new ArrayList<>();
168  RelNode curNode = root;
169  final RelBuilder relBuilder = call.builder();
170  // collect filter nodes
171  collectFilterCondition(curNode, collectedFilterNodes);
172  if (collectedFilterNodes.isEmpty()) {
173  // we have a last chance to take a look at this join condition itself
174  // i.e., the filter preds lay with the join conditions in the same join node
175  // but for now we disable the optimization to avoid unexpected plan issue
176  return;
177  }
178 
179  // check whether join column has filter predicate(s)
180  // and collect join column info used in target join nodes to be translated
181  Set<Integer> nullRejectedLeftJoinCols = new HashSet<>();
182  Set<Integer> nullRejectedRightJoinCols = new HashSet<>();
183  for (LogicalFilter filter : collectedFilterNodes) {
184  RexNode node = filter.getCondition();
185  if (node instanceof RexCall) {
186  RexCall curExpr = (RexCall) node;
187  if (curExpr.getKind() == SqlKind.AND || curExpr.getKind() == SqlKind.OR) {
188  for (RexNode n : curExpr.getOperands()) {
189  if (n instanceof RexCall) {
190  RexCall c = (RexCall) n;
191  if (isCandidateFilterPred(c)) {
192  RexInputRef col = (RexInputRef) c.getOperands().get(0);
193  int colId = col.getIndex();
194  boolean leftFilter = leftJoinCols.contains(colId);
195  boolean rightFilter = rightJoinCols.contains(colId);
196  if (leftFilter && rightFilter) {
197  // here we currently do not have a concrete column tracing logic
198  // so it may become a source of plan issue, so we disable this opt
199  return;
200  }
202  filter,
203  nullRejectedLeftJoinCols,
204  nullRejectedRightJoinCols,
205  leftJoinColToColNameMap,
206  rightJoinColToColNameMap);
207  }
208  }
209  }
210  } else {
211  if (curExpr instanceof RexCall) {
212  if (isCandidateFilterPred(curExpr)) {
213  RexInputRef col = (RexInputRef) curExpr.getOperands().get(0);
214  int colId = col.getIndex();
215  boolean leftFilter = leftJoinCols.contains(colId);
216  boolean rightFilter = rightJoinCols.contains(colId);
217  if (leftFilter && rightFilter) {
218  // here we currently do not have a concrete column tracing logic
219  // so it may become a source of plan issue, so we disable this opt
220  return;
221  }
222  addNullRejectedJoinCols(curExpr,
223  filter,
224  nullRejectedLeftJoinCols,
225  nullRejectedRightJoinCols,
226  leftJoinColToColNameMap,
227  rightJoinColToColNameMap);
228  }
229  }
230  }
231  }
232  }
233 
234  if (!capturedFilterPredFromJoin.isEmpty()) {
235  for (RexCall c : capturedFilterPredFromJoin) {
236  RexInputRef col = (RexInputRef) c.getOperands().get(0);
237  int colId = col.getIndex();
238  String colName = join.getRowType().getFieldNames().get(colId);
239  Boolean l = false;
240  Boolean r = false;
241  if (originalLeftJoinColToColNameMap.containsKey(colId)
242  && originalLeftJoinColToColNameMap.get(colId).equals(colName)) {
243  l = true;
244  }
245  if (originalRightJoinColToColNameMap.containsKey(colId)
246  && originalRightJoinColToColNameMap.get(colId).equals(colName)) {
247  r = true;
248  }
249  if (l && !r) {
250  nullRejectedLeftJoinCols.add(colId);
251  } else if (r && !l) {
252  nullRejectedRightJoinCols.add(colId);
253  } else if (r && l) {
254  return;
255  }
256  }
257  }
258 
259  Boolean leftNullRejected = false;
260  Boolean rightNullRejected = false;
261  if (!nullRejectedLeftJoinCols.isEmpty()
262  && leftJoinCols.equals(nullRejectedLeftJoinCols)) {
263  leftNullRejected = true;
264  }
265  if (!nullRejectedRightJoinCols.isEmpty()
266  && rightJoinCols.equals(nullRejectedRightJoinCols)) {
267  rightNullRejected = true;
268  }
269 
270  // relax outer join condition depending on null rejected cols
271  RelNode newJoinNode = null;
272  Boolean needTransform = false;
273  if (join.getJoinType() == JoinRelType.FULL) {
274  // 1) full -> left
275  if (leftNullRejected && !rightNullRejected) {
276  newJoinNode = join.copy(join.getTraitSet(),
277  join.getCondition(),
278  join.getLeft(),
279  join.getRight(),
280  JoinRelType.LEFT,
281  join.isSemiJoinDone());
282  needTransform = true;
283  }
284 
285  // 2) full -> inner
286  if (leftNullRejected && rightNullRejected) {
287  newJoinNode = join.copy(join.getTraitSet(),
288  join.getCondition(),
289  join.getLeft(),
290  join.getRight(),
291  JoinRelType.INNER,
292  join.isSemiJoinDone());
293  needTransform = true;
294  }
295  } else if (join.getJoinType() == JoinRelType.LEFT) {
296  // 3) left -> inner
297  if (rightNullRejected) {
298  newJoinNode = join.copy(join.getTraitSet(),
299  join.getCondition(),
300  join.getLeft(),
301  join.getRight(),
302  JoinRelType.INNER,
303  join.isSemiJoinDone());
304  needTransform = true;
305  }
306  }
307  if (needTransform) {
308  relBuilder.push(newJoinNode);
309  parentNode.replaceInput(0, newJoinNode);
310  call.transformTo(parentNode);
311  }
312  return;
313  }
314 
315  void addJoinCols(RexCall joinCond,
316  LogicalJoin joinOp,
317  Set<Integer> leftJoinCols,
318  Set<Integer> rightJoinCols,
319  Map<Integer, String> leftJoinColToColNameMap,
320  Map<Integer, String> rightJoinColToColNameMap,
321  Set<Integer> originalLeftJoinCols,
322  Set<Integer> originalRightJoinCols,
323  Map<Integer, String> originalLeftJoinColToColNameMap,
324  Map<Integer, String> originalRightJoinColToColNameMap) {
325  if (joinCond.getOperands().size() != 2
326  || !(joinCond.getOperands().get(0) instanceof RexInputRef)
327  || !(joinCond.getOperands().get(1) instanceof RexInputRef)) {
328  return;
329  }
330  RexInputRef leftJoinCol = (RexInputRef) joinCond.getOperands().get(0);
331  RexInputRef rightJoinCol = (RexInputRef) joinCond.getOperands().get(1);
332  originalLeftJoinCols.add(leftJoinCol.getIndex());
333  originalRightJoinCols.add(rightJoinCol.getIndex());
334  originalLeftJoinColToColNameMap.put(leftJoinCol.getIndex(),
335  joinOp.getRowType().getFieldNames().get(leftJoinCol.getIndex()));
336  originalRightJoinColToColNameMap.put(rightJoinCol.getIndex(),
337  joinOp.getRowType().getFieldNames().get(rightJoinCol.getIndex()));
338  if (leftJoinCol.getIndex() > rightJoinCol.getIndex()) {
339  leftJoinCol = (RexInputRef) joinCond.getOperands().get(1);
340  rightJoinCol = (RexInputRef) joinCond.getOperands().get(0);
341  }
342  int originalLeftColOffset = traceColOffset(joinOp.getLeft(), leftJoinCol, 0);
343  int originalRightColOffset = traceColOffset(joinOp.getRight(),
344  rightJoinCol,
345  joinOp.getLeft().getRowType().getFieldCount());
346  if (originalLeftColOffset != -1) {
347  return;
348  }
349  int leftColOffset =
350  originalLeftColOffset == -1 ? leftJoinCol.getIndex() : originalLeftColOffset;
351  int rightColOffset = originalRightColOffset == -1 ? rightJoinCol.getIndex()
352  : originalRightColOffset;
353  String leftJoinColName = joinOp.getRowType().getFieldNames().get(leftColOffset);
354  String rightJoinColName =
355  joinOp.getRowType().getFieldNames().get(rightJoinCol.getIndex());
356  leftJoinCols.add(leftColOffset);
357  rightJoinCols.add(rightColOffset);
358  leftJoinColToColNameMap.put(leftColOffset, leftJoinColName);
359  rightJoinColToColNameMap.put(rightColOffset, rightJoinColName);
360  return;
361  }
362 
363  void addNullRejectedJoinCols(RexCall call,
364  LogicalFilter targetFilter,
365  Set<Integer> nullRejectedLeftJoinCols,
366  Set<Integer> nullRejectedRightJoinCols,
367  Map<Integer, String> leftJoinColToColNameMap,
368  Map<Integer, String> rightJoinColToColNameMap) {
369  if (isCandidateFilterPred(call) && call.getOperands().get(0) instanceof RexInputRef) {
370  RexInputRef col = (RexInputRef) call.getOperands().get(0);
371  int colId = col.getIndex();
372  String colName = targetFilter.getRowType().getFieldNames().get(colId);
373  Boolean l = false;
374  Boolean r = false;
375  if (leftJoinColToColNameMap.containsKey(colId)
376  && leftJoinColToColNameMap.get(colId).equals(colName)) {
377  l = true;
378  }
379  if (rightJoinColToColNameMap.containsKey(colId)
380  && rightJoinColToColNameMap.get(colId).equals(colName)) {
381  r = true;
382  }
383  if (l && !r) {
384  nullRejectedLeftJoinCols.add(colId);
385  return;
386  }
387  if (r && !l) {
388  nullRejectedRightJoinCols.add(colId);
389  return;
390  }
391  }
392  }
393 
394  void collectFilterCondition(RelNode curNode, List<LogicalFilter> collectedFilterNodes) {
395  if (curNode instanceof HepRelVertex) {
396  curNode = ((HepRelVertex) curNode).getCurrentRel();
397  }
398  if (curNode instanceof LogicalFilter) {
399  collectedFilterNodes.add((LogicalFilter) curNode);
400  }
401  if (curNode.getInputs().size() == 0) {
402  // end of the query plan, move out
403  return;
404  }
405  for (int i = 0; i < curNode.getInputs().size(); i++) {
406  collectFilterCondition(curNode.getInput(i), collectedFilterNodes);
407  }
408  }
409 
410  void collectProjectNode(RelNode curNode, List<LogicalProject> collectedProject) {
411  if (curNode instanceof HepRelVertex) {
412  curNode = ((HepRelVertex) curNode).getCurrentRel();
413  }
414  if (curNode instanceof LogicalProject) {
415  collectedProject.add((LogicalProject) curNode);
416  }
417  if (curNode.getInputs().size() == 0) {
418  // end of the query plan, move out
419  return;
420  }
421  for (int i = 0; i < curNode.getInputs().size(); i++) {
422  collectProjectNode(curNode.getInput(i), collectedProject);
423  }
424  }
425 
426  int traceColOffset(RelNode curNode, RexInputRef colRef, int startOffset) {
427  int colOffset = -1;
428  ArrayList<LogicalProject> collectedProjectNodes = new ArrayList<>();
429  collectProjectNode(curNode, collectedProjectNodes);
430  // the nearest project node that may permute the column offset
431  if (!collectedProjectNodes.isEmpty()) {
432  // get the closest project node from the cur join node's target child
433  LogicalProject projectNode = collectedProjectNodes.get(0);
434  Mappings.TargetMapping targetMapping = projectNode.getMapping();
435  if (null != colRef && null != targetMapping) {
436  // try to track the original col offset
437  int base_offset = colRef.getIndex() - startOffset;
438 
439  if (base_offset >= 0 && base_offset < targetMapping.getSourceCount()) {
440  colOffset = targetMapping.getSourceOpt(base_offset);
441  }
442  }
443  }
444  return colOffset;
445  }
446 
447  boolean isComparisonOp(RexCall c) {
448  SqlKind opKind = c.getKind();
449  return (SqlKind.BINARY_COMPARISON.contains(opKind)
450  || SqlKind.BINARY_EQUALITY.contains(opKind));
451  }
452 
453  boolean isNotNullFilter(RexCall c) {
454  return (c.op.kind == SqlKind.IS_NOT_NULL && c.operands.size() == 1);
455  }
456 
457  boolean isCandidateFilterPred(RexCall c) {
458  return (isNotNullFilter(c)
459  || (c.operands.size() == 2 && isComparisonOp(c)
460  && c.operands.get(0) instanceof RexInputRef
461  && c.operands.get(1) instanceof RexLiteral));
462  }
463 }
void collectFilterCondition(RelNode curNode, List< LogicalFilter > collectedFilterNodes)
std::string toString(const ExtArgumentType &sig_type)
void addNullRejectedJoinCols(RexCall call, LogicalFilter targetFilter, Set< Integer > nullRejectedLeftJoinCols, Set< Integer > nullRejectedRightJoinCols, Map< Integer, String > leftJoinColToColNameMap, Map< Integer, String > rightJoinColToColNameMap)
tuple r
Definition: test_fsi.py:16
std::string join(T const &container, std::string const &delim)
tuple root
Definition: setup.in.py:13
void addJoinCols(RexCall joinCond, LogicalJoin joinOp, Set< Integer > leftJoinCols, Set< Integer > rightJoinCols, Map< Integer, String > leftJoinColToColNameMap, Map< Integer, String > rightJoinColToColNameMap, Set< Integer > originalLeftJoinCols, Set< Integer > originalRightJoinCols, Map< Integer, String > originalLeftJoinColToColNameMap, Map< Integer, String > originalRightJoinColToColNameMap)
int traceColOffset(RelNode curNode, RexInputRef colRef, int startOffset)
void collectProjectNode(RelNode curNode, List< LogicalProject > collectedProject)