OmniSciDB  2e3a973ef4
OuterJoinOptViaNullRejectionRule.java
Go to the documentation of this file.
1 /*
2  * Copyright 2020 OmniSci, Inc.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 package org.apache.calcite.rel.rules;
18 
19 import org.apache.calcite.plan.RelOptRuleCall;
20 import org.apache.calcite.plan.hep.HepRelVertex;
21 import org.apache.calcite.rel.RelNode;
22 import org.apache.calcite.rel.core.Join;
23 import org.apache.calcite.rel.core.JoinRelType;
24 import org.apache.calcite.rel.logical.LogicalFilter;
25 import org.apache.calcite.rel.logical.LogicalJoin;
26 import org.apache.calcite.rel.logical.LogicalProject;
27 import org.apache.calcite.rel.logical.LogicalTableScan;
28 import org.apache.calcite.rex.RexCall;
29 import org.apache.calcite.rex.RexInputRef;
30 import org.apache.calcite.rex.RexLiteral;
31 import org.apache.calcite.rex.RexNode;
32 import org.apache.calcite.sql.SqlKind;
33 import org.apache.calcite.tools.RelBuilder;
34 import org.apache.calcite.tools.RelBuilderFactory;
35 import org.apache.calcite.util.mapping.Mappings;
36 
37 import java.util.ArrayList;
38 import java.util.HashMap;
39 import java.util.HashSet;
40 import java.util.List;
41 import java.util.Map;
42 import java.util.Set;
43 
45  // goal: relax full outer join to either left or inner joins
46  // consider two tables 'foo(a int, b int)' and 'bar(c int, d int)'
47  // foo = {(1,3), (2,4), (NULL, 5)} // bar = {(1,2), (4, 3), (NULL, 5)}
48 
49  // 1. full outer join -> left
50  // : select * from foo full outer join bar on a = c where a is not null;
51  // = select * from foo left outer join bar on a = c where a is not null;
52 
53  // 2. full outer join -> inner
54  // : select * from foo full outer join bar on a = c where a is not null and c is
55  // not null; = select * from foo join bar on a = c; (or select * from foo, bar
56  // where a = c;)
57 
58  // 3. left outer join --> inner
59  // : select * from foo left outer join bar on a = c where c is not null;
60  // = select * from foo join bar on a = c; (or select * from foo, bar where a = c;)
61 
62  // null rejection: "col IS NOT NULL" or "col > NULL_INDICATOR" in WHERE clause
63  // i.e., col > 1 must reject any tuples having null value in a col column
64 
65  // todo(yoonmin): runtime query optimization via statistic
66  // in fact, we can optimize more broad range of the query having outer joins
67  // by using filter predicates on join tables (but not on join cols)
68  // because such filter conditions could affect join tables and
69  // they can make join cols to be null rejected
70 
71  public static Set<String> visitedJoinMemo = new HashSet<>();
72 
73  public OuterJoinOptViaNullRejectionRule(RelBuilderFactory relBuilderFactory) {
74  super(operand(RelNode.class, operand(Join.class, null, any())),
75  relBuilderFactory,
76  "OuterJoinOptViaNullRejectionRule");
77  clearMemo();
78  }
79 
80  void clearMemo() {
81  visitedJoinMemo.clear();
82  }
83 
84  @Override
85  public void onMatch(RelOptRuleCall call) {
86  RelNode parentNode = call.rel(0);
87  LogicalJoin join = (LogicalJoin) call.rel(1);
88  String condString = join.getCondition().toString();
89  if (visitedJoinMemo.contains(condString)) {
90  return;
91  } else {
92  visitedJoinMemo.add(condString);
93  }
94  if (!(join.getCondition() instanceof RexCall)) {
95  return; // an inner join
96  }
97  if (join.getJoinType() == JoinRelType.INNER || join.getJoinType() == JoinRelType.SEMI
98  || join.getJoinType() == JoinRelType.ANTI) {
99  return; // non target
100  }
101  RelNode joinLeftChild = ((HepRelVertex) join.getLeft()).getCurrentRel();
102  RelNode joinRightChild = ((HepRelVertex) join.getRight()).getCurrentRel();
103  if (joinLeftChild instanceof LogicalProject) {
104  return; // disable this opt when LHS has subquery (i.e., filter push-down)
105  }
106  if (!(joinRightChild instanceof LogicalTableScan)) {
107  return; // disable this opt when RHS has subquery (i.e., filter push-down)
108  }
109  // an outer join contains its join cond in itself,
110  // not in a filter as typical inner join op. does
111  RexCall joinCond = (RexCall) join.getCondition();
112  Set<Integer> leftJoinCols = new HashSet<>();
113  Set<Integer> rightJoinCols = new HashSet<>();
114  Map<Integer, String> leftJoinColToColNameMap = new HashMap<>();
115  Map<Integer, String> rightJoinColToColNameMap = new HashMap<>();
116  Set<Integer> originalLeftJoinCols = new HashSet<>();
117  Set<Integer> originalRightJoinCols = new HashSet<>();
118  Map<Integer, String> originalLeftJoinColToColNameMap = new HashMap<>();
119  Map<Integer, String> originalRightJoinColToColNameMap = new HashMap<>();
120  List<RexCall> capturedFilterPredFromJoin = new ArrayList<>();
121  if (joinCond.getKind() == SqlKind.EQUALS) {
122  addJoinCols(joinCond,
123  join,
124  leftJoinCols,
125  rightJoinCols,
126  leftJoinColToColNameMap,
127  rightJoinColToColNameMap,
128  originalLeftJoinCols,
129  originalRightJoinCols,
130  originalLeftJoinColToColNameMap,
131  originalRightJoinColToColNameMap);
132  } else if (joinCond.getKind() == SqlKind.AND || joinCond.getKind() == SqlKind.OR) {
133  for (RexNode n : joinCond.getOperands()) {
134  if (n instanceof RexCall) {
135  RexCall op = (RexCall) n;
136  if (op.getOperands().size() > 2
137  && op.getOperands().get(1) instanceof RexLiteral) {
138  // try to capture literal comparison of join column located in the cur join
139  // node
140  capturedFilterPredFromJoin.add(op);
141  continue;
142  }
143  addJoinCols(op,
144  join,
145  leftJoinCols,
146  rightJoinCols,
147  leftJoinColToColNameMap,
148  rightJoinColToColNameMap,
149  originalLeftJoinCols,
150  originalRightJoinCols,
151  originalLeftJoinColToColNameMap,
152  originalRightJoinColToColNameMap);
153  }
154  }
155  }
156 
157  if (leftJoinCols.isEmpty() || rightJoinCols.isEmpty()) {
158  return;
159  }
160 
161  // find filter node(s)
162  RelNode root = call.getPlanner().getRoot();
163  List<LogicalFilter> collectedFilterNodes = new ArrayList<>();
164  RelNode curNode = root;
165  final RelBuilder relBuilder = call.builder();
166  // collect filter nodes
167  collectFilterCondition(curNode, collectedFilterNodes);
168  if (collectedFilterNodes.isEmpty()) {
169  // we have a last chance to take a look at this join condition itself
170  // i.e., the filter preds lay with the join conditions in the same join node
171  // but for now we disable the optimization to avoid unexpected plan issue
172  return;
173  }
174 
175  // check whether join column has filter predicate(s)
176  // and collect join column info used in target join nodes to be translated
177  Set<Integer> nullRejectedLeftJoinCols = new HashSet<>();
178  Set<Integer> nullRejectedRightJoinCols = new HashSet<>();
179  for (LogicalFilter filter : collectedFilterNodes) {
180  List<RexNode> filterExprs = filter.getChildExps();
181  for (RexNode node : filterExprs) {
182  if (node instanceof RexCall) {
183  RexCall curExpr = (RexCall) node;
184  if (curExpr.getKind() == SqlKind.AND || curExpr.getKind() == SqlKind.OR) {
185  for (RexNode n : curExpr.getOperands()) {
186  if (n instanceof RexCall) {
187  RexCall c = (RexCall) n;
188  if (isCandidateFilterPred(c)) {
189  RexInputRef col = (RexInputRef) c.getOperands().get(0);
190  int colId = col.getIndex();
191  boolean leftFilter = leftJoinCols.contains(colId);
192  boolean rightFilter = rightJoinCols.contains(colId);
193  if (leftFilter && rightFilter) {
194  // here we currently do not have a concrete column tracing logic
195  // so it may become a source of plan issue, so we disable this opt
196  return;
197  }
199  filter,
200  nullRejectedLeftJoinCols,
201  nullRejectedRightJoinCols,
202  leftJoinColToColNameMap,
203  rightJoinColToColNameMap);
204  }
205  }
206  }
207  } else {
208  if (curExpr instanceof RexCall) {
209  if (isCandidateFilterPred(curExpr)) {
210  RexInputRef col = (RexInputRef) curExpr.getOperands().get(0);
211  int colId = col.getIndex();
212  boolean leftFilter = leftJoinCols.contains(colId);
213  boolean rightFilter = rightJoinCols.contains(colId);
214  if (leftFilter && rightFilter) {
215  // here we currently do not have a concrete column tracing logic
216  // so it may become a source of plan issue, so we disable this opt
217  return;
218  }
219  addNullRejectedJoinCols(curExpr,
220  filter,
221  nullRejectedLeftJoinCols,
222  nullRejectedRightJoinCols,
223  leftJoinColToColNameMap,
224  rightJoinColToColNameMap);
225  }
226  }
227  }
228  }
229  }
230  }
231 
232  if (!capturedFilterPredFromJoin.isEmpty()) {
233  for (RexCall c : capturedFilterPredFromJoin) {
234  RexInputRef col = (RexInputRef) c.getOperands().get(0);
235  int colId = col.getIndex();
236  String colName = join.getRowType().getFieldNames().get(colId);
237  Boolean l = false;
238  Boolean r = false;
239  if (originalLeftJoinColToColNameMap.containsKey(colId)
240  && originalLeftJoinColToColNameMap.get(colId).equals(colName)) {
241  l = true;
242  }
243  if (originalRightJoinColToColNameMap.containsKey(colId)
244  && originalRightJoinColToColNameMap.get(colId).equals(colName)) {
245  r = true;
246  }
247  if (l && !r) {
248  nullRejectedLeftJoinCols.add(colId);
249  } else if (r && !l) {
250  nullRejectedRightJoinCols.add(colId);
251  } else if (r && l) {
252  return;
253  }
254  }
255  }
256 
257  Boolean leftNullRejected = false;
258  Boolean rightNullRejected = false;
259  if (!nullRejectedLeftJoinCols.isEmpty()
260  && leftJoinCols.equals(nullRejectedLeftJoinCols)) {
261  leftNullRejected = true;
262  }
263  if (!nullRejectedRightJoinCols.isEmpty()
264  && rightJoinCols.equals(nullRejectedRightJoinCols)) {
265  rightNullRejected = true;
266  }
267 
268  // relax outer join condition depending on null rejected cols
269  RelNode newJoinNode = null;
270  Boolean needTransform = false;
271  if (join.getJoinType() == JoinRelType.FULL) {
272  // 1) full -> left
273  if (leftNullRejected && !rightNullRejected) {
274  newJoinNode = join.copy(join.getTraitSet(),
275  join.getCondition(),
276  join.getLeft(),
277  join.getRight(),
278  JoinRelType.LEFT,
279  join.isSemiJoinDone());
280  needTransform = true;
281  }
282 
283  // 2) full -> inner
284  if (leftNullRejected && rightNullRejected) {
285  newJoinNode = join.copy(join.getTraitSet(),
286  join.getCondition(),
287  join.getLeft(),
288  join.getRight(),
289  JoinRelType.INNER,
290  join.isSemiJoinDone());
291  needTransform = true;
292  }
293  } else if (join.getJoinType() == JoinRelType.LEFT) {
294  // 3) left -> inner
295  if (leftNullRejected) {
296  newJoinNode = join.copy(join.getTraitSet(),
297  join.getCondition(),
298  join.getLeft(),
299  join.getRight(),
300  JoinRelType.INNER,
301  join.isSemiJoinDone());
302  needTransform = true;
303  }
304  }
305  if (needTransform) {
306  relBuilder.push(newJoinNode);
307  parentNode.replaceInput(0, newJoinNode);
308  call.transformTo(parentNode);
309  }
310  return;
311  }
312 
313  void addJoinCols(RexCall joinCond,
314  LogicalJoin joinOp,
315  Set<Integer> leftJoinCols,
316  Set<Integer> rightJoinCols,
317  Map<Integer, String> leftJoinColToColNameMap,
318  Map<Integer, String> rightJoinColToColNameMap,
319  Set<Integer> originalLeftJoinCols,
320  Set<Integer> originalRightJoinCols,
321  Map<Integer, String> originalLeftJoinColToColNameMap,
322  Map<Integer, String> originalRightJoinColToColNameMap) {
323  if (joinCond.getOperands().size() != 2
324  || !(joinCond.getOperands().get(0) instanceof RexInputRef)
325  || !(joinCond.getOperands().get(1) instanceof RexInputRef)) {
326  return;
327  }
328  RexInputRef leftJoinCol = (RexInputRef) joinCond.getOperands().get(0);
329  RexInputRef rightJoinCol = (RexInputRef) joinCond.getOperands().get(1);
330  originalLeftJoinCols.add(leftJoinCol.getIndex());
331  originalRightJoinCols.add(rightJoinCol.getIndex());
332  originalLeftJoinColToColNameMap.put(leftJoinCol.getIndex(),
333  joinOp.getRowType().getFieldNames().get(leftJoinCol.getIndex()));
334  originalRightJoinColToColNameMap.put(rightJoinCol.getIndex(),
335  joinOp.getRowType().getFieldNames().get(rightJoinCol.getIndex()));
336  if (leftJoinCol.getIndex() > rightJoinCol.getIndex()) {
337  leftJoinCol = (RexInputRef) joinCond.getOperands().get(1);
338  rightJoinCol = (RexInputRef) joinCond.getOperands().get(0);
339  }
340  int originalLeftColOffset = traceColOffset(joinOp.getLeft(), leftJoinCol, 0);
341  int originalRightColOffset = traceColOffset(joinOp.getRight(),
342  rightJoinCol,
343  joinOp.getLeft().getRowType().getFieldCount());
344  if (originalLeftColOffset != -1) {
345  return;
346  }
347  int leftColOffset =
348  originalLeftColOffset == -1 ? leftJoinCol.getIndex() : originalLeftColOffset;
349  int rightColOffset = originalRightColOffset == -1 ? rightJoinCol.getIndex()
350  : originalRightColOffset;
351  String leftJoinColName = joinOp.getRowType().getFieldNames().get(leftColOffset);
352  String rightJoinColName =
353  joinOp.getRowType().getFieldNames().get(rightJoinCol.getIndex());
354  leftJoinCols.add(leftColOffset);
355  rightJoinCols.add(rightColOffset);
356  leftJoinColToColNameMap.put(leftColOffset, leftJoinColName);
357  rightJoinColToColNameMap.put(rightColOffset, rightJoinColName);
358  return;
359  }
360 
361  void addNullRejectedJoinCols(RexCall call,
362  LogicalFilter targetFilter,
363  Set<Integer> nullRejectedLeftJoinCols,
364  Set<Integer> nullRejectedRightJoinCols,
365  Map<Integer, String> leftJoinColToColNameMap,
366  Map<Integer, String> rightJoinColToColNameMap) {
367  if (isCandidateFilterPred(call) && call.getOperands().get(0) instanceof RexInputRef) {
368  RexInputRef col = (RexInputRef) call.getOperands().get(0);
369  int colId = col.getIndex();
370  String colName = targetFilter.getRowType().getFieldNames().get(colId);
371  Boolean l = false;
372  Boolean r = false;
373  if (leftJoinColToColNameMap.containsKey(colId)
374  && leftJoinColToColNameMap.get(colId).equals(colName)) {
375  l = true;
376  }
377  if (rightJoinColToColNameMap.containsKey(colId)
378  && rightJoinColToColNameMap.get(colId).equals(colName)) {
379  r = true;
380  }
381  if (l && !r) {
382  nullRejectedLeftJoinCols.add(colId);
383  return;
384  }
385  if (r && !l) {
386  nullRejectedRightJoinCols.add(colId);
387  return;
388  }
389  }
390  }
391 
392  void collectFilterCondition(RelNode curNode, List<LogicalFilter> collectedFilterNodes) {
393  if (curNode instanceof HepRelVertex) {
394  curNode = ((HepRelVertex) curNode).getCurrentRel();
395  }
396  if (curNode instanceof LogicalFilter) {
397  collectedFilterNodes.add((LogicalFilter) curNode);
398  }
399  if (curNode.getInputs().size() == 0) {
400  // end of the query plan, move out
401  return;
402  }
403  for (int i = 0; i < curNode.getInputs().size(); i++) {
404  collectFilterCondition(curNode.getInput(i), collectedFilterNodes);
405  }
406  }
407 
408  void collectProjectNode(RelNode curNode, List<LogicalProject> collectedProject) {
409  if (curNode instanceof HepRelVertex) {
410  curNode = ((HepRelVertex) curNode).getCurrentRel();
411  }
412  if (curNode instanceof LogicalProject) {
413  collectedProject.add((LogicalProject) curNode);
414  }
415  if (curNode.getInputs().size() == 0) {
416  // end of the query plan, move out
417  return;
418  }
419  for (int i = 0; i < curNode.getInputs().size(); i++) {
420  collectProjectNode(curNode.getInput(i), collectedProject);
421  }
422  }
423 
424  int traceColOffset(RelNode curNode, RexInputRef colRef, int startOffset) {
425  int colOffset = -1;
426  ArrayList<LogicalProject> collectedProjectNodes = new ArrayList<>();
427  collectProjectNode(curNode, collectedProjectNodes);
428  // the nearest project node that may permute the column offset
429  if (!collectedProjectNodes.isEmpty()) {
430  // get the closest project node from the cur join node's target child
431  LogicalProject projectNode = collectedProjectNodes.get(0);
432  Mappings.TargetMapping targetMapping = projectNode.getMapping();
433  if (null != colRef && null != targetMapping) {
434  // try to track the original col offset
435  int base_offset = colRef.getIndex() - startOffset;
436 
437  if (base_offset >= 0 && base_offset < targetMapping.getSourceCount()) {
438  colOffset = targetMapping.getSourceOpt(base_offset);
439  }
440  }
441  }
442  return colOffset;
443  }
444 
445  boolean isComparisonOp(RexCall c) {
446  SqlKind opKind = c.getKind();
447  return (SqlKind.BINARY_COMPARISON.contains(opKind)
448  || SqlKind.BINARY_EQUALITY.contains(opKind));
449  }
450 
451  boolean isCandidateFilterPred(RexCall c) {
452  return ((c.op.kind == SqlKind.IS_NOT_NULL && c.operands.size() == 1)
453  || (c.operands.size() == 2 && isComparisonOp(c)
454  && c.operands.get(0) instanceof RexInputRef
455  && c.operands.get(1) instanceof RexLiteral));
456  }
457 }
void collectFilterCondition(RelNode curNode, List< LogicalFilter > collectedFilterNodes)
void addNullRejectedJoinCols(RexCall call, LogicalFilter targetFilter, Set< Integer > nullRejectedLeftJoinCols, Set< Integer > nullRejectedRightJoinCols, Map< Integer, String > leftJoinColToColNameMap, Map< Integer, String > rightJoinColToColNameMap)
std::string join(T const &container, std::string const &delim)
void addJoinCols(RexCall joinCond, LogicalJoin joinOp, Set< Integer > leftJoinCols, Set< Integer > rightJoinCols, Map< Integer, String > leftJoinColToColNameMap, Map< Integer, String > rightJoinColToColNameMap, Set< Integer > originalLeftJoinCols, Set< Integer > originalRightJoinCols, Map< Integer, String > originalLeftJoinColToColNameMap, Map< Integer, String > originalRightJoinColToColNameMap)
int traceColOffset(RelNode curNode, RexInputRef colRef, int startOffset)
void collectProjectNode(RelNode curNode, List< LogicalProject > collectedProject)