OmniSciDB  c1a53651b2
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
ColumnIR.cpp
Go to the documentation of this file.
1 /*
2  * Copyright 2022 HEAVY.AI, Inc.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #include "CodeGenerator.h"
18 #include "Codec.h"
19 #include "Execute.h"
20 #include "WindowContext.h"
21 
22 // Code generation routines and helpers for working with column expressions.
23 
24 namespace {
25 
26 // Return the right decoder for a given column expression. Doesn't handle
27 // variable length data. The decoder encapsulates the code generation logic.
28 std::shared_ptr<Decoder> get_col_decoder(const Analyzer::ColumnVar* col_var) {
29  const auto enc_type = col_var->get_compression();
30  const auto& ti = col_var->get_type_info();
31  switch (enc_type) {
32  case kENCODING_NONE: {
33  const auto int_type = ti.is_decimal() ? decimal_to_int_type(ti) : ti.get_type();
34  switch (int_type) {
35  case kBOOLEAN:
36  return std::make_shared<FixedWidthInt>(1);
37  case kTINYINT:
38  return std::make_shared<FixedWidthInt>(1);
39  case kSMALLINT:
40  return std::make_shared<FixedWidthInt>(2);
41  case kINT:
42  return std::make_shared<FixedWidthInt>(4);
43  case kBIGINT:
44  return std::make_shared<FixedWidthInt>(8);
45  case kFLOAT:
46  return std::make_shared<FixedWidthReal>(false);
47  case kDOUBLE:
48  return std::make_shared<FixedWidthReal>(true);
49  case kTIME:
50  case kTIMESTAMP:
51  case kDATE:
52  return std::make_shared<FixedWidthInt>(8);
53  default:
54  CHECK(false);
55  }
56  }
57  case kENCODING_DICT:
58  CHECK(ti.is_string());
59  // For dictionary-encoded columns encoded on less than 4 bytes, we can use
60  // unsigned representation for double the maximum cardinality. The inline
61  // null value is going to be the maximum value of the underlying type.
62  if (ti.get_size() < ti.get_logical_size()) {
63  return std::make_shared<FixedWidthUnsigned>(ti.get_size());
64  }
65  return std::make_shared<FixedWidthInt>(ti.get_size());
66  case kENCODING_FIXED: {
67  const auto bit_width = col_var->get_type_info().get_comp_param();
68  CHECK_EQ(0, bit_width % 8);
69  return std::make_shared<FixedWidthInt>(bit_width / 8);
70  }
72  CHECK(ti.is_date_in_days());
73  return col_var->get_type_info().get_comp_param() == 16
74  ? std::make_shared<FixedWidthSmallDate>(2)
75  : std::make_shared<FixedWidthSmallDate>(4);
76  }
77  default:
78  abort();
79  }
80 }
81 
82 size_t get_col_bit_width(const Analyzer::ColumnVar* col_var) {
83  const auto& type_info = col_var->get_type_info();
84  return get_bit_width(type_info);
85 }
86 
88  return col_var->get_rte_idx() == -1 ? 0 : col_var->get_rte_idx();
89 }
90 
91 } // namespace
92 
93 std::vector<llvm::Value*> CodeGenerator::codegenColumn(const Analyzer::ColumnVar* col_var,
94  const bool fetch_column,
95  const CompilationOptions& co) {
97  if (col_var->get_rte_idx() <= 0 ||
99  !foundOuterJoinMatch(col_var->get_rte_idx())) {
100  return codegenColVar(col_var, fetch_column, true, co);
101  }
102  return codegenOuterJoinNullPlaceholder(col_var, fetch_column, co);
103 }
104 
105 std::vector<llvm::Value*> CodeGenerator::codegenColVar(const Analyzer::ColumnVar* col_var,
106  const bool fetch_column,
107  const bool update_query_plan,
108  const CompilationOptions& co) {
110  const bool hoist_literals = co.hoist_literals;
111  const int rte_idx = adjusted_range_table_index(col_var);
112  CHECK_LT(static_cast<size_t>(rte_idx), cgen_state_->frag_offsets_.size());
113  const auto& column_key = col_var->getColumnKey();
114  if (column_key.table_id > 0) {
115  const auto cd = get_column_descriptor(column_key);
116  if (cd->isVirtualCol) {
117  CHECK(cd->columnName == "rowid");
118  return {codegenRowId(col_var, co)};
119  }
120  const auto col_ti = cd->columnType;
121  if (col_ti.get_physical_coord_cols() > 0) {
122  std::vector<llvm::Value*> cols;
123  const auto col_id = column_key.column_id;
124  auto temp_column_key = column_key;
125  for (auto i = 0; i < col_ti.get_physical_coord_cols(); i++) {
126  temp_column_key.column_id = col_id + i + 1;
127  const auto cd0 = get_column_descriptor(temp_column_key);
128  CHECK(cd0);
129  const auto col0_ti = cd0->columnType;
130  CHECK(!cd0->isVirtualCol);
131  const auto col0_var =
132  makeExpr<Analyzer::ColumnVar>(col0_ti, temp_column_key, rte_idx);
133  const auto col = codegenColVar(col0_var.get(), fetch_column, false, co);
134  cols.insert(cols.end(), col.begin(), col.end());
135  if (!fetch_column && plan_state_->isLazyFetchColumn(col_var)) {
136  plan_state_->columns_to_not_fetch_.insert(temp_column_key);
137  }
138  }
139  if (!fetch_column && plan_state_->isLazyFetchColumn(col_var)) {
140  plan_state_->columns_to_not_fetch_.insert(column_key);
141  } else {
142  plan_state_->columns_to_fetch_.insert(column_key);
143  }
144  return cols;
145  }
146  } else {
147  if (col_var->get_type_info().is_geometry()) {
148  throw std::runtime_error(
149  "Geospatial columns not supported in temporary tables yet");
150  }
151  }
152  const auto grouped_col_lv = resolveGroupedColumnReference(col_var);
153  if (grouped_col_lv) {
154  return {grouped_col_lv};
155  }
156  const auto col_var_hash = boost::hash_value(col_var->toString());
157  const auto window_func_context =
159  // only generate the decoding code once; if a column has been previously
160  // fetched in the generated IR, we'll reuse it
161  // here, we do not just use (local) column id since our analyzer may cast the same
162  // col_var with different types depending on the (aggregate) function that the col_var
163  // is used i.e., SELECT COUNT(DISTINCT x), MIN(x) FROM ...
164  if (!window_func_context) {
165  auto it = cgen_state_->fetch_cache_.find(col_var_hash);
166  if (it != cgen_state_->fetch_cache_.end()) {
167  return {it->second};
168  }
169  }
170  const auto hash_join_lhs = hashJoinLhs(col_var);
171  // Note(jclay): This has been prone to cause failures in some overlaps joins.
172  // I believe most of the issues are worked out now, but a good place to check if
173  // failures are happening.
174 
175  // Use the already fetched left-hand side of an equi-join if the types are identical.
176  // Currently, types can only be different because of different underlying dictionaries.
177  if (hash_join_lhs && hash_join_lhs->get_type_info() == col_var->get_type_info()) {
178  if (plan_state_->isLazyFetchColumn(col_var)) {
179  plan_state_->columns_to_fetch_.insert(col_var->getColumnKey());
180  }
181  return codegen(hash_join_lhs.get(), fetch_column, co);
182  }
183  auto pos_arg = posArg(col_var);
184  if (window_func_context) {
185  pos_arg = codegenWindowPosition(window_func_context, pos_arg);
186  }
187  auto col_byte_stream = colByteStream(col_var, fetch_column, hoist_literals);
188  if (plan_state_->isLazyFetchColumn(col_var)) {
189  if (update_query_plan) {
190  plan_state_->columns_to_not_fetch_.insert(col_var->getColumnKey());
191  }
192  if (rte_idx > 0) {
193  const auto offset = cgen_state_->frag_offsets_[rte_idx];
194  if (offset) {
195  return {cgen_state_->ir_builder_.CreateAdd(pos_arg, offset)};
196  } else {
197  return {pos_arg};
198  }
199  }
200  return {pos_arg};
201  }
202  const auto& col_ti = col_var->get_type_info();
203  if (col_ti.is_string() && col_ti.get_compression() == kENCODING_NONE) {
204  const auto varlen_str_column_lvs =
205  codegenVariableLengthStringColVar(col_byte_stream, pos_arg);
206  if (!window_func_context) {
207  auto it_ok = cgen_state_->fetch_cache_.insert(
208  std::make_pair(col_var_hash, varlen_str_column_lvs));
209  CHECK(it_ok.second);
210  }
211  return varlen_str_column_lvs;
212  }
213  if (col_ti.is_array()) {
214  return {col_byte_stream};
215  }
216  if (window_func_context) {
218  col_var, col_byte_stream, pos_arg, window_func_context)};
219  }
220  const auto fixed_length_column_lv =
221  codegenFixedLengthColVar(col_var, col_byte_stream, pos_arg);
222  auto it_ok = cgen_state_->fetch_cache_.insert(
223  std::make_pair(col_var_hash, std::vector<llvm::Value*>{fixed_length_column_lv}));
224  return {it_ok.first->second};
225 }
226 
228  const WindowFunctionContext* window_func_context,
229  llvm::Value* pos_arg) {
231  const auto window_position = cgen_state_->emitCall(
232  "row_number_window_func",
233  {cgen_state_->llInt(reinterpret_cast<const int64_t>(window_func_context->output())),
234  pos_arg});
235  return window_position;
236 }
237 
238 // Generate code for fixed length column types (number, timestamp or date,
239 // dictionary-encoded string)
241  const Analyzer::ColumnVar* col_var,
242  llvm::Value* col_byte_stream,
243  llvm::Value* pos_arg,
244  const WindowFunctionContext* window_function_context) {
246  const auto decoder = get_col_decoder(col_var);
247  auto dec_val = decoder->codegenDecode(col_byte_stream, pos_arg, cgen_state_->module_);
248  cgen_state_->ir_builder_.Insert(dec_val);
249  auto dec_type = dec_val->getType();
250  llvm::Value* dec_val_cast{nullptr};
251  const auto& col_ti = col_var->get_type_info();
252  if (dec_type->isIntegerTy()) {
253  auto dec_width = static_cast<llvm::IntegerType*>(dec_type)->getBitWidth();
254  auto col_width = get_col_bit_width(col_var);
255  dec_val_cast = cgen_state_->ir_builder_.CreateCast(
256  static_cast<size_t>(col_width) > dec_width ? llvm::Instruction::CastOps::SExt
257  : llvm::Instruction::CastOps::Trunc,
258  dec_val,
259  get_int_type(col_width, cgen_state_->context_));
260  bool adjust_fixed_enc_null = true;
261  if (window_function_context &&
262  window_function_context->getWindowFunction()->hasRangeModeFraming()) {
263  // we only need to cast it to 8 byte iff it is encoded type
264  // (i.e., the size of non-encoded timestamp type is 8 byte)
265  const auto order_key_ti =
266  window_function_context->getOrderKeyColumnBufferTypes().front();
267  if (order_key_ti.is_timestamp() && order_key_ti.get_size() == 4) {
268  adjust_fixed_enc_null = false;
269  }
270  }
271  if (adjust_fixed_enc_null &&
272  (col_ti.get_compression() == kENCODING_FIXED ||
273  (col_ti.get_compression() == kENCODING_DICT && col_ti.get_size() < 4)) &&
274  !col_ti.get_notnull()) {
275  dec_val_cast = codgenAdjustFixedEncNull(dec_val_cast, col_ti);
276  }
277  } else {
278  CHECK_EQ(kENCODING_NONE, col_ti.get_compression());
279  CHECK(dec_type->isFloatTy() || dec_type->isDoubleTy());
280  if (dec_type->isDoubleTy()) {
281  CHECK(col_ti.get_type() == kDOUBLE);
282  } else if (dec_type->isFloatTy()) {
283  CHECK(col_ti.get_type() == kFLOAT);
284  }
285  dec_val_cast = dec_val;
286  }
287  CHECK(dec_val_cast);
288  return dec_val_cast;
289 }
290 
292  const Analyzer::ColumnVar* col_var,
293  llvm::Value* col_byte_stream,
294  llvm::Value* pos_arg,
295  const WindowFunctionContext* window_function_context) {
297  const auto orig_bb = cgen_state_->ir_builder_.GetInsertBlock();
298  const auto pos_valid_bb = llvm::BasicBlock::Create(
299  cgen_state_->context_, "window.pos_valid", cgen_state_->current_func_);
300  const auto pos_notvalid_bb = llvm::BasicBlock::Create(
301  cgen_state_->context_, "window.pos_notvalid", cgen_state_->current_func_);
302  const auto pos_is_valid =
303  cgen_state_->ir_builder_.CreateICmpSGE(pos_arg, cgen_state_->llInt(int64_t(0)));
304  if (window_function_context->getWindowFunction()->getKind() ==
306  // NTH_VALUE needs to return null if N > partition size
307  // To do this, we store null value to the output buffer of the current row
308  // if following requirements for processing NTH_VALUE are not satisfied
309  // 1. current row is valid
310  // 2. N < partition size that the current row is included
311  const auto window_func_args = window_function_context->getWindowFunction()->getArgs();
312  auto n_value_ptr = dynamic_cast<Analyzer::Constant*>(window_func_args[1].get());
313  auto n_value_lv = cgen_state_->llInt((int64_t)n_value_ptr->get_constval().intval);
314  CHECK(n_value_lv);
315 
316  auto partition_index_lv =
317  executor_->codegenCurrentPartitionIndex(window_function_context, pos_arg);
318  // # elems per partition
319  const auto pi32_type =
320  llvm::PointerType::get(get_int_type(32, cgen_state_->context_), 0);
321  const auto partition_count_buf =
322  cgen_state_->llInt(reinterpret_cast<int64_t>(window_function_context->counts()));
323  auto partition_count_buf_ptr_lv =
324  cgen_state_->ir_builder_.CreateIntToPtr(partition_count_buf, pi32_type);
325 
326  // # elems of the given partition
327  const auto num_elem_current_partition_ptr =
329  partition_count_buf_ptr_lv,
330  partition_index_lv);
331  const auto num_elem_current_partition_lv = cgen_state_->castToTypeIn(
332  cgen_state_->ir_builder_.CreateLoad(
333  num_elem_current_partition_ptr->getType()->getPointerElementType(),
334  num_elem_current_partition_ptr),
335  64);
336  auto is_valid_n_value_lv = cgen_state_->ir_builder_.CreateICmpSLT(
337  n_value_lv, num_elem_current_partition_lv, "is_valid_nth_value");
338  auto cond_lv = cgen_state_->ir_builder_.CreateAnd(
339  is_valid_n_value_lv, pos_is_valid, "is_valid_row_for_nth_value");
340  // return the current row value iff 1) it is a valid row and 2) N < partition_size
341  cgen_state_->ir_builder_.CreateCondBr(cond_lv, pos_valid_bb, pos_notvalid_bb);
342  } else {
343  // return the current row value if it is valid
344  cgen_state_->ir_builder_.CreateCondBr(pos_is_valid, pos_valid_bb, pos_notvalid_bb);
345  }
346  cgen_state_->ir_builder_.SetInsertPoint(pos_valid_bb);
347  const auto fixed_length_column_lv = codegenFixedLengthColVar(
348  col_var, col_byte_stream, pos_arg, window_function_context);
349  cgen_state_->ir_builder_.CreateBr(pos_notvalid_bb);
350  cgen_state_->ir_builder_.SetInsertPoint(pos_notvalid_bb);
351  const auto window_func_call_phi =
352  cgen_state_->ir_builder_.CreatePHI(fixed_length_column_lv->getType(), 2);
353  window_func_call_phi->addIncoming(fixed_length_column_lv, pos_valid_bb);
354  const auto& col_ti = col_var->get_type_info();
355  const auto null_lv =
356  col_ti.is_fp() ? static_cast<llvm::Value*>(cgen_state_->inlineFpNull(col_ti))
357  : static_cast<llvm::Value*>(cgen_state_->inlineIntNull(col_ti));
358  window_func_call_phi->addIncoming(null_lv, orig_bb);
359  return window_func_call_phi;
360 }
361 
363  llvm::Value* col_byte_stream,
364  llvm::Value* pos_arg) {
366  // real (not dictionary-encoded) strings; store the pointer to the payload
367  auto* const string_view = cgen_state_->emitExternalCall(
368  "string_decode", createStringViewStructType(), {col_byte_stream, pos_arg});
369  auto* str_lv = cgen_state_->ir_builder_.CreateExtractValue(string_view, 0);
370  auto* len_lv = cgen_state_->ir_builder_.CreateExtractValue(string_view, 1);
371  len_lv = cgen_state_->ir_builder_.CreateTrunc(
372  len_lv, llvm::Type::getInt32Ty(cgen_state_->context_));
373  return {string_view, str_lv, len_lv};
374 }
375 
377  const CompilationOptions& co) {
379  const auto offset_lv = cgen_state_->frag_offsets_[adjusted_range_table_index(col_var)];
380  llvm::Value* start_rowid_lv{nullptr};
381  const auto& table_generation = executor()->getTableGeneration(col_var->getTableKey());
382  if (table_generation.start_rowid > 0) {
383  // Handle the multi-node case: each leaf receives a start rowid used
384  // to offset the local rowid and generate a cluster-wide unique rowid.
385  Datum d;
386  d.bigintval = table_generation.start_rowid;
387  const auto start_rowid = makeExpr<Analyzer::Constant>(kBIGINT, false, d);
388  const auto start_rowid_lvs = codegen(start_rowid.get(), kENCODING_NONE, {}, co);
389  CHECK_EQ(size_t(1), start_rowid_lvs.size());
390  start_rowid_lv = start_rowid_lvs.front();
391  }
392  auto rowid_lv = posArg(col_var);
393  if (offset_lv) {
394  rowid_lv = cgen_state_->ir_builder_.CreateAdd(rowid_lv, offset_lv);
395  } else if (col_var->get_rte_idx() > 0) {
396  auto frag_off_ptr = get_arg_by_name(cgen_state_->row_func_, "frag_row_off");
397  auto input_off_ptr = cgen_state_->ir_builder_.CreateGEP(
398  frag_off_ptr->getType()->getScalarType()->getPointerElementType(),
399  frag_off_ptr,
400  cgen_state_->llInt(int32_t(col_var->get_rte_idx())));
401  auto rowid_offset_lv = cgen_state_->ir_builder_.CreateLoad(
402  input_off_ptr->getType()->getPointerElementType(), input_off_ptr);
403  rowid_lv = cgen_state_->ir_builder_.CreateAdd(rowid_lv, rowid_offset_lv);
404  }
405  if (table_generation.start_rowid > 0) {
406  CHECK(start_rowid_lv);
407  rowid_lv = cgen_state_->ir_builder_.CreateAdd(rowid_lv, start_rowid_lv);
408  }
409  return rowid_lv;
410 }
411 
412 namespace {
413 
414 SQLTypes get_phys_int_type(const size_t byte_sz) {
415  switch (byte_sz) {
416  case 1:
417  return kBOOLEAN;
418  // TODO: kTINYINT
419  case 2:
420  return kSMALLINT;
421  case 4:
422  return kINT;
423  case 8:
424  return kBIGINT;
425  default:
426  CHECK(false);
427  }
428  return kNULLT;
429 }
430 
431 } // namespace
432 
433 llvm::Value* CodeGenerator::codgenAdjustFixedEncNull(llvm::Value* val,
434  const SQLTypeInfo& col_ti) {
436  CHECK_LT(col_ti.get_size(), col_ti.get_logical_size());
437  const auto col_phys_width = col_ti.get_size() * 8;
438  auto from_typename = "int" + std::to_string(col_phys_width) + "_t";
439  auto adjusted = cgen_state_->ir_builder_.CreateCast(
440  llvm::Instruction::CastOps::Trunc,
441  val,
442  get_int_type(col_phys_width, cgen_state_->context_));
443  if (col_ti.get_compression() == kENCODING_DICT) {
444  from_typename = "u" + from_typename;
445  llvm::Value* from_null{nullptr};
446  switch (col_ti.get_size()) {
447  case 1:
448  from_null = cgen_state_->llInt(std::numeric_limits<uint8_t>::max());
449  break;
450  case 2:
451  from_null = cgen_state_->llInt(std::numeric_limits<uint16_t>::max());
452  break;
453  default:
454  CHECK(false);
455  }
456  return cgen_state_->emitCall(
457  "cast_" + from_typename + "_to_" + numeric_type_name(col_ti) + "_nullable",
458  {adjusted, from_null, cgen_state_->inlineIntNull(col_ti)});
459  }
460  SQLTypeInfo col_phys_ti(get_phys_int_type(col_ti.get_size()),
461  col_ti.get_dimension(),
462  col_ti.get_scale(),
463  false,
465  0,
466  col_ti.get_subtype());
467  return cgen_state_->emitCall(
468  "cast_" + from_typename + "_to_" + numeric_type_name(col_ti) + "_nullable",
469  {adjusted,
470  cgen_state_->inlineIntNull(col_phys_ti),
471  cgen_state_->inlineIntNull(col_ti)});
472 }
473 
474 llvm::Value* CodeGenerator::foundOuterJoinMatch(const size_t nesting_level) const {
475  CHECK_GE(nesting_level, size_t(1));
476  CHECK_LE(nesting_level,
477  static_cast<size_t>(cgen_state_->outer_join_match_found_per_level_.size()));
478  return cgen_state_->outer_join_match_found_per_level_[nesting_level - 1];
479 }
480 
482  const Analyzer::ColumnVar* col_var,
483  const bool fetch_column,
484  const CompilationOptions& co) {
486  const auto grouped_col_lv = resolveGroupedColumnReference(col_var);
487  if (grouped_col_lv) {
488  return {grouped_col_lv};
489  }
490  const auto outer_join_args_bb = llvm::BasicBlock::Create(
491  cgen_state_->context_, "outer_join_args", cgen_state_->current_func_);
492  const auto outer_join_nulls_bb = llvm::BasicBlock::Create(
493  cgen_state_->context_, "outer_join_nulls", cgen_state_->current_func_);
494  const auto phi_bb = llvm::BasicBlock::Create(
495  cgen_state_->context_, "outer_join_phi", cgen_state_->current_func_);
496  const auto outer_join_match_lv = foundOuterJoinMatch(col_var->get_rte_idx());
497  CHECK(outer_join_match_lv);
498  cgen_state_->ir_builder_.CreateCondBr(
499  outer_join_match_lv, outer_join_args_bb, outer_join_nulls_bb);
500  const auto back_from_outer_join_bb = llvm::BasicBlock::Create(
501  cgen_state_->context_, "back_from_outer_join", cgen_state_->current_func_);
502  cgen_state_->ir_builder_.SetInsertPoint(outer_join_args_bb);
504  const auto orig_lvs = codegenColVar(col_var, fetch_column, true, co);
505  // sometimes col_var used in the join qual needs to cast its column to sync with
506  // the target join column's type which generates a code with a new bb like cast_bb
507  // if so, we need to keep that bb to correctly construct phi_bb
508  // i.e., use cast_bb instead of outer_join_args_bb for the "casted" column
509  // which is the right end point
510  const auto needs_casting_col_var = needCastForHashJoinLhs(col_var);
511  auto* cast_bb = cgen_state_->ir_builder_.GetInsertBlock();
512  cgen_state_->ir_builder_.CreateBr(phi_bb);
513  cgen_state_->ir_builder_.SetInsertPoint(outer_join_nulls_bb);
514  const auto& null_ti = col_var->get_type_info();
515  if ((null_ti.is_string() && null_ti.get_compression() == kENCODING_NONE) ||
516  null_ti.is_array() || null_ti.is_geometry()) {
517  throw std::runtime_error("Projection type " + null_ti.get_type_name() +
518  " not supported for outer joins yet");
519  }
520  const auto null_constant = makeExpr<Analyzer::Constant>(null_ti, true, Datum{0});
521  const auto null_target_lvs =
522  codegen(null_constant.get(),
523  false,
526  cgen_state_->ir_builder_.CreateBr(phi_bb);
527  CHECK_EQ(orig_lvs.size(), null_target_lvs.size());
528  cgen_state_->ir_builder_.SetInsertPoint(phi_bb);
529  std::vector<llvm::Value*> target_lvs;
530  for (size_t i = 0; i < orig_lvs.size(); ++i) {
531  const auto target_type = orig_lvs[i]->getType();
532  CHECK_EQ(target_type, null_target_lvs[i]->getType());
533  auto target_phi = cgen_state_->ir_builder_.CreatePHI(target_type, 2);
534  const auto orig_lvs_bb = needs_casting_col_var ? cast_bb : outer_join_args_bb;
535  target_phi->addIncoming(orig_lvs[i], orig_lvs_bb);
536  target_phi->addIncoming(null_target_lvs[i], outer_join_nulls_bb);
537  target_lvs.push_back(target_phi);
538  }
539  cgen_state_->ir_builder_.CreateBr(back_from_outer_join_bb);
540  cgen_state_->ir_builder_.SetInsertPoint(back_from_outer_join_bb);
541  return target_lvs;
542 }
543 
545  const Analyzer::ColumnVar* col_var) {
546  if (col_var->get_rte_idx() >= 0) {
547  return nullptr;
548  }
549  const auto& column_key = col_var->getColumnKey();
550  CHECK((column_key.column_id == 0) ||
551  (col_var->get_rte_idx() >= 0 && column_key.table_id > 0));
552  const auto var = dynamic_cast<const Analyzer::Var*>(col_var);
553  CHECK(var);
554  const auto var_no = var->get_varno();
555  CHECK_GE(var_no, 1);
556  if (var->get_which_row() == Analyzer::Var::kGROUPBY) {
557  CHECK_LE(static_cast<size_t>(var_no), cgen_state_->group_by_expr_cache_.size());
558  return cgen_state_->group_by_expr_cache_[var_no - 1];
559  }
560  return nullptr;
561 }
562 
563 // returns the byte stream argument and the position for the given column
565  const bool fetch_column,
566  const bool hoist_literals) {
567  CHECK_GE(cgen_state_->row_func_->arg_size(), size_t(3));
568  const auto stream_arg_name =
569  "col_buf" + std::to_string(plan_state_->getLocalColumnId(col_var, fetch_column));
570  for (auto& arg : cgen_state_->row_func_->args()) {
571  if (arg.getName() == stream_arg_name) {
572  CHECK(arg.getType() == llvm::Type::getInt8PtrTy(cgen_state_->context_));
573  return &arg;
574  }
575  }
576  CHECK(false);
577  return nullptr;
578 }
579 
580 llvm::Value* CodeGenerator::posArg(const Analyzer::Expr* expr) const {
582  const auto col_var = dynamic_cast<const Analyzer::ColumnVar*>(expr);
583  if (col_var && col_var->get_rte_idx() > 0) {
584  const auto hash_pos_it =
585  cgen_state_->scan_idx_to_hash_pos_.find(col_var->get_rte_idx());
586  CHECK(hash_pos_it != cgen_state_->scan_idx_to_hash_pos_.end());
587  if (hash_pos_it->second->getType()->isPointerTy()) {
588  CHECK(hash_pos_it->second->getType()->getPointerElementType()->isIntegerTy(32));
589  llvm::Value* result = cgen_state_->ir_builder_.CreateLoad(
590  hash_pos_it->second->getType()->getPointerElementType(), hash_pos_it->second);
591  result = cgen_state_->ir_builder_.CreateSExt(
592  result, get_int_type(64, cgen_state_->context_));
593  return result;
594  }
595  return hash_pos_it->second;
596  }
597  for (auto& arg : cgen_state_->row_func_->args()) {
598  if (arg.getName() == "pos") {
599  CHECK(arg.getType()->isIntegerTy(64));
600  return &arg;
601  }
602  }
603  abort();
604 }
605 
607  const auto uoper = dynamic_cast<const Analyzer::UOper*>(expr);
608  if (!uoper || uoper->get_optype() != kCAST) {
609  return nullptr;
610  }
611  const auto& target_ti = uoper->get_type_info();
612  if (!target_ti.is_integer()) {
613  return nullptr;
614  }
615  return uoper->get_operand();
616 }
617 
618 std::shared_ptr<const Analyzer::Expr> CodeGenerator::hashJoinLhs(
619  const Analyzer::ColumnVar* rhs) const {
620  for (const auto& tautological_eq : plan_state_->join_info_.equi_join_tautologies_) {
621  CHECK(IS_EQUIVALENCE(tautological_eq->get_optype()));
622  if (dynamic_cast<const Analyzer::ExpressionTuple*>(
623  tautological_eq->get_left_operand())) {
624  auto lhs_col = hashJoinLhsTuple(rhs, tautological_eq.get());
625  if (lhs_col) {
626  return lhs_col;
627  }
628  } else {
629  auto eq_right_op = tautological_eq->get_right_operand();
630  if (!rhs->get_type_info().is_string()) {
631  eq_right_op = remove_cast_to_int(eq_right_op);
632  }
633  if (!eq_right_op) {
634  eq_right_op = tautological_eq->get_right_operand();
635  }
636  if (*eq_right_op == *rhs) {
637  auto eq_left_op = tautological_eq->get_left_operand();
638  if (!eq_left_op->get_type_info().is_string()) {
639  eq_left_op = remove_cast_to_int(eq_left_op);
640  }
641  if (!eq_left_op) {
642  eq_left_op = tautological_eq->get_left_operand();
643  }
644  if (eq_left_op->get_type_info().is_geometry()) {
645  // skip cast for a geospatial lhs, since the rhs is likely to be a geospatial
646  // physical col without geospatial type info
647  return nullptr;
648  }
649  if (is_constructed_point(eq_left_op)) {
650  // skip cast for a constructed point lhs
651  return nullptr;
652  }
653  auto eq_left_op_col = dynamic_cast<const Analyzer::ColumnVar*>(eq_left_op);
654  if (!eq_left_op_col) {
655  if (dynamic_cast<const Analyzer::StringOper*>(eq_left_op)) {
656  return nullptr;
657  }
658  if (dynamic_cast<const Analyzer::FunctionOper*>(eq_left_op)) {
659  return nullptr;
660  }
661  }
662  CHECK(eq_left_op_col);
663  if (eq_left_op_col->get_rte_idx() != 0) {
664  return nullptr;
665  }
666  if (rhs->get_type_info().is_string()) {
667  return eq_left_op->deep_copy();
668  }
669  if (rhs->get_type_info().is_array()) {
670  // Note(jclay): Can this be restored from copy as above?
671  // If we fall through to the below return statement,
672  // a superfulous cast from DOUBLE[] to DOUBLE[] is made and
673  // this fails at a later stage in codegen.
674  return nullptr;
675  }
676  return makeExpr<Analyzer::UOper>(
677  rhs->get_type_info(), false, kCAST, eq_left_op->deep_copy());
678  }
679  }
680  }
681  return nullptr;
682 }
683 
685  for (const auto& tautological_eq : plan_state_->join_info_.equi_join_tautologies_) {
686  CHECK(IS_EQUIVALENCE(tautological_eq->get_optype()));
687  if (dynamic_cast<const Analyzer::ExpressionTuple*>(
688  tautological_eq->get_left_operand())) {
689  auto lhs_col = hashJoinLhsTuple(rhs, tautological_eq.get());
690  if (lhs_col) {
691  // our join column normalizer falls back to the loop join
692  // when columns of two join tables do not have the same types
693  // todo (yoonmin): relax this
694  return false;
695  }
696  } else {
697  auto eq_right_op = tautological_eq->get_right_operand();
698  if (!rhs->get_type_info().is_string()) {
699  eq_right_op = remove_cast_to_int(eq_right_op);
700  }
701  if (!eq_right_op) {
702  eq_right_op = tautological_eq->get_right_operand();
703  }
704  if (*eq_right_op == *rhs) {
705  auto eq_left_op = tautological_eq->get_left_operand();
706  if (!eq_left_op->get_type_info().is_string()) {
707  eq_left_op = remove_cast_to_int(eq_left_op);
708  }
709  if (!eq_left_op) {
710  eq_left_op = tautological_eq->get_left_operand();
711  }
712  if (eq_left_op->get_type_info().is_geometry()) {
713  // skip cast for a geospatial lhs, since the rhs is likely to be a geospatial
714  // physical col without geospatial type info
715  return false;
716  }
717  if (is_constructed_point(eq_left_op)) {
718  // skip cast for a constructed point lhs
719  return false;
720  }
721  auto eq_left_op_col = dynamic_cast<const Analyzer::ColumnVar*>(eq_left_op);
722  if (!eq_left_op_col) {
723  if (dynamic_cast<const Analyzer::StringOper*>(eq_left_op)) {
724  return false;
725  }
726  if (dynamic_cast<const Analyzer::FunctionOper*>(eq_left_op)) {
727  return false;
728  }
729  }
730  CHECK(eq_left_op_col);
731  if (eq_left_op_col->get_rte_idx() != 0) {
732  return false;
733  }
734  if (rhs->get_type_info().is_string()) {
735  return false;
736  }
737  if (rhs->get_type_info().is_array()) {
738  return false;
739  }
740  return true;
741  }
742  }
743  }
744  return false;
745 }
746 
747 std::shared_ptr<const Analyzer::ColumnVar> CodeGenerator::hashJoinLhsTuple(
748  const Analyzer::ColumnVar* rhs,
749  const Analyzer::BinOper* tautological_eq) const {
750  const auto lhs_tuple_expr =
751  dynamic_cast<const Analyzer::ExpressionTuple*>(tautological_eq->get_left_operand());
752  const auto rhs_tuple_expr = dynamic_cast<const Analyzer::ExpressionTuple*>(
753  tautological_eq->get_right_operand());
754  CHECK(lhs_tuple_expr && rhs_tuple_expr);
755  const auto& lhs_tuple = lhs_tuple_expr->getTuple();
756  const auto& rhs_tuple = rhs_tuple_expr->getTuple();
757  CHECK_EQ(lhs_tuple.size(), rhs_tuple.size());
758  for (size_t i = 0; i < lhs_tuple.size(); ++i) {
759  if (*rhs_tuple[i] == *rhs) {
760  const auto lhs_col =
761  std::static_pointer_cast<const Analyzer::ColumnVar>(lhs_tuple[i]);
762  return lhs_col->get_rte_idx() == 0 ? lhs_col : nullptr;
763  }
764  }
765  return nullptr;
766 }
bool hasRangeModeFraming() const
Definition: Analyzer.h:2610
JoinInfo join_info_
Definition: PlanState.h:65
HOST DEVICE SQLTypes get_subtype() const
Definition: sqltypes.h:382
#define CHECK_EQ(x, y)
Definition: Logger.h:301
llvm::Value * castToTypeIn(llvm::Value *val, const size_t bit_width)
Definition: CgenState.cpp:149
SqlWindowFunctionKind getKind() const
Definition: Analyzer.h:2576
std::vector< llvm::Value * > outer_join_match_found_per_level_
Definition: CgenState.h:388
HOST DEVICE int get_size() const
Definition: sqltypes.h:393
const Analyzer::Expr * remove_cast_to_int(const Analyzer::Expr *expr)
Definition: ColumnIR.cpp:606
std::unordered_map< size_t, std::vector< llvm::Value * > > fetch_cache_
Definition: CgenState.h:378
Definition: sqltypes.h:66
SQLTypes
Definition: sqltypes.h:55
#define IS_EQUIVALENCE(X)
Definition: sqldefs.h:69
CgenState * cgen_state_
llvm::Value * codegenRowId(const Analyzer::ColumnVar *col_var, const CompilationOptions &co)
Definition: ColumnIR.cpp:376
llvm::Value * codegenFixedLengthColVarInWindow(const Analyzer::ColumnVar *col_var, llvm::Value *col_byte_stream, llvm::Value *pos_arg, const WindowFunctionContext *window_function_context=nullptr)
Definition: ColumnIR.cpp:291
bool is_fp() const
Definition: sqltypes.h:584
HOST DEVICE int get_scale() const
Definition: sqltypes.h:386
const Expr * get_right_operand() const
Definition: Analyzer.h:456
bool is_constructed_point(const Analyzer::Expr *expr)
Definition: Execute.h:1508
llvm::IRBuilder ir_builder_
Definition: CgenState.h:377
std::vector< llvm::Value * > codegenOuterJoinNullPlaceholder(const Analyzer::ColumnVar *col_var, const bool fetch_column, const CompilationOptions &co)
Definition: ColumnIR.cpp:481
const std::vector< SQLTypeInfo > & getOrderKeyColumnBufferTypes() const
llvm::Value * posArg(const Analyzer::Expr *) const
Definition: ColumnIR.cpp:580
#define CHECK_GE(x, y)
Definition: Logger.h:306
llvm::Value * codgenAdjustFixedEncNull(llvm::Value *, const SQLTypeInfo &)
Definition: ColumnIR.cpp:433
Definition: sqldefs.h:48
const int8_t * output() const
llvm::Value * foundOuterJoinMatch(const size_t nesting_level) const
Definition: ColumnIR.cpp:474
const int32_t * counts() const
virtual std::vector< llvm::Value * > codegenColumn(const Analyzer::ColumnVar *, const bool fetch_column, const CompilationOptions &)
Definition: ColumnIR.cpp:93
llvm::Type * get_int_type(const int width, llvm::LLVMContext &context)
std::shared_ptr< Decoder > get_col_decoder(const Analyzer::ColumnVar *col_var)
Definition: ColumnIR.cpp:28
EncodingType get_compression() const
Definition: Analyzer.h:204
static WindowFunctionContext * getActiveWindowFunctionContext(Executor *executor)
std::string to_string(char const *&&v)
llvm::Function * row_func_
Definition: CgenState.h:367
std::vector< llvm::Value * > group_by_expr_cache_
Definition: CgenState.h:384
std::shared_ptr< const Analyzer::Expr > hashJoinLhs(const Analyzer::ColumnVar *rhs) const
Definition: ColumnIR.cpp:618
llvm::Value * codegenWindowPosition(const WindowFunctionContext *window_func_context, llvm::Value *pos_arg)
Definition: ColumnIR.cpp:227
llvm::Module * module_
Definition: CgenState.h:366
int getLocalColumnId(const Analyzer::ColumnVar *col_var, const bool fetch_column)
Definition: PlanState.cpp:59
size_t get_bit_width(const SQLTypeInfo &ti)
llvm::LLVMContext & context_
Definition: CgenState.h:375
llvm::Function * current_func_
Definition: CgenState.h:369
llvm::Value * emitExternalCall(const std::string &fname, llvm::Type *ret_type, const std::vector< llvm::Value * > args, const std::vector< llvm::Attribute::AttrKind > &fnattrs={}, const bool has_struct_return=false)
Definition: CgenState.cpp:396
llvm::Value * get_arg_by_name(llvm::Function *func, const std::string &name)
Definition: Execute.h:167
const std::vector< std::shared_ptr< Analyzer::Expr > > & getArgs() const
Definition: Analyzer.h:2578
bool isLazyFetchColumn(const Analyzer::Expr *target_expr) const
Definition: PlanState.cpp:20
int get_logical_size() const
Definition: sqltypes.h:403
size_t get_col_bit_width(const Analyzer::ColumnVar *col_var)
Definition: ColumnIR.cpp:82
llvm::ConstantInt * inlineIntNull(const SQLTypeInfo &)
Definition: CgenState.cpp:64
std::string toString() const override
Definition: Analyzer.cpp:2614
int64_t bigintval
Definition: Datum.h:72
const ColumnDescriptor * get_column_descriptor(const shared::ColumnKey &column_key)
Definition: Execute.h:192
Executor * executor_
std::set< shared::ColumnKey > columns_to_not_fetch_
Definition: PlanState.h:61
std::unordered_map< int, llvm::Value * > scan_idx_to_hash_pos_
Definition: CgenState.h:389
#define AUTOMATIC_IR_METADATA(CGENSTATE)
std::vector< llvm::Value * > codegenColVar(const Analyzer::ColumnVar *, const bool fetch_column, const bool update_query_plan, const CompilationOptions &)
Definition: ColumnIR.cpp:105
const SQLTypeInfo & get_type_info() const
Definition: Analyzer.h:79
llvm::Value * emitCall(const std::string &fname, const std::vector< llvm::Value * > &args)
Definition: CgenState.cpp:216
SQLTypes decimal_to_int_type(const SQLTypeInfo &ti)
Definition: Datum.cpp:559
PlanState * plan_state_
std::vector< llvm::Value * > codegen(const Analyzer::Expr *, const bool fetch_columns, const CompilationOptions &)
Definition: IRCodegen.cpp:30
#define CHECK_LT(x, y)
Definition: Logger.h:303
Definition: sqltypes.h:70
const shared::ColumnKey & getColumnKey() const
Definition: Analyzer.h:198
#define CHECK_LE(x, y)
Definition: Logger.h:304
HOST DEVICE EncodingType get_compression() const
Definition: sqltypes.h:389
std::vector< llvm::Value * > frag_offsets_
Definition: CgenState.h:386
llvm::StructType * createStringViewStructType()
HOST DEVICE int get_dimension() const
Definition: sqltypes.h:383
HOST DEVICE int get_comp_param() const
Definition: sqltypes.h:392
bool needCastForHashJoinLhs(const Analyzer::ColumnVar *rhs) const
Definition: ColumnIR.cpp:684
llvm::ConstantInt * llInt(const T v) const
Definition: CgenState.h:242
#define CHECK(condition)
Definition: Logger.h:291
bool is_geometry() const
Definition: sqltypes.h:592
std::set< shared::ColumnKey > columns_to_fetch_
Definition: PlanState.h:60
llvm::Value * colByteStream(const Analyzer::ColumnVar *col_var, const bool fetch_column, const bool hoist_literals)
Definition: ColumnIR.cpp:564
const Expr * get_left_operand() const
Definition: Analyzer.h:455
const Analyzer::WindowFunction * getWindowFunction() const
std::string numeric_type_name(const SQLTypeInfo &ti)
Definition: Execute.h:209
Definition: sqltypes.h:62
bool is_string() const
Definition: sqltypes.h:580
std::shared_ptr< const Analyzer::ColumnVar > hashJoinLhsTuple(const Analyzer::ColumnVar *rhs, const Analyzer::BinOper *tautological_eq) const
Definition: ColumnIR.cpp:747
llvm::Value * codegenFixedLengthColVar(const Analyzer::ColumnVar *col_var, llvm::Value *col_byte_stream, llvm::Value *pos_arg, const WindowFunctionContext *window_function_context=nullptr)
Definition: ColumnIR.cpp:240
std::vector< llvm::Value * > codegenVariableLengthStringColVar(llvm::Value *col_byte_stream, llvm::Value *pos_arg)
Definition: ColumnIR.cpp:362
int32_t get_rte_idx() const
Definition: Analyzer.h:202
Definition: Datum.h:67
SQLTypes get_phys_int_type(const size_t byte_sz)
Definition: ColumnIR.cpp:414
int adjusted_range_table_index(const Analyzer::ColumnVar *col_var)
Definition: ColumnIR.cpp:87
bool is_array() const
Definition: sqltypes.h:588
shared::TableKey getTableKey() const
Definition: Analyzer.h:199
llvm::Value * resolveGroupedColumnReference(const Analyzer::ColumnVar *)
Definition: ColumnIR.cpp:544
llvm::ConstantFP * inlineFpNull(const SQLTypeInfo &)
Definition: CgenState.cpp:103
Executor * executor() const