OmniSciDB  a987f07e93
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
ColumnIR.cpp
Go to the documentation of this file.
1 /*
2  * Copyright 2022 HEAVY.AI, Inc.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #include "CodeGenerator.h"
18 #include "Codec.h"
19 #include "Execute.h"
20 #include "WindowContext.h"
21 
22 // Code generation routines and helpers for working with column expressions.
23 
24 namespace {
25 
26 // Return the right decoder for a given column expression. Doesn't handle
27 // variable length data. The decoder encapsulates the code generation logic.
28 std::shared_ptr<Decoder> get_col_decoder(const Analyzer::ColumnVar* col_var) {
29  const auto enc_type = col_var->get_compression();
30  const auto& ti = col_var->get_type_info();
31  switch (enc_type) {
32  case kENCODING_NONE: {
33  const auto int_type = ti.is_decimal() ? decimal_to_int_type(ti) : ti.get_type();
34  switch (int_type) {
35  case kBOOLEAN:
36  return std::make_shared<FixedWidthInt>(1);
37  case kTINYINT:
38  return std::make_shared<FixedWidthInt>(1);
39  case kSMALLINT:
40  return std::make_shared<FixedWidthInt>(2);
41  case kINT:
42  return std::make_shared<FixedWidthInt>(4);
43  case kBIGINT:
44  return std::make_shared<FixedWidthInt>(8);
45  case kFLOAT:
46  return std::make_shared<FixedWidthReal>(false);
47  case kDOUBLE:
48  return std::make_shared<FixedWidthReal>(true);
49  case kTIME:
50  case kTIMESTAMP:
51  case kDATE:
52  return std::make_shared<FixedWidthInt>(8);
53  default:
54  CHECK(false);
55  }
56  }
57  case kENCODING_DICT:
58  CHECK(ti.is_string());
59  // For dictionary-encoded columns encoded on less than 4 bytes, we can use
60  // unsigned representation for double the maximum cardinality. The inline
61  // null value is going to be the maximum value of the underlying type.
62  if (ti.get_size() < ti.get_logical_size()) {
63  return std::make_shared<FixedWidthUnsigned>(ti.get_size());
64  }
65  return std::make_shared<FixedWidthInt>(ti.get_size());
66  case kENCODING_FIXED: {
67  const auto bit_width = col_var->get_comp_param();
68  CHECK_EQ(0, bit_width % 8);
69  return std::make_shared<FixedWidthInt>(bit_width / 8);
70  }
72  CHECK(ti.is_date_in_days());
73  return col_var->get_comp_param() == 16 ? std::make_shared<FixedWidthSmallDate>(2)
74  : std::make_shared<FixedWidthSmallDate>(4);
75  }
76  default:
77  abort();
78  }
79 }
80 
81 size_t get_col_bit_width(const Analyzer::ColumnVar* col_var) {
82  const auto& type_info = col_var->get_type_info();
83  return get_bit_width(type_info);
84 }
85 
87  return col_var->get_rte_idx() == -1 ? 0 : col_var->get_rte_idx();
88 }
89 
90 } // namespace
91 
92 std::vector<llvm::Value*> CodeGenerator::codegenColumn(const Analyzer::ColumnVar* col_var,
93  const bool fetch_column,
94  const CompilationOptions& co) {
96  if (col_var->get_rte_idx() <= 0 ||
98  !foundOuterJoinMatch(col_var->get_rte_idx())) {
99  return codegenColVar(col_var, fetch_column, true, co);
100  }
101  return codegenOuterJoinNullPlaceholder(col_var, fetch_column, co);
102 }
103 
104 std::vector<llvm::Value*> CodeGenerator::codegenColVar(const Analyzer::ColumnVar* col_var,
105  const bool fetch_column,
106  const bool update_query_plan,
107  const CompilationOptions& co) {
109  const bool hoist_literals = co.hoist_literals;
110  auto col_id = col_var->get_column_id();
111  const int rte_idx = adjusted_range_table_index(col_var);
112  CHECK_LT(static_cast<size_t>(rte_idx), cgen_state_->frag_offsets_.size());
113  const auto catalog = executor()->getCatalog();
114  CHECK(catalog);
115  if (col_var->get_table_id() > 0) {
116  auto cd = get_column_descriptor(col_id, col_var->get_table_id(), *catalog);
117  if (cd->isVirtualCol) {
118  CHECK(cd->columnName == "rowid");
119  return {codegenRowId(col_var, co)};
120  }
121  auto col_ti = cd->columnType;
122  if (col_ti.get_physical_coord_cols() > 0) {
123  std::vector<llvm::Value*> cols;
124  for (auto i = 0; i < col_ti.get_physical_coord_cols(); i++) {
125  auto cd0 =
126  get_column_descriptor(col_id + i + 1, col_var->get_table_id(), *catalog);
127  auto col0_ti = cd0->columnType;
128  CHECK(!cd0->isVirtualCol);
129  auto col0_var = makeExpr<Analyzer::ColumnVar>(
130  col0_ti, col_var->get_table_id(), cd0->columnId, rte_idx);
131  auto col = codegenColVar(col0_var.get(), fetch_column, false, co);
132  cols.insert(cols.end(), col.begin(), col.end());
133  if (!fetch_column && plan_state_->isLazyFetchColumn(col_var)) {
135  std::make_pair(col_var->get_table_id(), col0_var->get_column_id()));
136  }
137  }
138  if (!fetch_column && plan_state_->isLazyFetchColumn(col_var)) {
140  std::make_pair(col_var->get_table_id(), col_var->get_column_id()));
141  } else {
143  std::make_pair(col_var->get_table_id(), col_var->get_column_id()));
144  }
145  return cols;
146  }
147  } else {
148  if (col_var->get_type_info().is_geometry()) {
149  throw std::runtime_error(
150  "Geospatial columns not supported in temporary tables yet");
151  }
152  }
153  const auto grouped_col_lv = resolveGroupedColumnReference(col_var);
154  if (grouped_col_lv) {
155  return {grouped_col_lv};
156  }
157  const auto col_var_hash = boost::hash_value(col_var->toString());
158  const auto window_func_context =
160  // only generate the decoding code once; if a column has been previously
161  // fetched in the generated IR, we'll reuse it
162  // here, we do not just use (local) column id since our analyzer may cast the same
163  // col_var with different types depending on the (aggregate) function that the col_var
164  // is used i.e., SELECT COUNT(DISTINCT x), MIN(x) FROM ...
165  if (!window_func_context) {
166  auto it = cgen_state_->fetch_cache_.find(col_var_hash);
167  if (it != cgen_state_->fetch_cache_.end()) {
168  return {it->second};
169  }
170  }
171  const auto hash_join_lhs = hashJoinLhs(col_var);
172  // Note(jclay): This has been prone to cause failures in some overlaps joins.
173  // I believe most of the issues are worked out now, but a good place to check if
174  // failures are happening.
175 
176  // Use the already fetched left-hand side of an equi-join if the types are identical.
177  // Currently, types can only be different because of different underlying dictionaries.
178  if (hash_join_lhs && hash_join_lhs->get_type_info() == col_var->get_type_info()) {
179  if (plan_state_->isLazyFetchColumn(col_var)) {
181  std::make_pair(col_var->get_table_id(), col_var->get_column_id()));
182  }
183  return codegen(hash_join_lhs.get(), fetch_column, co);
184  }
185  auto pos_arg = posArg(col_var);
186  if (window_func_context) {
187  pos_arg = codegenWindowPosition(window_func_context, pos_arg);
188  }
189  auto col_byte_stream = colByteStream(col_var, fetch_column, hoist_literals);
190  if (plan_state_->isLazyFetchColumn(col_var)) {
191  if (update_query_plan) {
193  std::make_pair(col_var->get_table_id(), col_var->get_column_id()));
194  }
195  if (rte_idx > 0) {
196  const auto offset = cgen_state_->frag_offsets_[rte_idx];
197  if (offset) {
198  return {cgen_state_->ir_builder_.CreateAdd(pos_arg, offset)};
199  } else {
200  return {pos_arg};
201  }
202  }
203  return {pos_arg};
204  }
205  const auto& col_ti = col_var->get_type_info();
206  if (col_ti.is_string() && col_ti.get_compression() == kENCODING_NONE) {
207  const auto varlen_str_column_lvs =
208  codegenVariableLengthStringColVar(col_byte_stream, pos_arg);
209  if (!window_func_context) {
210  auto it_ok = cgen_state_->fetch_cache_.insert(
211  std::make_pair(col_var_hash, varlen_str_column_lvs));
212  CHECK(it_ok.second);
213  }
214  return varlen_str_column_lvs;
215  }
216  if (col_ti.is_array()) {
217  return {col_byte_stream};
218  }
219  if (window_func_context) {
221  col_var, col_byte_stream, pos_arg, window_func_context)};
222  }
223  const auto fixed_length_column_lv =
224  codegenFixedLengthColVar(col_var, col_byte_stream, pos_arg);
225  auto it_ok = cgen_state_->fetch_cache_.insert(
226  std::make_pair(col_var_hash, std::vector<llvm::Value*>{fixed_length_column_lv}));
227  return {it_ok.first->second};
228 }
229 
231  const WindowFunctionContext* window_func_context,
232  llvm::Value* pos_arg) {
234  const auto window_position = cgen_state_->emitCall(
235  "row_number_window_func",
236  {cgen_state_->llInt(reinterpret_cast<const int64_t>(window_func_context->output())),
237  pos_arg});
238  return window_position;
239 }
240 
241 // Generate code for fixed length column types (number, timestamp or date,
242 // dictionary-encoded string)
244  const Analyzer::ColumnVar* col_var,
245  llvm::Value* col_byte_stream,
246  llvm::Value* pos_arg,
247  const WindowFunctionContext* window_function_context) {
249  const auto decoder = get_col_decoder(col_var);
250  auto dec_val = decoder->codegenDecode(col_byte_stream, pos_arg, cgen_state_->module_);
251  cgen_state_->ir_builder_.Insert(dec_val);
252  auto dec_type = dec_val->getType();
253  llvm::Value* dec_val_cast{nullptr};
254  const auto& col_ti = col_var->get_type_info();
255  if (dec_type->isIntegerTy()) {
256  auto dec_width = static_cast<llvm::IntegerType*>(dec_type)->getBitWidth();
257  auto col_width = get_col_bit_width(col_var);
258  dec_val_cast = cgen_state_->ir_builder_.CreateCast(
259  static_cast<size_t>(col_width) > dec_width ? llvm::Instruction::CastOps::SExt
260  : llvm::Instruction::CastOps::Trunc,
261  dec_val,
262  get_int_type(col_width, cgen_state_->context_));
263  bool adjust_fixed_enc_null = true;
264  if (window_function_context &&
265  window_function_context->getWindowFunction()->hasRangeModeFraming()) {
266  // we only need to cast it to 8 byte iff it is encoded type
267  // (i.e., the size of non-encoded timestamp type is 8 byte)
268  const auto order_key_ti =
269  window_function_context->getOrderKeyColumnBufferTypes().front();
270  if (order_key_ti.is_timestamp() && order_key_ti.get_size() == 4) {
271  adjust_fixed_enc_null = false;
272  }
273  }
274  if (adjust_fixed_enc_null &&
275  (col_ti.get_compression() == kENCODING_FIXED ||
276  (col_ti.get_compression() == kENCODING_DICT && col_ti.get_size() < 4)) &&
277  !col_ti.get_notnull()) {
278  dec_val_cast = codgenAdjustFixedEncNull(dec_val_cast, col_ti);
279  }
280  } else {
281  CHECK_EQ(kENCODING_NONE, col_ti.get_compression());
282  CHECK(dec_type->isFloatTy() || dec_type->isDoubleTy());
283  if (dec_type->isDoubleTy()) {
284  CHECK(col_ti.get_type() == kDOUBLE);
285  } else if (dec_type->isFloatTy()) {
286  CHECK(col_ti.get_type() == kFLOAT);
287  }
288  dec_val_cast = dec_val;
289  }
290  CHECK(dec_val_cast);
291  return dec_val_cast;
292 }
293 
295  const Analyzer::ColumnVar* col_var,
296  llvm::Value* col_byte_stream,
297  llvm::Value* pos_arg,
298  const WindowFunctionContext* window_function_context) {
300  const auto orig_bb = cgen_state_->ir_builder_.GetInsertBlock();
301  const auto pos_valid_bb = llvm::BasicBlock::Create(
302  cgen_state_->context_, "window.pos_valid", cgen_state_->current_func_);
303  const auto pos_notvalid_bb = llvm::BasicBlock::Create(
304  cgen_state_->context_, "window.pos_notvalid", cgen_state_->current_func_);
305  const auto pos_is_valid =
306  cgen_state_->ir_builder_.CreateICmpSGE(pos_arg, cgen_state_->llInt(int64_t(0)));
307  if (window_function_context->getWindowFunction()->getKind() ==
309  // NTH_VALUE needs to return null if N > partition size
310  // To do this, we store null value to the output buffer of the current row
311  // if following requirements for processing NTH_VALUE are not satisfied
312  // 1. current row is valid
313  // 2. N < partition size that the current row is included
314  const auto window_func_args = window_function_context->getWindowFunction()->getArgs();
315  auto n_value_ptr = dynamic_cast<Analyzer::Constant*>(window_func_args[1].get());
316  auto n_value_lv = cgen_state_->llInt((int64_t)n_value_ptr->get_constval().intval);
317  CHECK(n_value_lv);
318 
319  auto partition_index_lv =
320  executor_->codegenCurrentPartitionIndex(window_function_context, pos_arg);
321  // # elems per partition
322  const auto pi32_type =
323  llvm::PointerType::get(get_int_type(32, cgen_state_->context_), 0);
324  const auto partition_count_buf =
325  cgen_state_->llInt(reinterpret_cast<int64_t>(window_function_context->counts()));
326  auto partition_count_buf_ptr_lv =
327  cgen_state_->ir_builder_.CreateIntToPtr(partition_count_buf, pi32_type);
328 
329  // # elems of the given partition
330  const auto num_elem_current_partition_ptr =
332  partition_count_buf_ptr_lv,
333  partition_index_lv);
334  const auto num_elem_current_partition_lv = cgen_state_->castToTypeIn(
335  cgen_state_->ir_builder_.CreateLoad(
336  num_elem_current_partition_ptr->getType()->getPointerElementType(),
337  num_elem_current_partition_ptr),
338  64);
339  auto is_valid_n_value_lv = cgen_state_->ir_builder_.CreateICmpSLT(
340  n_value_lv, num_elem_current_partition_lv, "is_valid_nth_value");
341  auto cond_lv = cgen_state_->ir_builder_.CreateAnd(
342  is_valid_n_value_lv, pos_is_valid, "is_valid_row_for_nth_value");
343  // return the current row value iff 1) it is a valid row and 2) N < partition_size
344  cgen_state_->ir_builder_.CreateCondBr(cond_lv, pos_valid_bb, pos_notvalid_bb);
345  } else {
346  // return the current row value if it is valid
347  cgen_state_->ir_builder_.CreateCondBr(pos_is_valid, pos_valid_bb, pos_notvalid_bb);
348  }
349  cgen_state_->ir_builder_.SetInsertPoint(pos_valid_bb);
350  const auto fixed_length_column_lv = codegenFixedLengthColVar(
351  col_var, col_byte_stream, pos_arg, window_function_context);
352  cgen_state_->ir_builder_.CreateBr(pos_notvalid_bb);
353  cgen_state_->ir_builder_.SetInsertPoint(pos_notvalid_bb);
354  const auto window_func_call_phi =
355  cgen_state_->ir_builder_.CreatePHI(fixed_length_column_lv->getType(), 2);
356  window_func_call_phi->addIncoming(fixed_length_column_lv, pos_valid_bb);
357  const auto& col_ti = col_var->get_type_info();
358  const auto null_lv =
359  col_ti.is_fp() ? static_cast<llvm::Value*>(cgen_state_->inlineFpNull(col_ti))
360  : static_cast<llvm::Value*>(cgen_state_->inlineIntNull(col_ti));
361  window_func_call_phi->addIncoming(null_lv, orig_bb);
362  return window_func_call_phi;
363 }
364 
366  llvm::Value* col_byte_stream,
367  llvm::Value* pos_arg) {
369  // real (not dictionary-encoded) strings; store the pointer to the payload
370  auto* const string_view = cgen_state_->emitExternalCall(
371  "string_decode", createStringViewStructType(), {col_byte_stream, pos_arg});
372  auto* str_lv = cgen_state_->ir_builder_.CreateExtractValue(string_view, 0);
373  auto* len_lv = cgen_state_->ir_builder_.CreateExtractValue(string_view, 1);
374  len_lv = cgen_state_->ir_builder_.CreateTrunc(
375  len_lv, llvm::Type::getInt32Ty(cgen_state_->context_));
376  return {string_view, str_lv, len_lv};
377 }
378 
380  const CompilationOptions& co) {
382  const auto offset_lv = cgen_state_->frag_offsets_[adjusted_range_table_index(col_var)];
383  llvm::Value* start_rowid_lv{nullptr};
384  const auto& table_generation = executor()->getTableGeneration(col_var->get_table_id());
385  if (table_generation.start_rowid > 0) {
386  // Handle the multi-node case: each leaf receives a start rowid used
387  // to offset the local rowid and generate a cluster-wide unique rowid.
388  Datum d;
389  d.bigintval = table_generation.start_rowid;
390  const auto start_rowid = makeExpr<Analyzer::Constant>(kBIGINT, false, d);
391  const auto start_rowid_lvs = codegen(start_rowid.get(), kENCODING_NONE, -1, co);
392  CHECK_EQ(size_t(1), start_rowid_lvs.size());
393  start_rowid_lv = start_rowid_lvs.front();
394  }
395  auto rowid_lv = posArg(col_var);
396  if (offset_lv) {
397  rowid_lv = cgen_state_->ir_builder_.CreateAdd(rowid_lv, offset_lv);
398  } else if (col_var->get_rte_idx() > 0) {
399  auto frag_off_ptr = get_arg_by_name(cgen_state_->row_func_, "frag_row_off");
400  auto input_off_ptr = cgen_state_->ir_builder_.CreateGEP(
401  frag_off_ptr->getType()->getScalarType()->getPointerElementType(),
402  frag_off_ptr,
403  cgen_state_->llInt(int32_t(col_var->get_rte_idx())));
404  auto rowid_offset_lv = cgen_state_->ir_builder_.CreateLoad(
405  input_off_ptr->getType()->getPointerElementType(), input_off_ptr);
406  rowid_lv = cgen_state_->ir_builder_.CreateAdd(rowid_lv, rowid_offset_lv);
407  }
408  if (table_generation.start_rowid > 0) {
409  CHECK(start_rowid_lv);
410  rowid_lv = cgen_state_->ir_builder_.CreateAdd(rowid_lv, start_rowid_lv);
411  }
412  return rowid_lv;
413 }
414 
415 namespace {
416 
417 SQLTypes get_phys_int_type(const size_t byte_sz) {
418  switch (byte_sz) {
419  case 1:
420  return kBOOLEAN;
421  // TODO: kTINYINT
422  case 2:
423  return kSMALLINT;
424  case 4:
425  return kINT;
426  case 8:
427  return kBIGINT;
428  default:
429  CHECK(false);
430  }
431  return kNULLT;
432 }
433 
434 } // namespace
435 
436 llvm::Value* CodeGenerator::codgenAdjustFixedEncNull(llvm::Value* val,
437  const SQLTypeInfo& col_ti) {
439  CHECK_LT(col_ti.get_size(), col_ti.get_logical_size());
440  const auto col_phys_width = col_ti.get_size() * 8;
441  auto from_typename = "int" + std::to_string(col_phys_width) + "_t";
442  auto adjusted = cgen_state_->ir_builder_.CreateCast(
443  llvm::Instruction::CastOps::Trunc,
444  val,
445  get_int_type(col_phys_width, cgen_state_->context_));
446  if (col_ti.get_compression() == kENCODING_DICT) {
447  from_typename = "u" + from_typename;
448  llvm::Value* from_null{nullptr};
449  switch (col_ti.get_size()) {
450  case 1:
451  from_null = cgen_state_->llInt(std::numeric_limits<uint8_t>::max());
452  break;
453  case 2:
454  from_null = cgen_state_->llInt(std::numeric_limits<uint16_t>::max());
455  break;
456  default:
457  CHECK(false);
458  }
459  return cgen_state_->emitCall(
460  "cast_" + from_typename + "_to_" + numeric_type_name(col_ti) + "_nullable",
461  {adjusted, from_null, cgen_state_->inlineIntNull(col_ti)});
462  }
463  SQLTypeInfo col_phys_ti(get_phys_int_type(col_ti.get_size()),
464  col_ti.get_dimension(),
465  col_ti.get_scale(),
466  false,
468  0,
469  col_ti.get_subtype());
470  return cgen_state_->emitCall(
471  "cast_" + from_typename + "_to_" + numeric_type_name(col_ti) + "_nullable",
472  {adjusted,
473  cgen_state_->inlineIntNull(col_phys_ti),
474  cgen_state_->inlineIntNull(col_ti)});
475 }
476 
477 llvm::Value* CodeGenerator::foundOuterJoinMatch(const size_t nesting_level) const {
478  CHECK_GE(nesting_level, size_t(1));
479  CHECK_LE(nesting_level,
480  static_cast<size_t>(cgen_state_->outer_join_match_found_per_level_.size()));
481  return cgen_state_->outer_join_match_found_per_level_[nesting_level - 1];
482 }
483 
485  const Analyzer::ColumnVar* col_var,
486  const bool fetch_column,
487  const CompilationOptions& co) {
489  const auto grouped_col_lv = resolveGroupedColumnReference(col_var);
490  if (grouped_col_lv) {
491  return {grouped_col_lv};
492  }
493  const auto outer_join_args_bb = llvm::BasicBlock::Create(
494  cgen_state_->context_, "outer_join_args", cgen_state_->current_func_);
495  const auto outer_join_nulls_bb = llvm::BasicBlock::Create(
496  cgen_state_->context_, "outer_join_nulls", cgen_state_->current_func_);
497  const auto phi_bb = llvm::BasicBlock::Create(
498  cgen_state_->context_, "outer_join_phi", cgen_state_->current_func_);
499  const auto outer_join_match_lv = foundOuterJoinMatch(col_var->get_rte_idx());
500  CHECK(outer_join_match_lv);
501  cgen_state_->ir_builder_.CreateCondBr(
502  outer_join_match_lv, outer_join_args_bb, outer_join_nulls_bb);
503  const auto back_from_outer_join_bb = llvm::BasicBlock::Create(
504  cgen_state_->context_, "back_from_outer_join", cgen_state_->current_func_);
505  cgen_state_->ir_builder_.SetInsertPoint(outer_join_args_bb);
507  const auto orig_lvs = codegenColVar(col_var, fetch_column, true, co);
508  // sometimes col_var used in the join qual needs to cast its column to sync with
509  // the target join column's type which generates a code with a new bb like cast_bb
510  // if so, we need to keep that bb to correctly construct phi_bb
511  // i.e., use cast_bb instead of outer_join_args_bb for the "casted" column
512  // which is the right end point
513  const auto needs_casting_col_var = needCastForHashJoinLhs(col_var);
514  auto* cast_bb = cgen_state_->ir_builder_.GetInsertBlock();
515  cgen_state_->ir_builder_.CreateBr(phi_bb);
516  cgen_state_->ir_builder_.SetInsertPoint(outer_join_nulls_bb);
517  const auto& null_ti = col_var->get_type_info();
518  if ((null_ti.is_string() && null_ti.get_compression() == kENCODING_NONE) ||
519  null_ti.is_array() || null_ti.is_geometry()) {
520  throw std::runtime_error("Projection type " + null_ti.get_type_name() +
521  " not supported for outer joins yet");
522  }
523  const auto null_constant = makeExpr<Analyzer::Constant>(null_ti, true, Datum{0});
524  const auto null_target_lvs =
525  codegen(null_constant.get(),
526  false,
529  cgen_state_->ir_builder_.CreateBr(phi_bb);
530  CHECK_EQ(orig_lvs.size(), null_target_lvs.size());
531  cgen_state_->ir_builder_.SetInsertPoint(phi_bb);
532  std::vector<llvm::Value*> target_lvs;
533  for (size_t i = 0; i < orig_lvs.size(); ++i) {
534  const auto target_type = orig_lvs[i]->getType();
535  CHECK_EQ(target_type, null_target_lvs[i]->getType());
536  auto target_phi = cgen_state_->ir_builder_.CreatePHI(target_type, 2);
537  const auto orig_lvs_bb = needs_casting_col_var ? cast_bb : outer_join_args_bb;
538  target_phi->addIncoming(orig_lvs[i], orig_lvs_bb);
539  target_phi->addIncoming(null_target_lvs[i], outer_join_nulls_bb);
540  target_lvs.push_back(target_phi);
541  }
542  cgen_state_->ir_builder_.CreateBr(back_from_outer_join_bb);
543  cgen_state_->ir_builder_.SetInsertPoint(back_from_outer_join_bb);
544  return target_lvs;
545 }
546 
548  const Analyzer::ColumnVar* col_var) {
549  auto col_id = col_var->get_column_id();
550  if (col_var->get_rte_idx() >= 0) {
551  return nullptr;
552  }
553  CHECK((col_id == 0) || (col_var->get_rte_idx() >= 0 && col_var->get_table_id() > 0));
554  const auto var = dynamic_cast<const Analyzer::Var*>(col_var);
555  CHECK(var);
556  col_id = var->get_varno();
557  CHECK_GE(col_id, 1);
558  if (var->get_which_row() == Analyzer::Var::kGROUPBY) {
559  CHECK_LE(static_cast<size_t>(col_id), cgen_state_->group_by_expr_cache_.size());
560  return cgen_state_->group_by_expr_cache_[col_id - 1];
561  }
562  return nullptr;
563 }
564 
565 // returns the byte stream argument and the position for the given column
567  const bool fetch_column,
568  const bool hoist_literals) {
569  CHECK_GE(cgen_state_->row_func_->arg_size(), size_t(3));
570  const auto stream_arg_name =
571  "col_buf" + std::to_string(plan_state_->getLocalColumnId(col_var, fetch_column));
572  for (auto& arg : cgen_state_->row_func_->args()) {
573  if (arg.getName() == stream_arg_name) {
574  CHECK(arg.getType() == llvm::Type::getInt8PtrTy(cgen_state_->context_));
575  return &arg;
576  }
577  }
578  CHECK(false);
579  return nullptr;
580 }
581 
582 llvm::Value* CodeGenerator::posArg(const Analyzer::Expr* expr) const {
584  const auto col_var = dynamic_cast<const Analyzer::ColumnVar*>(expr);
585  if (col_var && col_var->get_rte_idx() > 0) {
586  const auto hash_pos_it =
587  cgen_state_->scan_idx_to_hash_pos_.find(col_var->get_rte_idx());
588  CHECK(hash_pos_it != cgen_state_->scan_idx_to_hash_pos_.end());
589  if (hash_pos_it->second->getType()->isPointerTy()) {
590  CHECK(hash_pos_it->second->getType()->getPointerElementType()->isIntegerTy(32));
591  llvm::Value* result = cgen_state_->ir_builder_.CreateLoad(
592  hash_pos_it->second->getType()->getPointerElementType(), hash_pos_it->second);
593  result = cgen_state_->ir_builder_.CreateSExt(
594  result, get_int_type(64, cgen_state_->context_));
595  return result;
596  }
597  return hash_pos_it->second;
598  }
599  for (auto& arg : cgen_state_->row_func_->args()) {
600  if (arg.getName() == "pos") {
601  CHECK(arg.getType()->isIntegerTy(64));
602  return &arg;
603  }
604  }
605  abort();
606 }
607 
609  const auto uoper = dynamic_cast<const Analyzer::UOper*>(expr);
610  if (!uoper || uoper->get_optype() != kCAST) {
611  return nullptr;
612  }
613  const auto& target_ti = uoper->get_type_info();
614  if (!target_ti.is_integer()) {
615  return nullptr;
616  }
617  return uoper->get_operand();
618 }
619 
620 std::shared_ptr<const Analyzer::Expr> CodeGenerator::hashJoinLhs(
621  const Analyzer::ColumnVar* rhs) const {
622  for (const auto& tautological_eq : plan_state_->join_info_.equi_join_tautologies_) {
623  CHECK(IS_EQUIVALENCE(tautological_eq->get_optype()));
624  if (dynamic_cast<const Analyzer::ExpressionTuple*>(
625  tautological_eq->get_left_operand())) {
626  auto lhs_col = hashJoinLhsTuple(rhs, tautological_eq.get());
627  if (lhs_col) {
628  return lhs_col;
629  }
630  } else {
631  auto eq_right_op = tautological_eq->get_right_operand();
632  if (!rhs->get_type_info().is_string()) {
633  eq_right_op = remove_cast_to_int(eq_right_op);
634  }
635  if (!eq_right_op) {
636  eq_right_op = tautological_eq->get_right_operand();
637  }
638  if (*eq_right_op == *rhs) {
639  auto eq_left_op = tautological_eq->get_left_operand();
640  if (!eq_left_op->get_type_info().is_string()) {
641  eq_left_op = remove_cast_to_int(eq_left_op);
642  }
643  if (!eq_left_op) {
644  eq_left_op = tautological_eq->get_left_operand();
645  }
646  if (eq_left_op->get_type_info().is_geometry()) {
647  // skip cast for a geospatial lhs, since the rhs is likely to be a geospatial
648  // physical col without geospatial type info
649  return nullptr;
650  }
651  if (is_constructed_point(eq_left_op)) {
652  // skip cast for a constructed point lhs
653  return nullptr;
654  }
655  auto eq_left_op_col = dynamic_cast<const Analyzer::ColumnVar*>(eq_left_op);
656  if (!eq_left_op_col) {
657  if (dynamic_cast<const Analyzer::StringOper*>(eq_left_op)) {
658  return nullptr;
659  }
660  if (dynamic_cast<const Analyzer::FunctionOper*>(eq_left_op)) {
661  return nullptr;
662  }
663  }
664  CHECK(eq_left_op_col);
665  if (eq_left_op_col->get_rte_idx() != 0) {
666  return nullptr;
667  }
668  if (rhs->get_type_info().is_string()) {
669  return eq_left_op->deep_copy();
670  }
671  if (rhs->get_type_info().is_array()) {
672  // Note(jclay): Can this be restored from copy as above?
673  // If we fall through to the below return statement,
674  // a superfulous cast from DOUBLE[] to DOUBLE[] is made and
675  // this fails at a later stage in codegen.
676  return nullptr;
677  }
678  return makeExpr<Analyzer::UOper>(
679  rhs->get_type_info(), false, kCAST, eq_left_op->deep_copy());
680  }
681  }
682  }
683  return nullptr;
684 }
685 
687  for (const auto& tautological_eq : plan_state_->join_info_.equi_join_tautologies_) {
688  CHECK(IS_EQUIVALENCE(tautological_eq->get_optype()));
689  if (dynamic_cast<const Analyzer::ExpressionTuple*>(
690  tautological_eq->get_left_operand())) {
691  auto lhs_col = hashJoinLhsTuple(rhs, tautological_eq.get());
692  if (lhs_col) {
693  // our join column normalizer falls back to the loop join
694  // when columns of two join tables do not have the same types
695  // todo (yoonmin): relax this
696  return false;
697  }
698  } else {
699  auto eq_right_op = tautological_eq->get_right_operand();
700  if (!rhs->get_type_info().is_string()) {
701  eq_right_op = remove_cast_to_int(eq_right_op);
702  }
703  if (!eq_right_op) {
704  eq_right_op = tautological_eq->get_right_operand();
705  }
706  if (*eq_right_op == *rhs) {
707  auto eq_left_op = tautological_eq->get_left_operand();
708  if (!eq_left_op->get_type_info().is_string()) {
709  eq_left_op = remove_cast_to_int(eq_left_op);
710  }
711  if (!eq_left_op) {
712  eq_left_op = tautological_eq->get_left_operand();
713  }
714  if (eq_left_op->get_type_info().is_geometry()) {
715  // skip cast for a geospatial lhs, since the rhs is likely to be a geospatial
716  // physical col without geospatial type info
717  return false;
718  }
719  if (is_constructed_point(eq_left_op)) {
720  // skip cast for a constructed point lhs
721  return false;
722  }
723  auto eq_left_op_col = dynamic_cast<const Analyzer::ColumnVar*>(eq_left_op);
724  if (!eq_left_op_col) {
725  if (dynamic_cast<const Analyzer::StringOper*>(eq_left_op)) {
726  return false;
727  }
728  if (dynamic_cast<const Analyzer::FunctionOper*>(eq_left_op)) {
729  return false;
730  }
731  }
732  CHECK(eq_left_op_col);
733  if (eq_left_op_col->get_rte_idx() != 0) {
734  return false;
735  }
736  if (rhs->get_type_info().is_string()) {
737  return false;
738  }
739  if (rhs->get_type_info().is_array()) {
740  return false;
741  }
742  return true;
743  }
744  }
745  }
746  return false;
747 }
748 
749 std::shared_ptr<const Analyzer::ColumnVar> CodeGenerator::hashJoinLhsTuple(
750  const Analyzer::ColumnVar* rhs,
751  const Analyzer::BinOper* tautological_eq) const {
752  const auto lhs_tuple_expr =
753  dynamic_cast<const Analyzer::ExpressionTuple*>(tautological_eq->get_left_operand());
754  const auto rhs_tuple_expr = dynamic_cast<const Analyzer::ExpressionTuple*>(
755  tautological_eq->get_right_operand());
756  CHECK(lhs_tuple_expr && rhs_tuple_expr);
757  const auto& lhs_tuple = lhs_tuple_expr->getTuple();
758  const auto& rhs_tuple = rhs_tuple_expr->getTuple();
759  CHECK_EQ(lhs_tuple.size(), rhs_tuple.size());
760  for (size_t i = 0; i < lhs_tuple.size(); ++i) {
761  if (*rhs_tuple[i] == *rhs) {
762  const auto lhs_col =
763  std::static_pointer_cast<const Analyzer::ColumnVar>(lhs_tuple[i]);
764  return lhs_col->get_rte_idx() == 0 ? lhs_col : nullptr;
765  }
766  }
767  return nullptr;
768 }
int get_table_id() const
Definition: Analyzer.h:202
bool hasRangeModeFraming() const
Definition: Analyzer.h:2604
JoinInfo join_info_
Definition: PlanState.h:66
HOST DEVICE SQLTypes get_subtype() const
Definition: sqltypes.h:381
#define CHECK_EQ(x, y)
Definition: Logger.h:297
llvm::Value * castToTypeIn(llvm::Value *val, const size_t bit_width)
Definition: CgenState.cpp:149
SqlWindowFunctionKind getKind() const
Definition: Analyzer.h:2570
std::vector< llvm::Value * > outer_join_match_found_per_level_
Definition: CgenState.h:386
HOST DEVICE int get_size() const
Definition: sqltypes.h:390
const Analyzer::Expr * remove_cast_to_int(const Analyzer::Expr *expr)
Definition: ColumnIR.cpp:608
std::unordered_map< size_t, std::vector< llvm::Value * > > fetch_cache_
Definition: CgenState.h:376
Definition: sqltypes.h:64
SQLTypes
Definition: sqltypes.h:53
#define IS_EQUIVALENCE(X)
Definition: sqldefs.h:69
CgenState * cgen_state_
llvm::Value * codegenRowId(const Analyzer::ColumnVar *col_var, const CompilationOptions &co)
Definition: ColumnIR.cpp:379
llvm::Value * codegenFixedLengthColVarInWindow(const Analyzer::ColumnVar *col_var, llvm::Value *col_byte_stream, llvm::Value *pos_arg, const WindowFunctionContext *window_function_context=nullptr)
Definition: ColumnIR.cpp:294
bool is_fp() const
Definition: sqltypes.h:580
HOST DEVICE int get_scale() const
Definition: sqltypes.h:385
const Expr * get_right_operand() const
Definition: Analyzer.h:452
bool is_constructed_point(const Analyzer::Expr *expr)
Definition: Execute.h:1516
llvm::IRBuilder ir_builder_
Definition: CgenState.h:375
std::vector< llvm::Value * > codegenOuterJoinNullPlaceholder(const Analyzer::ColumnVar *col_var, const bool fetch_column, const CompilationOptions &co)
Definition: ColumnIR.cpp:484
const std::vector< SQLTypeInfo > & getOrderKeyColumnBufferTypes() const
llvm::Value * posArg(const Analyzer::Expr *) const
Definition: ColumnIR.cpp:582
std::set< std::pair< TableId, ColumnId > > columns_to_fetch_
Definition: PlanState.h:61
#define CHECK_GE(x, y)
Definition: Logger.h:302
llvm::Value * codgenAdjustFixedEncNull(llvm::Value *, const SQLTypeInfo &)
Definition: ColumnIR.cpp:436
Definition: sqldefs.h:48
const int8_t * output() const
llvm::Value * foundOuterJoinMatch(const size_t nesting_level) const
Definition: ColumnIR.cpp:477
const int32_t * counts() const
virtual std::vector< llvm::Value * > codegenColumn(const Analyzer::ColumnVar *, const bool fetch_column, const CompilationOptions &)
Definition: ColumnIR.cpp:92
llvm::Type * get_int_type(const int width, llvm::LLVMContext &context)
std::shared_ptr< Decoder > get_col_decoder(const Analyzer::ColumnVar *col_var)
Definition: ColumnIR.cpp:28
EncodingType get_compression() const
Definition: Analyzer.h:206
static WindowFunctionContext * getActiveWindowFunctionContext(Executor *executor)
std::string to_string(char const *&&v)
llvm::Function * row_func_
Definition: CgenState.h:365
std::set< std::pair< TableId, ColumnId > > columns_to_not_fetch_
Definition: PlanState.h:62
std::vector< llvm::Value * > group_by_expr_cache_
Definition: CgenState.h:382
std::shared_ptr< const Analyzer::Expr > hashJoinLhs(const Analyzer::ColumnVar *rhs) const
Definition: ColumnIR.cpp:620
llvm::Value * codegenWindowPosition(const WindowFunctionContext *window_func_context, llvm::Value *pos_arg)
Definition: ColumnIR.cpp:230
llvm::Module * module_
Definition: CgenState.h:364
int getLocalColumnId(const Analyzer::ColumnVar *col_var, const bool fetch_column)
Definition: PlanState.cpp:62
size_t get_bit_width(const SQLTypeInfo &ti)
llvm::LLVMContext & context_
Definition: CgenState.h:373
llvm::Function * current_func_
Definition: CgenState.h:367
llvm::Value * emitExternalCall(const std::string &fname, llvm::Type *ret_type, const std::vector< llvm::Value * > args, const std::vector< llvm::Attribute::AttrKind > &fnattrs={}, const bool has_struct_return=false)
Definition: CgenState.cpp:396
llvm::Value * get_arg_by_name(llvm::Function *func, const std::string &name)
Definition: Execute.h:166
const std::vector< std::shared_ptr< Analyzer::Expr > > & getArgs() const
Definition: Analyzer.h:2572
bool isLazyFetchColumn(const Analyzer::Expr *target_expr) const
Definition: PlanState.cpp:20
int get_logical_size() const
Definition: sqltypes.h:400
size_t get_col_bit_width(const Analyzer::ColumnVar *col_var)
Definition: ColumnIR.cpp:81
llvm::ConstantInt * inlineIntNull(const SQLTypeInfo &)
Definition: CgenState.cpp:64
std::string toString() const override
Definition: Analyzer.cpp:2601
int64_t bigintval
Definition: Datum.h:72
Executor * executor_
std::unordered_map< int, llvm::Value * > scan_idx_to_hash_pos_
Definition: CgenState.h:387
#define AUTOMATIC_IR_METADATA(CGENSTATE)
std::vector< llvm::Value * > codegenColVar(const Analyzer::ColumnVar *, const bool fetch_column, const bool update_query_plan, const CompilationOptions &)
Definition: ColumnIR.cpp:104
const SQLTypeInfo & get_type_info() const
Definition: Analyzer.h:83
llvm::Value * emitCall(const std::string &fname, const std::vector< llvm::Value * > &args)
Definition: CgenState.cpp:216
SQLTypes decimal_to_int_type(const SQLTypeInfo &ti)
Definition: Datum.cpp:559
PlanState * plan_state_
std::vector< llvm::Value * > codegen(const Analyzer::Expr *, const bool fetch_columns, const CompilationOptions &)
Definition: IRCodegen.cpp:30
#define CHECK_LT(x, y)
Definition: Logger.h:299
Definition: sqltypes.h:68
#define CHECK_LE(x, y)
Definition: Logger.h:300
int get_comp_param() const
Definition: Analyzer.h:207
HOST DEVICE EncodingType get_compression() const
Definition: sqltypes.h:388
std::vector< llvm::Value * > frag_offsets_
Definition: CgenState.h:384
int get_rte_idx() const
Definition: Analyzer.h:204
llvm::StructType * createStringViewStructType()
HOST DEVICE int get_dimension() const
Definition: sqltypes.h:382
bool needCastForHashJoinLhs(const Analyzer::ColumnVar *rhs) const
Definition: ColumnIR.cpp:686
llvm::ConstantInt * llInt(const T v) const
Definition: CgenState.h:240
#define CHECK(condition)
Definition: Logger.h:289
bool is_geometry() const
Definition: sqltypes.h:588
llvm::Value * colByteStream(const Analyzer::ColumnVar *col_var, const bool fetch_column, const bool hoist_literals)
Definition: ColumnIR.cpp:566
const Expr * get_left_operand() const
Definition: Analyzer.h:451
const Analyzer::WindowFunction * getWindowFunction() const
std::string numeric_type_name(const SQLTypeInfo &ti)
Definition: Execute.h:209
Definition: sqltypes.h:60
SQLTypeInfo columnType
int get_column_id() const
Definition: Analyzer.h:203
bool is_string() const
Definition: sqltypes.h:576
std::shared_ptr< const Analyzer::ColumnVar > hashJoinLhsTuple(const Analyzer::ColumnVar *rhs, const Analyzer::BinOper *tautological_eq) const
Definition: ColumnIR.cpp:749
llvm::Value * codegenFixedLengthColVar(const Analyzer::ColumnVar *col_var, llvm::Value *col_byte_stream, llvm::Value *pos_arg, const WindowFunctionContext *window_function_context=nullptr)
Definition: ColumnIR.cpp:243
std::vector< llvm::Value * > codegenVariableLengthStringColVar(llvm::Value *col_byte_stream, llvm::Value *pos_arg)
Definition: ColumnIR.cpp:365
Definition: Datum.h:67
SQLTypes get_phys_int_type(const size_t byte_sz)
Definition: ColumnIR.cpp:417
int adjusted_range_table_index(const Analyzer::ColumnVar *col_var)
Definition: ColumnIR.cpp:86
bool is_array() const
Definition: sqltypes.h:584
const ColumnDescriptor * get_column_descriptor(const int col_id, const int table_id, const Catalog_Namespace::Catalog &cat)
Definition: Execute.h:191
llvm::Value * resolveGroupedColumnReference(const Analyzer::ColumnVar *)
Definition: ColumnIR.cpp:547
llvm::ConstantFP * inlineFpNull(const SQLTypeInfo &)
Definition: CgenState.cpp:103
Executor * executor() const