OmniSciDB  72c90bc290
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
InValuesIR.cpp
Go to the documentation of this file.
1 /*
2  * Copyright 2022 HEAVY.AI, Inc.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #include "CodeGenerator.h"
18 #include "Execute.h"
19 
20 #include <future>
21 #include <memory>
22 
23 llvm::Value* CodeGenerator::codegen(const Analyzer::InValues* expr,
24  const CompilationOptions& co) {
26  const auto in_arg = expr->get_arg();
27  if (is_unnest(in_arg)) {
28  throw std::runtime_error("IN not supported for unnested expressions");
29  }
30  const auto& expr_ti = expr->get_type_info();
31  CHECK(expr_ti.is_boolean());
32  const auto lhs_lvs = codegen(in_arg, true, co);
33  llvm::Value* result{nullptr};
34  if (expr_ti.get_notnull()) {
35  result = llvm::ConstantInt::get(llvm::IntegerType::getInt1Ty(cgen_state_->context_),
36  false);
37  } else {
38  result = cgen_state_->llInt(int8_t(0));
39  }
40  CHECK(result);
41  if (co.hoist_literals) { // TODO(alex): remove this constraint
42  auto in_vals_bitmap = createInValuesBitmap(expr, co);
43  if (in_vals_bitmap) {
44  if (in_vals_bitmap->isEmpty()) {
45  return in_vals_bitmap->hasNull()
47  : result;
48  }
49  CHECK_EQ(size_t(1), lhs_lvs.size());
50  return cgen_state_->addInValuesBitmap(in_vals_bitmap)
51  ->codegen(lhs_lvs.front(), executor());
52  }
53  }
54  if (expr_ti.get_notnull()) {
55  for (auto in_val : expr->get_value_list()) {
56  result = cgen_state_->ir_builder_.CreateOr(
57  result,
58  toBool(
59  codegenCmp(kEQ, kONE, lhs_lvs, in_arg->get_type_info(), in_val.get(), co)));
60  }
61  } else {
62  for (auto in_val : expr->get_value_list()) {
63  const auto crt =
64  codegenCmp(kEQ, kONE, lhs_lvs, in_arg->get_type_info(), in_val.get(), co);
65  result = cgen_state_->emitCall("logical_or",
66  {result, crt, cgen_state_->inlineIntNull(expr_ti)});
67  }
68  }
69  return result;
70 }
71 
72 llvm::Value* CodeGenerator::codegen(const Analyzer::InIntegerSet* in_integer_set,
73  const CompilationOptions& co) {
75  const auto in_arg = in_integer_set->get_arg();
76  if (is_unnest(in_arg)) {
77  throw std::runtime_error("IN not supported for unnested expressions");
78  }
79  const auto& ti = in_integer_set->get_arg()->get_type_info();
80  const auto needle_null_val = inline_int_null_val(ti);
81  if (!co.hoist_literals) {
82  // We never run without literal hoisting in real world scenarios, this avoids a crash
83  // when testing.
84  throw std::runtime_error(
85  "IN subquery with many right-hand side values not supported when literal "
86  "hoisting is disabled");
87  }
88  auto in_vals_bitmap = std::make_unique<InValuesBitmap>(
89  in_integer_set->get_value_list(),
90  needle_null_val,
93  executor()->deviceCount(co.device_type),
94  executor()->data_mgr_);
95  const auto& in_integer_set_ti = in_integer_set->get_type_info();
96  CHECK(in_integer_set_ti.is_boolean());
97  const auto lhs_lvs = codegen(in_arg, true, co);
98  llvm::Value* result{nullptr};
99  if (in_integer_set_ti.get_notnull()) {
100  result = llvm::ConstantInt::get(llvm::IntegerType::getInt1Ty(cgen_state_->context_),
101  false);
102  } else {
103  result = cgen_state_->llInt(int8_t(0));
104  }
105  CHECK(result);
106  CHECK_EQ(size_t(1), lhs_lvs.size());
107  return cgen_state_->addInValuesBitmap(in_vals_bitmap)
108  ->codegen(lhs_lvs.front(), executor());
109 }
110 
111 std::unique_ptr<InValuesBitmap> CodeGenerator::createInValuesBitmap(
112  const Analyzer::InValues* in_values,
113  const CompilationOptions& co) {
115  const auto& value_list = in_values->get_value_list();
116  const auto val_count = value_list.size();
117  const auto& ti = in_values->get_arg()->get_type_info();
118  if (!(ti.is_integer() || (ti.is_string() && ti.get_compression() == kENCODING_DICT))) {
119  return nullptr;
120  }
121  const auto sdp =
122  ti.is_string()
123  ? executor()->getStringDictionaryProxy(
124  ti.getStringDictKey(), executor()->getRowSetMemoryOwner(), true)
125  : nullptr;
126  if (val_count > 3) {
127  using ListIterator = decltype(value_list.begin());
128  std::vector<int64_t> values;
129  const auto needle_null_val = inline_int_null_val(ti);
130  const int worker_count = val_count > 10000 ? cpu_threads() : int(1);
131  std::vector<std::vector<int64_t>> values_set(worker_count, std::vector<int64_t>());
132  std::vector<std::future<bool>> worker_threads;
133  auto start_it = value_list.begin();
134  for (size_t i = 0,
135  start_val = 0,
136  stride = (val_count + worker_count - 1) / worker_count;
137  i < val_count && start_val < val_count;
138  ++i, start_val += stride, std::advance(start_it, stride)) {
139  auto end_it = start_it;
140  std::advance(end_it, std::min(stride, val_count - start_val));
141  const auto do_work = [&](std::vector<int64_t>& out_vals,
142  const ListIterator start,
143  const ListIterator end) -> bool {
144  for (auto val_it = start; val_it != end; ++val_it) {
145  const auto& in_val = *val_it;
146  const auto in_val_const =
147  dynamic_cast<const Analyzer::Constant*>(extract_cast_arg(in_val.get()));
148  if (!in_val_const) {
149  return false;
150  }
151  const auto& in_val_ti = in_val->get_type_info();
152  CHECK(in_val_ti == ti || get_nullable_type_info(in_val_ti) == ti);
153  if (ti.is_string()) {
154  CHECK(sdp);
155  const auto string_id =
156  in_val_const->get_is_null()
157  ? needle_null_val
158  : sdp->getIdOfString(*in_val_const->get_constval().stringval);
159  if (string_id != StringDictionary::INVALID_STR_ID) {
160  out_vals.push_back(string_id);
161  }
162  } else {
163  out_vals.push_back(CodeGenerator::codegenIntConst(in_val_const, cgen_state_)
164  ->getSExtValue());
165  }
166  }
167  return true;
168  };
169  if (worker_count > 1) {
170  worker_threads.push_back(std::async(
171  std::launch::async, do_work, std::ref(values_set[i]), start_it, end_it));
172  } else {
173  do_work(std::ref(values), start_it, end_it);
174  }
175  }
176  bool success = true;
177  for (auto& worker : worker_threads) {
178  success &= worker.get();
179  }
180  if (!success) {
181  return nullptr;
182  }
183  if (worker_count > 1) {
184  size_t total_val_count = 0;
185  for (auto& vals : values_set) {
186  total_val_count += vals.size();
187  }
188  values.reserve(total_val_count);
189  for (auto& vals : values_set) {
190  values.insert(values.end(), vals.begin(), vals.end());
191  }
192  }
193  try {
194  return std::make_unique<InValuesBitmap>(values,
195  needle_null_val,
199  executor()->deviceCount(co.device_type),
200  executor()->data_mgr_);
201  } catch (...) {
202  return nullptr;
203  }
204  }
205  return nullptr;
206 }
#define CHECK_EQ(x, y)
Definition: Logger.h:301
const std::vector< int64_t > & get_value_list() const
Definition: Analyzer.h:695
CgenState * cgen_state_
llvm::IRBuilder ir_builder_
Definition: CgenState.h:384
Definition: sqldefs.h:29
const Analyzer::Expr * extract_cast_arg(const Analyzer::Expr *expr)
Definition: Execute.h:222
future< Result > async(Fn &&fn, Args &&...args)
llvm::LLVMContext & context_
Definition: CgenState.h:382
static constexpr int32_t INVALID_STR_ID
llvm::ConstantInt * inlineIntNull(const SQLTypeInfo &)
Definition: CgenState.cpp:65
const InValuesBitmap * addInValuesBitmap(std::unique_ptr< InValuesBitmap > &in_values_bitmap)
Definition: CgenState.h:211
#define AUTOMATIC_IR_METADATA(CGENSTATE)
const SQLTypeInfo & get_type_info() const
Definition: Analyzer.h:79
llvm::Value * emitCall(const std::string &fname, const std::vector< llvm::Value * > &args)
Definition: CgenState.cpp:217
ExecutorDeviceType device_type
std::vector< llvm::Value * > codegen(const Analyzer::Expr *, const bool fetch_columns, const CompilationOptions &)
Definition: IRCodegen.cpp:30
static llvm::ConstantInt * codegenIntConst(const Analyzer::Constant *constant, CgenState *cgen_state)
Definition: ConstantIR.cpp:89
Definition: sqldefs.h:71
llvm::Value * codegen(llvm::Value *needle, Executor *executor) const
const std::list< std::shared_ptr< Analyzer::Expr > > & get_value_list() const
Definition: Analyzer.h:646
llvm::Value * toBool(llvm::Value *)
Definition: LogicalIR.cpp:343
llvm::Value * codegenCmp(const Analyzer::BinOper *, const CompilationOptions &)
Definition: CompareIR.cpp:230
llvm::ConstantInt * llInt(const T v) const
Definition: CgenState.h:249
#define CHECK(condition)
Definition: Logger.h:291
int64_t inline_int_null_val(const SQL_TYPE_INFO &ti)
std::unique_ptr< InValuesBitmap > createInValuesBitmap(const Analyzer::InValues *, const CompilationOptions &)
Definition: InValuesIR.cpp:111
bool is_unnest(const Analyzer::Expr *expr)
Definition: Execute.h:1694
bool is_string() const
Definition: sqltypes.h:559
const Expr * get_arg() const
Definition: Analyzer.h:693
int cpu_threads()
Definition: thread_count.h:25
const Expr * get_arg() const
Definition: Analyzer.h:644
SQLTypeInfo get_nullable_type_info(const SQLTypeInfo &type_info)
Definition: sqltypes.h:1482
Executor * executor() const