OmniSciDB  cde582ebc3
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
StringDictionaryTranslationMgr.cpp
Go to the documentation of this file.
1 /*
2  * Copyright 2022 HEAVY.AI, Inc.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
24 
25 #include "CodeGenerator.h"
26 #include "Execute.h"
27 #ifdef HAVE_CUDA
29 #include "GpuMemUtils.h"
30 #endif // HAVE_CUDA
31 #include "Parser/ParserNode.h"
32 #include "RuntimeFunctions.h"
33 #include "Shared/checked_alloc.h"
35 
36 #ifdef HAVE_TBB
37 #include <tbb/parallel_for.h>
38 #endif // HAVE_TBB
39 
41  const std::vector<StringOps_Namespace::StringOpInfo>& string_op_infos) {
42  for (const auto& string_op_info : string_op_infos) {
43  if (string_op_info.hasNullLiteralArg()) {
44  return true;
45  }
46  }
47  return false;
48 }
49 
51  const int32_t source_string_dict_id,
52  const int32_t dest_string_dict_id,
53  const bool translate_intersection_only,
54  const std::vector<StringOps_Namespace::StringOpInfo>& string_op_infos,
55  const Data_Namespace::MemoryLevel memory_level,
56  const int device_count,
57  Executor* executor,
58  Data_Namespace::DataMgr* data_mgr,
59  const bool delay_translation)
60  : source_string_dict_id_(source_string_dict_id)
61  , dest_string_dict_id_(dest_string_dict_id)
62  , translate_intersection_only_(translate_intersection_only)
63  , string_op_infos_(string_op_infos)
64  , has_null_string_op_(one_or_more_string_ops_is_null(string_op_infos))
65  , memory_level_(memory_level)
66  , device_count_(device_count)
67  , executor_(executor)
68  , data_mgr_(data_mgr) {
69 #ifdef HAVE_CUDA
71  memory_level == Data_Namespace::GPU_LEVEL);
72 #else
74 #endif // HAVE_CUDA
75  if (!delay_translation && !has_null_string_op_) {
78  }
79 }
80 
83  for (auto& device_buffer : device_buffers_) {
84  data_mgr_->free(device_buffer);
85  }
86 }
87 
89  host_translation_map_ = executor_->getStringProxyTranslationMap(
96  executor_->getRowSetMemoryOwner(),
97  true);
98 }
99 
101 #ifdef HAVE_CUDA
103  const size_t translation_map_size_bytes{host_translation_map_->getVectorMap().size() *
104  sizeof(int32_t)};
105  for (int device_id = 0; device_id < device_count_; ++device_id) {
107  data_mgr_, translation_map_size_bytes, device_id));
108  auto device_buffer =
109  reinterpret_cast<int32_t*>(device_buffers_.back()->getMemoryPtr());
111  reinterpret_cast<CUdeviceptr>(device_buffer),
112  host_translation_map_->data(),
113  translation_map_size_bytes,
114  device_id);
115  kernel_translation_maps_.push_back(device_buffer);
116  }
117  }
118 #else
120 #endif // HAVE_CUDA
122  kernel_translation_maps_.push_back(host_translation_map_->data());
123  }
124 }
125 
126 llvm::Value* StringDictionaryTranslationMgr::codegen(llvm::Value* input_str_id_lv,
127  const SQLTypeInfo& input_ti,
128  const bool add_nullcheck,
129  const CompilationOptions& co) const {
130  CHECK(kernel_translation_maps_.size() == static_cast<size_t>(device_count_) ||
132  if (!co.hoist_literals && kernel_translation_maps_.size() > 1UL) {
133  // Currently the only way to have multiple kernel translation maps is
134  // to be running on GPU, where we would need to have a different pointer
135  // per GPU to the translation map, as the address space is not shared
136  // between GPUs
137 
140 
141  // Since we currently cannot support different code per device, the only
142  // way to allow for a different kernel translation map/kernel per
143  // device(i.e. GPU) is via hoisting the map handle literal so that
144  // it can be paramertized as a kernel argument. Hence if literal
145  // hoisting is disabled (generally b/c we have an update query),
146  // the surest fire way of ensuring one and only one translation map
147  // that can have a hard-coded handle in the generated code is by running
148  // on CPU (which per the comment above currently always has a device
149  // count of 1).
150 
151  // This is not currently a major limitation as we currently run
152  // all update queries on CPU, but it would be if we want to run
153  // on multiple GPUs.
154 
155  // Todo(todd): Examine ways around the above limitation, likely either
156  // a dedicated kernel parameter for translation maps (like we have for
157  // join hash tables), or perhaps better for a number of reasons, reworking
158  // the translation map plumbing to use the join infra (which would also
159  // mean we could use pieces like the baseline hash join for multiple
160  // input string dictionaries, i.e. CONCAT on two string columns).
161 
162  throw QueryMustRunOnCpu();
163  }
164  CHECK(co.hoist_literals || kernel_translation_maps_.size() == 1UL);
165 
166  auto cgen_state_ptr = executor_->getCgenStatePtr();
167  AUTOMATIC_IR_METADATA(cgen_state_ptr);
168 
169  if (has_null_string_op_) {
170  // If any of the string ops can statically be determined to output all nulls
171  // (currently determined by whether any of the constant literal inputs to the
172  // string operation are null), then simply generate codegen a null
173  // dictionary-encoded value
174  const auto null_ti = SQLTypeInfo(kTEXT, true /* is_nullable */, kENCODING_DICT);
175  return static_cast<llvm::Value*>(executor_->cgen_state_->inlineIntNull(null_ti));
176  }
177 
178  std::vector<std::shared_ptr<const Analyzer::Constant>> constants_owned;
179  std::vector<const Analyzer::Constant*> constants;
180  for (const auto kernel_translation_map : kernel_translation_maps_) {
181  const int64_t translation_map_handle =
182  reinterpret_cast<int64_t>(kernel_translation_map);
183  const auto translation_map_handle_literal =
184  std::dynamic_pointer_cast<Analyzer::Constant>(
185  Parser::IntLiteral::analyzeValue(translation_map_handle));
186  CHECK(translation_map_handle_literal);
188  translation_map_handle_literal->get_type_info().get_compression());
189  constants_owned.push_back(translation_map_handle_literal);
190  constants.push_back(translation_map_handle_literal.get());
191  }
192  CHECK_GE(constants.size(), 1UL);
193  CHECK(co.hoist_literals || constants.size() == 1UL);
194 
195  CodeGenerator code_generator(executor_);
196 
197  const auto translation_map_handle_lvs =
198  co.hoist_literals
199  ? code_generator.codegenHoistedConstants(constants, kENCODING_NONE, 0)
200  : code_generator.codegen(constants[0], false, co);
201  CHECK_EQ(size_t(1), translation_map_handle_lvs.size());
202 
203  std::unique_ptr<CodeGenerator::NullCheckCodegen> nullcheck_codegen;
204  const bool is_nullable = !input_ti.get_notnull();
205  const auto decoded_input_ti = SQLTypeInfo(kTEXT, is_nullable, kENCODING_DICT);
206  if (add_nullcheck && is_nullable) {
207  nullcheck_codegen = std::make_unique<CodeGenerator::NullCheckCodegen>(
208  cgen_state_ptr,
209  executor_,
210  input_str_id_lv,
211  decoded_input_ti,
212  "dict_encoded_str_cast_nullcheck");
213  }
214  llvm::Value* ret = cgen_state_ptr->emitCall(
215  "map_string_dict_id",
216  {input_str_id_lv,
217  cgen_state_ptr->castToTypeIn(translation_map_handle_lvs.front(), 64),
218  cgen_state_ptr->llInt(minSourceStringId())});
219 
220  if (nullcheck_codegen) {
221  ret =
222  nullcheck_codegen->finalize(cgen_state_ptr->inlineIntNull(decoded_input_ti), ret);
223  }
224  return ret;
225 }
226 
228  return host_translation_map_ && !host_translation_map_->empty();
229 }
230 
231 const int32_t* StringDictionaryTranslationMgr::data() const {
232  return isMapValid() ? host_translation_map_->data() : nullptr;
233 }
234 
236  return isMapValid() ? host_translation_map_->domainStart() : 0;
237 }
#define CHECK_EQ(x, y)
Definition: Logger.h:230
const Data_Namespace::MemoryLevel memory_level_
StringDictionaryTranslationMgr(const int32_t source_string_dict_id, const int32_t dest_string_dict_id, const bool translate_intersection_only, const std::vector< StringOps_Namespace::StringOpInfo > &string_op_infos, const Data_Namespace::MemoryLevel memory_level, const int device_count, Executor *executor, Data_Namespace::DataMgr *data_mgr, const bool delay_translation)
static std::shared_ptr< Analyzer::Expr > analyzeValue(const int64_t intval)
Definition: ParserNode.cpp:161
bool one_or_more_string_ops_is_null(const std::vector< StringOps_Namespace::StringOpInfo > &string_op_infos)
#define CHECK_GE(x, y)
Definition: Logger.h:235
Classes representing a parse tree.
#define AUTOMATIC_IR_METADATA(CGENSTATE)
ExecutorDeviceType device_type
std::vector< const int32_t * > kernel_translation_maps_
Definition: sqltypes.h:52
static Data_Namespace::AbstractBuffer * allocGpuAbstractBuffer(Data_Namespace::DataMgr *data_mgr, const size_t num_bytes, const int device_id)
data_mgr_(data_mgr)
void copy_to_nvidia_gpu(Data_Namespace::DataMgr *data_mgr, CUdeviceptr dst, const void *src, const size_t num_bytes, const int device_id)
Definition: GpuMemUtils.cpp:35
#define CHECK(condition)
Definition: Logger.h:222
std::vector< Data_Namespace::AbstractBuffer * > device_buffers_
Allocate GPU memory using GpuBuffers via DataMgr.
void free(AbstractBuffer *buffer)
Definition: DataMgr.cpp:528
HOST DEVICE bool get_notnull() const
Definition: sqltypes.h:336
llvm::Value * codegen(llvm::Value *str_id_input, const SQLTypeInfo &input_ti, const bool add_nullcheck, const CompilationOptions &co) const
const std::vector< StringOps_Namespace::StringOpInfo > string_op_infos_