OmniSciDB  72c90bc290
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
MaxwellCodegenPatch.cpp
Go to the documentation of this file.
1 /*
2  * Copyright 2022 HEAVY.AI, Inc.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #include "MaxwellCodegenPatch.h"
18 
19 llvm::Value* Executor::spillDoubleElement(llvm::Value* elem_val, llvm::Type* elem_ty) {
20  auto var_ptr = cgen_state_->ir_builder_.CreateAlloca(elem_ty);
21  cgen_state_->ir_builder_.CreateStore(elem_val, var_ptr);
22  return var_ptr;
23 }
24 
26  return dt == ExecutorDeviceType::GPU && cudaMgr()->isArchMaxwell();
27 }
28 
29 bool GroupByAndAggregate::needsUnnestDoublePatch(llvm::Value const* val_ptr,
30  const std::string& agg_base_name,
31  const bool threads_share_memory,
32  const CompilationOptions& co) const {
33  return (executor_->isArchMaxwell(co.device_type) && threads_share_memory &&
34  llvm::isa<llvm::AllocaInst>(val_ptr) &&
35  val_ptr->getType() ==
36  llvm::Type::getDoublePtrTy(executor_->cgen_state_->context_) &&
37  "agg_id" == agg_base_name);
38 }
39 
41  executor_->cgen_state_->ir_builder_.CreateCall(
42  executor_->cgen_state_->module_->getFunction("force_sync"));
43 }
bool needsUnnestDoublePatch(llvm::Value const *val_ptr, const std::string &agg_base_name, const bool threads_share_memory, const CompilationOptions &co) const
ExecutorDeviceType
ExecutorDeviceType device_type
llvm::Value * spillDoubleElement(llvm::Value *elem_val, llvm::Type *elem_ty)
bool isArchMaxwell(const ExecutorDeviceType dt) const