OmniSciDB  340b00dbf6
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
UDFCompiler.cpp
Go to the documentation of this file.
1 /*
2  * Copyright 2019 OmniSci, Inc.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #include "UDFCompiler.h"
18 
19 #include <clang/AST/AST.h>
20 #include <clang/AST/ASTConsumer.h>
21 #include <clang/AST/RecursiveASTVisitor.h>
22 #include <clang/Driver/Compilation.h>
23 #include <clang/Driver/Driver.h>
24 #include <clang/Frontend/CompilerInstance.h>
25 #include <clang/Frontend/FrontendActions.h>
26 #include <clang/Frontend/TextDiagnosticPrinter.h>
27 #include <clang/Parse/ParseAST.h>
28 #include <clang/Tooling/CommonOptionsParser.h>
29 #include <clang/Tooling/Tooling.h>
30 #include <llvm/Support/Program.h>
31 #include <llvm/Support/raw_ostream.h>
32 #include <boost/process/search_path.hpp>
33 #include <iterator>
34 #include <memory>
35 
36 #if LLVM_VERSION_MAJOR >= 11
37 #include <llvm/Support/Host.h>
38 #endif
39 
40 #include "Execute.h"
41 #include "Logger/Logger.h"
42 
43 using namespace clang;
44 using namespace clang::tooling;
45 
46 static llvm::cl::OptionCategory ToolingSampleCategory("UDF Tooling");
47 
48 namespace {
49 
50 // By implementing RecursiveASTVisitor, we can specify which AST nodes
51 // we're interested in by overriding relevant methods.
52 
53 class FunctionDeclVisitor : public RecursiveASTVisitor<FunctionDeclVisitor> {
54  public:
55  FunctionDeclVisitor(llvm::raw_fd_ostream& ast_file,
56  SourceManager& s_manager,
57  ASTContext& context)
58  : ast_file_(ast_file), source_manager_(s_manager), context_(context) {
59  source_manager_.getDiagnostics().setShowColors(false);
60  }
61 
62  bool VisitFunctionDecl(FunctionDecl* f) {
63  // Only function definitions (with bodies), not declarations.
64  if (f->hasBody()) {
65  if (getMainFileName() == getFuncDeclFileName(f)) {
66  auto printing_policy = context_.getPrintingPolicy();
67  printing_policy.FullyQualifiedName = 1;
68  printing_policy.UseVoidForZeroParams = 1;
69  printing_policy.PolishForDeclaration = 1;
70  printing_policy.TerseOutput = 1;
71  f->print(ast_file_, printing_policy);
72  ast_file_ << "\n";
73  }
74  }
75 
76  return true;
77  }
78 
79  private:
80  std::string getMainFileName() const {
81  auto f_entry = source_manager_.getFileEntryForID(source_manager_.getMainFileID());
82  return f_entry->getName().str();
83  }
84 
85  std::string getFuncDeclFileName(FunctionDecl* f) const {
86  SourceLocation spell_loc = source_manager_.getSpellingLoc(f->getLocation());
87  PresumedLoc p_loc = source_manager_.getPresumedLoc(spell_loc);
88 
89  return std::string(p_loc.getFilename());
90  }
91 
92  private:
93  llvm::raw_fd_ostream& ast_file_;
94  SourceManager& source_manager_;
95  ASTContext& context_;
96 };
97 
98 // Implementation of the ASTConsumer interface for reading an AST produced
99 // by the Clang parser.
100 class DeclASTConsumer : public ASTConsumer {
101  public:
102  DeclASTConsumer(llvm::raw_fd_ostream& ast_file,
103  SourceManager& s_manager,
104  ASTContext& context)
105  : visitor_(ast_file, s_manager, context) {}
106 
107  // Override the method that gets called for each parsed top-level
108  // declaration.
109  bool HandleTopLevelDecl(DeclGroupRef decl_reference) override {
110  for (DeclGroupRef::iterator b = decl_reference.begin(), e = decl_reference.end();
111  b != e;
112  ++b) {
113  // Traverse the declaration using our AST visitor.
114  visitor_.TraverseDecl(*b);
115  }
116  return true;
117  }
118 
119  private:
121 };
122 
123 // For each source file provided to the tool, a new FrontendAction is created.
124 class HandleDeclAction : public ASTFrontendAction {
125  public:
126  HandleDeclAction(llvm::raw_fd_ostream& ast_file) : ast_file_(ast_file) {}
127 
128  ~HandleDeclAction() override {}
129 
130  std::unique_ptr<ASTConsumer> CreateASTConsumer(CompilerInstance& instance,
131  StringRef file) override {
132  return std::make_unique<DeclASTConsumer>(
133  ast_file_, instance.getSourceManager(), instance.getASTContext());
134  }
135 
136  private:
137  llvm::raw_fd_ostream& ast_file_;
138 };
139 
140 class ToolFactory : public FrontendActionFactory {
141  public:
142 #if LLVM_VERSION_MAJOR >= 10
143  using FrontendActionPtr = std::unique_ptr<clang::FrontendAction>;
144 #define CREATE_FRONTEND_ACTION(ast_file_) std::make_unique<HandleDeclAction>(ast_file_)
145 #else
146  using FrontendActionPtr = clang::FrontendAction*;
147 #define CREATE_FRONTEND_ACTION(ast_file_) new HandleDeclAction(ast_file_)
148 #endif
149 
150  ToolFactory(llvm::raw_fd_ostream& ast_file) : ast_file_(ast_file) {}
151 
152  FrontendActionPtr create() override { return CREATE_FRONTEND_ACTION(ast_file_); }
153 
154  private:
155  llvm::raw_fd_ostream& ast_file_;
156 };
157 
158 const char* convert(const std::string& s) {
159  return s.c_str();
160 }
161 } // namespace
162 
163 UdfClangDriver::UdfClangDriver(const std::string& clang_path)
164  : diag_options(new DiagnosticOptions())
165  , diag_client(new TextDiagnosticPrinter(llvm::errs(), diag_options.get()))
166  , diag_id(new clang::DiagnosticIDs())
167  , diags(diag_id, diag_options.get(), diag_client)
168  , diag_client_owner(diags.takeClient())
169  , the_driver(clang_path.c_str(), llvm::sys::getDefaultTargetTriple(), diags) {}
170 
171 std::string UdfCompiler::removeFileExtension(const std::string& path) {
172  if (path == "." || path == "..") {
173  return path;
174  }
175 
176  size_t pos = path.find_last_of("\\/.");
177  if (pos != std::string::npos && path[pos] == '.') {
178  return path.substr(0, pos);
179  }
180 
181  return path;
182 }
183 
184 std::string UdfCompiler::getFileExt(std::string& s) {
185  size_t i = s.rfind('.', s.length());
186  if (1 != std::string::npos) {
187  return (s.substr(i + 1, s.length() - i));
188  }
189 }
190 
191 void UdfCompiler::replaceExtn(std::string& s, const std::string& new_ext) {
192  std::string::size_type i = s.rfind('.', s.length());
193 
194  if (i != std::string::npos) {
195  s.replace(i + 1, getFileExt(s).length(), new_ext);
196  }
197 }
198 
199 std::string UdfCompiler::genGpuIrFilename(const char* udf_file_name) {
200  std::string gpu_file_name(removeFileExtension(udf_file_name));
201 
202  gpu_file_name += "_gpu.bc";
203  return gpu_file_name;
204 }
205 
206 std::string UdfCompiler::genCpuIrFilename(const char* udf_fileName) {
207  std::string cpu_file_name(removeFileExtension(udf_fileName));
208 
209  cpu_file_name += "_cpu.bc";
210  return cpu_file_name;
211 }
212 
213 int UdfCompiler::compileFromCommandLine(const std::vector<std::string>& command_line) {
214  UdfClangDriver compiler_driver(clang_path_);
215  auto the_driver(compiler_driver.getClangDriver());
216 
217  std::vector<const char*> clang_command_opts;
218  clang_command_opts.reserve(command_line.size() + clang_options_.size());
219  // add required options first
220  std::transform(std::begin(command_line),
221  std::end(command_line),
222  std::back_inserter(clang_command_opts),
223  [&](const std::string& str) { return str.c_str(); });
224 
225  // If there were additional clang options passed to the system, append them here
226  if (!clang_options_.empty()) {
227  std::transform(std::begin(clang_options_),
228  std::end(clang_options_),
229  std::back_inserter(clang_command_opts),
230  [&](const std::string& str) { return str.c_str(); });
231  }
232 
233  the_driver->CCPrintOptions = 0;
234  std::unique_ptr<driver::Compilation> compilation(
235  the_driver->BuildCompilation(clang_command_opts));
236 
237  if (!compilation) {
238  LOG(FATAL) << "failed to build compilation object!\n";
239  }
240 
241  llvm::SmallVector<std::pair<int, const driver::Command*>, 10> failing_commands;
242  int res = the_driver->ExecuteCompilation(*compilation, failing_commands);
243 
244  if (res < 0) {
245  for (const std::pair<int, const driver::Command*>& p : failing_commands) {
246  if (p.first) {
247  the_driver->generateCompilationDiagnostics(*compilation, *p.second);
248  }
249  }
250  }
251 
252  return res;
253 }
254 
255 int UdfCompiler::compileToGpuByteCode(const char* udf_file_name, bool cpu_mode) {
256  std::string gpu_out_filename(genGpuIrFilename(udf_file_name));
257 
258  std::vector<std::string> command_line{
259  clang_path_, "-c", "-O2", "-emit-llvm", "-o", gpu_out_filename, "-std=c++14"};
260 
261  // If we are not compiling for cpu mode, then target the gpu
262  // Otherwise assume we can generic ir that will
263  // be translated to gpu code during target code generation
264  if (!cpu_mode) {
265  command_line.emplace_back("--cuda-gpu-arch=" +
267  command_line.emplace_back("--cuda-device-only");
268  command_line.emplace_back("-xcuda");
269  }
270 
271  command_line.emplace_back(udf_file_name);
272 
273  return compileFromCommandLine(command_line);
274 }
275 
276 int UdfCompiler::compileToCpuByteCode(const char* udf_file_name) {
277  std::string cpu_out_filename(genCpuIrFilename(udf_file_name));
278 
279  std::vector<std::string> command_line{clang_path_,
280  "-c",
281  "-O2",
282  "-emit-llvm",
283  "-o",
284  cpu_out_filename,
285  "-std=c++14",
286  udf_file_name};
287 
288  return compileFromCommandLine(command_line);
289 }
290 
291 int UdfCompiler::parseToAst(const char* file_name) {
292  UdfClangDriver the_driver(clang_path_);
293  std::string resource_path = the_driver.getClangDriver()->ResourceDir;
294  std::string include_option =
295  std::string("-I") + resource_path + std::string("/include");
296 
297  std::vector<std::string> arg_vector;
298  arg_vector.emplace_back("astparser");
299  arg_vector.emplace_back(file_name);
300  arg_vector.emplace_back("--");
301  arg_vector.emplace_back(include_option);
302 
303  if (clang_options_.size() > 0) {
304  std::copy(
305  clang_options_.begin(), clang_options_.end(), std::back_inserter(arg_vector));
306  }
307 
308  std::vector<const char*> arg_vec2;
309  std::transform(
310  arg_vector.begin(), arg_vector.end(), std::back_inserter(arg_vec2), convert);
311 
312  int num_args = arg_vec2.size();
313  CommonOptionsParser op(num_args, &arg_vec2[0], ToolingSampleCategory);
314  ClangTool tool(op.getCompilations(), op.getSourcePathList());
315 
316  std::string out_name(file_name);
317  std::string file_ext("ast");
318  replaceExtn(out_name, file_ext);
319 
320  std::error_code out_error_info;
321  llvm::raw_fd_ostream out_file(
322  llvm::StringRef(out_name), out_error_info, llvm::sys::fs::F_None);
323 
324  auto factory = std::make_unique<ToolFactory>(out_file);
325  return tool.run(factory.get());
326 }
327 
328 const std::string& UdfCompiler::getAstFileName() const {
329  return udf_ast_file_name_;
330 }
331 
332 void UdfCompiler::init(const std::string& clang_path) {
334 
335  if (clang_path.empty()) {
336  clang_path_.assign(llvm::sys::findProgramByName("clang++").get());
337  if (clang_path_.empty()) {
338  throw std::runtime_error(
339  "Unable to find clang++ to compile user defined functions");
340  }
341  } else {
342  clang_path_.assign(clang_path);
343 
344  if (!boost::filesystem::exists(clang_path)) {
345  throw std::runtime_error("Path provided for udf compiler " + clang_path +
346  " does not exist.");
347  }
348 
349  if (boost::filesystem::is_directory(clang_path)) {
350  throw std::runtime_error("Path provided for udf compiler " + clang_path +
351  " is not to the clang++ executable.");
352  }
353  }
354 }
355 
356 UdfCompiler::UdfCompiler(const std::string& file_name,
358  const std::string& clang_path)
359  : udf_file_name_(file_name)
360  , udf_ast_file_name_(file_name)
361  , target_arch_(target_arch) {
362  init(clang_path);
363 }
364 
365 UdfCompiler::UdfCompiler(const std::string& file_name,
367  const std::string& clang_path,
368  const std::vector<std::string> clang_options)
369  : udf_file_name_(file_name)
370  , udf_ast_file_name_(file_name)
371  , target_arch_(target_arch)
372  , clang_options_(clang_options) {
373  init(clang_path);
374 }
375 
377  std::string cpu_ir_file(genCpuIrFilename(udf_file_name_.c_str()));
378 
379  VLOG(1) << "UDFCompiler cpu bc file = " << cpu_ir_file;
380 
381  read_udf_cpu_module(cpu_ir_file);
382 }
383 
385  std::string gpu_ir_file(genGpuIrFilename(udf_file_name_.c_str()));
386 
387  VLOG(1) << "UDFCompiler gpu bc file = " << gpu_ir_file;
388 
389  read_udf_gpu_module(gpu_ir_file);
390 }
391 
395 }
396 
398  int gpu_compile_result = 1;
399 
400  gpu_compile_result = compileToGpuByteCode(udf_file_name_.c_str(), false);
401 
402  // If gpu compilation fails but cpu compilation has succeeded, try compiling
403  // for the cpu with the assumption the user does not have the CUDA toolkit
404  // installed
405  if (gpu_compile_result != 0) {
406  gpu_compile_result = compileToGpuByteCode(udf_file_name_.c_str(), true);
407  }
408 
409  return gpu_compile_result;
410 }
411 
413  LOG(INFO) << "UDFCompiler filename to compile: " << udf_file_name_;
414  if (!boost::filesystem::exists(udf_file_name_)) {
415  LOG(FATAL) << "User defined function file " << udf_file_name_ << " does not exist.";
416  return 1;
417  }
418 
419  auto ast_result = parseToAst(udf_file_name_.c_str());
420 
421  if (ast_result == 0) {
422  // Compile udf file to generate cpu and gpu bytecode files
423 
424  int cpu_compile_result = compileToCpuByteCode(udf_file_name_.c_str());
425 #ifdef HAVE_CUDA
426  int gpu_compile_result = 1;
427 #endif
428 
429  if (cpu_compile_result == 0) {
431 #ifdef HAVE_CUDA
432  gpu_compile_result = compileForGpu();
433 
434  if (gpu_compile_result == 0) {
436  } else {
437  LOG(FATAL) << "Unable to compile UDF file for gpu";
438  return 1;
439  }
440 #endif
441  } else {
442  LOG(FATAL) << "Unable to compile UDF file for cpu";
443  return 1;
444  }
445  } else {
446  LOG(FATAL) << "Unable to create AST file for udf compilation";
447  return 1;
448  }
449 
450  return 0;
451 }
ToolFactory(llvm::raw_fd_ostream &ast_file)
clang::driver::Driver * getClangDriver()
Definition: UDFCompiler.h:39
std::string genCpuIrFilename(const char *udf_file_name)
int compileToCpuByteCode(const char *udf_file_name)
void readCompiledModules()
std::vector< std::string > clang_options_
Definition: UDFCompiler.h:83
UdfCompiler(const std::string &udf_file_name, CudaMgr_Namespace::NvidiaDeviceArch target_arch, const std::string &clang_path="")
const std::string & getAstFileName() const
#define LOG(tag)
Definition: Logger.h:188
void readCpuCompiledModule()
void read_udf_cpu_module(const std::string &udf_ir_filename)
static llvm::cl::OptionCategory ToolingSampleCategory("UDF Tooling")
void read_udf_gpu_module(const std::string &udf_ir_filename)
External interface for parsing AST and bitcode files.
std::string genGpuIrFilename(const char *udf_file_name)
const char * convert(const std::string &s)
std::string removeFileExtension(const std::string &path)
DeclASTConsumer(llvm::raw_fd_ostream &ast_file, SourceManager &s_manager, ASTContext &context)
int compileToGpuByteCode(const char *udf_file_name, bool cpu_mode)
std::unique_ptr< ASTConsumer > CreateASTConsumer(CompilerInstance &instance, StringRef file) override
std::string udf_ast_file_name_
Definition: UDFCompiler.h:80
UdfClangDriver(const std::string &)
int parseToAst(const char *file_name)
bool HandleTopLevelDecl(DeclGroupRef decl_reference) override
CudaMgr_Namespace::NvidiaDeviceArch target_arch_
Definition: UDFCompiler.h:81
static std::string deviceArchToSM(const NvidiaDeviceArch arch)
Definition: CudaMgr.h:148
void init(const std::string &clang_path)
std::string udf_file_name_
Definition: UDFCompiler.h:79
int compileUdf()
void replaceExtn(std::string &s, const std::string &new_ext)
std::string getFuncDeclFileName(FunctionDecl *f) const
Definition: UDFCompiler.cpp:85
void readGpuCompiledModule()
std::string getFileExt(std::string &s)
int compileForGpu()
FunctionDeclVisitor(llvm::raw_fd_ostream &ast_file, SourceManager &s_manager, ASTContext &context)
Definition: UDFCompiler.cpp:55
int compileFromCommandLine(const std::vector< std::string > &command_line)
#define VLOG(n)
Definition: Logger.h:291
std::string clang_path_
Definition: UDFCompiler.h:82
#define CREATE_FRONTEND_ACTION(ast_file_)