OmniSciDB  8fa3bf436f
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
UDFCompiler.cpp
Go to the documentation of this file.
1 /*
2  * Copyright 2019 OmniSci, Inc.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #include "UDFCompiler.h"
18 #include "CudaMgr/CudaMgr.h"
19 
20 #include <clang/AST/AST.h>
21 #include <clang/AST/ASTConsumer.h>
22 #include <clang/AST/RecursiveASTVisitor.h>
23 #include <clang/Driver/Compilation.h>
24 #include <clang/Driver/Driver.h>
25 #include <clang/Frontend/CompilerInstance.h>
26 #include <clang/Frontend/FrontendActions.h>
27 #include <clang/Frontend/TextDiagnosticPrinter.h>
28 #include <clang/Parse/ParseAST.h>
29 #include <clang/Tooling/CommonOptionsParser.h>
30 #include <clang/Tooling/Tooling.h>
31 #include <llvm/Support/Program.h>
32 #include <llvm/Support/raw_ostream.h>
33 #include <boost/process/search_path.hpp>
34 #include <iterator>
35 #include <memory>
36 
37 #if LLVM_VERSION_MAJOR >= 11
38 #include <llvm/Support/Host.h>
39 #endif
40 
41 #include "Execute.h"
42 #include "Logger/Logger.h"
43 
44 using namespace clang;
45 using namespace clang::tooling;
46 
47 static llvm::cl::OptionCategory ToolingSampleCategory("UDF Tooling");
48 
49 namespace {
50 
51 // By implementing RecursiveASTVisitor, we can specify which AST nodes
52 // we're interested in by overriding relevant methods.
53 
54 class FunctionDeclVisitor : public RecursiveASTVisitor<FunctionDeclVisitor> {
55  public:
56  FunctionDeclVisitor(llvm::raw_fd_ostream& ast_file,
57  SourceManager& s_manager,
58  ASTContext& context)
59  : ast_file_(ast_file), source_manager_(s_manager), context_(context) {
60  source_manager_.getDiagnostics().setShowColors(false);
61  }
62 
63  bool VisitFunctionDecl(FunctionDecl* f) {
64  // Only function definitions (with bodies), not declarations.
65  if (f->hasBody()) {
66  if (getMainFileName() == getFuncDeclFileName(f)) {
67  auto printing_policy = context_.getPrintingPolicy();
68  printing_policy.FullyQualifiedName = 1;
69  printing_policy.UseVoidForZeroParams = 1;
70  printing_policy.PolishForDeclaration = 1;
71  printing_policy.TerseOutput = 1;
72  f->print(ast_file_, printing_policy);
73  ast_file_ << "\n";
74  }
75  }
76 
77  return true;
78  }
79 
80  private:
81  std::string getMainFileName() const {
82  auto f_entry = source_manager_.getFileEntryForID(source_manager_.getMainFileID());
83  return f_entry->getName().str();
84  }
85 
86  std::string getFuncDeclFileName(FunctionDecl* f) const {
87  SourceLocation spell_loc = source_manager_.getSpellingLoc(f->getLocation());
88  PresumedLoc p_loc = source_manager_.getPresumedLoc(spell_loc);
89 
90  return std::string(p_loc.getFilename());
91  }
92 
93  private:
94  llvm::raw_fd_ostream& ast_file_;
95  SourceManager& source_manager_;
96  ASTContext& context_;
97 };
98 
99 // Implementation of the ASTConsumer interface for reading an AST produced
100 // by the Clang parser.
101 class DeclASTConsumer : public ASTConsumer {
102  public:
103  DeclASTConsumer(llvm::raw_fd_ostream& ast_file,
104  SourceManager& s_manager,
105  ASTContext& context)
106  : visitor_(ast_file, s_manager, context) {}
107 
108  // Override the method that gets called for each parsed top-level
109  // declaration.
110  bool HandleTopLevelDecl(DeclGroupRef decl_reference) override {
111  for (DeclGroupRef::iterator b = decl_reference.begin(), e = decl_reference.end();
112  b != e;
113  ++b) {
114  // Traverse the declaration using our AST visitor.
115  visitor_.TraverseDecl(*b);
116  }
117  return true;
118  }
119 
120  private:
122 };
123 
124 // For each source file provided to the tool, a new FrontendAction is created.
125 class HandleDeclAction : public ASTFrontendAction {
126  public:
127  HandleDeclAction(llvm::raw_fd_ostream& ast_file) : ast_file_(ast_file) {}
128 
129  ~HandleDeclAction() override {}
130 
131  std::unique_ptr<ASTConsumer> CreateASTConsumer(CompilerInstance& instance,
132  StringRef file) override {
133  return std::make_unique<DeclASTConsumer>(
134  ast_file_, instance.getSourceManager(), instance.getASTContext());
135  }
136 
137  private:
138  llvm::raw_fd_ostream& ast_file_;
139 };
140 
141 class ToolFactory : public FrontendActionFactory {
142  public:
143 #if LLVM_VERSION_MAJOR >= 10
144  using FrontendActionPtr = std::unique_ptr<clang::FrontendAction>;
145 #define CREATE_FRONTEND_ACTION(ast_file_) std::make_unique<HandleDeclAction>(ast_file_)
146 #else
147  using FrontendActionPtr = clang::FrontendAction*;
148 #define CREATE_FRONTEND_ACTION(ast_file_) new HandleDeclAction(ast_file_)
149 #endif
150 
151  ToolFactory(llvm::raw_fd_ostream& ast_file) : ast_file_(ast_file) {}
152 
153  FrontendActionPtr create() override { return CREATE_FRONTEND_ACTION(ast_file_); }
154 
155  private:
156  llvm::raw_fd_ostream& ast_file_;
157 };
158 
159 const char* convert(const std::string& s) {
160  return s.c_str();
161 }
162 } // namespace
163 
164 UdfClangDriver::UdfClangDriver(const std::string& clang_path)
165  : diag_options(new DiagnosticOptions())
166  , diag_client(new TextDiagnosticPrinter(llvm::errs(), diag_options.get()))
167  , diag_id(new clang::DiagnosticIDs())
168  , diags(diag_id, diag_options.get(), diag_client)
169  , diag_client_owner(diags.takeClient())
170  , the_driver(clang_path.c_str(), llvm::sys::getDefaultTargetTriple(), diags) {}
171 
172 std::string UdfCompiler::removeFileExtension(const std::string& path) {
173  if (path == "." || path == "..") {
174  return path;
175  }
176 
177  size_t pos = path.find_last_of("\\/.");
178  if (pos != std::string::npos && path[pos] == '.') {
179  return path.substr(0, pos);
180  }
181 
182  return path;
183 }
184 
185 std::string UdfCompiler::getFileExt(std::string& s) {
186  size_t i = s.rfind('.', s.length());
187  if (1 != std::string::npos) {
188  return (s.substr(i + 1, s.length() - i));
189  }
190 }
191 
192 void UdfCompiler::replaceExtn(std::string& s, const std::string& new_ext) {
193  std::string::size_type i = s.rfind('.', s.length());
194 
195  if (i != std::string::npos) {
196  s.replace(i + 1, getFileExt(s).length(), new_ext);
197  }
198 }
199 
200 std::string UdfCompiler::genGpuIrFilename(const char* udf_file_name) {
201  std::string gpu_file_name(removeFileExtension(udf_file_name));
202 
203  gpu_file_name += "_gpu.bc";
204  return gpu_file_name;
205 }
206 
207 std::string UdfCompiler::genCpuIrFilename(const char* udf_fileName) {
208  std::string cpu_file_name(removeFileExtension(udf_fileName));
209 
210  cpu_file_name += "_cpu.bc";
211  return cpu_file_name;
212 }
213 
214 int UdfCompiler::compileFromCommandLine(const std::vector<std::string>& command_line) {
215  UdfClangDriver compiler_driver(clang_path_);
216  auto the_driver(compiler_driver.getClangDriver());
217 
218  std::vector<const char*> clang_command_opts;
219  clang_command_opts.reserve(command_line.size() + clang_options_.size());
220  // add required options first
221  std::transform(std::begin(command_line),
222  std::end(command_line),
223  std::back_inserter(clang_command_opts),
224  [&](const std::string& str) { return str.c_str(); });
225 
226  // If there were additional clang options passed to the system, append them here
227  if (!clang_options_.empty()) {
228  std::transform(std::begin(clang_options_),
229  std::end(clang_options_),
230  std::back_inserter(clang_command_opts),
231  [&](const std::string& str) { return str.c_str(); });
232  }
233 
234  the_driver->CCPrintOptions = 0;
235  std::unique_ptr<driver::Compilation> compilation(
236  the_driver->BuildCompilation(clang_command_opts));
237 
238  if (!compilation) {
239  LOG(FATAL) << "failed to build compilation object!\n";
240  }
241 
242  llvm::SmallVector<std::pair<int, const driver::Command*>, 10> failing_commands;
243  int res = the_driver->ExecuteCompilation(*compilation, failing_commands);
244  if (res < 0) {
245  for (const std::pair<int, const driver::Command*>& p : failing_commands) {
246  if (p.first) {
247  the_driver->generateCompilationDiagnostics(*compilation, *p.second);
248  }
249  }
250  }
251 
252  return res;
253 }
254 
255 int UdfCompiler::compileToGpuByteCode(const char* udf_file_name, bool cpu_mode) {
256  std::string gpu_out_filename(genGpuIrFilename(udf_file_name));
257 
258  std::vector<std::string> command_line{clang_path_,
259  "-c",
260  "-O2",
261  "-emit-llvm",
262  "-o",
263  gpu_out_filename,
264  "-std=c++14",
265  "-DNO_BOOST"};
266 
267  // If we are not compiling for cpu mode, then target the gpu
268  // Otherwise assume we can generic ir that will
269  // be translated to gpu code during target code generation
270 #ifdef HAVE_CUDA
271  if (!cpu_mode) {
272  command_line.emplace_back("--cuda-gpu-arch=" +
274  command_line.emplace_back("--cuda-device-only");
275  command_line.emplace_back("-xcuda");
276  command_line.emplace_back("--no-cuda-version-check");
277  const auto cuda_path = get_cuda_home();
278  if (cuda_path != "") {
279  command_line.emplace_back("--cuda-path=" + cuda_path);
280  }
281  }
282 #endif
283 
284  command_line.emplace_back(udf_file_name);
285 
286  // clean up from previous runs
287  boost::filesystem::remove(gpu_out_filename);
288  auto status = compileFromCommandLine(command_line);
289  // make sure that compilation actually succeeded by checking the
290  // output file:
291  if (!status && !boost::filesystem::exists(gpu_out_filename)) {
292  status = 2;
293  }
294  return status;
295 }
296 
297 int UdfCompiler::compileToCpuByteCode(const char* udf_file_name) {
298  std::string cpu_out_filename(genCpuIrFilename(udf_file_name));
299 
300  std::vector<std::string> command_line{clang_path_,
301  "-c",
302  "-O2",
303  "-emit-llvm",
304  "-o",
305  cpu_out_filename,
306  "-std=c++14",
307  "-DNO_BOOST",
308  udf_file_name};
309  return compileFromCommandLine(command_line);
310 }
311 
312 int UdfCompiler::parseToAst(const char* file_name) {
313  UdfClangDriver the_driver(clang_path_);
314  std::string resource_path = the_driver.getClangDriver()->ResourceDir;
315  std::string include_option =
316  std::string("-I") + resource_path + std::string("/include");
317 
318  std::vector<std::string> arg_vector;
319  arg_vector.emplace_back("astparser");
320  arg_vector.emplace_back(file_name);
321  arg_vector.emplace_back("--");
322  arg_vector.emplace_back("-DNO_BOOST");
323  arg_vector.emplace_back(include_option);
324 
325  if (clang_options_.size() > 0) {
326  std::copy(
327  clang_options_.begin(), clang_options_.end(), std::back_inserter(arg_vector));
328  }
329 
330  std::vector<const char*> arg_vec2;
331  std::transform(
332  arg_vector.begin(), arg_vector.end(), std::back_inserter(arg_vec2), convert);
333 
334  int num_args = arg_vec2.size();
335  CommonOptionsParser op(num_args, &arg_vec2[0], ToolingSampleCategory);
336  ClangTool tool(op.getCompilations(), op.getSourcePathList());
337 
338  std::string out_name(file_name);
339  std::string file_ext("ast");
340  replaceExtn(out_name, file_ext);
341 
342  std::error_code out_error_info;
343  llvm::raw_fd_ostream out_file(
344  llvm::StringRef(out_name), out_error_info, llvm::sys::fs::F_None);
345 
346  auto factory = std::make_unique<ToolFactory>(out_file);
347  return tool.run(factory.get());
348 }
349 
350 const std::string& UdfCompiler::getAstFileName() const {
351  return udf_ast_file_name_;
352 }
353 
354 void UdfCompiler::init(const std::string& clang_path) {
356 
357  if (clang_path.empty()) {
358  clang_path_.assign(llvm::sys::findProgramByName("clang++").get());
359  if (clang_path_.empty()) {
360  throw std::runtime_error(
361  "Unable to find clang++ to compile user defined functions");
362  }
363  } else {
364  clang_path_.assign(clang_path);
365 
366  if (!boost::filesystem::exists(clang_path)) {
367  throw std::runtime_error("Path provided for udf compiler " + clang_path +
368  " does not exist.");
369  }
370 
371  if (boost::filesystem::is_directory(clang_path)) {
372  throw std::runtime_error("Path provided for udf compiler " + clang_path +
373  " is not to the clang++ executable.");
374  }
375  }
376 }
377 
378 UdfCompiler::UdfCompiler(const std::string& file_name,
380  const std::string& clang_path)
381  : udf_file_name_(file_name)
382  , udf_ast_file_name_(file_name)
383 #ifdef HAVE_CUDA
384  , target_arch_(target_arch)
385 #endif
386 {
387  init(clang_path);
388 }
389 
390 UdfCompiler::UdfCompiler(const std::string& file_name,
392  const std::string& clang_path,
393  const std::vector<std::string> clang_options)
394  : udf_file_name_(file_name)
395  , udf_ast_file_name_(file_name)
396 #ifdef HAVE_CUDA
397  , target_arch_(target_arch)
398 #endif
399  , clang_options_(clang_options) {
400  init(clang_path);
401 }
402 
404  std::string cpu_ir_file(genCpuIrFilename(udf_file_name_.c_str()));
405 
406  VLOG(1) << "UDFCompiler cpu bc file = " << cpu_ir_file;
407 
408  read_udf_cpu_module(cpu_ir_file);
409 }
410 
412  std::string gpu_ir_file(genGpuIrFilename(udf_file_name_.c_str()));
413 
414  VLOG(1) << "UDFCompiler gpu bc file = " << gpu_ir_file;
415 
416  read_udf_gpu_module(gpu_ir_file);
417 }
418 
422 }
423 
425  int gpu_compile_result = 1;
426 
427  gpu_compile_result = compileToGpuByteCode(udf_file_name_.c_str(), false);
428 
429  // If gpu compilation fails but cpu compilation has succeeded, try compiling
430  // for the cpu with the assumption the user does not have the CUDA toolkit
431  // installed
432  //
433  // Update: while this approach may work for some cases, it will not
434  // work in general as evidenced by the current UdfTest using arrays:
435  // generation of PTX will fail. Hence, read_udf_gpu_module is now
436  // rejecting LLVM IR with a non-nvptx target triple. However, we
437  // will still try cpu compilation but with the aim of detecting any
438  // code errors.
439  if (gpu_compile_result != 0) {
440  gpu_compile_result = compileToGpuByteCode(udf_file_name_.c_str(), true);
441  }
442 
443  return gpu_compile_result;
444 }
445 
447  LOG(INFO) << "UDFCompiler filename to compile: " << udf_file_name_;
448  if (!boost::filesystem::exists(udf_file_name_)) {
449  LOG(FATAL) << "User defined function file " << udf_file_name_ << " does not exist.";
450  return 1;
451  }
452 
453  auto ast_result = parseToAst(udf_file_name_.c_str());
454 
455  if (ast_result == 0) {
456  // Compile udf file to generate cpu and gpu bytecode files
457 
458  int cpu_compile_result = compileToCpuByteCode(udf_file_name_.c_str());
459 #ifdef HAVE_CUDA
460  int gpu_compile_result = 1;
461 #endif
462 
463  if (cpu_compile_result == 0) {
465 #ifdef HAVE_CUDA
466  gpu_compile_result = compileForGpu();
467  if (gpu_compile_result == 0) {
469  } else {
470  LOG(FATAL) << "Unable to compile UDF file for gpu";
471  return 1;
472  }
473 #endif
474  } else {
475  LOG(FATAL) << "Unable to compile UDF file for cpu";
476  return 1;
477  }
478  } else {
479  LOG(FATAL) << "Unable to create AST file for udf compilation";
480  return 1;
481  }
482 
483  return 0;
484 }
ToolFactory(llvm::raw_fd_ostream &ast_file)
clang::driver::Driver * getClangDriver()
Definition: UDFCompiler.h:39
std::string genCpuIrFilename(const char *udf_file_name)
int compileToCpuByteCode(const char *udf_file_name)
void readCompiledModules()
std::vector< std::string > clang_options_
Definition: UDFCompiler.h:85
UdfCompiler(const std::string &udf_file_name, CudaMgr_Namespace::NvidiaDeviceArch target_arch, const std::string &clang_path="")
const std::string & getAstFileName() const
#define LOG(tag)
Definition: Logger.h:194
void readCpuCompiledModule()
void read_udf_cpu_module(const std::string &udf_ir_filename)
static llvm::cl::OptionCategory ToolingSampleCategory("UDF Tooling")
void read_udf_gpu_module(const std::string &udf_ir_filename)
External interface for parsing AST and bitcode files.
std::string genGpuIrFilename(const char *udf_file_name)
const char * convert(const std::string &s)
std::string removeFileExtension(const std::string &path)
DeclASTConsumer(llvm::raw_fd_ostream &ast_file, SourceManager &s_manager, ASTContext &context)
int compileToGpuByteCode(const char *udf_file_name, bool cpu_mode)
std::string get_cuda_home(void)
Definition: CudaMgr.cpp:404
std::unique_ptr< ASTConsumer > CreateASTConsumer(CompilerInstance &instance, StringRef file) override
DEVICE auto copy(ARGS &&...args)
Definition: gpu_enabled.h:51
std::string udf_ast_file_name_
Definition: UDFCompiler.h:80
UdfClangDriver(const std::string &)
int parseToAst(const char *file_name)
bool HandleTopLevelDecl(DeclGroupRef decl_reference) override
static std::string deviceArchToSM(const NvidiaDeviceArch arch)
Definition: CudaMgr.h:148
void init(const std::string &clang_path)
std::string udf_file_name_
Definition: UDFCompiler.h:79
int compileUdf()
void replaceExtn(std::string &s, const std::string &new_ext)
std::string getFuncDeclFileName(FunctionDecl *f) const
Definition: UDFCompiler.cpp:86
void readGpuCompiledModule()
std::string getFileExt(std::string &s)
int compileForGpu()
FunctionDeclVisitor(llvm::raw_fd_ostream &ast_file, SourceManager &s_manager, ASTContext &context)
Definition: UDFCompiler.cpp:56
char * f
int compileFromCommandLine(const std::vector< std::string > &command_line)
#define VLOG(n)
Definition: Logger.h:297
std::string clang_path_
Definition: UDFCompiler.h:84
#define CREATE_FRONTEND_ACTION(ast_file_)