OmniSciDB  1dac507f6e
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
UDFCompiler.cpp
Go to the documentation of this file.
1 /*
2  * Copyright 2019 OmniSci, Inc.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #include "UDFCompiler.h"
18 #include <clang/AST/AST.h>
19 #include <clang/AST/ASTConsumer.h>
20 #include <clang/AST/RecursiveASTVisitor.h>
21 #include <clang/Driver/Compilation.h>
22 #include <clang/Driver/Driver.h>
23 #include <clang/Frontend/CompilerInstance.h>
24 #include <clang/Frontend/FrontendActions.h>
25 #include <clang/Frontend/TextDiagnosticPrinter.h>
26 #include <clang/Parse/ParseAST.h>
27 #include <clang/Tooling/CommonOptionsParser.h>
28 #include <clang/Tooling/Tooling.h>
29 #include <llvm/Support/Program.h>
30 #include <llvm/Support/raw_ostream.h>
31 #include <boost/process/search_path.hpp>
32 #include <memory>
33 #include "Execute.h"
34 #include "Shared/Logger.h"
35 
36 using namespace clang;
37 using namespace clang::tooling;
38 
39 static llvm::cl::OptionCategory ToolingSampleCategory("UDF Tooling");
40 
41 namespace {
42 
43 // By implementing RecursiveASTVisitor, we can specify which AST nodes
44 // we're interested in by overriding relevant methods.
45 
46 class FunctionDeclVisitor : public RecursiveASTVisitor<FunctionDeclVisitor> {
47  public:
48  FunctionDeclVisitor(llvm::raw_fd_ostream& ast_file, SourceManager& s_manager)
49  : ast_file_(ast_file), source_manager_(s_manager) {
50  source_manager_.getDiagnostics().setShowColors();
51  }
52 
53  bool VisitFunctionDecl(FunctionDecl* f) {
54  // Only function definitions (with bodies), not declarations.
55  if (f->hasBody()) {
56  if (getMainFileName() == getFuncDeclFileName(f)) {
57  f->dump(ast_file_);
58  }
59  }
60 
61  return true;
62  }
63 
64  private:
65  std::string getMainFileName() const {
66  auto f_entry = source_manager_.getFileEntryForID(source_manager_.getMainFileID());
67  return f_entry->getName().str();
68  }
69 
70  std::string getFuncDeclFileName(FunctionDecl* f) const {
71  SourceLocation spell_loc = source_manager_.getSpellingLoc(f->getLocation());
72  PresumedLoc p_loc = source_manager_.getPresumedLoc(spell_loc);
73 
74  return std::string(p_loc.getFilename());
75  }
76 
77  private:
78  llvm::raw_fd_ostream& ast_file_;
79  SourceManager& source_manager_;
80 };
81 
82 // Implementation of the ASTConsumer interface for reading an AST produced
83 // by the Clang parser.
84 class DeclASTConsumer : public ASTConsumer {
85  public:
86  DeclASTConsumer(llvm::raw_fd_ostream& ast_file, SourceManager& s_manager)
87  : visitor_(ast_file, s_manager) {}
88 
89  // Override the method that gets called for each parsed top-level
90  // declaration.
91  bool HandleTopLevelDecl(DeclGroupRef decl_reference) override {
92  for (DeclGroupRef::iterator b = decl_reference.begin(), e = decl_reference.end();
93  b != e;
94  ++b) {
95  // Traverse the declaration using our AST visitor.
96  visitor_.TraverseDecl(*b);
97  }
98  return true;
99  }
100 
101  private:
103 };
104 
105 // For each source file provided to the tool, a new FrontendAction is created.
106 class HandleDeclAction : public ASTFrontendAction {
107  public:
108  HandleDeclAction(llvm::raw_fd_ostream& ast_file) : ast_file_(ast_file) {}
109 
110  ~HandleDeclAction() override {}
111 
112  std::unique_ptr<ASTConsumer> CreateASTConsumer(CompilerInstance& instance,
113  StringRef file) override {
114  return llvm::make_unique<DeclASTConsumer>(ast_file_, instance.getSourceManager());
115  }
116 
117  private:
118  llvm::raw_fd_ostream& ast_file_;
119 };
120 
121 class ToolFactory : public FrontendActionFactory {
122  public:
123  ToolFactory(llvm::raw_fd_ostream& ast_file) : ast_file_(ast_file) {}
124 
125  clang::FrontendAction* create() override { return new HandleDeclAction(ast_file_); }
126 
127  private:
128  llvm::raw_fd_ostream& ast_file_;
129 };
130 
131 bool on_search_path(const std::string file) {
132  boost::filesystem::path p = boost::process::search_path(file);
133  return boost::filesystem::exists(p);
134 }
135 } // namespace
136 
137 std::string UdfCompiler::removeFileExtension(const std::string& path) {
138  if (path == "." || path == "..") {
139  return path;
140  }
141 
142  size_t pos = path.find_last_of("\\/.");
143  if (pos != std::string::npos && path[pos] == '.') {
144  return path.substr(0, pos);
145  }
146 
147  return path;
148 }
149 
150 std::string UdfCompiler::getFileExt(std::string& s) {
151  size_t i = s.rfind('.', s.length());
152  if (1 != std::string::npos) {
153  return (s.substr(i + 1, s.length() - i));
154  }
155 }
156 
157 void UdfCompiler::replaceExtn(std::string& s, const std::string& new_ext) {
158  std::string::size_type i = s.rfind('.', s.length());
159 
160  if (i != std::string::npos) {
161  s.replace(i + 1, getFileExt(s).length(), new_ext);
162  }
163 }
164 
165 std::string UdfCompiler::genGpuIrFilename(const char* udf_file_name) {
166  std::string gpu_file_name(removeFileExtension(udf_file_name));
167 
168  gpu_file_name += "_gpu.bc";
169  return gpu_file_name;
170 }
171 
172 std::string UdfCompiler::genCpuIrFilename(const char* udf_fileName) {
173  std::string cpu_file_name(removeFileExtension(udf_fileName));
174 
175  cpu_file_name += "_cpu.bc";
176  return cpu_file_name;
177 }
178 
179 int UdfCompiler::compileFromCommandLine(std::vector<const char*>& command_line) {
180  auto a_path = llvm::sys::findProgramByName("clang++");
181  auto clang_path = a_path.get();
182 
183  llvm::IntrusiveRefCntPtr<clang::DiagnosticOptions> diag_options(
184  new DiagnosticOptions());
185 
186  clang::DiagnosticConsumer* diag_client =
187  new TextDiagnosticPrinter(llvm::errs(), diag_options.get());
188 
189  llvm::IntrusiveRefCntPtr<clang::DiagnosticIDs> diag_id(new clang::DiagnosticIDs());
190  clang::DiagnosticsEngine diags(diag_id, diag_options.get(), diag_client);
191 
192  std::unique_ptr<clang::DiagnosticConsumer> diag_client_owner(diags.takeClient());
193 
194  clang::driver::Driver the_driver(
195  clang_path.c_str(), llvm::sys::getDefaultTargetTriple(), diags);
196 
197  the_driver.CCPrintOptions = 0;
198  std::unique_ptr<driver::Compilation> compilation(
199  the_driver.BuildCompilation(command_line));
200 
201  if (!compilation) {
202  LOG(FATAL) << "failed to build compilation object!\n";
203  }
204 
205  llvm::SmallVector<std::pair<int, const driver::Command*>, 10> failing_commands;
206  int res = the_driver.ExecuteCompilation(*compilation, failing_commands);
207 
208  if (res < 0) {
209  for (const std::pair<int, const driver::Command*>& p : failing_commands) {
210  if (p.first) {
211  the_driver.generateCompilationDiagnostics(*compilation, *p.second);
212  }
213  }
214  }
215 
216  return res;
217 }
218 
219 int UdfCompiler::compileToGpuByteCode(const char* udf_file_name, bool cpu_mode) {
220  auto a_path = llvm::sys::findProgramByName("clang++");
221  auto clang_path = a_path.get();
222 
223  std::string gpu_outName(genGpuIrFilename(udf_file_name));
224 
225  std::vector<const char*> command_line{clang_path.c_str(),
226  "-c",
227  "-O2",
228  "-emit-llvm",
229  "-o",
230  gpu_outName.c_str(),
231  "-std=c++14"};
232 
233  // If we are not compiling for cpu mode, then target the gpu
234  // Otherwise assume we can generic ir that will
235  // be translated to gpu code during target code generation
236  if (!cpu_mode) {
237  command_line.emplace_back("--cuda-gpu-arch=sm_30");
238  command_line.emplace_back("--cuda-device-only");
239  command_line.emplace_back("-xcuda");
240  }
241 
242  command_line.emplace_back(udf_file_name);
243 
244  return compileFromCommandLine(command_line);
245 }
246 
247 int UdfCompiler::compileToCpuByteCode(const char* udf_file_name) {
248  auto a_path = llvm::sys::findProgramByName("clang++");
249  auto clang_path = a_path.get();
250 
251  std::string cpu_outName(genCpuIrFilename(udf_file_name));
252 
253  std::vector<const char*> command_line{clang_path.c_str(),
254  "-c",
255  "-O2",
256  "-emit-llvm",
257  "-o",
258  cpu_outName.c_str(),
259  "-std=c++14",
260  udf_file_name};
261 
262  return compileFromCommandLine(command_line);
263 }
264 
265 int UdfCompiler::parseToAst(const char* file_name) {
266  int num_args = 3;
267  const char arg0[] = "astparser";
268  const char* arg1 = file_name;
269  const char arg2[] = "--";
270  const char* arg_vector[3] = {arg0, arg1, arg2};
271 
272  CommonOptionsParser op(num_args, arg_vector, ToolingSampleCategory);
273  ClangTool tool(op.getCompilations(), op.getSourcePathList());
274 
275  std::string out_name(file_name);
276  std::string file_ext("ast");
277  replaceExtn(out_name, file_ext);
278 
279  std::error_code out_error_info;
280  llvm::raw_fd_ostream out_file(
281  llvm::StringRef(out_name), out_error_info, llvm::sys::fs::F_None);
282 
283  auto factory = llvm::make_unique<ToolFactory>(out_file);
284  return tool.run(factory.get());
285 }
286 
287 const std::string& UdfCompiler::getAstFileName() const {
288  return udf_ast_file_name_;
289 }
290 
291 UdfCompiler::UdfCompiler(const std::string& file_name)
292  : udf_file_name_(file_name), udf_ast_file_name_(file_name) {
294 }
295 
297  std::string cpu_ir_file(genCpuIrFilename(udf_file_name_.c_str()));
298 
299  VLOG(1) << "UDFCompiler cpu bc file = " << cpu_ir_file << std::endl;
300 
301  read_udf_cpu_module(cpu_ir_file);
302 }
303 
305  std::string gpu_ir_file(genGpuIrFilename(udf_file_name_.c_str()));
306 
307  VLOG(1) << "UDFCompiler gpu bc file = " << gpu_ir_file << std::endl;
308 
309  read_udf_gpu_module(gpu_ir_file);
310 }
311 
315 }
316 
318  int gpu_compile_result = 1;
319 
320  if (on_search_path("nvcc")) {
321  gpu_compile_result = compileToGpuByteCode(udf_file_name_.c_str(), false);
322  }
323 
324  // If gpu compilation fails but cpu compilation has succeeded, try compiling
325  // for the cpu with the assumption the user does not have the CUDA toolkit
326  // installed
327  if (gpu_compile_result != 0) {
328  gpu_compile_result = compileToGpuByteCode(udf_file_name_.c_str(), true);
329  }
330 
331  return gpu_compile_result;
332 }
333 
335  if (on_search_path("clang++")) {
336  LOG(INFO) << "UDFCompiler filename to compiler: " << udf_file_name_ << std::endl;
337  if (!boost::filesystem::exists(udf_file_name_)) {
338  LOG(FATAL) << "User defined function file " << udf_file_name_ << " does not exist.";
339  return 1;
340  }
341 
342  auto ast_result = parseToAst(udf_file_name_.c_str());
343 
344  if (ast_result == 0) {
345  // Compile udf file to generate cpu and gpu bytecode files
346 
347  int cpu_compile_result = compileToCpuByteCode(udf_file_name_.c_str());
348 #ifdef HAVE_CUDA
349  int gpu_compile_result = 1;
350 #endif
351 
352  if (cpu_compile_result == 0) {
354 #ifdef HAVE_CUDA
355  gpu_compile_result = compileForGpu();
356 
357  if (gpu_compile_result == 0) {
359  } else {
360  LOG(FATAL) << "Unable to compile UDF file for gpu" << std::endl;
361  return 1;
362  }
363 #endif
364  } else {
365  LOG(FATAL) << "Unable to compile UDF file for cpu" << std::endl;
366  return 1;
367  }
368  } else {
369  LOG(FATAL) << "Unable to create AST file for udf compilation" << std::endl;
370  return 1;
371  }
372  } else {
373  LOG(FATAL) << "Unable to compile udfs due to absence of clang++" << std::endl;
374  return 1;
375  }
376 
377  return 0;
378 }
ToolFactory(llvm::raw_fd_ostream &ast_file)
std::string genCpuIrFilename(const char *udf_file_name)
int compileToCpuByteCode(const char *udf_file_name)
void readCompiledModules()
UdfCompiler(const std::string &)
const std::string & getAstFileName() const
#define LOG(tag)
Definition: Logger.h:185
void readCpuCompiledModule()
void read_udf_cpu_module(const std::string &udf_ir_filename)
static llvm::cl::OptionCategory ToolingSampleCategory("UDF Tooling")
void read_udf_gpu_module(const std::string &udf_ir_filename)
External interface for parsing AST and bitcode files.
std::string genGpuIrFilename(const char *udf_file_name)
std::string removeFileExtension(const std::string &path)
int compileToGpuByteCode(const char *udf_file_name, bool cpu_mode)
std::unique_ptr< ASTConsumer > CreateASTConsumer(CompilerInstance &instance, StringRef file) override
std::string udf_ast_file_name_
Definition: UDFCompiler.h:54
const int8_t const int64_t const uint64_t const int32_t const int64_t int64_t uint32_t const int64_t int32_t * error_code
int parseToAst(const char *file_name)
bool HandleTopLevelDecl(DeclGroupRef decl_reference) override
Definition: UDFCompiler.cpp:91
FunctionDeclVisitor(llvm::raw_fd_ostream &ast_file, SourceManager &s_manager)
Definition: UDFCompiler.cpp:48
std::string udf_file_name_
Definition: UDFCompiler.h:53
int compileUdf()
void replaceExtn(std::string &s, const std::string &new_ext)
std::string getFuncDeclFileName(FunctionDecl *f) const
Definition: UDFCompiler.cpp:70
bool on_search_path(const std::string file)
void readGpuCompiledModule()
int compileFromCommandLine(std::vector< const char * > &command_line)
clang::FrontendAction * create() override
std::string getFileExt(std::string &s)
DeclASTConsumer(llvm::raw_fd_ostream &ast_file, SourceManager &s_manager)
Definition: UDFCompiler.cpp:86
int compileForGpu()
#define VLOG(n)
Definition: Logger.h:280