OmniSciDB  29e35f4d58
ResultSetReductionOps.h
Go to the documentation of this file.
1 /*
2  * Copyright 2019 OmniSci, Inc.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #pragma once
18 
19 #include <Shared/Logger.h>
20 
21 #include <memory>
22 #include <string>
23 #include <vector>
24 
25 extern thread_local size_t g_value_id;
26 
27 // A collection of operators heavily inspired from LLVM IR which are both easy to
28 // translated to LLVM IR and interpreted, for small result sets, to avoid compilation
29 // overhead. In order to keep things simple, there is no general-purpose control flow.
30 // Instead, there is ReturnEarly for early return from a function based on a logical
31 // condition and For, which iterates between a start and an index and executes the body.
32 
33 enum class Type {
34  Int1,
35  Int8,
36  Int32,
37  Int64,
38  Float,
39  Double,
40  Void,
41  Int8Ptr,
42  Int32Ptr,
43  Int64Ptr,
44  FloatPtr,
45  DoublePtr,
46  VoidPtr,
48 };
49 
50 // Retrieves the type a pointer type points to.
51 inline Type pointee_type(const Type pointer) {
52  switch (pointer) {
53  case Type::Int8Ptr: {
54  return Type::Int8;
55  }
56  case Type::Int32Ptr: {
57  return Type::Int32;
58  }
59  case Type::Int64Ptr: {
60  return Type::Int64;
61  }
62  case Type::FloatPtr: {
63  return Type::Float;
64  }
65  case Type::DoublePtr: {
66  return Type::Double;
67  }
68  case Type::Int64PtrPtr: {
69  return Type::Int64Ptr;
70  }
71  default: {
72  LOG(FATAL) << "Invalid pointer type: " << static_cast<int>(pointer);
73  }
74  }
75  return Type::Void;
76 }
77 
78 // Creates a pointer type from the given type.
79 inline Type pointer_type(const Type pointee) {
80  switch (pointee) {
81  case Type::Int8: {
82  return Type::Int8Ptr;
83  }
84  case Type::Int64: {
85  return Type::Int64Ptr;
86  }
87  case Type::Int64Ptr: {
88  return Type::Int64PtrPtr;
89  }
90  default: {
91  LOG(FATAL) << "Invalid pointee type: " << static_cast<int>(pointee);
92  }
93  }
94  return Type::Void;
95 }
96 
97 class Value {
98  public:
99  Value(const Type type, const std::string& label)
100  : type_(type), label_(label), id_(g_value_id++) {}
101 
102  Type type() const { return type_; }
103 
104  size_t id() const { return id_; }
105 
106  const std::string& label() const { return label_; }
107 
108  virtual ~Value() = default;
109 
110  private:
111  const Type type_;
112  // The label of the value, useful for debugging the generated LLVM IR.
113  const std::string label_;
114  // An unique id, starting from 0, relative to the function. Used by the interpreter to
115  // implement a dense map of evaluated values.
116  const size_t id_;
117 };
118 
119 class Constant : public Value {
120  public:
121  Constant(const Type type) : Value(type, "") {}
122 };
123 
124 class ConstantInt : public Constant {
125  public:
126  ConstantInt(const int64_t value, const Type target) : Constant(target), value_(value) {}
127 
128  int64_t value() const { return value_; }
129 
130  private:
131  const int64_t value_;
132 };
133 
134 class ConstantFP : public Constant {
135  public:
136  ConstantFP(const double value, const Type target) : Constant(target), value_(value) {}
137 
138  double value() const { return value_; }
139 
140  private:
141  const double value_;
142 };
143 
144 class Argument : public Value {
145  public:
146  Argument(const Type type, const std::string& label) : Value(type, label) {}
147 };
148 
150 
151 class Instruction : public Value {
152  public:
153  Instruction(const Type type, const std::string& label) : Value(type, label) {}
154 
155  // Run the instruction in the given interpreter.
156  virtual void run(ReductionInterpreterImpl* interpreter) = 0;
157 };
158 
159 // A function, defined by its signature and instructions, which it owns.
160 class Function {
161  public:
162  struct NamedArg {
163  std::string name;
165  };
166 
167  Function(const std::string name,
168  const std::vector<NamedArg>& arg_types,
169  const Type ret_type,
170  const bool always_inline)
171  : name_(name)
172  , arg_types_(arg_types)
173  , ret_type_(ret_type)
174  , always_inline_(always_inline) {
175  g_value_id = 0;
176  for (const auto& named_arg : arg_types_) {
177  arguments_.emplace_back(new Argument(named_arg.type, named_arg.name));
178  }
179  }
180 
181  const std::string& name() const { return name_; }
182 
183  const std::vector<NamedArg>& arg_types() const { return arg_types_; }
184 
185  Argument* arg(const size_t idx) const { return arguments_[idx].get(); }
186 
187  Type ret_type() const { return ret_type_; }
188 
189  const std::vector<std::unique_ptr<Instruction>>& body() const { return body_; }
190 
191  const std::vector<std::unique_ptr<Constant>>& constants() const { return constants_; }
192 
193  bool always_inline() const { return always_inline_; }
194 
195  template <typename Tp, typename... Args>
196  Value* add(Args&&... args) {
197  body_.emplace_back(new Tp(std::forward<Args>(args)...));
198  return body_.back().get();
199  }
200 
201  template <typename Tp, typename... Args>
202  Value* addConstant(Args&&... args) {
203  constants_.emplace_back(new Tp(std::forward<Args>(args)...));
204  return constants_.back().get();
205  }
206 
207  private:
208  const std::string name_;
209  const std::vector<NamedArg> arg_types_;
211  std::vector<std::unique_ptr<Instruction>> body_;
212  const bool always_inline_;
213  std::vector<std::unique_ptr<Argument>> arguments_;
214  std::vector<std::unique_ptr<Constant>> constants_;
215 };
216 
217 class GetElementPtr : public Instruction {
218  public:
219  GetElementPtr(const Value* base, const Value* index, const std::string& label)
220  : Instruction(base->type(), label), base_(base), index_(index) {}
221 
222  const Value* base() const { return base_; }
223 
224  const Value* index() const { return index_; }
225 
226  void run(ReductionInterpreterImpl* interpreter) override;
227 
228  private:
229  const Value* base_;
230  const Value* index_;
231 };
232 
233 class Load : public Instruction {
234  public:
235  Load(const Value* source, const std::string& label)
236  : Instruction(pointee_type(source->type()), label), source_(source) {}
237 
238  const Value* source() const { return source_; }
239 
240  void run(ReductionInterpreterImpl* interpreter) override;
241 
242  private:
243  const Value* source_;
244 };
245 
246 class ICmp : public Instruction {
247  public:
248  enum class Predicate {
249  NE,
250  EQ,
251  };
252 
253  ICmp(const Predicate predicate,
254  const Value* lhs,
255  const Value* rhs,
256  const std::string& label)
257  : Instruction(Type::Int1, label), predicate_(predicate), lhs_(lhs), rhs_(rhs) {}
258 
259  Predicate predicate() const { return predicate_; }
260 
261  const Value* lhs() const { return lhs_; }
262 
263  const Value* rhs() const { return rhs_; }
264 
265  void run(ReductionInterpreterImpl* interpreter) override;
266 
267  private:
269  const Value* lhs_;
270  const Value* rhs_;
271 };
272 
273 class BinaryOperator : public Instruction {
274  public:
275  enum class BinaryOp {
276  Add,
277  Mul,
278  };
279 
281  const Value* lhs,
282  const Value* rhs,
283  const std::string& label)
284  : Instruction(Type::Int1, label), op_(op), lhs_(lhs), rhs_(rhs) {}
285 
286  BinaryOp op() const { return op_; }
287 
288  const Value* lhs() const { return lhs_; }
289 
290  const Value* rhs() const { return rhs_; }
291 
292  void run(ReductionInterpreterImpl* interpreter) override;
293 
294  private:
295  const BinaryOp op_;
296  const Value* lhs_;
297  const Value* rhs_;
298 };
299 
300 class Cast : public Instruction {
301  public:
302  enum class CastOp {
303  Trunc,
304  SExt,
305  BitCast,
306  };
307 
308  Cast(const CastOp op, const Value* source, const Type type, const std::string& label)
309  : Instruction(type, label), op_(op), source_(source) {}
310 
311  CastOp op() const { return op_; }
312 
313  const Value* source() const { return source_; }
314 
315  void run(ReductionInterpreterImpl* interpreter) override;
316 
317  private:
318  const CastOp op_;
319  const Value* source_;
320 };
321 
322 class Ret : public Instruction {
323  public:
324  Ret(const Value* value) : Instruction(value->type(), ""), value_(value) {}
325 
326  Ret() : Instruction(Type::Void, ""), value_(nullptr) {}
327 
328  const Value* value() const { return value_; }
329 
330  void run(ReductionInterpreterImpl* interpreter) override;
331 
332  private:
333  const Value* value_;
334 };
335 
336 // An internal runtime function. In this context, internal means either part of the
337 // bitcode runtime (given by name) or one of the reduction functions.
338 class Call : public Instruction {
339  public:
340  Call(const Function* callee,
341  const std::vector<const Value*>& arguments,
342  const std::string& label)
343  : Instruction(callee->ret_type(), label)
344  , callee_(callee)
345  , arguments_(arguments)
346  , cached_callee_(nullptr) {}
347 
348  Call(const std::string& callee_name,
349  const std::vector<const Value*>& arguments,
350  const std::string& label)
351  : Instruction(Type::Void, label)
352  , callee_name_(callee_name)
353  , callee_(nullptr)
354  , arguments_(arguments)
355  , cached_callee_(nullptr) {}
356 
357  Call(const std::string& callee_name,
358  const Type returnType,
359  const std::vector<const Value*>& arguments,
360  const std::string& label)
361  : Instruction(returnType, label)
362  , callee_name_(callee_name)
363  , callee_(nullptr)
364  , arguments_(arguments)
365  , cached_callee_(nullptr) {}
366 
367  bool external() const { return false; }
368 
369  const std::string& callee_name() const { return callee_name_; }
370 
371  const Function* callee() const { return callee_; }
372 
373  const std::vector<const Value*>& arguments() const { return arguments_; }
374 
375  void run(ReductionInterpreterImpl* interpreter) override;
376 
377  void* cached_callee() const { return cached_callee_; }
378 
379  void set_cached_callee(void* cached_callee) const { cached_callee_ = cached_callee; }
380 
381  private:
382  const std::string callee_name_;
384  const std::vector<const Value*> arguments_;
385  // For performance reasons, the pointer of the native function is stored in this field.
386  mutable void* cached_callee_;
387 };
388 
389 // An external runtime function, with C binding.
390 class ExternalCall : public Instruction {
391  public:
392  ExternalCall(const std::string& callee_name,
393  const Type ret_type,
394  const std::vector<const Value*>& arguments,
395  const std::string& label)
396  : Instruction(ret_type, label)
397  , callee_name_(callee_name)
398  , ret_type_(ret_type)
399  , arguments_(arguments)
400  , cached_callee_(nullptr) {}
401 
402  bool external() const { return true; }
403 
404  const std::string& callee_name() const { return callee_name_; }
405 
406  const std::vector<const Value*>& arguments() const { return arguments_; }
407 
408  void run(ReductionInterpreterImpl* interpreter) override;
409 
410  void* cached_callee() const { return cached_callee_; }
411 
412  void set_cached_callee(void* cached_callee) const { cached_callee_ = cached_callee; }
413 
414  private:
415  const std::string callee_name_;
417  const std::vector<const Value*> arguments_;
418  mutable void* cached_callee_;
419 };
420 
421 class Alloca : public Instruction {
422  public:
423  Alloca(const Type element_type, const Value* array_size, const std::string& label)
424  : Instruction(pointer_type(element_type), label), array_size_(array_size) {}
425 
426  const Value* array_size() const { return array_size_; }
427 
428  void run(ReductionInterpreterImpl* interpreter) override;
429 
430  private:
432 };
433 
434 class MemCpy : public Instruction {
435  public:
436  MemCpy(const Value* dest, const Value* source, const Value* size)
437  : Instruction(Type::Void, ""), dest_(dest), source_(source), size_(size) {}
438 
439  const Value* dest() const { return dest_; }
440 
441  const Value* source() const { return source_; }
442 
443  const Value* size() const { return size_; }
444 
445  void run(ReductionInterpreterImpl* interpreter) override;
446 
447  private:
448  const Value* dest_;
449  const Value* source_;
450  const Value* size_;
451 };
452 
453 // Returns from the current function with the given error code, if the provided condition
454 // is true. If the function return type is void, the error code is ignored.
455 class ReturnEarly : public Instruction {
456  public:
457  ReturnEarly(const Value* cond, const std::string& label)
458  : Instruction(Type::Void, label), cond_(cond), error_code_(nullptr) {}
459 
460  ReturnEarly(const Value* cond, const Value* error_code, const std::string& label)
461  : Instruction(Type::Void, label), cond_(cond), error_code_(error_code) {}
462 
463  const Value* cond() const { return cond_; }
464 
465  const Value* error_code() const { return error_code_; }
466 
467  void run(ReductionInterpreterImpl* interpreter) override;
468 
469  private:
470  const Value* cond_;
472 };
473 
474 // An operation which executes the provided body from the given start index to the end
475 // index (exclusive). Additionally, the iterator is added to the variables seen by the
476 // body.
477 class For : public Instruction {
478  public:
479  For(const Value* start, const Value* end, const std::string& label)
480  : Instruction(Type::Void, label)
481  , start_(start)
482  , end_(end)
483  , iter_(Type::Int64, label) {}
484 
485  const std::vector<std::unique_ptr<Instruction>>& body() const { return body_; }
486 
487  const Value* start() const { return start_; }
488 
489  const Value* end() const { return end_; }
490 
491  const Value* iter() const { return &iter_; }
492 
493  void run(ReductionInterpreterImpl* interpreter) override;
494 
495  template <typename Tp, typename... Args>
496  Value* add(Args&&... args) {
497  body_.emplace_back(new Tp(std::forward<Args>(args)...));
498  return body_.back().get();
499  }
500 
501  private:
502  std::vector<std::unique_ptr<Instruction>> body_;
503  const Value* start_;
504  const Value* end_;
505  // Since the iterator always moves between the start and the end, just store a dummy
506  // value. During codegen or interpretation, it will be mapped to the current value of
507  // the iterator.
508  const Value iter_;
509 };
const int64_t value_
const Value * source() const
Argument(const Type type, const std::string &label)
void * cached_callee() const
size_t id() const
const Value * array_size_
Value * add(Args &&... args)
const Value * rhs() const
const Value * cond_
Argument * arg(const size_t idx) const
Ret(const Value *value)
thread_local size_t g_value_id
const std::vector< const Value * > arguments_
ICmp(const Predicate predicate, const Value *lhs, const Value *rhs, const std::string &label)
const Value * index_
const Value * source_
void * cached_callee() const
const std::string callee_name_
const Value * index() const
const Function * callee() const
#define LOG(tag)
Definition: Logger.h:188
const std::string callee_name_
ExternalCall(const std::string &callee_name, const Type ret_type, const std::vector< const Value *> &arguments, const std::string &label)
const std::vector< const Value * > arguments_
const Value * error_code() const
int64_t value() const
Instruction(const Type type, const std::string &label)
ConstantInt(const int64_t value, const Type target)
Call(const std::string &callee_name, const Type returnType, const std::vector< const Value *> &arguments, const std::string &label)
Type pointer_type(const Type pointee)
std::vector< std::unique_ptr< Argument > > arguments_
CastOp op() const
Alloca(const Type element_type, const Value *array_size, const std::string &label)
Constant(const Type type)
BinaryOperator(const BinaryOp op, const Value *lhs, const Value *rhs, const std::string &label)
const Value * start() const
const Value * end_
Type type() const
void * cached_callee_
const std::vector< const Value * > & arguments() const
const Value * rhs_
const Value * size() const
std::vector< std::unique_ptr< Instruction > > body_
ConstantFP(const double value, const Type target)
const std::string label_
const Value * start_
const Value * source() const
Function(const std::string name, const std::vector< NamedArg > &arg_types, const Type ret_type, const bool always_inline)
Type ret_type() const
GetElementPtr(const Value *base, const Value *index, const std::string &label)
bool external() const
Call(const Function *callee, const std::vector< const Value *> &arguments, const std::string &label)
const Value * value_
const int8_t const int64_t const uint64_t const int32_t const int64_t int64_t uint32_t const int64_t int32_t * error_code
const std::vector< NamedArg > arg_types_
const Value * base() const
const std::vector< std::unique_ptr< Instruction > > & body() const
Type pointee_type(const Type pointer)
const Value * lhs_
const Value * lhs() const
const std::string & callee_name() const
const Value * end() const
const Type type_
const std::string & callee_name() const
Value(const Type type, const std::string &label)
MemCpy(const Value *dest, const Value *source, const Value *size)
const Value * cond() const
Predicate predicate() const
const size_t id_
const std::vector< std::unique_ptr< Constant > > & constants() const
void set_cached_callee(void *cached_callee) const
const Value * value() const
const Predicate predicate_
std::vector< std::unique_ptr< Instruction > > body_
const Value * error_code_
const Value * array_size() const
const std::vector< const Value * > & arguments() const
const Value * source() const
Cast(const CastOp op, const Value *source, const Type type, const std::string &label)
Call(const std::string &callee_name, const std::vector< const Value *> &arguments, const std::string &label)
Load(const Value *source, const std::string &label)
const Value * rhs() const
const std::vector< std::unique_ptr< Instruction > > & body() const
const Value * size_
const Value * dest() const
const bool always_inline_
Value * addConstant(Args &&... args)
const Value * source_
For(const Value *start, const Value *end, const std::string &label)
Value * add(Args &&... args)
std::vector< std::unique_ptr< Constant > > constants_
void set_cached_callee(void *cached_callee) const
const Value * dest_
const Value * source_
const Value iter_
ReturnEarly(const Value *cond, const std::string &label)
const Value * lhs() const
const CastOp op_
const Function * callee_
bool external() const
const std::string name_
const Value * iter() const
const double value_
static bool run
const std::vector< NamedArg > & arg_types() const
bool always_inline() const
const Type ret_type_
double value() const
const std::string & name() const
ReturnEarly(const Value *cond, const Value *error_code, const std::string &label)
const std::string & label() const
BinaryOp op() const