OmniSciDB  1dac507f6e
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
ResultSetReductionOps.h
Go to the documentation of this file.
1 /*
2  * Copyright 2019 OmniSci, Inc.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #pragma once
18 
19 #include <Shared/Logger.h>
20 
21 #include <memory>
22 #include <string>
23 #include <vector>
24 
25 extern thread_local size_t g_value_id;
26 
27 // A collection of operators heavily inspired from LLVM IR which are both easy to
28 // translated to LLVM IR and interpreted, for small result sets, to avoid compilation
29 // overhead. In order to keep things simple, there is no general-purpose control flow.
30 // Instead, there is ReturnEarly for early return from a function based on a logical
31 // condition and For, which iterates between a start and an index and executes the body.
32 
33 enum class Type {
34  Int1,
35  Int8,
36  Int32,
37  Int64,
38  Float,
39  Double,
40  Void,
41  Int8Ptr,
42  Int32Ptr,
43  Int64Ptr,
44  FloatPtr,
45  DoublePtr,
46  VoidPtr,
48 };
49 
50 // Retrieves the type a pointer type points to.
51 inline Type pointee_type(const Type pointer) {
52  switch (pointer) {
53  case Type::Int8Ptr: {
54  return Type::Int8;
55  }
56  case Type::Int32Ptr: {
57  return Type::Int32;
58  }
59  case Type::Int64Ptr: {
60  return Type::Int64;
61  }
62  case Type::FloatPtr: {
63  return Type::Float;
64  }
65  case Type::DoublePtr: {
66  return Type::Double;
67  }
68  case Type::Int64PtrPtr: {
69  return Type::Int64Ptr;
70  }
71  default: {
72  LOG(FATAL) << "Invalid pointer type: " << static_cast<int>(pointer);
73  }
74  }
75  return Type::Void;
76 }
77 
78 // Creates a pointer type from the given type.
79 inline Type pointer_type(const Type pointee) {
80  switch (pointee) {
81  case Type::Int8: {
82  return Type::Int8Ptr;
83  }
84  case Type::Int64: {
85  return Type::Int64Ptr;
86  }
87  case Type::Int64Ptr: {
88  return Type::Int64PtrPtr;
89  }
90  default: {
91  LOG(FATAL) << "Invalid pointee type: " << static_cast<int>(pointee);
92  }
93  }
94  return Type::Void;
95 }
96 
97 class Value {
98  public:
99  Value(const Type type, const std::string& label)
100  : type_(type), label_(label), id_(g_value_id++) {}
101 
102  Type type() const { return type_; }
103 
104  size_t id() const { return id_; }
105 
106  const std::string& label() const { return label_; }
107 
108  virtual ~Value() = default;
109 
110  private:
111  const Type type_;
112  // The label of the value, useful for debugging the generated LLVM IR.
113  const std::string label_;
114  // An unique id, starting from 0, relative to the function. Used by the interpreter to
115  // implement a dense map of evaluated values.
116  const size_t id_;
117 };
118 
119 class Constant : public Value {
120  public:
121  Constant(const Type type) : Value(type, "") {}
122 };
123 
124 class ConstantInt : public Constant {
125  public:
126  ConstantInt(const int64_t value, const Type target) : Constant(target), value_(value) {}
127 
128  int64_t value() const { return value_; }
129 
130  private:
131  const int64_t value_;
132 };
133 
134 class ConstantFP : public Constant {
135  public:
136  ConstantFP(const double value, const Type target) : Constant(target), value_(value) {}
137 
138  double value() const { return value_; }
139 
140  private:
141  const double value_;
142 };
143 
144 class Argument : public Value {
145  public:
146  Argument(const Type type, const std::string& label) : Value(type, label) {}
147 };
148 
150 
151 class Instruction : public Value {
152  public:
153  Instruction(const Type type, const std::string& label) : Value(type, label) {}
154 
155  // Run the instruction in the given interpreter.
156  virtual void run(ReductionInterpreterImpl* interpreter) = 0;
157 };
158 
159 // A function, defined by its signature and instructions, which it owns.
160 class Function {
161  public:
162  struct NamedArg {
163  std::string name;
165  };
166 
167  Function(const std::string name,
168  const std::vector<NamedArg>& arg_types,
169  const Type ret_type,
170  const bool always_inline)
171  : name_(name)
172  , arg_types_(arg_types)
173  , ret_type_(ret_type)
174  , always_inline_(always_inline) {
175  g_value_id = 0;
176  for (const auto& named_arg : arg_types_) {
177  arguments_.emplace_back(new Argument(named_arg.type, named_arg.name));
178  }
179  }
180 
181  const std::string& name() const { return name_; }
182 
183  const std::vector<NamedArg>& arg_types() const { return arg_types_; }
184 
185  Argument* arg(const size_t idx) const { return arguments_[idx].get(); }
186 
187  Type ret_type() const { return ret_type_; }
188 
189  const std::vector<std::unique_ptr<Instruction>>& body() const { return body_; }
190 
191  const std::vector<std::unique_ptr<Constant>>& constants() const { return constants_; }
192 
193  bool always_inline() const { return always_inline_; }
194 
195  template <typename Tp, typename... Args>
196  Value* add(Args&&... args) {
197  body_.emplace_back(new Tp(std::forward<Args>(args)...));
198  return body_.back().get();
199  }
200 
201  template <typename Tp, typename... Args>
202  Value* addConstant(Args&&... args) {
203  constants_.emplace_back(new Tp(std::forward<Args>(args)...));
204  return constants_.back().get();
205  }
206 
207  private:
208  const std::string name_;
209  const std::vector<NamedArg> arg_types_;
211  std::vector<std::unique_ptr<Instruction>> body_;
212  const bool always_inline_;
213  std::vector<std::unique_ptr<Argument>> arguments_;
214  std::vector<std::unique_ptr<Constant>> constants_;
215 };
216 
217 class GetElementPtr : public Instruction {
218  public:
219  GetElementPtr(const Value* base, const Value* index, const std::string& label)
220  : Instruction(base->type(), label), base_(base), index_(index) {}
221 
222  const Value* base() const { return base_; }
223 
224  const Value* index() const { return index_; }
225 
226  void run(ReductionInterpreterImpl* interpreter) override;
227 
228  private:
229  const Value* base_;
230  const Value* index_;
231 };
232 
233 class Load : public Instruction {
234  public:
235  Load(const Value* source, const std::string& label)
236  : Instruction(pointee_type(source->type()), label), source_(source) {}
237 
238  const Value* source() const { return source_; }
239 
240  void run(ReductionInterpreterImpl* interpreter) override;
241 
242  private:
243  const Value* source_;
244 };
245 
246 class ICmp : public Instruction {
247  public:
248  enum class Predicate {
249  NE,
250  EQ,
251  };
252 
254  const Value* lhs,
255  const Value* rhs,
256  const std::string& label)
257  : Instruction(Type::Int1, label), predicate_(predicate), lhs_(lhs), rhs_(rhs) {}
258 
259  Predicate predicate() const { return predicate_; }
260 
261  const Value* lhs() const { return lhs_; }
262 
263  const Value* rhs() const { return rhs_; }
264 
265  void run(ReductionInterpreterImpl* interpreter) override;
266 
267  private:
269  const Value* lhs_;
270  const Value* rhs_;
271 };
272 
273 class BinaryOperator : public Instruction {
274  public:
275  enum class BinaryOp {
276  Add,
277  Mul,
278  };
279 
281  const Value* lhs,
282  const Value* rhs,
283  const std::string& label)
284  : Instruction(Type::Int1, label), op_(op), lhs_(lhs), rhs_(rhs) {}
285 
286  BinaryOp op() const { return op_; }
287 
288  const Value* lhs() const { return lhs_; }
289 
290  const Value* rhs() const { return rhs_; }
291 
292  void run(ReductionInterpreterImpl* interpreter) override;
293 
294  private:
295  const BinaryOp op_;
296  const Value* lhs_;
297  const Value* rhs_;
298 };
299 
300 class Cast : public Instruction {
301  public:
302  enum class CastOp {
303  Trunc,
304  SExt,
305  BitCast,
306  };
307 
308  Cast(const CastOp op, const Value* source, const Type type, const std::string& label)
309  : Instruction(type, label), op_(op), source_(source) {}
310 
311  CastOp op() const { return op_; }
312 
313  const Value* source() const { return source_; }
314 
315  void run(ReductionInterpreterImpl* interpreter) override;
316 
317  private:
318  const CastOp op_;
319  const Value* source_;
320 };
321 
322 class Ret : public Instruction {
323  public:
324  Ret(const Value* value) : Instruction(value->type(), ""), value_(value) {}
325 
327 
328  const Value* value() const { return value_; }
329 
330  void run(ReductionInterpreterImpl* interpreter) override;
331 
332  private:
333  const Value* value_;
334 };
335 
336 // An internal runtime function. In this context, internal means either part of the
337 // bitcode runtime (given by name) or one of the reduction functions.
338 class Call : public Instruction {
339  public:
341  const std::vector<const Value*>& arguments,
342  const std::string& label)
343  : Instruction(callee->ret_type(), label)
344  , callee_(callee)
345  , arguments_(arguments)
346  , cached_callee_(nullptr) {}
347 
348  Call(const std::string& callee_name,
349  const std::vector<const Value*>& arguments,
350  const std::string& label)
351  : Instruction(Type::Void, label)
352  , callee_name_(callee_name)
353  , callee_(nullptr)
354  , arguments_(arguments)
355  , cached_callee_(nullptr) {}
356 
357  bool external() const { return false; }
358 
359  const std::string& callee_name() const { return callee_name_; }
360 
361  const Function* callee() const { return callee_; }
362 
363  const std::vector<const Value*>& arguments() const { return arguments_; }
364 
365  void run(ReductionInterpreterImpl* interpreter) override;
366 
367  void* cached_callee() const { return cached_callee_; }
368 
370 
371  private:
372  const std::string callee_name_;
374  const std::vector<const Value*> arguments_;
375  // For performance reasons, the pointer of the native function is stored in this field.
376  mutable void* cached_callee_;
377 };
378 
379 // An external runtime function, with C binding.
380 class ExternalCall : public Instruction {
381  public:
382  ExternalCall(const std::string& callee_name,
383  const Type ret_type,
384  const std::vector<const Value*>& arguments,
385  const std::string& label)
386  : Instruction(ret_type, label)
387  , callee_name_(callee_name)
388  , ret_type_(ret_type)
389  , arguments_(arguments)
390  , cached_callee_(nullptr) {}
391 
392  bool external() const { return true; }
393 
394  const std::string& callee_name() const { return callee_name_; }
395 
396  const std::vector<const Value*>& arguments() const { return arguments_; }
397 
398  void run(ReductionInterpreterImpl* interpreter) override;
399 
400  void* cached_callee() const { return cached_callee_; }
401 
403 
404  private:
405  const std::string callee_name_;
407  const std::vector<const Value*> arguments_;
408  mutable void* cached_callee_;
409 };
410 
411 class Alloca : public Instruction {
412  public:
413  Alloca(const Type element_type, const Value* array_size, const std::string& label)
414  : Instruction(pointer_type(element_type), label), array_size_(array_size) {}
415 
416  const Value* array_size() const { return array_size_; }
417 
418  void run(ReductionInterpreterImpl* interpreter) override;
419 
420  private:
422 };
423 
424 class MemCpy : public Instruction {
425  public:
426  MemCpy(const Value* dest, const Value* source, const Value* size)
427  : Instruction(Type::Void, ""), dest_(dest), source_(source), size_(size) {}
428 
429  const Value* dest() const { return dest_; }
430 
431  const Value* source() const { return source_; }
432 
433  const Value* size() const { return size_; }
434 
435  void run(ReductionInterpreterImpl* interpreter) override;
436 
437  private:
438  const Value* dest_;
439  const Value* source_;
440  const Value* size_;
441 };
442 
443 // Returns from the current function with the given error code, if the provided condition
444 // is true. If the function return type is void, the error code is ignored.
445 class ReturnEarly : public Instruction {
446  public:
447  ReturnEarly(const Value* cond, const int error_code, const std::string& label)
448  : Instruction(Type::Void, label), cond_(cond), error_code_(error_code) {}
449 
450  const Value* cond() const { return cond_; }
451 
452  int error_code() const { return error_code_; }
453 
454  void run(ReductionInterpreterImpl* interpreter) override;
455 
456  private:
457  const Value* cond_;
458  const int error_code_;
459 };
460 
461 // An operation which executes the provided body from the given start index to the end
462 // index (exclusive). Additionally, the iterator is added to the variables seen by the
463 // body.
464 class For : public Instruction {
465  public:
466  For(const Value* start, const Value* end, const std::string& label)
467  : Instruction(Type::Void, label)
468  , start_(start)
469  , end_(end)
470  , iter_(Type::Int64, label) {}
471 
472  const std::vector<std::unique_ptr<Instruction>>& body() const { return body_; }
473 
474  const Value* start() const { return start_; }
475 
476  const Value* end() const { return end_; }
477 
478  const Value* iter() const { return &iter_; }
479 
480  void run(ReductionInterpreterImpl* interpreter) override;
481 
482  template <typename Tp, typename... Args>
483  Value* add(Args&&... args) {
484  body_.emplace_back(new Tp(std::forward<Args>(args)...));
485  return body_.back().get();
486  }
487 
488  private:
489  std::vector<std::unique_ptr<Instruction>> body_;
490  const Value* start_;
491  const Value* end_;
492  // Since the iterator always moves between the start and the end, just store a dummy
493  // value. During codegen or interpretation, it will be mapped to the current value of
494  // the iterator.
495  const Value iter_;
496 };
const int64_t value_
const std::vector< std::unique_ptr< Constant > > & constants() const
Argument(const Type type, const std::string &label)
const std::vector< NamedArg > & arg_types() const
const Value * size() const
bool external() const
const std::string & name() const
const Value * array_size_
bool external() const
const Value * cond_
size_t id() const
const std::string & label() const
Ret(const Value *value)
ExternalCall(const std::string &callee_name, const Type ret_type, const std::vector< const Value * > &arguments, const std::string &label)
const std::vector< const Value * > arguments_
bool always_inline() const
ICmp(const Predicate predicate, const Value *lhs, const Value *rhs, const std::string &label)
const Value * value() const
const Value * index_
ReturnEarly(const Value *cond, const int error_code, const std::string &label)
const Value * rhs() const
const Value * source_
const std::string callee_name_
const std::vector< std::unique_ptr< Instruction > > & body() const
#define LOG(tag)
Definition: Logger.h:185
const std::string callee_name_
void run(ReductionInterpreterImpl *interpreter) override
Type ret_type() const
const std::vector< const Value * > arguments_
Argument * arg(const size_t idx) const
Type type() const
Instruction(const Type type, const std::string &label)
ConstantInt(const int64_t value, const Type target)
Type pointer_type(const Type pointee)
std::vector< std::unique_ptr< Argument > > arguments_
Alloca(const Type element_type, const Value *array_size, const std::string &label)
Constant(const Type type)
BinaryOperator(const BinaryOp op, const Value *lhs, const Value *rhs, const std::string &label)
void run(ReductionInterpreterImpl *interpreter) override
void run(ReductionInterpreterImpl *interpreter) override
const Value * base() const
Value * addConstant(Args &&...args)
Value * add(Args &&...args)
virtual void run(ReductionInterpreterImpl *interpreter)=0
const Value * lhs() const
void set_cached_callee(void *cached_callee) const
void * cached_callee() const
const Value * end_
thread_local size_t g_value_id
const Value * source() const
const Value * end() const
void * cached_callee_
const Value * start() const
const Value * rhs_
int error_code() const
std::vector< std::unique_ptr< Instruction > > body_
const Value * rhs() const
const Value * source() const
ConstantFP(const double value, const Type target)
const std::string label_
const Value * dest() const
void run(ReductionInterpreterImpl *interpreter) override
const Value * start_
void run(ReductionInterpreterImpl *interpreter) override
Function(const std::string name, const std::vector< NamedArg > &arg_types, const Type ret_type, const bool always_inline)
GetElementPtr(const Value *base, const Value *index, const std::string &label)
const Value * value_
const std::vector< NamedArg > arg_types_
Type pointee_type(const Type pointer)
const int32_t groups_buffer_size return nullptr
const Value * lhs_
Call(const Function *callee, const std::vector< const Value * > &arguments, const std::string &label)
const std::string & callee_name() const
const std::vector< const Value * > & arguments() const
BinaryOp op() const
Value * add(Args &&...args)
const Type type_
void run(ReductionInterpreterImpl *interpreter) override
Value(const Type type, const std::string &label)
MemCpy(const Value *dest, const Value *source, const Value *size)
void * cached_callee() const
const size_t id_
const Function * callee() const
const Value * cond() const
const Predicate predicate_
const Value * source() const
std::vector< std::unique_ptr< Instruction > > body_
Cast(const CastOp op, const Value *source, const Type type, const std::string &label)
Load(const Value *source, const std::string &label)
const Value * index() const
const Value * array_size() const
const Value * size_
const bool always_inline_
const Value * lhs() const
virtual ~Value()=default
const Value * source_
const std::vector< std::unique_ptr< Instruction > > & body() const
For(const Value *start, const Value *end, const std::string &label)
int64_t value() const
double value() const
CastOp op() const
const Value * iter() const
std::vector< std::unique_ptr< Constant > > constants_
const Value * dest_
const Value * source_
const Value iter_
const CastOp op_
const Function * callee_
void run(ReductionInterpreterImpl *interpreter) override
const std::string name_
const std::string & callee_name() const
Predicate predicate() const
void run(ReductionInterpreterImpl *interpreter) override
void run(ReductionInterpreterImpl *interpreter) override
void run(ReductionInterpreterImpl *interpreter) override
const double value_
Call(const std::string &callee_name, const std::vector< const Value * > &arguments, const std::string &label)
const Type ret_type_
void run(ReductionInterpreterImpl *interpreter) override
void set_cached_callee(void *cached_callee) const
void run(ReductionInterpreterImpl *interpreter) override
const std::vector< const Value * > & arguments() const