OmniSciDB  06b3bd477c
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
TableFunctionExecutionContext Class Reference

#include <TableFunctionExecutionContext.h>

Public Member Functions

 TableFunctionExecutionContext (std::shared_ptr< RowSetMemoryOwner > row_set_mem_owner)
 
 TableFunctionExecutionContext (const TableFunctionExecutionContext &)=delete
 
TableFunctionExecutionContextoperator= (const TableFunctionExecutionContext &)=delete
 
ResultSetPtr execute (const TableFunctionExecutionUnit &exe_unit, const InputTableInfo &table_info, const TableFunctionCompilationContext *compilation_context, const ColumnFetcher &column_fetcher, const ExecutorDeviceType device_type, Executor *executor)
 

Private Member Functions

ResultSetPtr launchCpuCode (const TableFunctionExecutionUnit &exe_unit, const TableFunctionCompilationContext *compilation_context, std::vector< const int8_t * > &col_buf_ptrs, const size_t elem_count, Executor *executor)
 
ResultSetPtr launchGpuCode (const TableFunctionExecutionUnit &exe_unit, const TableFunctionCompilationContext *compilation_context, std::vector< const int8_t * > &col_buf_ptrs, const size_t elem_count, const int device_id, Executor *executor)
 

Private Attributes

std::shared_ptr
< RowSetMemoryOwner
row_set_mem_owner_
 

Detailed Description

Definition at line 28 of file TableFunctionExecutionContext.h.

Constructor & Destructor Documentation

TableFunctionExecutionContext::TableFunctionExecutionContext ( std::shared_ptr< RowSetMemoryOwner row_set_mem_owner)
inline

Definition at line 30 of file TableFunctionExecutionContext.h.

31  : row_set_mem_owner_(row_set_mem_owner) {}
std::shared_ptr< RowSetMemoryOwner > row_set_mem_owner_
TableFunctionExecutionContext::TableFunctionExecutionContext ( const TableFunctionExecutionContext )
delete

Member Function Documentation

ResultSetPtr TableFunctionExecutionContext::execute ( const TableFunctionExecutionUnit exe_unit,
const InputTableInfo table_info,
const TableFunctionCompilationContext compilation_context,
const ColumnFetcher column_fetcher,
const ExecutorDeviceType  device_type,
Executor executor 
)

Definition at line 66 of file TableFunctionExecutionContext.cpp.

References CHECK(), CHECK_EQ, CHECK_GE, ColumnFetcher::columnarized_table_cache_, CPU, Data_Namespace::CPU_LEVEL, anonymous_namespace{TableFunctionExecutionContext.cpp}::create_literal_buffer(), Fragmenter_Namespace::TableInfo::fragments, get_bit_width(), ColumnFetcher::getOneColumnFragment(), GPU, Data_Namespace::GPU_LEVEL, InputTableInfo::info, TableFunctionExecutionUnit::input_exprs, launchCpuCode(), launchGpuCode(), and UNREACHABLE.

Referenced by Executor::executeTableFunction().

72  {
73  CHECK(compilation_context);
74 
75  std::vector<std::shared_ptr<Chunk_NS::Chunk>> chunks_owner;
76  std::vector<std::unique_ptr<char[]>> literals_owner;
77 
78  const int device_id = 0; // TODO(adb): support multi-gpu table functions
79  std::unique_ptr<CudaAllocator> device_allocator;
80  if (device_type == ExecutorDeviceType::GPU) {
81  auto& data_mgr = executor->catalog_->getDataMgr();
82  device_allocator.reset(new CudaAllocator(&data_mgr, device_id));
83  }
84 
85  std::vector<const int8_t*> col_buf_ptrs;
86  ssize_t element_count = -1;
87  for (const auto& input_expr : exe_unit.input_exprs) {
88  if (auto col_var = dynamic_cast<Analyzer::ColumnVar*>(input_expr)) {
89  auto [col_buf, buf_elem_count] = ColumnFetcher::getOneColumnFragment(
90  executor,
91  *col_var,
92  table_info.info.fragments.front(),
95  device_id,
96  device_allocator.get(),
97  chunks_owner,
98  column_fetcher.columnarized_table_cache_);
99  if (element_count < 0) {
100  element_count = static_cast<ssize_t>(buf_elem_count);
101  } else {
102  CHECK_EQ(static_cast<ssize_t>(buf_elem_count), element_count);
103  }
104  col_buf_ptrs.push_back(col_buf);
105  } else if (const auto& constant_val = dynamic_cast<Analyzer::Constant*>(input_expr)) {
106  // TODO(adb): Unify literal handling with rest of system, either in Codegen or as a
107  // separate serialization component
108  const auto const_val_datum = constant_val->get_constval();
109  const auto& ti = constant_val->get_type_info();
110  if (ti.is_fp()) {
111  switch (get_bit_width(ti)) {
112  case 32:
113  col_buf_ptrs.push_back(create_literal_buffer(const_val_datum.floatval,
114  device_type,
115  literals_owner,
116  device_allocator.get()));
117  break;
118  case 64:
119  col_buf_ptrs.push_back(create_literal_buffer(const_val_datum.doubleval,
120  device_type,
121  literals_owner,
122  device_allocator.get()));
123  break;
124  default:
125  UNREACHABLE();
126  }
127  } else if (ti.is_integer()) {
128  switch (get_bit_width(ti)) {
129  case 8:
130  col_buf_ptrs.push_back(create_literal_buffer(const_val_datum.tinyintval,
131  device_type,
132  literals_owner,
133  device_allocator.get()));
134  break;
135  case 16:
136  col_buf_ptrs.push_back(create_literal_buffer(const_val_datum.smallintval,
137  device_type,
138  literals_owner,
139  device_allocator.get()));
140  break;
141  case 32:
142  col_buf_ptrs.push_back(create_literal_buffer(const_val_datum.intval,
143  device_type,
144  literals_owner,
145  device_allocator.get()));
146  break;
147  case 64:
148  col_buf_ptrs.push_back(create_literal_buffer(const_val_datum.bigintval,
149  device_type,
150  literals_owner,
151  device_allocator.get()));
152  break;
153  default:
154  UNREACHABLE();
155  }
156  } else {
157  throw std::runtime_error("Literal value " + constant_val->toString() +
158  " is not yet supported.");
159  }
160  }
161  }
162  CHECK_EQ(col_buf_ptrs.size(), exe_unit.input_exprs.size());
163 
164  CHECK_GE(element_count, ssize_t(0));
165  switch (device_type) {
167  return launchCpuCode(exe_unit,
168  compilation_context,
169  col_buf_ptrs,
170  static_cast<size_t>(element_count),
171  executor);
173  return launchGpuCode(exe_unit,
174  compilation_context,
175  col_buf_ptrs,
176  static_cast<size_t>(element_count),
177  /*device_id=*/0,
178  executor);
179  }
180  UNREACHABLE();
181  return nullptr;
182 }
#define CHECK_EQ(x, y)
Definition: Logger.h:205
Fragmenter_Namespace::TableInfo info
Definition: InputMetadata.h:35
ResultSetPtr launchCpuCode(const TableFunctionExecutionUnit &exe_unit, const TableFunctionCompilationContext *compilation_context, std::vector< const int8_t * > &col_buf_ptrs, const size_t elem_count, Executor *executor)
std::vector< Analyzer::Expr * > input_exprs
ResultSetPtr launchGpuCode(const TableFunctionExecutionUnit &exe_unit, const TableFunctionCompilationContext *compilation_context, std::vector< const int8_t * > &col_buf_ptrs, const size_t elem_count, const int device_id, Executor *executor)
#define UNREACHABLE()
Definition: Logger.h:241
#define CHECK_GE(x, y)
Definition: Logger.h:210
ColumnCacheMap columnarized_table_cache_
std::vector< FragmentInfo > fragments
Definition: Fragmenter.h:161
size_t get_bit_width(const SQLTypeInfo &ti)
CHECK(cgen_state)
static std::pair< const int8_t *, size_t > getOneColumnFragment(Executor *executor, const Analyzer::ColumnVar &hash_col, const Fragmenter_Namespace::FragmentInfo &fragment, const Data_Namespace::MemoryLevel effective_mem_lvl, const int device_id, DeviceAllocator *device_allocator, std::vector< std::shared_ptr< Chunk_NS::Chunk >> &chunks_owner, ColumnCacheMap &column_cache)
Gets one chunk&#39;s pointer and element count on either CPU or GPU.
const int8_t * create_literal_buffer(T literal, const ExecutorDeviceType device_type, std::vector< std::unique_ptr< char[]>> &literals_owner, CudaAllocator *gpu_allocator)

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

ResultSetPtr TableFunctionExecutionContext::launchCpuCode ( const TableFunctionExecutionUnit exe_unit,
const TableFunctionCompilationContext compilation_context,
std::vector< const int8_t * > &  col_buf_ptrs,
const size_t  elem_count,
Executor executor 
)
private

Definition at line 184 of file TableFunctionExecutionContext.cpp.

References QueryMemoryDescriptor::addColSlotInfo(), CHECK(), CPU, anonymous_namespace{TableFunctionExecutionContext.cpp}::get_output_row_count(), TableFunctionCompilationContext::getFuncPtr(), Projection, query_mem_desc, row_set_mem_owner_, QueryMemoryDescriptor::setOutputColumnar(), TableFunctionExecutionUnit::target_exprs, and to_string().

Referenced by execute().

189  {
190  // setup the inputs
191  const auto byte_stream_ptr = reinterpret_cast<const int8_t**>(col_buf_ptrs.data());
192  CHECK(byte_stream_ptr);
193 
194  // initialize output memory
196  executor, elem_count, QueryDescriptionType::Projection, /*is_table_function=*/true);
197  query_mem_desc.setOutputColumnar(true);
198 
199  for (size_t i = 0; i < exe_unit.target_exprs.size(); i++) {
200  // All outputs padded to 8 bytes
201  query_mem_desc.addColSlotInfo({std::make_tuple(8, 8)});
202  }
203 
204  const auto allocated_output_row_count = get_output_row_count(exe_unit, elem_count);
205  auto query_buffers = std::make_unique<QueryMemoryInitializer>(
206  exe_unit,
208  /*device_id=*/0,
210  allocated_output_row_count,
211  std::vector<std::vector<const int8_t*>>{col_buf_ptrs},
212  std::vector<std::vector<uint64_t>>{{0}}, // frag offsets
214  nullptr,
215  executor);
216 
217  // setup the output
218  int64_t output_row_count = -1;
219  auto group_by_buffers_ptr = query_buffers->getGroupByBuffersPtr();
220  CHECK(group_by_buffers_ptr);
221 
222  // execute
223  const auto kernel_element_count = static_cast<int64_t>(elem_count);
224  const auto err =
225  compilation_context->getFuncPtr()(byte_stream_ptr,
226  &kernel_element_count,
227  query_buffers->getGroupByBuffersPtr(),
228  &output_row_count);
229  if (err) {
230  throw std::runtime_error("Error executing table function: " + std::to_string(err));
231  }
232  if (output_row_count < 0) {
233  throw std::runtime_error("Table function did not properly set output row count.");
234  }
235 
236  // Update entry count, it may differ from allocated mem size
237  query_buffers->getResultSet(0)->updateStorageEntryCount(output_row_count);
238 
239  return query_buffers->getResultSetOwned(0);
240 }
size_t get_output_row_count(const TableFunctionExecutionUnit &exe_unit, size_t input_element_count)
std::string to_string(char const *&&v)
CHECK(cgen_state)
std::shared_ptr< RowSetMemoryOwner > row_set_mem_owner_
TableFunctionCompilationContext::FuncPtr getFuncPtr() const
std::vector< Analyzer::Expr * > target_exprs

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

ResultSetPtr TableFunctionExecutionContext::launchGpuCode ( const TableFunctionExecutionUnit exe_unit,
const TableFunctionCompilationContext compilation_context,
std::vector< const int8_t * > &  col_buf_ptrs,
const size_t  elem_count,
const int  device_id,
Executor executor 
)
private

Definition at line 253 of file TableFunctionExecutionContext.cpp.

References QueryMemoryDescriptor::addColSlotInfo(), CHECK(), CHECK_EQ, checkCudaErrors(), anonymous_namespace{TableFunctionExecutionContext.cpp}::COL_BUFFERS, anonymous_namespace{TableFunctionExecutionContext.cpp}::ERROR_BUFFER, anonymous_namespace{TableFunctionExecutionContext.cpp}::get_output_row_count(), TableFunctionCompilationContext::getGpuCode(), GPU, anonymous_namespace{TableFunctionExecutionContext.cpp}::INPUT_ROW_COUNT, anonymous_namespace{TableFunctionExecutionContext.cpp}::KERNEL_PARAM_COUNT, anonymous_namespace{TableFunctionExecutionContext.cpp}::OUTPUT_BUFFERS, anonymous_namespace{TableFunctionExecutionContext.cpp}::OUTPUT_ROW_COUNT, Projection, query_mem_desc, row_set_mem_owner_, QueryMemoryDescriptor::setOutputColumnar(), TableFunctionExecutionUnit::target_exprs, and UNREACHABLE.

Referenced by execute().

259  {
260 #ifdef HAVE_CUDA
261  auto& data_mgr = executor->catalog_->getDataMgr();
262  auto gpu_allocator = std::make_unique<CudaAllocator>(&data_mgr, device_id);
263  CHECK(gpu_allocator);
264 
265  std::vector<CUdeviceptr> kernel_params(KERNEL_PARAM_COUNT, 0);
266  // setup the inputs
267  auto byte_stream_ptr = gpu_allocator->alloc(col_buf_ptrs.size() * sizeof(int64_t));
268  gpu_allocator->copyToDevice(byte_stream_ptr,
269  reinterpret_cast<int8_t*>(col_buf_ptrs.data()),
270  col_buf_ptrs.size() * sizeof(int64_t));
271  kernel_params[COL_BUFFERS] = reinterpret_cast<CUdeviceptr>(byte_stream_ptr);
272 
273  kernel_params[INPUT_ROW_COUNT] =
274  reinterpret_cast<CUdeviceptr>(gpu_allocator->alloc(sizeof(elem_count)));
275  gpu_allocator->copyToDevice(reinterpret_cast<int8_t*>(kernel_params[INPUT_ROW_COUNT]),
276  reinterpret_cast<const int8_t*>(&elem_count),
277  sizeof(elem_count));
278 
279  kernel_params[ERROR_BUFFER] =
280  reinterpret_cast<CUdeviceptr>(gpu_allocator->alloc(sizeof(int32_t)));
281 
282  // initialize output memory
284  executor, elem_count, QueryDescriptionType::Projection, /*is_table_function=*/true);
285  query_mem_desc.setOutputColumnar(true);
286 
287  for (size_t i = 0; i < exe_unit.target_exprs.size(); i++) {
288  // All outputs padded to 8 bytes
289  query_mem_desc.addColSlotInfo({std::make_tuple(8, 8)});
290  }
291  const auto allocated_output_row_count = get_output_row_count(exe_unit, elem_count);
292  auto query_buffers = std::make_unique<QueryMemoryInitializer>(
293  exe_unit,
295  device_id,
297  allocated_output_row_count,
298  std::vector<std::vector<const int8_t*>>{col_buf_ptrs},
299  std::vector<std::vector<uint64_t>>{{0}}, // frag offsets
301  gpu_allocator.get(),
302  executor);
303 
304  // setup the output
305  int64_t output_row_count = -1;
306  kernel_params[OUTPUT_ROW_COUNT] =
307  reinterpret_cast<CUdeviceptr>(gpu_allocator->alloc(sizeof(int64_t*)));
308  gpu_allocator->copyToDevice(reinterpret_cast<int8_t*>(kernel_params[OUTPUT_ROW_COUNT]),
309  reinterpret_cast<int8_t*>(&output_row_count),
310  sizeof(output_row_count));
311 
312  auto group_by_buffers_ptr = query_buffers->getGroupByBuffersPtr();
313  CHECK(group_by_buffers_ptr);
314 
315  const unsigned block_size_x = executor->blockSize();
316  const unsigned block_size_y = 1;
317  const unsigned block_size_z = 1;
318  const unsigned grid_size_x = executor->gridSize();
319  const unsigned grid_size_y = 1;
320  const unsigned grid_size_z = 1;
321 
322  auto gpu_output_buffers = query_buffers->setupTableFunctionGpuBuffers(
323  query_mem_desc, device_id, block_size_x, grid_size_x);
324  kernel_params[OUTPUT_BUFFERS] = reinterpret_cast<CUdeviceptr>(gpu_output_buffers.first);
325 
326  // execute
327  CHECK_EQ(static_cast<size_t>(KERNEL_PARAM_COUNT), kernel_params.size());
328 
329  std::vector<void*> param_ptrs;
330  for (auto& param : kernel_params) {
331  param_ptrs.push_back(&param);
332  }
333 
334  // Get cu func
335  const auto gpu_context = compilation_context->getGpuCode();
336  CHECK(gpu_context);
337  const auto native_code = gpu_context->getNativeCode(device_id);
338  auto cu_func = static_cast<CUfunction>(native_code.first);
339  checkCudaErrors(cuLaunchKernel(cu_func,
340  grid_size_x,
341  grid_size_y,
342  grid_size_z,
343  block_size_x,
344  block_size_y,
345  block_size_z,
346  0, // shared mem bytes
347  nullptr,
348  &param_ptrs[0],
349  nullptr));
350  // TODO(adb): read errors
351 
352  // read output row count from GPU
353  int64_t new_output_row_count = -1;
354  gpu_allocator->copyFromDevice(
355  reinterpret_cast<int8_t*>(&new_output_row_count),
356  reinterpret_cast<int8_t*>(kernel_params[OUTPUT_ROW_COUNT]),
357  sizeof(int64_t));
358  if (new_output_row_count < 0) {
359  new_output_row_count = allocated_output_row_count;
360  }
361 
362  // Update entry count, it may differ from allocated mem size
363  query_buffers->getResultSet(0)->updateStorageEntryCount(new_output_row_count);
364 
365  // Copy back to CPU storage
366  query_buffers->copyGroupByBuffersFromGpu(&data_mgr,
367  query_mem_desc,
368  new_output_row_count,
369  gpu_output_buffers,
370  nullptr,
371  block_size_x,
372  grid_size_x,
373  device_id,
374  false);
375 
376  return query_buffers->getResultSetOwned(0);
377 #else
378  UNREACHABLE();
379  return nullptr;
380 #endif
381 }
#define CHECK_EQ(x, y)
Definition: Logger.h:205
size_t get_output_row_count(const TableFunctionExecutionUnit &exe_unit, size_t input_element_count)
GpuCompilationContext * getGpuCode() const
void checkCudaErrors(CUresult err)
Definition: sample.cpp:38
unsigned long long CUdeviceptr
Definition: nocuda.h:27
#define UNREACHABLE()
Definition: Logger.h:241
CHECK(cgen_state)
void * CUfunction
Definition: nocuda.h:24
std::shared_ptr< RowSetMemoryOwner > row_set_mem_owner_
std::vector< Analyzer::Expr * > target_exprs

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

TableFunctionExecutionContext& TableFunctionExecutionContext::operator= ( const TableFunctionExecutionContext )
delete

Member Data Documentation

std::shared_ptr<RowSetMemoryOwner> TableFunctionExecutionContext::row_set_mem_owner_
private

Definition at line 57 of file TableFunctionExecutionContext.h.

Referenced by launchCpuCode(), and launchGpuCode().


The documentation for this class was generated from the following files: