OmniSciDB  340b00dbf6
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
TableFunctionExecutionContext Class Reference

#include <TableFunctionExecutionContext.h>

Public Member Functions

 TableFunctionExecutionContext (std::shared_ptr< RowSetMemoryOwner > row_set_mem_owner)
 
 TableFunctionExecutionContext (const TableFunctionExecutionContext &)=delete
 
TableFunctionExecutionContextoperator= (const TableFunctionExecutionContext &)=delete
 
ResultSetPtr execute (const TableFunctionExecutionUnit &exe_unit, const std::vector< InputTableInfo > &table_infos, const TableFunctionCompilationContext *compilation_context, const ColumnFetcher &column_fetcher, const ExecutorDeviceType device_type, Executor *executor)
 

Private Member Functions

ResultSetPtr launchCpuCode (const TableFunctionExecutionUnit &exe_unit, const TableFunctionCompilationContext *compilation_context, std::vector< const int8_t * > &col_buf_ptrs, const size_t elem_count, Executor *executor)
 
ResultSetPtr launchGpuCode (const TableFunctionExecutionUnit &exe_unit, const TableFunctionCompilationContext *compilation_context, std::vector< const int8_t * > &col_buf_ptrs, const size_t elem_count, const int device_id, Executor *executor)
 

Private Attributes

std::shared_ptr
< RowSetMemoryOwner
row_set_mem_owner_
 

Detailed Description

Definition at line 28 of file TableFunctionExecutionContext.h.

Constructor & Destructor Documentation

TableFunctionExecutionContext::TableFunctionExecutionContext ( std::shared_ptr< RowSetMemoryOwner row_set_mem_owner)
inline

Definition at line 30 of file TableFunctionExecutionContext.h.

31  : row_set_mem_owner_(row_set_mem_owner) {}
std::shared_ptr< RowSetMemoryOwner > row_set_mem_owner_
TableFunctionExecutionContext::TableFunctionExecutionContext ( const TableFunctionExecutionContext )
delete

Member Function Documentation

ResultSetPtr TableFunctionExecutionContext::execute ( const TableFunctionExecutionUnit exe_unit,
const std::vector< InputTableInfo > &  table_infos,
const TableFunctionCompilationContext compilation_context,
const ColumnFetcher column_fetcher,
const ExecutorDeviceType  device_type,
Executor executor 
)

Definition at line 74 of file TableFunctionExecutionContext.cpp.

References CHECK, CHECK_EQ, ColumnFetcher::columnarized_table_cache_, CPU, Data_Namespace::CPU_LEVEL, anonymous_namespace{TableFunctionExecutionContext.cpp}::create_literal_buffer(), get_bit_width(), ColumnFetcher::getOneColumnFragment(), GPU, Data_Namespace::GPU_LEVEL, TableFunctionExecutionUnit::input_exprs, launchCpuCode(), launchGpuCode(), and UNREACHABLE.

Referenced by Executor::executeTableFunction().

80  {
81  CHECK(compilation_context);
82  std::vector<std::shared_ptr<Chunk_NS::Chunk>> chunks_owner;
83  std::vector<std::unique_ptr<char[]>> literals_owner;
84 
85  const int device_id = 0; // TODO(adb): support multi-gpu table functions
86  std::unique_ptr<CudaAllocator> device_allocator;
87  if (device_type == ExecutorDeviceType::GPU) {
88  auto& data_mgr = executor->catalog_->getDataMgr();
89  device_allocator.reset(new CudaAllocator(&data_mgr, device_id));
90  }
91  std::vector<const int8_t*> col_buf_ptrs;
92  std::optional<size_t> element_count;
93  for (const auto& input_expr : exe_unit.input_exprs) {
94  if (auto col_var = dynamic_cast<Analyzer::ColumnVar*>(input_expr)) {
95  auto table_id = col_var->get_table_id();
96  auto table_info_it = std::find_if(
97  table_infos.begin(), table_infos.end(), [&table_id](const auto& table_info) {
98  return table_info.table_id == table_id;
99  });
100  CHECK(table_info_it != table_infos.end());
101  auto [col_buf, buf_elem_count] = ColumnFetcher::getOneColumnFragment(
102  executor,
103  *col_var,
104  table_info_it->info.fragments.front(),
107  device_id,
108  device_allocator.get(),
109  chunks_owner,
110  column_fetcher.columnarized_table_cache_);
111  if (!element_count) {
112  element_count = buf_elem_count;
113  } else {
114  CHECK(element_count && (buf_elem_count == *element_count));
115  }
116  col_buf_ptrs.push_back(col_buf);
117  } else if (const auto& constant_val = dynamic_cast<Analyzer::Constant*>(input_expr)) {
118  // TODO(adb): Unify literal handling with rest of system, either in Codegen or as a
119  // separate serialization component
120  const auto const_val_datum = constant_val->get_constval();
121  const auto& ti = constant_val->get_type_info();
122  if (ti.is_fp()) {
123  switch (get_bit_width(ti)) {
124  case 32:
125  col_buf_ptrs.push_back(create_literal_buffer(const_val_datum.floatval,
126  device_type,
127  literals_owner,
128  device_allocator.get()));
129  break;
130  case 64:
131  col_buf_ptrs.push_back(create_literal_buffer(const_val_datum.doubleval,
132  device_type,
133  literals_owner,
134  device_allocator.get()));
135  break;
136  default:
137  UNREACHABLE();
138  }
139  } else if (ti.is_integer()) {
140  switch (get_bit_width(ti)) {
141  case 8:
142  col_buf_ptrs.push_back(create_literal_buffer(const_val_datum.tinyintval,
143  device_type,
144  literals_owner,
145  device_allocator.get()));
146  break;
147  case 16:
148  col_buf_ptrs.push_back(create_literal_buffer(const_val_datum.smallintval,
149  device_type,
150  literals_owner,
151  device_allocator.get()));
152  break;
153  case 32:
154  col_buf_ptrs.push_back(create_literal_buffer(const_val_datum.intval,
155  device_type,
156  literals_owner,
157  device_allocator.get()));
158  break;
159  case 64:
160  col_buf_ptrs.push_back(create_literal_buffer(const_val_datum.bigintval,
161  device_type,
162  literals_owner,
163  device_allocator.get()));
164  break;
165  default:
166  UNREACHABLE();
167  }
168  } else {
169  throw std::runtime_error("Literal value " + constant_val->toString() +
170  " is not yet supported.");
171  }
172  }
173  }
174  CHECK_EQ(col_buf_ptrs.size(), exe_unit.input_exprs.size());
175 
176  CHECK(element_count);
177  switch (device_type) {
179  return launchCpuCode(
180  exe_unit, compilation_context, col_buf_ptrs, *element_count, executor);
182  return launchGpuCode(exe_unit,
183  compilation_context,
184  col_buf_ptrs,
185  *element_count,
186  /*device_id=*/0,
187  executor);
188  }
189  UNREACHABLE();
190  return nullptr;
191 }
#define CHECK_EQ(x, y)
Definition: Logger.h:205
ResultSetPtr launchCpuCode(const TableFunctionExecutionUnit &exe_unit, const TableFunctionCompilationContext *compilation_context, std::vector< const int8_t * > &col_buf_ptrs, const size_t elem_count, Executor *executor)
std::vector< Analyzer::Expr * > input_exprs
ResultSetPtr launchGpuCode(const TableFunctionExecutionUnit &exe_unit, const TableFunctionCompilationContext *compilation_context, std::vector< const int8_t * > &col_buf_ptrs, const size_t elem_count, const int device_id, Executor *executor)
#define UNREACHABLE()
Definition: Logger.h:241
ColumnCacheMap columnarized_table_cache_
size_t get_bit_width(const SQLTypeInfo &ti)
#define CHECK(condition)
Definition: Logger.h:197
static std::pair< const int8_t *, size_t > getOneColumnFragment(Executor *executor, const Analyzer::ColumnVar &hash_col, const Fragmenter_Namespace::FragmentInfo &fragment, const Data_Namespace::MemoryLevel effective_mem_lvl, const int device_id, DeviceAllocator *device_allocator, std::vector< std::shared_ptr< Chunk_NS::Chunk >> &chunks_owner, ColumnCacheMap &column_cache)
Gets one chunk&#39;s pointer and element count on either CPU or GPU.
const int8_t * create_literal_buffer(T literal, const ExecutorDeviceType device_type, std::vector< std::unique_ptr< char[]>> &literals_owner, CudaAllocator *gpu_allocator)

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

ResultSetPtr TableFunctionExecutionContext::launchCpuCode ( const TableFunctionExecutionUnit exe_unit,
const TableFunctionCompilationContext compilation_context,
std::vector< const int8_t * > &  col_buf_ptrs,
const size_t  elem_count,
Executor executor 
)
private

Definition at line 193 of file TableFunctionExecutionContext.cpp.

References QueryMemoryDescriptor::addColSlotInfo(), CHECK, CHECK_EQ, CPU, anonymous_namespace{TableFunctionExecutionContext.cpp}::get_output_row_count(), TableFunctionCompilationContext::getFuncPtr(), Projection, query_mem_desc, row_set_mem_owner_, QueryMemoryDescriptor::setOutputColumnar(), TableFunctionExecutionUnit::target_exprs, and to_string().

Referenced by execute().

198  {
199  // setup the inputs
200  const auto byte_stream_ptr = reinterpret_cast<const int8_t**>(col_buf_ptrs.data());
201  CHECK(byte_stream_ptr);
202 
203  // initialize output memory
204  auto num_out_columns = exe_unit.target_exprs.size();
206  executor, elem_count, QueryDescriptionType::Projection, /*is_table_function=*/true);
207  query_mem_desc.setOutputColumnar(true);
208 
209  for (size_t i = 0; i < num_out_columns; i++) {
210  // All outputs padded to 8 bytes
211  query_mem_desc.addColSlotInfo({std::make_tuple(8, 8)});
212  }
213 
214  const auto allocated_output_row_count = get_output_row_count(exe_unit, elem_count);
215  auto query_buffers = std::make_unique<QueryMemoryInitializer>(
216  exe_unit,
218  /*device_id=*/0,
220  allocated_output_row_count,
221  std::vector<std::vector<const int8_t*>>{col_buf_ptrs},
222  std::vector<std::vector<uint64_t>>{{0}}, // frag offsets
224  nullptr,
225  executor);
226 
227  // setup the output
228  int64_t output_row_count = allocated_output_row_count;
229  auto group_by_buffers_ptr = query_buffers->getGroupByBuffersPtr();
230  CHECK(group_by_buffers_ptr);
231 
232  auto output_buffers_ptr = reinterpret_cast<int64_t*>(group_by_buffers_ptr[0]);
233  std::vector<int64_t*> output_col_buf_ptrs;
234  for (size_t i = 0; i < num_out_columns; i++) {
235  output_col_buf_ptrs.emplace_back(output_buffers_ptr + i * allocated_output_row_count);
236  }
237 
238  // execute
239  const auto kernel_element_count = static_cast<int64_t>(elem_count);
240  const auto err = compilation_context->getFuncPtr()(byte_stream_ptr,
241  &kernel_element_count,
242  output_col_buf_ptrs.data(),
243  &output_row_count);
244  if (err) {
245  throw std::runtime_error("Error executing table function: " + std::to_string(err));
246  }
247  if (exe_unit.table_func.hasNonUserSpecifiedOutputSizeConstant()) {
248  if (static_cast<size_t>(output_row_count) != allocated_output_row_count) {
249  throw std::runtime_error(
250  "Table function with constant sizing parameter must return " +
251  std::to_string(allocated_output_row_count) + " (got " +
252  std::to_string(output_row_count) + ")");
253  }
254  } else {
255  if (output_row_count < 0 || (size_t)output_row_count > allocated_output_row_count) {
256  output_row_count = allocated_output_row_count;
257  }
258  }
259  // Update entry count, it may differ from allocated mem size
260  query_buffers->getResultSet(0)->updateStorageEntryCount(output_row_count);
261 
262  const size_t column_size = output_row_count * sizeof(int64_t);
263  const size_t allocated_column_size = allocated_output_row_count * sizeof(int64_t);
264 
265  int8_t* src = reinterpret_cast<int8_t*>(output_buffers_ptr);
266  int8_t* dst = reinterpret_cast<int8_t*>(output_buffers_ptr);
267  for (size_t i = 0; i < num_out_columns; i++) {
268  if (src != dst) {
269  auto t = memmove(dst, src, column_size);
270  CHECK_EQ(dst, t);
271  }
272  src += allocated_column_size;
273  dst += column_size;
274  }
275 
276  return query_buffers->getResultSetOwned(0);
277 }
#define CHECK_EQ(x, y)
Definition: Logger.h:205
size_t get_output_row_count(const TableFunctionExecutionUnit &exe_unit, size_t input_element_count)
std::string to_string(char const *&&v)
std::shared_ptr< RowSetMemoryOwner > row_set_mem_owner_
TableFunctionCompilationContext::FuncPtr getFuncPtr() const
#define CHECK(condition)
Definition: Logger.h:197
std::vector< Analyzer::Expr * > target_exprs

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

ResultSetPtr TableFunctionExecutionContext::launchGpuCode ( const TableFunctionExecutionUnit exe_unit,
const TableFunctionCompilationContext compilation_context,
std::vector< const int8_t * > &  col_buf_ptrs,
const size_t  elem_count,
const int  device_id,
Executor executor 
)
private

Definition at line 290 of file TableFunctionExecutionContext.cpp.

References QueryMemoryDescriptor::addColSlotInfo(), CHECK, CHECK_EQ, checkCudaErrors(), anonymous_namespace{TableFunctionExecutionContext.cpp}::COL_BUFFERS, anonymous_namespace{TableFunctionExecutionContext.cpp}::ERROR_BUFFER, anonymous_namespace{TableFunctionExecutionContext.cpp}::get_output_row_count(), TableFunctionCompilationContext::getGpuCode(), GPU, anonymous_namespace{TableFunctionExecutionContext.cpp}::INPUT_ROW_COUNT, anonymous_namespace{TableFunctionExecutionContext.cpp}::KERNEL_PARAM_COUNT, anonymous_namespace{TableFunctionExecutionContext.cpp}::OUTPUT_BUFFERS, anonymous_namespace{TableFunctionExecutionContext.cpp}::OUTPUT_ROW_COUNT, Projection, query_mem_desc, row_set_mem_owner_, QueryMemoryDescriptor::setOutputColumnar(), TableFunctionExecutionUnit::target_exprs, to_string(), and UNREACHABLE.

Referenced by execute().

296  {
297 #ifdef HAVE_CUDA
298  auto num_out_columns = exe_unit.target_exprs.size();
299  auto& data_mgr = executor->catalog_->getDataMgr();
300  auto gpu_allocator = std::make_unique<CudaAllocator>(&data_mgr, device_id);
301  CHECK(gpu_allocator);
302  std::vector<CUdeviceptr> kernel_params(KERNEL_PARAM_COUNT, 0);
303  // setup the inputs
304  auto byte_stream_ptr = gpu_allocator->alloc(col_buf_ptrs.size() * sizeof(int64_t));
305  gpu_allocator->copyToDevice(byte_stream_ptr,
306  reinterpret_cast<int8_t*>(col_buf_ptrs.data()),
307  col_buf_ptrs.size() * sizeof(int64_t));
308  kernel_params[COL_BUFFERS] = reinterpret_cast<CUdeviceptr>(byte_stream_ptr);
309  kernel_params[INPUT_ROW_COUNT] =
310  reinterpret_cast<CUdeviceptr>(gpu_allocator->alloc(sizeof(elem_count)));
311  gpu_allocator->copyToDevice(reinterpret_cast<int8_t*>(kernel_params[INPUT_ROW_COUNT]),
312  reinterpret_cast<const int8_t*>(&elem_count),
313  sizeof(elem_count));
314  kernel_params[ERROR_BUFFER] =
315  reinterpret_cast<CUdeviceptr>(gpu_allocator->alloc(sizeof(int32_t)));
316  // initialize output memory
318  executor, elem_count, QueryDescriptionType::Projection, /*is_table_function=*/true);
319  query_mem_desc.setOutputColumnar(true);
320 
321  for (size_t i = 0; i < num_out_columns; i++) {
322  // All outputs padded to 8 bytes
323  query_mem_desc.addColSlotInfo({std::make_tuple(8, 8)});
324  }
325  const auto allocated_output_row_count = get_output_row_count(exe_unit, elem_count);
326  auto query_buffers = std::make_unique<QueryMemoryInitializer>(
327  exe_unit,
329  device_id,
331  allocated_output_row_count,
332  std::vector<std::vector<const int8_t*>>{col_buf_ptrs},
333  std::vector<std::vector<uint64_t>>{{0}}, // frag offsets
335  gpu_allocator.get(),
336  executor);
337 
338  // setup the output
339  int64_t output_row_count = allocated_output_row_count;
340 
341  kernel_params[OUTPUT_ROW_COUNT] =
342  reinterpret_cast<CUdeviceptr>(gpu_allocator->alloc(sizeof(int64_t*)));
343  gpu_allocator->copyToDevice(reinterpret_cast<int8_t*>(kernel_params[OUTPUT_ROW_COUNT]),
344  reinterpret_cast<int8_t*>(&output_row_count),
345  sizeof(output_row_count));
346 
347  // const unsigned block_size_x = executor->blockSize();
348  const unsigned block_size_x = 1;
349  const unsigned block_size_y = 1;
350  const unsigned block_size_z = 1;
351  // const unsigned grid_size_x = executor->gridSize();
352  const unsigned grid_size_x = 1;
353  const unsigned grid_size_y = 1;
354  const unsigned grid_size_z = 1;
355 
356  auto gpu_output_buffers = query_buffers->setupTableFunctionGpuBuffers(
357  query_mem_desc, device_id, block_size_x, grid_size_x);
358 
359  kernel_params[OUTPUT_BUFFERS] = reinterpret_cast<CUdeviceptr>(gpu_output_buffers.first);
360 
361  // execute
362  CHECK_EQ(static_cast<size_t>(KERNEL_PARAM_COUNT), kernel_params.size());
363 
364  std::vector<void*> param_ptrs;
365  for (auto& param : kernel_params) {
366  param_ptrs.push_back(&param);
367  }
368 
369  // Get cu func
370  const auto gpu_context = compilation_context->getGpuCode();
371  CHECK(gpu_context);
372  const auto native_code = gpu_context->getNativeCode(device_id);
373  auto cu_func = static_cast<CUfunction>(native_code.first);
374  checkCudaErrors(cuLaunchKernel(cu_func,
375  grid_size_x,
376  grid_size_y,
377  grid_size_z,
378  block_size_x,
379  block_size_y,
380  block_size_z,
381  0, // shared mem bytes
382  nullptr,
383  &param_ptrs[0],
384  nullptr));
385  // TODO(adb): read errors
386 
387  // read output row count from GPU
388  gpu_allocator->copyFromDevice(
389  reinterpret_cast<int8_t*>(&output_row_count),
390  reinterpret_cast<int8_t*>(kernel_params[OUTPUT_ROW_COUNT]),
391  sizeof(int64_t));
392  if (exe_unit.table_func.hasNonUserSpecifiedOutputSizeConstant()) {
393  if (static_cast<size_t>(output_row_count) != allocated_output_row_count) {
394  throw std::runtime_error(
395  "Table function with constant sizing parameter must return " +
396  std::to_string(allocated_output_row_count) + " (got " +
397  std::to_string(output_row_count) + ")");
398  }
399  } else {
400  if (output_row_count < 0 || (size_t)output_row_count > allocated_output_row_count) {
401  output_row_count = allocated_output_row_count;
402  }
403  }
404 
405  // Update entry count, it may differ from allocated mem size
406  query_buffers->getResultSet(0)->updateStorageEntryCount(output_row_count);
407 
408  // Copy back to CPU storage
409  query_buffers->copyFromTableFunctionGpuBuffers(&data_mgr,
410  query_mem_desc,
411  output_row_count,
412  gpu_output_buffers,
413  device_id,
414  block_size_x,
415  grid_size_x);
416 
417  return query_buffers->getResultSetOwned(0);
418 #else
419  UNREACHABLE();
420  return nullptr;
421 #endif
422 }
#define CHECK_EQ(x, y)
Definition: Logger.h:205
size_t get_output_row_count(const TableFunctionExecutionUnit &exe_unit, size_t input_element_count)
GpuCompilationContext * getGpuCode() const
void checkCudaErrors(CUresult err)
Definition: sample.cpp:38
unsigned long long CUdeviceptr
Definition: nocuda.h:27
#define UNREACHABLE()
Definition: Logger.h:241
std::string to_string(char const *&&v)
void * CUfunction
Definition: nocuda.h:24
std::shared_ptr< RowSetMemoryOwner > row_set_mem_owner_
#define CHECK(condition)
Definition: Logger.h:197
std::vector< Analyzer::Expr * > target_exprs

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

TableFunctionExecutionContext& TableFunctionExecutionContext::operator= ( const TableFunctionExecutionContext )
delete

Member Data Documentation

std::shared_ptr<RowSetMemoryOwner> TableFunctionExecutionContext::row_set_mem_owner_
private

Definition at line 57 of file TableFunctionExecutionContext.h.

Referenced by launchCpuCode(), and launchGpuCode().


The documentation for this class was generated from the following files: