OmniSciDB  91042dcc5b
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
TableFunctionExecutionContext.cpp
Go to the documentation of this file.
1 /*
2  * Copyright 2019 OmniSci, Inc.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
18 
19 #include "Analyzer/Analyzer.h"
20 #include "Logger/Logger.h"
25 #include "Shared/funcannotations.h"
26 
27 namespace {
28 
29 template <typename T>
30 const int8_t* create_literal_buffer(const T literal,
31  const ExecutorDeviceType device_type,
32  std::vector<std::unique_ptr<char[]>>& literals_owner,
33  CudaAllocator* gpu_allocator) {
34  CHECK_LE(sizeof(T), sizeof(int64_t)); // pad to 8 bytes
35  switch (device_type) {
37  literals_owner.emplace_back(std::make_unique<char[]>(sizeof(int64_t)));
38  std::memcpy(literals_owner.back().get(), &literal, sizeof(T));
39  return reinterpret_cast<const int8_t*>(literals_owner.back().get());
40  }
42  CHECK(gpu_allocator);
43  const auto gpu_literal_buf_ptr = gpu_allocator->alloc(sizeof(int64_t));
44  gpu_allocator->copyToDevice(
45  gpu_literal_buf_ptr, reinterpret_cast<const int8_t*>(&literal), sizeof(T));
46  return gpu_literal_buf_ptr;
47  }
48  }
49  UNREACHABLE();
50  return nullptr;
51 }
52 
53 // Specialization for std::string. Currently we simply hand the UDTF a char* to the
54 // first char of a c-style null-terminated string we copy out of the std::string.
55 // May want to evaluate moving to sending in the ptr and size
56 template <>
57 const int8_t* create_literal_buffer(std::string* const literal,
58  const ExecutorDeviceType device_type,
59  std::vector<std::unique_ptr<char[]>>& literals_owner,
60  CudaAllocator* gpu_allocator) {
61  const int64_t string_size = literal->size();
62  const int64_t padded_string_size =
63  (string_size + 7) / 8 * 8; // round up to the next multiple of 8
64  switch (device_type) {
66  literals_owner.emplace_back(
67  std::make_unique<char[]>(sizeof(int64_t) + padded_string_size));
68  std::memcpy(literals_owner.back().get(), &string_size, sizeof(int64_t));
69  std::memcpy(
70  literals_owner.back().get() + sizeof(int64_t), literal->data(), string_size);
71  return reinterpret_cast<const int8_t*>(literals_owner.back().get());
72  }
74  CHECK(gpu_allocator);
75  const auto gpu_literal_buf_ptr =
76  gpu_allocator->alloc(sizeof(int64_t) + padded_string_size);
77  gpu_allocator->copyToDevice(gpu_literal_buf_ptr,
78  reinterpret_cast<const int8_t*>(&string_size),
79  sizeof(int64_t));
80  gpu_allocator->copyToDevice(gpu_literal_buf_ptr + sizeof(int64_t),
81  reinterpret_cast<const int8_t*>(literal->data()),
82  string_size);
83  return gpu_literal_buf_ptr;
84  }
85  }
86  UNREACHABLE();
87  return nullptr;
88 }
89 
91  size_t input_element_count) {
92  size_t allocated_output_row_count = 0;
93  switch (exe_unit.table_func.getOutputRowSizeType()) {
97  allocated_output_row_count = exe_unit.output_buffer_size_param;
98  break;
99  }
101  allocated_output_row_count =
102  exe_unit.output_buffer_size_param * input_element_count;
103  break;
104  }
106  allocated_output_row_count = input_element_count;
107  break;
108  }
109  default: {
110  UNREACHABLE();
111  }
112  }
113  return allocated_output_row_count;
114 }
115 
116 } // namespace
117 
119  const TableFunctionExecutionUnit& exe_unit,
120  const std::vector<InputTableInfo>& table_infos,
121  const std::shared_ptr<CompilationContext>& compilation_context,
122  const ColumnFetcher& column_fetcher,
123  const ExecutorDeviceType device_type,
124  Executor* executor,
125  bool is_pre_launch_udtf) {
126  auto timer = DEBUG_TIMER(__func__);
127  CHECK(compilation_context);
128  std::vector<std::shared_ptr<Chunk_NS::Chunk>> chunks_owner;
129  std::vector<std::unique_ptr<char[]>> literals_owner;
130 
131  const int device_id = 0; // TODO(adb): support multi-gpu table functions
132  std::unique_ptr<CudaAllocator> device_allocator;
133  if (device_type == ExecutorDeviceType::GPU) {
134  auto data_mgr = executor->getDataMgr();
135  device_allocator.reset(new CudaAllocator(data_mgr, device_id));
136  }
137  std::vector<const int8_t*> col_buf_ptrs;
138  std::vector<int64_t> col_sizes;
139  std::optional<size_t> input_num_rows;
140 
141  int col_index = -1;
142  // TODO: col_list_bufs are allocated on CPU memory, so UDTFs with column_list
143  // arguments are not supported on GPU atm.
144  std::vector<std::vector<const int8_t*>> col_list_bufs;
145  for (const auto& input_expr : exe_unit.input_exprs) {
146  auto ti = input_expr->get_type_info();
147  if (!ti.is_column_list()) {
148  CHECK_EQ(col_index, -1);
149  }
150  if (auto col_var = dynamic_cast<Analyzer::ColumnVar*>(input_expr)) {
151  auto table_id = col_var->get_table_id();
152  auto table_info_it = std::find_if(
153  table_infos.begin(), table_infos.end(), [&table_id](const auto& table_info) {
154  return table_info.table_id == table_id;
155  });
156  CHECK(table_info_it != table_infos.end());
157  auto [col_buf, buf_elem_count] = ColumnFetcher::getOneColumnFragment(
158  executor,
159  *col_var,
160  table_info_it->info.fragments.front(),
163  device_id,
164  device_allocator.get(),
165  /*thread_idx=*/0,
166  chunks_owner,
167  column_fetcher.columnarized_table_cache_);
168  // We use the number of entries in the first column to be the number of rows to base
169  // the output off of (optionally depending on the sizing parameter)
170  if (!input_num_rows) {
171  input_num_rows = (buf_elem_count ? buf_elem_count : 1);
172  }
173  if (ti.is_column_list()) {
174  if (col_index == -1) {
175  col_list_bufs.push_back({});
176  col_list_bufs.back().reserve(ti.get_dimension());
177  } else {
178  CHECK_EQ(col_sizes.back(), buf_elem_count);
179  }
180  col_index++;
181  col_list_bufs.back().push_back(col_buf);
182  // append col_buf to column_list col_buf
183  if (col_index + 1 == ti.get_dimension()) {
184  col_index = -1;
185  }
186  // columns in the same column_list point to column_list data
187  col_buf_ptrs.push_back((const int8_t*)col_list_bufs.back().data());
188  } else {
189  col_buf_ptrs.push_back(col_buf);
190  }
191  col_sizes.push_back(buf_elem_count);
192  } else if (const auto& constant_val = dynamic_cast<Analyzer::Constant*>(input_expr)) {
193  // TODO(adb): Unify literal handling with rest of system, either in Codegen or as a
194  // separate serialization component
195  col_sizes.push_back(0);
196  const auto const_val_datum = constant_val->get_constval();
197  const auto& ti = constant_val->get_type_info();
198  if (ti.is_fp()) {
199  switch (get_bit_width(ti)) {
200  case 32:
201  col_buf_ptrs.push_back(create_literal_buffer(const_val_datum.floatval,
202  device_type,
203  literals_owner,
204  device_allocator.get()));
205  break;
206  case 64:
207  col_buf_ptrs.push_back(create_literal_buffer(const_val_datum.doubleval,
208  device_type,
209  literals_owner,
210  device_allocator.get()));
211  break;
212  default:
213  UNREACHABLE();
214  }
215  } else if (ti.is_integer()) {
216  switch (get_bit_width(ti)) {
217  case 8:
218  col_buf_ptrs.push_back(create_literal_buffer(const_val_datum.tinyintval,
219  device_type,
220  literals_owner,
221  device_allocator.get()));
222  break;
223  case 16:
224  col_buf_ptrs.push_back(create_literal_buffer(const_val_datum.smallintval,
225  device_type,
226  literals_owner,
227  device_allocator.get()));
228  break;
229  case 32:
230  col_buf_ptrs.push_back(create_literal_buffer(const_val_datum.intval,
231  device_type,
232  literals_owner,
233  device_allocator.get()));
234  break;
235  case 64:
236  col_buf_ptrs.push_back(create_literal_buffer(const_val_datum.bigintval,
237  device_type,
238  literals_owner,
239  device_allocator.get()));
240  break;
241  default:
242  UNREACHABLE();
243  }
244  } else if (ti.is_boolean()) {
245  col_buf_ptrs.push_back(create_literal_buffer(const_val_datum.boolval,
246  device_type,
247  literals_owner,
248  device_allocator.get()));
249  } else if (ti.is_bytes()) { // text encoding none string
250  col_buf_ptrs.push_back(create_literal_buffer(const_val_datum.stringval,
251  device_type,
252  literals_owner,
253  device_allocator.get()));
254  } else {
255  throw TableFunctionError("Literal value " + constant_val->toString() +
256  " is not yet supported.");
257  }
258  }
259  }
260  CHECK_EQ(col_buf_ptrs.size(), exe_unit.input_exprs.size());
261  CHECK_EQ(col_sizes.size(), exe_unit.input_exprs.size());
262  if (!exe_unit.table_func
263  .hasOutputSizeIndependentOfInputSize()) { // includes compile-time constants,
264  // user-specified constants,
265  // and runtime table funtion
266  // specified sizing, only
267  // user-specified row-multipliers
268  // currently take into account input
269  // row size
270  CHECK(input_num_rows);
271  }
272  if (is_pre_launch_udtf) {
275  exe_unit,
276  std::dynamic_pointer_cast<CpuCompilationContext>(compilation_context),
277  col_buf_ptrs,
278  col_sizes,
279  *input_num_rows,
280  executor);
281  return nullptr;
282  } else {
283  switch (device_type) {
285  return launchCpuCode(
286  exe_unit,
287  std::dynamic_pointer_cast<CpuCompilationContext>(compilation_context),
288  col_buf_ptrs,
289  col_sizes,
290  *input_num_rows,
291  executor);
293  return launchGpuCode(
294  exe_unit,
295  std::dynamic_pointer_cast<GpuCompilationContext>(compilation_context),
296  col_buf_ptrs,
297  col_sizes,
298  *input_num_rows,
299  /*device_id=*/0,
300  executor);
301  }
302  }
303  UNREACHABLE();
304  return nullptr;
305 }
306 
308 
310  const TableFunctionExecutionUnit& exe_unit,
311  const std::shared_ptr<CpuCompilationContext>& compilation_context,
312  std::vector<const int8_t*>& col_buf_ptrs,
313  std::vector<int64_t>& col_sizes,
314  const size_t elem_count, // taken from first source only currently
315  Executor* executor) {
316  auto timer = DEBUG_TIMER(__func__);
317  int64_t output_row_count = 0;
318 
319  // If TableFunctionManager must be a singleton but it has been
320  // initialized from another thread, TableFunctionManager constructor
321  // blocks via TableFunctionManager_singleton_mutex until the
322  // existing singleton is deconstructed.
323  auto mgr = std::make_unique<TableFunctionManager>(
324  exe_unit,
325  executor,
326  col_buf_ptrs,
328  /*is_singleton=*/!exe_unit.table_func.usesManager());
329 
330  // setup the inputs
331  // We can have an empty col_buf_ptrs vector if there are no arguments to the function
332  const auto byte_stream_ptr = !col_buf_ptrs.empty()
333  ? reinterpret_cast<const int8_t**>(col_buf_ptrs.data())
334  : nullptr;
335  if (!col_buf_ptrs.empty()) {
336  CHECK(byte_stream_ptr);
337  }
338  const auto col_sizes_ptr = !col_sizes.empty() ? col_sizes.data() : nullptr;
339  if (!col_sizes.empty()) {
340  CHECK(col_sizes_ptr);
341  }
342 
343  // execute
344  const auto err = compilation_context->table_function_entry_point()(
345  reinterpret_cast<const int8_t*>(mgr.get()),
346  byte_stream_ptr, // input columns buffer
347  col_sizes_ptr, // input column sizes
348  nullptr,
349  &output_row_count);
350 
351  if (exe_unit.table_func.hasPreFlightOutputSizer()) {
352  exe_unit.output_buffer_size_param = output_row_count;
353  }
354 
356  throw UserTableFunctionError("Error executing table function pre flight check: " +
357  std::string(mgr->get_error_message()));
358  } else if (err) {
359  throw UserTableFunctionError("Error executing table function pre flight check: " +
360  std::to_string(err));
361  }
362 }
363 
365  const TableFunctionExecutionUnit& exe_unit,
366  const std::shared_ptr<CpuCompilationContext>& compilation_context,
367  std::vector<const int8_t*>& col_buf_ptrs,
368  std::vector<int64_t>& col_sizes,
369  const size_t elem_count, // taken from first source only currently
370  Executor* executor) {
371  auto timer = DEBUG_TIMER(__func__);
372  int64_t output_row_count = 0;
373 
374  // If TableFunctionManager must be a singleton but it has been
375  // initialized from another thread, TableFunctionManager constructor
376  // blocks via TableFunctionManager_singleton_mutex until the
377  // existing singleton is deconstructed.
378  auto mgr = std::make_unique<TableFunctionManager>(
379  exe_unit,
380  executor,
381  col_buf_ptrs,
383  /*is_singleton=*/!exe_unit.table_func.usesManager());
384 
385  if (exe_unit.table_func.hasOutputSizeKnownPreLaunch()) {
386  // allocate output buffers because the size is known up front, from
387  // user specified parameters (and table size in the case of a user
388  // specified row multiplier)
389  output_row_count = get_output_row_count(exe_unit, elem_count);
390  } else if (exe_unit.table_func.hasPreFlightOutputSizer()) {
391  output_row_count = exe_unit.output_buffer_size_param;
392  }
393 
394  // setup the inputs
395  // We can have an empty col_buf_ptrs vector if there are no arguments to the function
396  const auto byte_stream_ptr = !col_buf_ptrs.empty()
397  ? reinterpret_cast<const int8_t**>(col_buf_ptrs.data())
398  : nullptr;
399  if (!col_buf_ptrs.empty()) {
400  CHECK(byte_stream_ptr);
401  }
402  const auto col_sizes_ptr = !col_sizes.empty() ? col_sizes.data() : nullptr;
403  if (!col_sizes.empty()) {
404  CHECK(col_sizes_ptr);
405  }
406 
407  // execute
408  const auto err = compilation_context->table_function_entry_point()(
409  reinterpret_cast<const int8_t*>(mgr.get()),
410  byte_stream_ptr, // input columns buffer
411  col_sizes_ptr, // input column sizes
412  nullptr,
413  &output_row_count);
414 
416  throw UserTableFunctionError("Error executing table function: " +
417  std::string(mgr->get_error_message()));
418  }
419 
420  else if (err) {
421  throw UserTableFunctionError("Error executing table function: " +
422  std::to_string(err));
423  }
424 
425  if (exe_unit.table_func.hasCompileTimeOutputSizeConstant()) {
426  if (static_cast<size_t>(output_row_count) != mgr->get_nrows()) {
427  throw TableFunctionError(
428  "Table function with constant sizing parameter must return " +
429  std::to_string(mgr->get_nrows()) + " (got " + std::to_string(output_row_count) +
430  ")");
431  }
432  } else {
433  if (output_row_count < 0 || (size_t)output_row_count > mgr->get_nrows()) {
434  output_row_count = mgr->get_nrows();
435  }
436  }
437  // Update entry count, it may differ from allocated mem size
438  if (exe_unit.table_func.hasTableFunctionSpecifiedParameter() && !mgr->query_buffers) {
439  // set_output_row_size has not been called
440  if (output_row_count == 0) {
441  // allocate for empty output columns
442  mgr->allocate_output_buffers(0);
443  } else {
444  throw TableFunctionError("Table function must call set_output_row_size");
445  }
446  }
447 
448  mgr->query_buffers->getResultSet(0)->updateStorageEntryCount(output_row_count);
449 
450  auto group_by_buffers_ptr = mgr->query_buffers->getGroupByBuffersPtr();
451  CHECK(group_by_buffers_ptr);
452  auto output_buffers_ptr = reinterpret_cast<int64_t*>(group_by_buffers_ptr[0]);
453 
454  auto num_out_columns = exe_unit.target_exprs.size();
455  int8_t* src = reinterpret_cast<int8_t*>(output_buffers_ptr);
456  int8_t* dst = reinterpret_cast<int8_t*>(output_buffers_ptr);
457  // Todo (todd): Consolidate this column byte offset logic that occurs in at least 4
458  // places
459  for (size_t col_idx = 0; col_idx < num_out_columns; col_idx++) {
460  const size_t target_width =
461  exe_unit.target_exprs[col_idx]->get_type_info().get_size();
462  const size_t allocated_column_size = target_width * mgr->get_nrows();
463  const size_t actual_column_size = target_width * output_row_count;
464  if (src != dst) {
465  auto t = memmove(dst, src, actual_column_size);
466  CHECK_EQ(dst, t);
467  }
468  src = align_to_int64(src + allocated_column_size);
469  dst = align_to_int64(dst + actual_column_size);
470  }
471  return mgr->query_buffers->getResultSetOwned(0);
472 }
473 
474 namespace {
475 enum {
483 };
484 }
485 
487  const TableFunctionExecutionUnit& exe_unit,
488  const std::shared_ptr<GpuCompilationContext>& compilation_context,
489  std::vector<const int8_t*>& col_buf_ptrs,
490  std::vector<int64_t>& col_sizes,
491  const size_t elem_count,
492  const int device_id,
493  Executor* executor) {
494 #ifdef HAVE_CUDA
495  auto timer = DEBUG_TIMER(__func__);
497  throw QueryMustRunOnCpu();
498  }
499 
500  auto num_out_columns = exe_unit.target_exprs.size();
501  auto data_mgr = executor->getDataMgr();
502  auto gpu_allocator = std::make_unique<CudaAllocator>(data_mgr, device_id);
503  CHECK(gpu_allocator);
504  std::vector<CUdeviceptr> kernel_params(KERNEL_PARAM_COUNT, 0);
505 
506  // TODO: implement table function manager for CUDA
507  // kernels. kernel_params[MANAGER] ought to contain a device pointer
508  // to a struct that a table function kernel with a
509  // TableFunctionManager argument can access from the device.
510  kernel_params[MANAGER] =
511  reinterpret_cast<CUdeviceptr>(gpu_allocator->alloc(sizeof(int8_t*)));
512 
513  // setup the inputs
514  auto byte_stream_ptr = !(col_buf_ptrs.empty())
515  ? gpu_allocator->alloc(col_buf_ptrs.size() * sizeof(int64_t))
516  : nullptr;
517  if (byte_stream_ptr) {
518  gpu_allocator->copyToDevice(byte_stream_ptr,
519  reinterpret_cast<int8_t*>(col_buf_ptrs.data()),
520  col_buf_ptrs.size() * sizeof(int64_t));
521  }
522  kernel_params[COL_BUFFERS] = reinterpret_cast<CUdeviceptr>(byte_stream_ptr);
523 
524  auto col_sizes_ptr = !(col_sizes.empty())
525  ? gpu_allocator->alloc(col_sizes.size() * sizeof(int64_t))
526  : nullptr;
527  if (col_sizes_ptr) {
528  gpu_allocator->copyToDevice(col_sizes_ptr,
529  reinterpret_cast<int8_t*>(col_sizes.data()),
530  col_sizes.size() * sizeof(int64_t));
531  }
532  kernel_params[COL_SIZES] = reinterpret_cast<CUdeviceptr>(col_sizes_ptr);
533 
534  kernel_params[ERROR_BUFFER] =
535  reinterpret_cast<CUdeviceptr>(gpu_allocator->alloc(sizeof(int32_t)));
536  // initialize output memory
538  elem_count,
540  /*is_table_function=*/true);
541  query_mem_desc.setOutputColumnar(true);
542 
543  for (size_t i = 0; i < num_out_columns; i++) {
544  const size_t col_width = exe_unit.target_exprs[i]->get_type_info().get_size();
545  query_mem_desc.addColSlotInfo({std::make_tuple(col_width, col_width)});
546  }
547  const auto allocated_output_row_count = get_output_row_count(exe_unit, elem_count);
548  auto query_buffers = std::make_unique<QueryMemoryInitializer>(
549  exe_unit,
551  device_id,
553  (allocated_output_row_count == 0 ? 1 : allocated_output_row_count),
554  std::vector<std::vector<const int8_t*>>{col_buf_ptrs},
555  std::vector<std::vector<uint64_t>>{{0}}, // frag offsets
557  gpu_allocator.get(),
558  executor);
559 
560  // setup the output
561  int64_t output_row_count = allocated_output_row_count;
562 
563  kernel_params[OUTPUT_ROW_COUNT] =
564  reinterpret_cast<CUdeviceptr>(gpu_allocator->alloc(sizeof(int64_t*)));
565  gpu_allocator->copyToDevice(reinterpret_cast<int8_t*>(kernel_params[OUTPUT_ROW_COUNT]),
566  reinterpret_cast<int8_t*>(&output_row_count),
567  sizeof(output_row_count));
568  /*
569   TODO: RBC generated runtime table functions do not support
570  concurrent execution on a CUDA device. Hence, we'll force 1 as
571  block/grid size in the case of runtime table functions. To support
572  this, in RBC, we'll need to expose threadIdx/blockIdx/blockDim to
573  runtime table functions and these must do something sensible with
574  this information..
575  */
576  const unsigned block_size_x =
577  (exe_unit.table_func.isRuntime() ? 1 : executor->blockSize());
578  const unsigned block_size_y = 1;
579  const unsigned block_size_z = 1;
580  const unsigned grid_size_x =
581  (exe_unit.table_func.isRuntime() ? 1 : executor->gridSize());
582  const unsigned grid_size_y = 1;
583  const unsigned grid_size_z = 1;
584 
585  auto gpu_output_buffers = query_buffers->setupTableFunctionGpuBuffers(
586  query_mem_desc, device_id, block_size_x, grid_size_x);
587 
588  kernel_params[OUTPUT_BUFFERS] = reinterpret_cast<CUdeviceptr>(gpu_output_buffers.ptrs);
589 
590  // execute
591  CHECK_EQ(static_cast<size_t>(KERNEL_PARAM_COUNT), kernel_params.size());
592 
593  std::vector<void*> param_ptrs;
594  for (auto& param : kernel_params) {
595  param_ptrs.push_back(&param);
596  }
597 
598  // Get cu func
599 
600  CHECK(compilation_context);
601  const auto native_code = compilation_context->getNativeCode(device_id);
602  auto cu_func = static_cast<CUfunction>(native_code.first);
603  checkCudaErrors(cuLaunchKernel(cu_func,
604  grid_size_x,
605  grid_size_y,
606  grid_size_z,
607  block_size_x,
608  block_size_y,
609  block_size_z,
610  0, // shared mem bytes
611  nullptr,
612  &param_ptrs[0],
613  nullptr));
614  // TODO(adb): read errors
615 
616  // read output row count from GPU
617  gpu_allocator->copyFromDevice(
618  reinterpret_cast<int8_t*>(&output_row_count),
619  reinterpret_cast<int8_t*>(kernel_params[OUTPUT_ROW_COUNT]),
620  sizeof(int64_t));
621  if (exe_unit.table_func.hasNonUserSpecifiedOutputSize()) {
622  if (static_cast<size_t>(output_row_count) != allocated_output_row_count) {
623  throw TableFunctionError(
624  "Table function with constant sizing parameter must return " +
625  std::to_string(allocated_output_row_count) + " (got " +
626  std::to_string(output_row_count) + ")");
627  }
628  } else {
629  if (output_row_count < 0 || (size_t)output_row_count > allocated_output_row_count) {
630  output_row_count = allocated_output_row_count;
631  }
632  }
633 
634  // Update entry count, it may differ from allocated mem size
635  query_buffers->getResultSet(0)->updateStorageEntryCount(output_row_count);
636 
637  // Copy back to CPU storage
638  query_buffers->copyFromTableFunctionGpuBuffers(data_mgr,
639  query_mem_desc,
640  output_row_count,
641  gpu_output_buffers,
642  device_id,
643  block_size_x,
644  grid_size_x);
645 
646  return query_buffers->getResultSetOwned(0);
647 #else
648  UNREACHABLE();
649  return nullptr;
650 #endif
651 }
ResultSetPtr launchGpuCode(const TableFunctionExecutionUnit &exe_unit, const std::shared_ptr< GpuCompilationContext > &compilation_context, std::vector< const int8_t * > &col_buf_ptrs, std::vector< int64_t > &col_sizes, const size_t elem_count, const int device_id, Executor *executor)
Defines data structures for the semantic analysis phase of query processing.
#define CHECK_EQ(x, y)
Definition: Logger.h:219
size_t get_output_row_count(const TableFunctionExecutionUnit &exe_unit, size_t input_element_count)
void launchPreCodeOnCpu(const TableFunctionExecutionUnit &exe_unit, const std::shared_ptr< CpuCompilationContext > &compilation_context, std::vector< const int8_t * > &col_buf_ptrs, std::vector< int64_t > &col_sizes, const size_t elem_count, Executor *executor)
std::vector< Analyzer::Expr * > input_exprs
ExecutorDeviceType
const table_functions::TableFunction table_func
void checkCudaErrors(CUresult err)
Definition: sample.cpp:38
unsigned long long CUdeviceptr
Definition: nocuda.h:27
#define UNREACHABLE()
Definition: Logger.h:255
void setOutputColumnar(const bool val)
ColumnCacheMap columnarized_table_cache_
std::shared_ptr< ResultSet > ResultSetPtr
std::string to_string(char const *&&v)
size_t get_bit_width(const SQLTypeInfo &ti)
void * CUfunction
Definition: nocuda.h:24
std::shared_ptr< RowSetMemoryOwner > row_set_mem_owner_
std::mutex TableFunctionManager_singleton_mutex
ResultSetPtr launchCpuCode(const TableFunctionExecutionUnit &exe_unit, const std::shared_ptr< CpuCompilationContext > &compilation_context, std::vector< const int8_t * > &col_buf_ptrs, std::vector< int64_t > &col_sizes, const size_t elem_count, Executor *executor)
void copyToDevice(void *device_dst, const void *host_src, const size_t num_bytes) const override
int8_t * alloc(const size_t num_bytes) override
ResultSetPtr execute(const TableFunctionExecutionUnit &exe_unit, const std::vector< InputTableInfo > &table_infos, const std::shared_ptr< CompilationContext > &compilation_context, const ColumnFetcher &column_fetcher, const ExecutorDeviceType device_type, Executor *executor, bool is_pre_launch_udtf)
#define CHECK_LE(x, y)
Definition: Logger.h:222
static std::pair< const int8_t *, size_t > getOneColumnFragment(Executor *executor, const Analyzer::ColumnVar &hash_col, const Fragmenter_Namespace::FragmentInfo &fragment, const Data_Namespace::MemoryLevel effective_mem_lvl, const int device_id, DeviceAllocator *device_allocator, const size_t thread_idx, std::vector< std::shared_ptr< Chunk_NS::Chunk >> &chunks_owner, ColumnCacheMap &column_cache)
Gets one chunk&#39;s pointer and element count on either CPU or GPU.
const int8_t * create_literal_buffer(const T literal, const ExecutorDeviceType device_type, std::vector< std::unique_ptr< char[]>> &literals_owner, CudaAllocator *gpu_allocator)
#define CHECK(condition)
Definition: Logger.h:211
#define DEBUG_TIMER(name)
Definition: Logger.h:358
char * t
std::vector< Analyzer::Expr * > target_exprs
void addColSlotInfo(const std::vector< std::tuple< int8_t, int8_t >> &slots_for_col)
OutputBufferSizeType getOutputRowSizeType() const
FORCE_INLINE HOST DEVICE T align_to_int64(T addr)