OmniSciDB  04ee39c94c
ArrowResultSetConverter Class Reference

#include <ArrowResultSet.h>

Classes

struct  ColumnBuilder
 
struct  SerializedArrowOutput
 

Public Member Functions

 ArrowResultSetConverter (const std::shared_ptr< ResultSet > &results, const std::shared_ptr< Data_Namespace::DataMgr > data_mgr, const ExecutorDeviceType device_type, const int32_t device_id, const std::vector< std::string > &col_names, const int32_t first_n)
 
ArrowResult getArrowResult () const
 

Private Member Functions

 ArrowResultSetConverter (const std::shared_ptr< ResultSet > &results, const std::vector< std::string > &col_names, const int32_t first_n)
 
std::shared_ptr< arrow::RecordBatch > convertToArrow (arrow::ipc::DictionaryMemo &memo) const
 
std::shared_ptr< arrow::RecordBatch > getArrowBatch (const std::shared_ptr< arrow::Schema > &schema) const
 
ArrowResult getArrowResultImpl () const
 
std::shared_ptr< arrow::Field > makeField (const std::string name, const SQLTypeInfo &target_type, const std::shared_ptr< arrow::Array > &dictionary) const
 
std::shared_ptr< arrow::DataType > getArrowType (const SQLTypeInfo &mapd_type, const std::shared_ptr< arrow::Array > &dict_values) const
 
SerializedArrowOutput getSerializedArrowOutput () const
 
void initializeColumnBuilder (ColumnBuilder &column_builder, const SQLTypeInfo &col_type, const std::shared_ptr< arrow::Field > &field) const
 
void reserveColumnBuilderSize (ColumnBuilder &column_builder, const size_t row_count) const
 
void append (ColumnBuilder &column_builder, const ValueArray &values, const std::shared_ptr< std::vector< bool >> &is_valid) const
 
template<typename BuilderType , typename C_TYPE >
void appendToColumnBuilder (ColumnBuilder &column_builder, const ValueArray &values, const std::shared_ptr< std::vector< bool >> &is_valid) const
 
std::shared_ptr< arrow::Array > finishColumnBuilder (ColumnBuilder &column_builder) const
 

Private Attributes

std::shared_ptr< ResultSetresults_
 
std::shared_ptr< Data_Namespace::DataMgrdata_mgr_ = nullptr
 
ExecutorDeviceType device_type_ = ExecutorDeviceType::GPU
 
int32_t device_id_ = 0
 
std::vector< std::string > col_names_
 
int32_t top_n_
 

Friends

class ArrowResultSet
 

Detailed Description

Definition at line 166 of file ArrowResultSet.h.

Constructor & Destructor Documentation

◆ ArrowResultSetConverter() [1/2]

ArrowResultSetConverter::ArrowResultSetConverter ( const std::shared_ptr< ResultSet > &  results,
const std::shared_ptr< Data_Namespace::DataMgr data_mgr,
const ExecutorDeviceType  device_type,
const int32_t  device_id,
const std::vector< std::string > &  col_names,
const int32_t  first_n 
)
inline

Definition at line 168 of file ArrowResultSet.h.

174  : results_(results)
175  , data_mgr_(data_mgr)
176  , device_type_(device_type)
177  , device_id_(device_id)
178  , col_names_(col_names)
179  , top_n_(first_n) {}
std::shared_ptr< Data_Namespace::DataMgr > data_mgr_
std::vector< std::string > col_names_
ExecutorDeviceType device_type_
std::shared_ptr< ResultSet > results_

◆ ArrowResultSetConverter() [2/2]

ArrowResultSetConverter::ArrowResultSetConverter ( const std::shared_ptr< ResultSet > &  results,
const std::vector< std::string > &  col_names,
const int32_t  first_n 
)
inlineprivate

Definition at line 184 of file ArrowResultSet.h.

187  : results_(results), col_names_(col_names), top_n_(first_n) {}
std::vector< std::string > col_names_
std::shared_ptr< ResultSet > results_

Member Function Documentation

◆ append()

void ArrowResultSetConverter::append ( ColumnBuilder column_builder,
const ValueArray values,
const std::shared_ptr< std::vector< bool >> &  is_valid 
) const
private

Definition at line 645 of file ArrowResultSetConverter.cpp.

References ARROW_THROW_NOT_OK, GPU, kBIGINT, kBOOLEAN, kCHAR, kDATE, kDOUBLE, kFLOAT, kINT, kSMALLINT, kTEXT, kTIME, kTIMESTAMP, kTINYINT, kVARCHAR, and ArrowResultSetConverter::ColumnBuilder::physical_type.

648  {
649  switch (column_builder.physical_type) {
650  case kBOOLEAN:
651  appendToColumnBuilder<BooleanBuilder, bool>(column_builder, values, is_valid);
652  break;
653  case kTINYINT:
654  appendToColumnBuilder<Int8Builder, int8_t>(column_builder, values, is_valid);
655  break;
656  case kSMALLINT:
657  appendToColumnBuilder<Int16Builder, int16_t>(column_builder, values, is_valid);
658  break;
659  case kINT:
660  appendToColumnBuilder<Int32Builder, int32_t>(column_builder, values, is_valid);
661  break;
662  case kBIGINT:
663  appendToColumnBuilder<Int64Builder, int64_t>(column_builder, values, is_valid);
664  break;
665  case kFLOAT:
666  appendToColumnBuilder<FloatBuilder, float>(column_builder, values, is_valid);
667  break;
668  case kDOUBLE:
669  appendToColumnBuilder<DoubleBuilder, double>(column_builder, values, is_valid);
670  break;
671  case kTIME:
672  appendToColumnBuilder<Time32Builder, int32_t>(column_builder, values, is_valid);
673  break;
674  case kTIMESTAMP:
675  appendToColumnBuilder<TimestampBuilder, int64_t>(column_builder, values, is_valid);
676  break;
677  case kDATE:
679  ? appendToColumnBuilder<Date64Builder, int64_t>(
680  column_builder, values, is_valid)
681  : appendToColumnBuilder<Date32Builder, int32_t>(
682  column_builder, values, is_valid);
683  break;
684  case kCHAR:
685  case kVARCHAR:
686  case kTEXT:
687  default:
688  // TODO(miyu): support more scalar types.
689  throw std::runtime_error(column_builder.col_type.get_type_name() +
690  " is not supported in Arrow result sets.");
691  }
692 }
Definition: sqltypes.h:51
ExecutorDeviceType device_type_
Definition: sqltypes.h:54
Definition: sqltypes.h:55
Definition: sqltypes.h:43
Definition: sqltypes.h:47

◆ appendToColumnBuilder()

template<typename BuilderType , typename C_TYPE >
void ArrowResultSetConverter::appendToColumnBuilder ( ColumnBuilder column_builder,
const ValueArray values,
const std::shared_ptr< std::vector< bool >> &  is_valid 
) const
inlineprivate

Definition at line 620 of file ArrowResultSetConverter.cpp.

References ARROW_THROW_NOT_OK, ArrowResultSetConverter::ColumnBuilder::builder, CHECK, ArrowResultSetConverter::ColumnBuilder::field, DateConverters::get_epoch_days_from_seconds(), and kMilliSecsPerSec.

623  {
624  std::vector<C_TYPE> vals = boost::get<std::vector<C_TYPE>>(values);
625 
626  if (scale_epoch_values<BuilderType>()) {
627  auto scale_sec_to_millisec = [](auto seconds) { return seconds * kMilliSecsPerSec; };
628  auto scale_values = [&](auto epoch) {
629  return std::is_same<BuilderType, Date32Builder>::value
631  : scale_sec_to_millisec(epoch);
632  };
633  std::transform(vals.begin(), vals.end(), vals.begin(), scale_values);
634  }
635 
636  auto typed_builder = static_cast<BuilderType*>(column_builder.builder.get());
637  if (column_builder.field->nullable()) {
638  CHECK(is_valid.get());
639  ARROW_THROW_NOT_OK(typed_builder->APPENDVALUES(vals, *is_valid));
640  } else {
641  ARROW_THROW_NOT_OK(typed_builder->APPENDVALUES(vals));
642  }
643 }
#define ARROW_THROW_NOT_OK(s)
Definition: ArrowUtil.h:28
static constexpr int64_t kMilliSecsPerSec
#define CHECK(condition)
Definition: Logger.h:187
int64_t get_epoch_days_from_seconds(const int64_t seconds)
+ Here is the call graph for this function:

◆ convertToArrow()

std::shared_ptr< arrow::RecordBatch > ArrowResultSetConverter::convertToArrow ( arrow::ipc::DictionaryMemo &  memo) const
private

Definition at line 283 of file ArrowResultSetConverter.cpp.

References ARROW_THROW_NOT_OK, and CHECK.

284  {
285  const auto col_count = results_->colCount();
286  std::vector<std::shared_ptr<arrow::Field>> fields;
287  CHECK(col_names_.empty() || col_names_.size() == col_count);
288  for (size_t i = 0; i < col_count; ++i) {
289  const auto ti = results_->getColType(i);
290  std::shared_ptr<arrow::Array> dict;
291  if (ti.is_dict_encoded_string()) {
292  const int dict_id = ti.get_comp_param();
293  if (memo.HasDictionaryId(dict_id)) {
294  ARROW_THROW_NOT_OK(memo.GetDictionary(dict_id, &dict));
295  } else {
296  auto str_list = results_->getStringDictionaryPayloadCopy(dict_id);
297 
298  arrow::StringBuilder builder;
299  // TODO(andrewseidl): replace with AppendValues() once Arrow 0.7.1 support is
300  // fully deprecated
301  for (const std::string& val : *str_list) {
302  ARROW_THROW_NOT_OK(builder.Append(val));
303  }
304  ARROW_THROW_NOT_OK(builder.Finish(&dict));
305  ARROW_THROW_NOT_OK(memo.AddDictionary(dict_id, dict));
306  }
307  }
308  fields.push_back(makeField(col_names_.empty() ? "" : col_names_[i], ti, dict));
309  }
310  return getArrowBatch(arrow::schema(fields));
311 }
#define ARROW_THROW_NOT_OK(s)
Definition: ArrowUtil.h:28
std::shared_ptr< arrow::Field > makeField(const std::string name, const SQLTypeInfo &target_type, const std::shared_ptr< arrow::Array > &dictionary) const
std::vector< std::string > col_names_
std::shared_ptr< arrow::RecordBatch > getArrowBatch(const std::shared_ptr< arrow::Schema > &schema) const
std::shared_ptr< ResultSet > results_
#define CHECK(condition)
Definition: Logger.h:187

◆ finishColumnBuilder()

std::shared_ptr< arrow::Array > ArrowResultSetConverter::finishColumnBuilder ( ColumnBuilder column_builder) const
inlineprivate

Definition at line 608 of file ArrowResultSetConverter.cpp.

References ARROW_THROW_NOT_OK, ArrowResultSetConverter::ColumnBuilder::builder, and ArrowResultSetConverter::ColumnBuilder::field.

609  {
610  std::shared_ptr<Array> values;
611  ARROW_THROW_NOT_OK(column_builder.builder->Finish(&values));
612  if (column_builder.field->type()->id() == Type::DICTIONARY) {
613  return std::make_shared<DictionaryArray>(column_builder.field->type(), values);
614  } else {
615  return values;
616  }
617 }
#define ARROW_THROW_NOT_OK(s)
Definition: ArrowUtil.h:28

◆ getArrowBatch()

std::shared_ptr< arrow::RecordBatch > ArrowResultSetConverter::getArrowBatch ( const std::shared_ptr< arrow::Schema > &  schema) const
private

Definition at line 313 of file ArrowResultSetConverter.cpp.

References File_Namespace::append(), ARROW_RECORDBATCH_MAKE, CHECK, CHECK_EQ, cpu_threads(), GPU, kBIGINT, kBOOLEAN, kDATE, kDOUBLE, kFLOAT, kINT, kSMALLINT, kTIME, kTIMESTAMP, and kTINYINT.

314  {
315  std::vector<std::shared_ptr<arrow::Array>> result_columns;
316 
317  const size_t entry_count = top_n_ < 0
318  ? results_->entryCount()
319  : std::min(size_t(top_n_), results_->entryCount());
320  if (!entry_count) {
321  return ARROW_RECORDBATCH_MAKE(schema, 0, result_columns);
322  }
323  const auto col_count = results_->colCount();
324  size_t row_count = 0;
325 
326  std::vector<ColumnBuilder> builders(col_count);
327 
328  // Create array builders
329  for (size_t i = 0; i < col_count; ++i) {
330  initializeColumnBuilder(builders[i], results_->getColType(i), schema->field(i));
331  }
332 
333  // TODO(miyu): speed up for columnar buffers
334  auto fetch = [&](std::vector<std::shared_ptr<ValueArray>>& value_seg,
335  std::vector<std::shared_ptr<std::vector<bool>>>& null_bitmap_seg,
336  const size_t start_entry,
337  const size_t end_entry) -> size_t {
338  CHECK_EQ(value_seg.size(), col_count);
339  CHECK_EQ(null_bitmap_seg.size(), col_count);
340  const auto entry_count = end_entry - start_entry;
341  size_t seg_row_count = 0;
342  for (size_t i = start_entry; i < end_entry; ++i) {
343  auto row = results_->getRowAtNoTranslations(i);
344  if (row.empty()) {
345  continue;
346  }
347  ++seg_row_count;
348  for (size_t j = 0; j < col_count; ++j) {
349  auto scalar_value = boost::get<ScalarTargetValue>(&row[j]);
350  // TODO(miyu): support more types other than scalar.
351  CHECK(scalar_value);
352  const auto& column = builders[j];
353  switch (column.physical_type) {
354  case kBOOLEAN:
355  create_or_append_value<bool, int64_t>(
356  *scalar_value, value_seg[j], entry_count);
357  create_or_append_validity<int64_t>(
358  *scalar_value, column.col_type, null_bitmap_seg[j], entry_count);
359  break;
360  case kTINYINT:
361  create_or_append_value<int8_t, int64_t>(
362  *scalar_value, value_seg[j], entry_count);
363  create_or_append_validity<int64_t>(
364  *scalar_value, column.col_type, null_bitmap_seg[j], entry_count);
365  break;
366  case kSMALLINT:
367  create_or_append_value<int16_t, int64_t>(
368  *scalar_value, value_seg[j], entry_count);
369  create_or_append_validity<int64_t>(
370  *scalar_value, column.col_type, null_bitmap_seg[j], entry_count);
371  break;
372  case kINT:
373  create_or_append_value<int32_t, int64_t>(
374  *scalar_value, value_seg[j], entry_count);
375  create_or_append_validity<int64_t>(
376  *scalar_value, column.col_type, null_bitmap_seg[j], entry_count);
377  break;
378  case kBIGINT:
379  create_or_append_value<int64_t, int64_t>(
380  *scalar_value, value_seg[j], entry_count);
381  create_or_append_validity<int64_t>(
382  *scalar_value, column.col_type, null_bitmap_seg[j], entry_count);
383  break;
384  case kFLOAT:
385  create_or_append_value<float, float>(
386  *scalar_value, value_seg[j], entry_count);
387  create_or_append_validity<float>(
388  *scalar_value, column.col_type, null_bitmap_seg[j], entry_count);
389  break;
390  case kDOUBLE:
391  create_or_append_value<double, double>(
392  *scalar_value, value_seg[j], entry_count);
393  create_or_append_validity<double>(
394  *scalar_value, column.col_type, null_bitmap_seg[j], entry_count);
395  break;
396  case kTIME:
397  create_or_append_value<int32_t, int64_t>(
398  *scalar_value, value_seg[j], entry_count);
399  create_or_append_validity<int64_t>(
400  *scalar_value, column.col_type, null_bitmap_seg[j], entry_count);
401  break;
402  case kDATE:
404  ? create_or_append_value<int64_t, int64_t>(
405  *scalar_value, value_seg[j], entry_count)
406  : create_or_append_value<int32_t, int64_t>(
407  *scalar_value, value_seg[j], entry_count);
408  create_or_append_validity<int64_t>(
409  *scalar_value, column.col_type, null_bitmap_seg[j], entry_count);
410  break;
411  case kTIMESTAMP:
412  create_or_append_value<int64_t, int64_t>(
413  *scalar_value, value_seg[j], entry_count);
414  create_or_append_validity<int64_t>(
415  *scalar_value, column.col_type, null_bitmap_seg[j], entry_count);
416  break;
417  default:
418  // TODO(miyu): support more scalar types.
419  throw std::runtime_error(column.col_type.get_type_name() +
420  " is not supported in Arrow result sets.");
421  }
422  }
423  }
424  return seg_row_count;
425  };
426 
427  std::vector<std::shared_ptr<ValueArray>> column_values(col_count, nullptr);
428  std::vector<std::shared_ptr<std::vector<bool>>> null_bitmaps(col_count, nullptr);
429  const bool multithreaded = entry_count > 10000 && !results_->isTruncated();
430  if (multithreaded) {
431  const size_t cpu_count = cpu_threads();
432  std::vector<std::future<size_t>> child_threads;
433  std::vector<std::vector<std::shared_ptr<ValueArray>>> column_value_segs(
434  cpu_count, std::vector<std::shared_ptr<ValueArray>>(col_count, nullptr));
435  std::vector<std::vector<std::shared_ptr<std::vector<bool>>>> null_bitmap_segs(
436  cpu_count, std::vector<std::shared_ptr<std::vector<bool>>>(col_count, nullptr));
437  const auto stride = (entry_count + cpu_count - 1) / cpu_count;
438  for (size_t i = 0, start_entry = 0; start_entry < entry_count;
439  ++i, start_entry += stride) {
440  const auto end_entry = std::min(entry_count, start_entry + stride);
441  child_threads.push_back(std::async(std::launch::async,
442  fetch,
443  std::ref(column_value_segs[i]),
444  std::ref(null_bitmap_segs[i]),
445  start_entry,
446  end_entry));
447  }
448  for (auto& child : child_threads) {
449  row_count += child.get();
450  }
451  for (int i = 0; i < schema->num_fields(); ++i) {
452  reserveColumnBuilderSize(builders[i], row_count);
453  for (size_t j = 0; j < cpu_count; ++j) {
454  if (!column_value_segs[j][i]) {
455  continue;
456  }
457  append(builders[i], *column_value_segs[j][i], null_bitmap_segs[j][i]);
458  }
459  }
460  } else {
461  row_count = fetch(column_values, null_bitmaps, size_t(0), entry_count);
462  for (int i = 0; i < schema->num_fields(); ++i) {
463  reserveColumnBuilderSize(builders[i], row_count);
464  append(builders[i], *column_values[i], null_bitmaps[i]);
465  }
466  }
467 
468  for (size_t i = 0; i < col_count; ++i) {
469  result_columns.push_back(finishColumnBuilder(builders[i]));
470  }
471  return ARROW_RECORDBATCH_MAKE(schema, row_count, result_columns);
472 }
#define CHECK_EQ(x, y)
Definition: Logger.h:195
Definition: sqltypes.h:51
void reserveColumnBuilderSize(ColumnBuilder &column_builder, const size_t row_count) const
#define ARROW_RECORDBATCH_MAKE
ExecutorDeviceType device_type_
void append(ColumnBuilder &column_builder, const ValueArray &values, const std::shared_ptr< std::vector< bool >> &is_valid) const
std::shared_ptr< arrow::Array > finishColumnBuilder(ColumnBuilder &column_builder) const
Definition: sqltypes.h:55
std::shared_ptr< ResultSet > results_
void initializeColumnBuilder(ColumnBuilder &column_builder, const SQLTypeInfo &col_type, const std::shared_ptr< arrow::Field > &field) const
#define CHECK(condition)
Definition: Logger.h:187
Definition: sqltypes.h:47
int cpu_threads()
Definition: thread_count.h:23
+ Here is the call graph for this function:

◆ getArrowResult()

ArrowResult ArrowResultSetConverter::getArrowResult ( ) const
inline

Definition at line 181 of file ArrowResultSet.h.

181 { return getArrowResultImpl(); }
ArrowResult getArrowResultImpl() const

◆ getArrowResultImpl()

ArrowResult ArrowResultSetConverter::getArrowResultImpl ( ) const
private

Definition at line 217 of file ArrowResultSetConverter.cpp.

References CHECK, CPU, and arrow::get_and_copy_to_shm().

217  {
218  const auto serialized_arrow_output = getSerializedArrowOutput();
219  const auto& serialized_schema = serialized_arrow_output.schema;
220  const auto& serialized_records = serialized_arrow_output.records;
221 
222  const auto schema_key = arrow::get_and_copy_to_shm(serialized_schema);
223  CHECK(schema_key != IPC_PRIVATE);
224  std::vector<char> schema_handle_buffer(sizeof(key_t), 0);
225  memcpy(&schema_handle_buffer[0],
226  reinterpret_cast<const unsigned char*>(&schema_key),
227  sizeof(key_t));
229  const auto record_key = arrow::get_and_copy_to_shm(serialized_records);
230  std::vector<char> record_handle_buffer(sizeof(key_t), 0);
231  memcpy(&record_handle_buffer[0],
232  reinterpret_cast<const unsigned char*>(&record_key),
233  sizeof(key_t));
234 
235  return {schema_handle_buffer,
236  serialized_schema->size(),
237  record_handle_buffer,
238  serialized_records->size(),
239  nullptr};
240  }
241 #ifdef HAVE_CUDA
242  if (serialized_records->size()) {
243  CHECK(data_mgr_);
244  const auto cuda_mgr = data_mgr_->getCudaMgr();
245  CHECK(cuda_mgr);
246  auto dev_ptr = reinterpret_cast<CUdeviceptr>(
247  cuda_mgr->allocateDeviceMem(serialized_records->size(), device_id_));
248  CUipcMemHandle record_handle;
249  cuIpcGetMemHandle(&record_handle, dev_ptr);
250  cuda_mgr->copyHostToDevice(
251  reinterpret_cast<int8_t*>(dev_ptr),
252  reinterpret_cast<const int8_t*>(serialized_records->data()),
253  serialized_records->size(),
254  device_id_);
255  std::vector<char> record_handle_buffer(sizeof(record_handle), 0);
256  memcpy(&record_handle_buffer[0],
257  reinterpret_cast<unsigned char*>(&record_handle),
258  sizeof(CUipcMemHandle));
259  return {schema_handle_buffer,
260  serialized_schema->size(),
261  record_handle_buffer,
262  serialized_records->size(),
263  reinterpret_cast<int8_t*>(dev_ptr)};
264  }
265 #endif
266  return {schema_handle_buffer, serialized_schema->size(), {}, 0, nullptr};
267 }
std::shared_ptr< Data_Namespace::DataMgr > data_mgr_
unsigned long long CUdeviceptr
Definition: nocuda.h:27
SerializedArrowOutput getSerializedArrowOutput() const
ExecutorDeviceType device_type_
key_t get_and_copy_to_shm(const std::shared_ptr< Buffer > &data)
#define CHECK(condition)
Definition: Logger.h:187
+ Here is the call graph for this function:

◆ getArrowType()

std::shared_ptr< arrow::DataType > ArrowResultSetConverter::getArrowType ( const SQLTypeInfo mapd_type,
const std::shared_ptr< arrow::Array > &  dict_values 
) const
private

Definition at line 482 of file ArrowResultSetConverter.cpp.

References CHECK, anonymous_namespace{ArrowResultSetConverter.cpp}::get_dict_index_type_info(), anonymous_namespace{ArrowResultSetConverter.cpp}::get_physical_type(), SQLTypeInfoCore< TYPE_FACET_PACK >::get_precision(), SQLTypeInfoCore< TYPE_FACET_PACK >::get_scale(), SQLTypeInfoCore< TYPE_FACET_PACK >::get_type_name(), GPU, SQLTypeInfoCore< TYPE_FACET_PACK >::is_dict_encoded_string(), kARRAY, kBIGINT, kBOOLEAN, kCHAR, kDATE, kDECIMAL, kDOUBLE, kFLOAT, kINT, kINTERVAL_DAY_TIME, kINTERVAL_YEAR_MONTH, kNUMERIC, kSMALLINT, kTEXT, kTIME, kTIMESTAMP, kTINYINT, kVARCHAR, and to_string().

484  {
485  switch (get_physical_type(mapd_type)) {
486  case kBOOLEAN:
487  return boolean();
488  case kTINYINT:
489  return int8();
490  case kSMALLINT:
491  return int16();
492  case kINT:
493  return int32();
494  case kBIGINT:
495  return int64();
496  case kFLOAT:
497  return float32();
498  case kDOUBLE:
499  return float64();
500  case kCHAR:
501  case kVARCHAR:
502  case kTEXT:
503  if (mapd_type.is_dict_encoded_string()) {
504  CHECK(dict_values);
505  const auto index_type =
506  getArrowType(get_dict_index_type_info(mapd_type), nullptr);
507  return dictionary(index_type, dict_values);
508  }
509  return utf8();
510  case kDECIMAL:
511  case kNUMERIC:
512  return decimal(mapd_type.get_precision(), mapd_type.get_scale());
513  case kTIME:
514  return time32(TimeUnit::SECOND);
515  case kDATE:
516  // TODO(wamsi) : Remove date64() once date32() support is added in cuDF. date32()
517  // Currently support for date32() is missing in cuDF.Hence, if client requests for
518  // date on GPU, return date64() for the time being, till support is added.
519  return device_type_ == ExecutorDeviceType::GPU ? date64() : date32();
520  case kTIMESTAMP:
521  switch (mapd_type.get_precision()) {
522  case 0:
523  return timestamp(TimeUnit::SECOND);
524  case 3:
525  return timestamp(TimeUnit::MILLI);
526  case 6:
527  return timestamp(TimeUnit::MICRO);
528  case 9:
529  return timestamp(TimeUnit::NANO);
530  default:
531  throw std::runtime_error(
532  "Unsupported timestamp precision for Arrow result sets: " +
533  std::to_string(mapd_type.get_precision()));
534  }
535  case kARRAY:
536  case kINTERVAL_DAY_TIME:
538  default:
539  throw std::runtime_error(mapd_type.get_type_name() +
540  " is not supported in Arrow result sets.");
541  }
542  return nullptr;
543 }
int get_precision() const
Definition: sqltypes.h:326
Definition: sqltypes.h:51
SQLTypeInfo get_dict_index_type_info(const SQLTypeInfo &ti)
HOST DEVICE int get_scale() const
Definition: sqltypes.h:328
std::string to_string(char const *&&v)
std::string get_type_name() const
Definition: sqltypes.h:426
std::shared_ptr< arrow::DataType > getArrowType(const SQLTypeInfo &mapd_type, const std::shared_ptr< arrow::Array > &dict_values) const
ExecutorDeviceType device_type_
bool is_dict_encoded_string() const
Definition: sqltypes.h:476
Definition: sqltypes.h:54
Definition: sqltypes.h:55
Definition: sqltypes.h:43
#define CHECK(condition)
Definition: Logger.h:187
Definition: sqltypes.h:47
+ Here is the call graph for this function:

◆ getSerializedArrowOutput()

ArrowResultSetConverter::SerializedArrowOutput ArrowResultSetConverter::getSerializedArrowOutput ( ) const
private

Definition at line 270 of file ArrowResultSetConverter.cpp.

References ARROW_THROW_NOT_OK.

270  {
271  arrow::ipc::DictionaryMemo dict_memo;
272  std::shared_ptr<arrow::RecordBatch> arrow_copy = convertToArrow(dict_memo);
273  std::shared_ptr<arrow::Buffer> serialized_records, serialized_schema;
274 
275  ARROW_THROW_NOT_OK(arrow::ipc::SerializeSchema(
276  *arrow_copy->schema(), arrow::default_memory_pool(), &serialized_schema));
277 
278  ARROW_THROW_NOT_OK(arrow::ipc::SerializeRecordBatch(
279  *arrow_copy, arrow::default_memory_pool(), &serialized_records));
280  return {serialized_schema, serialized_records};
281 }
#define ARROW_THROW_NOT_OK(s)
Definition: ArrowUtil.h:28
std::shared_ptr< arrow::RecordBatch > convertToArrow(arrow::ipc::DictionaryMemo &memo) const

◆ initializeColumnBuilder()

void ArrowResultSetConverter::initializeColumnBuilder ( ColumnBuilder column_builder,
const SQLTypeInfo col_type,
const std::shared_ptr< arrow::Field > &  field 
) const
private

Definition at line 585 of file ArrowResultSetConverter.cpp.

References ARROW_THROW_NOT_OK, ArrowResultSetConverter::ColumnBuilder::builder, ArrowResultSetConverter::ColumnBuilder::col_type, field(), ArrowResultSetConverter::ColumnBuilder::field, anonymous_namespace{ArrowResultSetConverter.cpp}::get_dict_index_type(), anonymous_namespace{ArrowResultSetConverter.cpp}::get_physical_type(), SQLTypeInfoCore< TYPE_FACET_PACK >::is_dict_encoded_string(), and ArrowResultSetConverter::ColumnBuilder::physical_type.

588  {
589  column_builder.field = field;
590  column_builder.col_type = col_type;
591  column_builder.physical_type = col_type.is_dict_encoded_string()
592  ? get_dict_index_type(col_type)
593  : get_physical_type(col_type);
594 
595  auto value_type = field->type();
596  if (value_type->id() == Type::DICTIONARY) {
597  value_type = static_cast<const DictionaryType&>(*value_type).index_type();
598  }
600  arrow::MakeBuilder(default_memory_pool(), value_type, &column_builder.builder));
601 }
#define ARROW_THROW_NOT_OK(s)
Definition: ArrowUtil.h:28
const rapidjson::Value & field(const rapidjson::Value &obj, const char field[]) noexcept
Definition: JsonAccessors.h:31
bool is_dict_encoded_string() const
Definition: sqltypes.h:476
+ Here is the call graph for this function:

◆ makeField()

std::shared_ptr< arrow::Field > ArrowResultSetConverter::makeField ( const std::string  name,
const SQLTypeInfo target_type,
const std::shared_ptr< arrow::Array > &  dictionary 
) const
private

Definition at line 474 of file ArrowResultSetConverter.cpp.

References field(), and SQLTypeInfoCore< TYPE_FACET_PACK >::get_notnull().

477  {
478  return arrow::field(
479  name, getArrowType(target_type, dictionary), !target_type.get_notnull());
480 }
HOST DEVICE bool get_notnull() const
Definition: sqltypes.h:330
std::shared_ptr< arrow::DataType > getArrowType(const SQLTypeInfo &mapd_type, const std::shared_ptr< arrow::Array > &dict_values) const
const rapidjson::Value & field(const rapidjson::Value &obj, const char field[]) noexcept
Definition: JsonAccessors.h:31
+ Here is the call graph for this function:

◆ reserveColumnBuilderSize()

void ArrowResultSetConverter::reserveColumnBuilderSize ( ColumnBuilder column_builder,
const size_t  row_count 
) const
inlineprivate

Definition at line 603 of file ArrowResultSetConverter.cpp.

References ARROW_THROW_NOT_OK, and ArrowResultSetConverter::ColumnBuilder::builder.

604  {
605  ARROW_THROW_NOT_OK(column_builder.builder->Reserve(static_cast<int64_t>(row_count)));
606 }
#define ARROW_THROW_NOT_OK(s)
Definition: ArrowUtil.h:28

Friends And Related Function Documentation

◆ ArrowResultSet

friend class ArrowResultSet
friend

Definition at line 238 of file ArrowResultSet.h.

Member Data Documentation

◆ col_names_

std::vector<std::string> ArrowResultSetConverter::col_names_
private

Definition at line 235 of file ArrowResultSet.h.

◆ data_mgr_

std::shared_ptr<Data_Namespace::DataMgr> ArrowResultSetConverter::data_mgr_ = nullptr
private

Definition at line 232 of file ArrowResultSet.h.

◆ device_id_

int32_t ArrowResultSetConverter::device_id_ = 0
private

Definition at line 234 of file ArrowResultSet.h.

◆ device_type_

ExecutorDeviceType ArrowResultSetConverter::device_type_ = ExecutorDeviceType::GPU
private

Definition at line 233 of file ArrowResultSet.h.

◆ results_

std::shared_ptr<ResultSet> ArrowResultSetConverter::results_
private

Definition at line 231 of file ArrowResultSet.h.

◆ top_n_

int32_t ArrowResultSetConverter::top_n_
private

Definition at line 236 of file ArrowResultSet.h.


The documentation for this class was generated from the following files: