OmniSciDB  1dac507f6e
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
ArrowResultSetConverter Class Reference

#include <ArrowResultSet.h>

Classes

struct  ColumnBuilder
 
struct  SerializedArrowOutput
 

Public Member Functions

 ArrowResultSetConverter (const std::shared_ptr< ResultSet > &results, const std::shared_ptr< Data_Namespace::DataMgr > data_mgr, const ExecutorDeviceType device_type, const int32_t device_id, const std::vector< std::string > &col_names, const int32_t first_n)
 
ArrowResult getArrowResult () const
 

Private Member Functions

 ArrowResultSetConverter (const std::shared_ptr< ResultSet > &results, const std::vector< std::string > &col_names, const int32_t first_n)
 
std::shared_ptr
< arrow::RecordBatch > 
convertToArrow (arrow::ipc::DictionaryMemo &memo) const
 
std::shared_ptr
< arrow::RecordBatch > 
getArrowBatch (const std::shared_ptr< arrow::Schema > &schema) const
 
ArrowResult getArrowResultImpl () const
 
std::shared_ptr< arrow::Field > makeField (const std::string name, const SQLTypeInfo &target_type, const std::shared_ptr< arrow::Array > &dictionary) const
 
std::shared_ptr< arrow::DataType > getArrowType (const SQLTypeInfo &mapd_type, const std::shared_ptr< arrow::Array > &dict_values) const
 
SerializedArrowOutput getSerializedArrowOutput () const
 
void initializeColumnBuilder (ColumnBuilder &column_builder, const SQLTypeInfo &col_type, const std::shared_ptr< arrow::Field > &field) const
 
void reserveColumnBuilderSize (ColumnBuilder &column_builder, const size_t row_count) const
 
void append (ColumnBuilder &column_builder, const ValueArray &values, const std::shared_ptr< std::vector< bool >> &is_valid) const
 
template<typename BuilderType , typename C_TYPE >
void appendToColumnBuilder (ColumnBuilder &column_builder, const ValueArray &values, const std::shared_ptr< std::vector< bool >> &is_valid) const
 
std::shared_ptr< arrow::Array > finishColumnBuilder (ColumnBuilder &column_builder) const
 

Private Attributes

std::shared_ptr< ResultSetresults_
 
std::shared_ptr
< Data_Namespace::DataMgr
data_mgr_ = nullptr
 
ExecutorDeviceType device_type_ = ExecutorDeviceType::GPU
 
int32_t device_id_ = 0
 
std::vector< std::string > col_names_
 
int32_t top_n_
 

Friends

class ArrowResultSet
 

Detailed Description

Definition at line 166 of file ArrowResultSet.h.

Constructor & Destructor Documentation

ArrowResultSetConverter::ArrowResultSetConverter ( const std::shared_ptr< ResultSet > &  results,
const std::shared_ptr< Data_Namespace::DataMgr data_mgr,
const ExecutorDeviceType  device_type,
const int32_t  device_id,
const std::vector< std::string > &  col_names,
const int32_t  first_n 
)
inline

Definition at line 168 of file ArrowResultSet.h.

174  : results_(results)
175  , data_mgr_(data_mgr)
176  , device_type_(device_type)
177  , device_id_(device_id)
178  , col_names_(col_names)
179  , top_n_(first_n) {}
std::shared_ptr< Data_Namespace::DataMgr > data_mgr_
std::vector< std::string > col_names_
ExecutorDeviceType device_type_
std::shared_ptr< ResultSet > results_
ArrowResultSetConverter::ArrowResultSetConverter ( const std::shared_ptr< ResultSet > &  results,
const std::vector< std::string > &  col_names,
const int32_t  first_n 
)
inlineprivate

Definition at line 184 of file ArrowResultSet.h.

187  : results_(results), col_names_(col_names), top_n_(first_n) {}
std::vector< std::string > col_names_
std::shared_ptr< ResultSet > results_

Member Function Documentation

void ArrowResultSetConverter::append ( ColumnBuilder column_builder,
const ValueArray values,
const std::shared_ptr< std::vector< bool >> &  is_valid 
) const
private

Definition at line 650 of file ArrowResultSetConverter.cpp.

References GPU, kBIGINT, kBOOLEAN, kCHAR, kDATE, kDOUBLE, kFLOAT, kINT, kSMALLINT, kTEXT, kTIME, kTIMESTAMP, kTINYINT, kVARCHAR, and ArrowResultSetConverter::ColumnBuilder::physical_type.

653  {
654  switch (column_builder.physical_type) {
655  case kBOOLEAN:
656  appendToColumnBuilder<BooleanBuilder, bool>(column_builder, values, is_valid);
657  break;
658  case kTINYINT:
659  appendToColumnBuilder<Int8Builder, int8_t>(column_builder, values, is_valid);
660  break;
661  case kSMALLINT:
662  appendToColumnBuilder<Int16Builder, int16_t>(column_builder, values, is_valid);
663  break;
664  case kINT:
665  appendToColumnBuilder<Int32Builder, int32_t>(column_builder, values, is_valid);
666  break;
667  case kBIGINT:
668  appendToColumnBuilder<Int64Builder, int64_t>(column_builder, values, is_valid);
669  break;
670  case kFLOAT:
671  appendToColumnBuilder<FloatBuilder, float>(column_builder, values, is_valid);
672  break;
673  case kDOUBLE:
674  appendToColumnBuilder<DoubleBuilder, double>(column_builder, values, is_valid);
675  break;
676  case kTIME:
677  appendToColumnBuilder<Time32Builder, int32_t>(column_builder, values, is_valid);
678  break;
679  case kTIMESTAMP:
680  appendToColumnBuilder<TimestampBuilder, int64_t>(column_builder, values, is_valid);
681  break;
682  case kDATE:
684  ? appendToColumnBuilder<Date64Builder, int64_t>(
685  column_builder, values, is_valid)
686  : appendToColumnBuilder<Date32Builder, int32_t>(
687  column_builder, values, is_valid);
688  break;
689  case kCHAR:
690  case kVARCHAR:
691  case kTEXT:
692  default:
693  // TODO(miyu): support more scalar types.
694  throw std::runtime_error(column_builder.col_type.get_type_name() +
695  " is not supported in Arrow result sets.");
696  }
697 }
Definition: sqltypes.h:52
ExecutorDeviceType device_type_
Definition: sqltypes.h:55
Definition: sqltypes.h:56
Definition: sqltypes.h:44
Definition: sqltypes.h:48
template<typename BuilderType , typename C_TYPE >
void ArrowResultSetConverter::appendToColumnBuilder ( ColumnBuilder column_builder,
const ValueArray values,
const std::shared_ptr< std::vector< bool >> &  is_valid 
) const
inlineprivate

Definition at line 625 of file ArrowResultSetConverter.cpp.

References ARROW_THROW_NOT_OK, ArrowResultSetConverter::ColumnBuilder::builder, CHECK(), ArrowResultSetConverter::ColumnBuilder::field, DateConverters::get_epoch_days_from_seconds(), and kMilliSecsPerSec.

628  {
629  std::vector<C_TYPE> vals = boost::get<std::vector<C_TYPE>>(values);
630 
631  if (scale_epoch_values<BuilderType>()) {
632  auto scale_sec_to_millisec = [](auto seconds) { return seconds * kMilliSecsPerSec; };
633  auto scale_values = [&](auto epoch) {
634  return std::is_same<BuilderType, Date32Builder>::value
636  : scale_sec_to_millisec(epoch);
637  };
638  std::transform(vals.begin(), vals.end(), vals.begin(), scale_values);
639  }
640 
641  auto typed_builder = static_cast<BuilderType*>(column_builder.builder.get());
642  if (column_builder.field->nullable()) {
643  CHECK(is_valid.get());
644  ARROW_THROW_NOT_OK(typed_builder->APPENDVALUES(vals, *is_valid));
645  } else {
646  ARROW_THROW_NOT_OK(typed_builder->APPENDVALUES(vals));
647  }
648 }
#define ARROW_THROW_NOT_OK(s)
Definition: ArrowUtil.h:28
static constexpr int64_t kMilliSecsPerSec
CHECK(cgen_state)
int64_t get_epoch_days_from_seconds(const int64_t seconds)

+ Here is the call graph for this function:

std::shared_ptr< arrow::RecordBatch > ArrowResultSetConverter::convertToArrow ( arrow::ipc::DictionaryMemo &  memo) const
private

Definition at line 288 of file ArrowResultSetConverter.cpp.

References ARROW_THROW_NOT_OK, and CHECK().

289  {
290  const auto col_count = results_->colCount();
291  std::vector<std::shared_ptr<arrow::Field>> fields;
292  CHECK(col_names_.empty() || col_names_.size() == col_count);
293  for (size_t i = 0; i < col_count; ++i) {
294  const auto ti = results_->getColType(i);
295  std::shared_ptr<arrow::Array> dict;
296  if (ti.is_dict_encoded_string()) {
297  const int dict_id = ti.get_comp_param();
298  if (memo.HasDictionaryId(dict_id)) {
299  ARROW_THROW_NOT_OK(memo.GetDictionary(dict_id, &dict));
300  } else {
301  auto str_list = results_->getStringDictionaryPayloadCopy(dict_id);
302 
303  arrow::StringBuilder builder;
304  // TODO(andrewseidl): replace with AppendValues() once Arrow 0.7.1 support is
305  // fully deprecated
306  for (const std::string& val : *str_list) {
307  ARROW_THROW_NOT_OK(builder.Append(val));
308  }
309  ARROW_THROW_NOT_OK(builder.Finish(&dict));
310  ARROW_THROW_NOT_OK(memo.AddDictionary(dict_id, dict));
311  }
312  }
313  fields.push_back(makeField(col_names_.empty() ? "" : col_names_[i], ti, dict));
314  }
315  return getArrowBatch(arrow::schema(fields));
316 }
#define ARROW_THROW_NOT_OK(s)
Definition: ArrowUtil.h:28
std::shared_ptr< arrow::Field > makeField(const std::string name, const SQLTypeInfo &target_type, const std::shared_ptr< arrow::Array > &dictionary) const
std::shared_ptr< arrow::RecordBatch > getArrowBatch(const std::shared_ptr< arrow::Schema > &schema) const
std::vector< std::string > col_names_
CHECK(cgen_state)
std::shared_ptr< ResultSet > results_

+ Here is the call graph for this function:

std::shared_ptr< arrow::Array > ArrowResultSetConverter::finishColumnBuilder ( ColumnBuilder column_builder) const
inlineprivate

Definition at line 613 of file ArrowResultSetConverter.cpp.

References ARROW_THROW_NOT_OK, ArrowResultSetConverter::ColumnBuilder::builder, and ArrowResultSetConverter::ColumnBuilder::field.

614  {
615  std::shared_ptr<Array> values;
616  ARROW_THROW_NOT_OK(column_builder.builder->Finish(&values));
617  if (column_builder.field->type()->id() == Type::DICTIONARY) {
618  return std::make_shared<DictionaryArray>(column_builder.field->type(), values);
619  } else {
620  return values;
621  }
622 }
#define ARROW_THROW_NOT_OK(s)
Definition: ArrowUtil.h:28
std::shared_ptr< arrow::RecordBatch > ArrowResultSetConverter::getArrowBatch ( const std::shared_ptr< arrow::Schema > &  schema) const
private

Definition at line 318 of file ArrowResultSetConverter.cpp.

References File_Namespace::append(), ARROW_RECORDBATCH_MAKE, CHECK(), CHECK_EQ, cpu_threads(), GPU, kBIGINT, kBOOLEAN, kDATE, kDOUBLE, kFLOAT, kINT, kSMALLINT, kTIME, kTIMESTAMP, and kTINYINT.

319  {
320  std::vector<std::shared_ptr<arrow::Array>> result_columns;
321 
322  const size_t entry_count = top_n_ < 0
323  ? results_->entryCount()
324  : std::min(size_t(top_n_), results_->entryCount());
325  if (!entry_count) {
326  return ARROW_RECORDBATCH_MAKE(schema, 0, result_columns);
327  }
328  const auto col_count = results_->colCount();
329  size_t row_count = 0;
330 
331  std::vector<ColumnBuilder> builders(col_count);
332 
333  // Create array builders
334  for (size_t i = 0; i < col_count; ++i) {
335  initializeColumnBuilder(builders[i], results_->getColType(i), schema->field(i));
336  }
337 
338  // TODO(miyu): speed up for columnar buffers
339  auto fetch = [&](std::vector<std::shared_ptr<ValueArray>>& value_seg,
340  std::vector<std::shared_ptr<std::vector<bool>>>& null_bitmap_seg,
341  const size_t start_entry,
342  const size_t end_entry) -> size_t {
343  CHECK_EQ(value_seg.size(), col_count);
344  CHECK_EQ(null_bitmap_seg.size(), col_count);
345  const auto entry_count = end_entry - start_entry;
346  size_t seg_row_count = 0;
347  for (size_t i = start_entry; i < end_entry; ++i) {
348  auto row = results_->getRowAtNoTranslations(i);
349  if (row.empty()) {
350  continue;
351  }
352  ++seg_row_count;
353  for (size_t j = 0; j < col_count; ++j) {
354  auto scalar_value = boost::get<ScalarTargetValue>(&row[j]);
355  // TODO(miyu): support more types other than scalar.
356  CHECK(scalar_value);
357  const auto& column = builders[j];
358  switch (column.physical_type) {
359  case kBOOLEAN:
360  create_or_append_value<bool, int64_t>(
361  *scalar_value, value_seg[j], entry_count);
362  create_or_append_validity<int64_t>(
363  *scalar_value, column.col_type, null_bitmap_seg[j], entry_count);
364  break;
365  case kTINYINT:
366  create_or_append_value<int8_t, int64_t>(
367  *scalar_value, value_seg[j], entry_count);
368  create_or_append_validity<int64_t>(
369  *scalar_value, column.col_type, null_bitmap_seg[j], entry_count);
370  break;
371  case kSMALLINT:
372  create_or_append_value<int16_t, int64_t>(
373  *scalar_value, value_seg[j], entry_count);
374  create_or_append_validity<int64_t>(
375  *scalar_value, column.col_type, null_bitmap_seg[j], entry_count);
376  break;
377  case kINT:
378  create_or_append_value<int32_t, int64_t>(
379  *scalar_value, value_seg[j], entry_count);
380  create_or_append_validity<int64_t>(
381  *scalar_value, column.col_type, null_bitmap_seg[j], entry_count);
382  break;
383  case kBIGINT:
384  create_or_append_value<int64_t, int64_t>(
385  *scalar_value, value_seg[j], entry_count);
386  create_or_append_validity<int64_t>(
387  *scalar_value, column.col_type, null_bitmap_seg[j], entry_count);
388  break;
389  case kFLOAT:
390  create_or_append_value<float, float>(
391  *scalar_value, value_seg[j], entry_count);
392  create_or_append_validity<float>(
393  *scalar_value, column.col_type, null_bitmap_seg[j], entry_count);
394  break;
395  case kDOUBLE:
396  create_or_append_value<double, double>(
397  *scalar_value, value_seg[j], entry_count);
398  create_or_append_validity<double>(
399  *scalar_value, column.col_type, null_bitmap_seg[j], entry_count);
400  break;
401  case kTIME:
402  create_or_append_value<int32_t, int64_t>(
403  *scalar_value, value_seg[j], entry_count);
404  create_or_append_validity<int64_t>(
405  *scalar_value, column.col_type, null_bitmap_seg[j], entry_count);
406  break;
407  case kDATE:
409  ? create_or_append_value<int64_t, int64_t>(
410  *scalar_value, value_seg[j], entry_count)
411  : create_or_append_value<int32_t, int64_t>(
412  *scalar_value, value_seg[j], entry_count);
413  create_or_append_validity<int64_t>(
414  *scalar_value, column.col_type, null_bitmap_seg[j], entry_count);
415  break;
416  case kTIMESTAMP:
417  create_or_append_value<int64_t, int64_t>(
418  *scalar_value, value_seg[j], entry_count);
419  create_or_append_validity<int64_t>(
420  *scalar_value, column.col_type, null_bitmap_seg[j], entry_count);
421  break;
422  default:
423  // TODO(miyu): support more scalar types.
424  throw std::runtime_error(column.col_type.get_type_name() +
425  " is not supported in Arrow result sets.");
426  }
427  }
428  }
429  return seg_row_count;
430  };
431 
432  std::vector<std::shared_ptr<ValueArray>> column_values(col_count, nullptr);
433  std::vector<std::shared_ptr<std::vector<bool>>> null_bitmaps(col_count, nullptr);
434  const bool multithreaded = entry_count > 10000 && !results_->isTruncated();
435  if (multithreaded) {
436  const size_t cpu_count = cpu_threads();
437  std::vector<std::future<size_t>> child_threads;
438  std::vector<std::vector<std::shared_ptr<ValueArray>>> column_value_segs(
439  cpu_count, std::vector<std::shared_ptr<ValueArray>>(col_count, nullptr));
440  std::vector<std::vector<std::shared_ptr<std::vector<bool>>>> null_bitmap_segs(
441  cpu_count, std::vector<std::shared_ptr<std::vector<bool>>>(col_count, nullptr));
442  const auto stride = (entry_count + cpu_count - 1) / cpu_count;
443  for (size_t i = 0, start_entry = 0; start_entry < entry_count;
444  ++i, start_entry += stride) {
445  const auto end_entry = std::min(entry_count, start_entry + stride);
446  child_threads.push_back(std::async(std::launch::async,
447  fetch,
448  std::ref(column_value_segs[i]),
449  std::ref(null_bitmap_segs[i]),
450  start_entry,
451  end_entry));
452  }
453  for (auto& child : child_threads) {
454  row_count += child.get();
455  }
456  for (int i = 0; i < schema->num_fields(); ++i) {
457  reserveColumnBuilderSize(builders[i], row_count);
458  for (size_t j = 0; j < cpu_count; ++j) {
459  if (!column_value_segs[j][i]) {
460  continue;
461  }
462  append(builders[i], *column_value_segs[j][i], null_bitmap_segs[j][i]);
463  }
464  }
465  } else {
466  row_count = fetch(column_values, null_bitmaps, size_t(0), entry_count);
467  for (int i = 0; i < schema->num_fields(); ++i) {
468  reserveColumnBuilderSize(builders[i], row_count);
469  append(builders[i], *column_values[i], null_bitmaps[i]);
470  }
471  }
472 
473  for (size_t i = 0; i < col_count; ++i) {
474  result_columns.push_back(finishColumnBuilder(builders[i]));
475  }
476  return ARROW_RECORDBATCH_MAKE(schema, row_count, result_columns);
477 }
#define CHECK_EQ(x, y)
Definition: Logger.h:198
Definition: sqltypes.h:52
void append(ColumnBuilder &column_builder, const ValueArray &values, const std::shared_ptr< std::vector< bool >> &is_valid) const
std::shared_ptr< arrow::Array > finishColumnBuilder(ColumnBuilder &column_builder) const
void reserveColumnBuilderSize(ColumnBuilder &column_builder, const size_t row_count) const
#define ARROW_RECORDBATCH_MAKE
CHECK(cgen_state)
ExecutorDeviceType device_type_
Definition: sqltypes.h:56
std::shared_ptr< ResultSet > results_
void initializeColumnBuilder(ColumnBuilder &column_builder, const SQLTypeInfo &col_type, const std::shared_ptr< arrow::Field > &field) const
Definition: sqltypes.h:48
int cpu_threads()
Definition: thread_count.h:25

+ Here is the call graph for this function:

ArrowResult ArrowResultSetConverter::getArrowResult ( ) const
inline

Definition at line 181 of file ArrowResultSet.h.

References getArrowResultImpl().

181 { return getArrowResultImpl(); }
ArrowResult getArrowResultImpl() const

+ Here is the call graph for this function:

ArrowResult ArrowResultSetConverter::getArrowResultImpl ( ) const
private

Definition at line 217 of file ArrowResultSetConverter.cpp.

References CHECK(), CPU, and arrow::get_and_copy_to_shm().

Referenced by getArrowResult().

217  {
218  const auto serialized_arrow_output = getSerializedArrowOutput();
219  const auto& serialized_schema = serialized_arrow_output.schema;
220  const auto& serialized_records = serialized_arrow_output.records;
221 
222  const auto schema_key = arrow::get_and_copy_to_shm(serialized_schema);
223  CHECK(schema_key != IPC_PRIVATE);
224  std::vector<char> schema_handle_buffer(sizeof(key_t), 0);
225  memcpy(&schema_handle_buffer[0],
226  reinterpret_cast<const unsigned char*>(&schema_key),
227  sizeof(key_t));
229  const auto record_key = arrow::get_and_copy_to_shm(serialized_records);
230  std::vector<char> record_handle_buffer(sizeof(key_t), 0);
231  memcpy(&record_handle_buffer[0],
232  reinterpret_cast<const unsigned char*>(&record_key),
233  sizeof(key_t));
234 
235  return {schema_handle_buffer,
236  serialized_schema->size(),
237  record_handle_buffer,
238  serialized_records->size(),
239  nullptr};
240  }
241 #ifdef HAVE_CUDA
242  if (serialized_records->size()) {
243  CHECK(data_mgr_);
244  const auto cuda_mgr = data_mgr_->getCudaMgr();
245  CHECK(cuda_mgr);
246  auto dev_ptr = reinterpret_cast<CUdeviceptr>(
247  cuda_mgr->allocateDeviceMem(serialized_records->size(), device_id_));
248  CUipcMemHandle record_handle;
249  cuIpcGetMemHandle(&record_handle, dev_ptr);
250  cuda_mgr->copyHostToDevice(
251  reinterpret_cast<int8_t*>(dev_ptr),
252  reinterpret_cast<const int8_t*>(serialized_records->data()),
253  serialized_records->size(),
254  device_id_);
255  std::vector<char> record_handle_buffer(sizeof(record_handle), 0);
256  memcpy(&record_handle_buffer[0],
257  reinterpret_cast<unsigned char*>(&record_handle),
258  sizeof(CUipcMemHandle));
259  return {schema_handle_buffer,
260  serialized_schema->size(),
261  record_handle_buffer,
262  serialized_records->size(),
263  reinterpret_cast<int8_t*>(dev_ptr)};
264  }
265 #endif
266  return {schema_handle_buffer, serialized_schema->size(), {}, 0, nullptr};
267 }
std::shared_ptr< Data_Namespace::DataMgr > data_mgr_
unsigned long long CUdeviceptr
Definition: nocuda.h:27
CHECK(cgen_state)
ExecutorDeviceType device_type_
key_t get_and_copy_to_shm(const std::shared_ptr< Buffer > &data)
SerializedArrowOutput getSerializedArrowOutput() const

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

std::shared_ptr< arrow::DataType > ArrowResultSetConverter::getArrowType ( const SQLTypeInfo mapd_type,
const std::shared_ptr< arrow::Array > &  dict_values 
) const
private

Definition at line 487 of file ArrowResultSetConverter.cpp.

References CHECK(), anonymous_namespace{ArrowResultSetConverter.cpp}::get_dict_index_type_info(), anonymous_namespace{ArrowResultSetConverter.cpp}::get_physical_type(), SQLTypeInfoCore< TYPE_FACET_PACK >::get_precision(), SQLTypeInfoCore< TYPE_FACET_PACK >::get_scale(), SQLTypeInfoCore< TYPE_FACET_PACK >::get_type_name(), GPU, SQLTypeInfoCore< TYPE_FACET_PACK >::is_dict_encoded_string(), kARRAY, kBIGINT, kBOOLEAN, kCHAR, kDATE, kDECIMAL, kDOUBLE, kFLOAT, kINT, kINTERVAL_DAY_TIME, kINTERVAL_YEAR_MONTH, kNUMERIC, kSMALLINT, kTEXT, kTIME, kTIMESTAMP, kTINYINT, kVARCHAR, and to_string().

489  {
490  switch (get_physical_type(mapd_type)) {
491  case kBOOLEAN:
492  return boolean();
493  case kTINYINT:
494  return int8();
495  case kSMALLINT:
496  return int16();
497  case kINT:
498  return int32();
499  case kBIGINT:
500  return int64();
501  case kFLOAT:
502  return float32();
503  case kDOUBLE:
504  return float64();
505  case kCHAR:
506  case kVARCHAR:
507  case kTEXT:
508  if (mapd_type.is_dict_encoded_string()) {
509  CHECK(dict_values);
510  const auto index_type =
511  getArrowType(get_dict_index_type_info(mapd_type), nullptr);
512  return dictionary(index_type, dict_values);
513  }
514  return utf8();
515  case kDECIMAL:
516  case kNUMERIC:
517  return decimal(mapd_type.get_precision(), mapd_type.get_scale());
518  case kTIME:
519  return time32(TimeUnit::SECOND);
520  case kDATE:
521  // TODO(wamsi) : Remove date64() once date32() support is added in cuDF. date32()
522  // Currently support for date32() is missing in cuDF.Hence, if client requests for
523  // date on GPU, return date64() for the time being, till support is added.
524  return device_type_ == ExecutorDeviceType::GPU ? date64() : date32();
525  case kTIMESTAMP:
526  switch (mapd_type.get_precision()) {
527  case 0:
528  return timestamp(TimeUnit::SECOND);
529  case 3:
530  return timestamp(TimeUnit::MILLI);
531  case 6:
532  return timestamp(TimeUnit::MICRO);
533  case 9:
534  return timestamp(TimeUnit::NANO);
535  default:
536  throw std::runtime_error(
537  "Unsupported timestamp precision for Arrow result sets: " +
538  std::to_string(mapd_type.get_precision()));
539  }
540  case kARRAY:
541  case kINTERVAL_DAY_TIME:
543  default:
544  throw std::runtime_error(mapd_type.get_type_name() +
545  " is not supported in Arrow result sets.");
546  }
547  return nullptr;
548 }
Definition: sqltypes.h:52
SQLTypeInfo get_dict_index_type_info(const SQLTypeInfo &ti)
HOST DEVICE int get_scale() const
Definition: sqltypes.h:331
std::string to_string(char const *&&v)
std::string get_type_name() const
Definition: sqltypes.h:429
CHECK(cgen_state)
ExecutorDeviceType device_type_
std::shared_ptr< arrow::DataType > getArrowType(const SQLTypeInfo &mapd_type, const std::shared_ptr< arrow::Array > &dict_values) const
Definition: sqltypes.h:55
Definition: sqltypes.h:56
Definition: sqltypes.h:44
int get_precision() const
Definition: sqltypes.h:329
Definition: sqltypes.h:48
bool is_dict_encoded_string() const
Definition: sqltypes.h:503

+ Here is the call graph for this function:

ArrowResultSetConverter::SerializedArrowOutput ArrowResultSetConverter::getSerializedArrowOutput ( ) const
private

Definition at line 270 of file ArrowResultSetConverter.cpp.

References ARROW_THROW_NOT_OK.

270  {
271  arrow::ipc::DictionaryMemo dict_memo;
272  std::shared_ptr<arrow::RecordBatch> arrow_copy = convertToArrow(dict_memo);
273  std::shared_ptr<arrow::Buffer> serialized_records, serialized_schema;
274 
275  ARROW_THROW_NOT_OK(arrow::ipc::SerializeSchema(
276  *arrow_copy->schema(), arrow::default_memory_pool(), &serialized_schema));
277 
278  if (arrow_copy->num_rows()) {
279  ARROW_THROW_NOT_OK(arrow_copy->Validate());
280  ARROW_THROW_NOT_OK(arrow::ipc::SerializeRecordBatch(
281  *arrow_copy, arrow::default_memory_pool(), &serialized_records));
282  } else {
283  ARROW_THROW_NOT_OK(arrow::AllocateBuffer(0, &serialized_records));
284  }
285  return {serialized_schema, serialized_records};
286 }
#define ARROW_THROW_NOT_OK(s)
Definition: ArrowUtil.h:28
std::shared_ptr< arrow::RecordBatch > convertToArrow(arrow::ipc::DictionaryMemo &memo) const
void ArrowResultSetConverter::initializeColumnBuilder ( ColumnBuilder column_builder,
const SQLTypeInfo col_type,
const std::shared_ptr< arrow::Field > &  field 
) const
private

Definition at line 590 of file ArrowResultSetConverter.cpp.

References ARROW_THROW_NOT_OK, ArrowResultSetConverter::ColumnBuilder::builder, ArrowResultSetConverter::ColumnBuilder::col_type, field(), ArrowResultSetConverter::ColumnBuilder::field, anonymous_namespace{ArrowResultSetConverter.cpp}::get_dict_index_type(), anonymous_namespace{ArrowResultSetConverter.cpp}::get_physical_type(), SQLTypeInfoCore< TYPE_FACET_PACK >::is_dict_encoded_string(), and ArrowResultSetConverter::ColumnBuilder::physical_type.

593  {
594  column_builder.field = field;
595  column_builder.col_type = col_type;
596  column_builder.physical_type = col_type.is_dict_encoded_string()
597  ? get_dict_index_type(col_type)
598  : get_physical_type(col_type);
599 
600  auto value_type = field->type();
601  if (value_type->id() == Type::DICTIONARY) {
602  value_type = static_cast<const DictionaryType&>(*value_type).index_type();
603  }
605  arrow::MakeBuilder(default_memory_pool(), value_type, &column_builder.builder));
606 }
#define ARROW_THROW_NOT_OK(s)
Definition: ArrowUtil.h:28
const rapidjson::Value & field(const rapidjson::Value &obj, const char field[]) noexcept
Definition: JsonAccessors.h:31
bool is_dict_encoded_string() const
Definition: sqltypes.h:503

+ Here is the call graph for this function:

std::shared_ptr< arrow::Field > ArrowResultSetConverter::makeField ( const std::string  name,
const SQLTypeInfo target_type,
const std::shared_ptr< arrow::Array > &  dictionary 
) const
private

Definition at line 479 of file ArrowResultSetConverter.cpp.

References field(), and SQLTypeInfoCore< TYPE_FACET_PACK >::get_notnull().

482  {
483  return arrow::field(
484  name, getArrowType(target_type, dictionary), !target_type.get_notnull());
485 }
const rapidjson::Value & field(const rapidjson::Value &obj, const char field[]) noexcept
Definition: JsonAccessors.h:31
std::shared_ptr< arrow::DataType > getArrowType(const SQLTypeInfo &mapd_type, const std::shared_ptr< arrow::Array > &dict_values) const
HOST DEVICE bool get_notnull() const
Definition: sqltypes.h:333

+ Here is the call graph for this function:

void ArrowResultSetConverter::reserveColumnBuilderSize ( ColumnBuilder column_builder,
const size_t  row_count 
) const
inlineprivate

Definition at line 608 of file ArrowResultSetConverter.cpp.

References ARROW_THROW_NOT_OK, and ArrowResultSetConverter::ColumnBuilder::builder.

609  {
610  ARROW_THROW_NOT_OK(column_builder.builder->Reserve(static_cast<int64_t>(row_count)));
611 }
#define ARROW_THROW_NOT_OK(s)
Definition: ArrowUtil.h:28

Friends And Related Function Documentation

friend class ArrowResultSet
friend

Definition at line 238 of file ArrowResultSet.h.

Member Data Documentation

std::vector<std::string> ArrowResultSetConverter::col_names_
private

Definition at line 235 of file ArrowResultSet.h.

std::shared_ptr<Data_Namespace::DataMgr> ArrowResultSetConverter::data_mgr_ = nullptr
private

Definition at line 232 of file ArrowResultSet.h.

int32_t ArrowResultSetConverter::device_id_ = 0
private

Definition at line 234 of file ArrowResultSet.h.

ExecutorDeviceType ArrowResultSetConverter::device_type_ = ExecutorDeviceType::GPU
private

Definition at line 233 of file ArrowResultSet.h.

std::shared_ptr<ResultSet> ArrowResultSetConverter::results_
private

Definition at line 231 of file ArrowResultSet.h.

int32_t ArrowResultSetConverter::top_n_
private

Definition at line 236 of file ArrowResultSet.h.


The documentation for this class was generated from the following files: