#include <ArrowResultSet.h>

Classes
struct	ColumnBuilder

struct	SerializedArrowOutput

Public Member Functions
	ArrowResultSetConverter (const std::shared_ptr< ResultSet > &results, const std::shared_ptr< Data_Namespace::DataMgr > data_mgr, const ExecutorDeviceType device_type, const int32_t device_id, const std::vector< std::string > &col_names, const int32_t first_n, const ArrowTransport transport_method)

	ArrowResultSetConverter (const std::shared_ptr< ResultSet > &results, const std::shared_ptr< Data_Namespace::DataMgr > data_mgr, const ExecutorDeviceType device_type, const int32_t device_id, const std::vector< std::string > &col_names, const int32_t first_n, const ArrowTransport transport_method, const size_t min_result_size_for_bulk_dictionary_fetch, const double max_dictionary_to_result_size_ratio_for_bulk_dictionary_fetch)

ArrowResult	getArrowResult () const

	ArrowResultSetConverter (const std::shared_ptr< ResultSet > &results, const std::vector< std::string > &col_names, const int32_t first_n)

	ArrowResultSetConverter (const std::shared_ptr< ResultSet > &results, const std::vector< std::string > &col_names, const int32_t first_n, const size_t min_result_size_for_bulk_dictionary_fetch, const double max_dictionary_to_result_size_ratio_for_bulk_dictionary_fetch)

std::shared_ptr < arrow::RecordBatch >	convertToArrow () const

Static Public Attributes
static constexpr size_t	default_min_result_size_for_bulk_dictionary_fetch {10000UL}

static constexpr double	default_max_dictionary_to_result_size_ratio_for_bulk_dictionary_fetch {0.1}

Private Member Functions
std::shared_ptr < arrow::RecordBatch >	getArrowBatch (const std::shared_ptr< arrow::Schema > &schema) const

std::shared_ptr< arrow::Field >	makeField (const std::string name, const SQLTypeInfo &target_type) const

SerializedArrowOutput	getSerializedArrowOutput (arrow::ipc::DictionaryFieldMapper *mapper) const

void	initializeColumnBuilder (ColumnBuilder &column_builder, const SQLTypeInfo &col_type, const size_t result_col_idx, const std::shared_ptr< arrow::Field > &field) const

void	append (ColumnBuilder &column_builder, const ValueArray &values, const std::shared_ptr< std::vector< bool >> &is_valid) const

std::shared_ptr< arrow::Array >	finishColumnBuilder (ColumnBuilder &column_builder) const

Private Attributes
std::shared_ptr< ResultSet >	results_

std::shared_ptr < Data_Namespace::DataMgr >	data_mgr_ = nullptr

ExecutorDeviceType	device_type_ = ExecutorDeviceType::GPU

int32_t	device_id_ = 0

std::vector< std::string >	col_names_

int32_t	top_n_

ArrowTransport	transport_method_

const size_t	min_result_size_for_bulk_dictionary_fetch_

const double	max_dictionary_to_result_size_ratio_for_bulk_dictionary_fetch_

Friends
class	ArrowResultSet

Detailed Description

Definition at line 228 of file ArrowResultSet.h.

Constructor & Destructor Documentation

ArrowResultSetConverter::ArrowResultSetConverter	(	const std::shared_ptr< ResultSet > &	results,
		const std::shared_ptr< Data_Namespace::DataMgr >	data_mgr,
		const ExecutorDeviceType	device_type,
		const int32_t	device_id,
		const std::vector< std::string > &	col_names,
		const int32_t	first_n,
		const ArrowTransport	transport_method
	)

inline

Definition at line 234 of file ArrowResultSet.h.

       : results_(results)
       , data_mgr_(data_mgr)
       , device_type_(device_type)
       , device_id_(device_id)
       , col_names_(col_names)
       , top_n_(first_n)
       , transport_method_(transport_method)
       , min_result_size_for_bulk_dictionary_fetch_(
             ArrowResultSetConverter::default_min_result_size_for_bulk_dictionary_fetch)
       , max_dictionary_to_result_size_ratio_for_bulk_dictionary_fetch_(
             ArrowResultSetConverter::
                 default_max_dictionary_to_result_size_ratio_for_bulk_dictionary_fetch) {}

ArrowResultSetConverter::ArrowResultSetConverter	(	const std::shared_ptr< ResultSet > &	results,
		const std::shared_ptr< Data_Namespace::DataMgr >	data_mgr,
		const ExecutorDeviceType	device_type,
		const int32_t	device_id,
		const std::vector< std::string > &	col_names,
		const int32_t	first_n,
		const ArrowTransport	transport_method,
		const size_t	min_result_size_for_bulk_dictionary_fetch,
		const double	max_dictionary_to_result_size_ratio_for_bulk_dictionary_fetch
	)

inline

Definition at line 254 of file ArrowResultSet.h.

       : results_(results)
       , data_mgr_(data_mgr)
       , device_type_(device_type)
       , device_id_(device_id)
       , col_names_(col_names)
       , top_n_(first_n)
       , transport_method_(transport_method)
       , min_result_size_for_bulk_dictionary_fetch_(
             min_result_size_for_bulk_dictionary_fetch)
       , max_dictionary_to_result_size_ratio_for_bulk_dictionary_fetch_(
             max_dictionary_to_result_size_ratio_for_bulk_dictionary_fetch) {}

ArrowResultSetConverter::ArrowResultSetConverter	(	const std::shared_ptr< ResultSet > &	results,
		const std::vector< std::string > &	col_names,
		const int32_t	first_n
	)

inline

Definition at line 293 of file ArrowResultSet.h.

       : results_(results)
       , col_names_(col_names)
       , top_n_(first_n)
       , min_result_size_for_bulk_dictionary_fetch_(
             ArrowResultSetConverter::default_min_result_size_for_bulk_dictionary_fetch)
       , max_dictionary_to_result_size_ratio_for_bulk_dictionary_fetch_(
             ArrowResultSetConverter::
                 default_max_dictionary_to_result_size_ratio_for_bulk_dictionary_fetch) {}

ArrowResultSetConverter::ArrowResultSetConverter	(	const std::shared_ptr< ResultSet > &	results,
		const std::vector< std::string > &	col_names,
		const int32_t	first_n,
		const size_t	min_result_size_for_bulk_dictionary_fetch,
		const double	max_dictionary_to_result_size_ratio_for_bulk_dictionary_fetch
	)

inline

Definition at line 305 of file ArrowResultSet.h.

       : results_(results)
       , col_names_(col_names)
       , top_n_(first_n)
       , min_result_size_for_bulk_dictionary_fetch_(
             min_result_size_for_bulk_dictionary_fetch)
       , max_dictionary_to_result_size_ratio_for_bulk_dictionary_fetch_(
             max_dictionary_to_result_size_ratio_for_bulk_dictionary_fetch) {}

Member Function Documentation

void ArrowResultSetConverter::append	(	ColumnBuilder &	column_builder,
		const ValueArray &	values,
		const std::shared_ptr< std::vector< bool >> &	is_valid
	)		const

private

Definition at line 1618 of file ArrowResultSetConverter.cpp.

References CHECK_EQ, ArrowResultSetConverter::ColumnBuilder::col_type, device_type_, GPU, SQLTypeInfo::is_dict_encoded_string(), kARRAY, kBIGINT, kBOOLEAN, kCHAR, kDATE, kDECIMAL, kDOUBLE, kFLOAT, kINT, kSMALLINT, kTEXT, kTIME, kTIMESTAMP, kTINYINT, kVARCHAR, and ArrowResultSetConverter::ColumnBuilder::physical_type.

Referenced by getArrowBatch().

                                                           {
   if (column_builder.col_type.is_dict_encoded_string()) {
     CHECK_EQ(column_builder.physical_type,
              kINT);  // assume all dicts use none-encoded type for now
     appendToColumnBuilder<arrow::StringDictionary32Builder, int32_t>(
         column_builder, values, is_valid);
     return;
   }
   switch (column_builder.physical_type) {
     case kBOOLEAN:
       appendToColumnBuilder<arrow::BooleanBuilder, bool>(
           column_builder, values, is_valid);
       break;
     case kTINYINT:
       appendToColumnBuilder<arrow::Int8Builder, int8_t>(column_builder, values, is_valid);
       break;
     case kSMALLINT:
       appendToColumnBuilder<arrow::Int16Builder, int16_t>(
           column_builder, values, is_valid);
       break;
     case kINT:
       appendToColumnBuilder<arrow::Int32Builder, int32_t>(
           column_builder, values, is_valid);
       break;
     case kBIGINT:
       appendToColumnBuilder<arrow::Int64Builder, int64_t>(
           column_builder, values, is_valid);
       break;
     case kDECIMAL:
       appendToColumnBuilder<arrow::Decimal128Builder, int64_t>(
           column_builder, values, is_valid);
       break;
     case kFLOAT:
       appendToColumnBuilder<arrow::FloatBuilder, float>(column_builder, values, is_valid);
       break;
     case kDOUBLE:
       appendToColumnBuilder<arrow::DoubleBuilder, double>(
           column_builder, values, is_valid);
       break;
     case kTIME:
       appendToColumnBuilder<arrow::Time32Builder, int32_t>(
           column_builder, values, is_valid);
       break;
     case kTIMESTAMP:
       appendToColumnBuilder<arrow::TimestampBuilder, int64_t>(
           column_builder, values, is_valid);
       break;
     case kDATE:
       device_type_ == ExecutorDeviceType::GPU
           ? appendToColumnBuilder<arrow::Date64Builder, int64_t>(
                 column_builder, values, is_valid)
           : appendToColumnBuilder<arrow::Date32Builder, int32_t>(
                 column_builder, values, is_valid);
       break;
     case kARRAY:
       if (column_builder.col_type.get_subtype() == kBOOLEAN) {
         appendToListColumnBuilder<arrow::BooleanBuilder, int8_t>(
             column_builder, values, is_valid);
         break;
       } else if (column_builder.col_type.get_subtype() == kTINYINT) {
         appendToListColumnBuilder<arrow::Int8Builder, int8_t>(
             column_builder, values, is_valid);
         break;
       } else if (column_builder.col_type.get_subtype() == kSMALLINT) {
         appendToListColumnBuilder<arrow::Int16Builder, int16_t>(
             column_builder, values, is_valid);
         break;
       } else if (column_builder.col_type.get_subtype() == kINT) {
         appendToListColumnBuilder<arrow::Int32Builder, int32_t>(
             column_builder, values, is_valid);
         break;
       } else if (column_builder.col_type.get_subtype() == kBIGINT) {
         appendToListColumnBuilder<arrow::Int64Builder, int64_t>(
             column_builder, values, is_valid);
         break;
       } else if (column_builder.col_type.get_subtype() == kFLOAT) {
         appendToListColumnBuilder<arrow::FloatBuilder, float>(
             column_builder, values, is_valid);
         break;
       } else if (column_builder.col_type.get_subtype() == kDOUBLE) {
         appendToListColumnBuilder<arrow::DoubleBuilder, double>(
             column_builder, values, is_valid);
         break;
       } else if (column_builder.col_type.is_dict_encoded_type()) {
         appendToListColumnBuilder<arrow::StringDictionaryBuilder, int64_t>(
             column_builder, values, is_valid);
         break;
       } else {
         throw std::runtime_error(column_builder.col_type.get_type_name() +
                                  " is not supported in Arrow result sets.");
       }
     case kCHAR:
     case kVARCHAR:
     case kTEXT:
       appendToColumnBuilder<arrow::StringBuilder, std::string>(
           column_builder, values, is_valid);
       break;
     default:
       // TODO(miyu): support more scalar types.
       throw std::runtime_error(column_builder.col_type.get_type_name() +
                                " is not supported in Arrow result sets.");
   }
 }

Here is the call graph for this function:

Here is the caller graph for this function:

std::shared_ptr< arrow::RecordBatch > ArrowResultSetConverter::convertToArrow ( ) const

Definition at line 715 of file ArrowResultSetConverter.cpp.

References CHECK, col_names_, DEBUG_TIMER, f(), getArrowBatch(), makeField(), results_, and VLOG.

Referenced by getArrowResult(), and getSerializedArrowOutput().

                                                                               {
   auto timer = DEBUG_TIMER(__func__);
   const auto col_count = results_->colCount();
   std::vector<std::shared_ptr<arrow::Field>> fields;
   CHECK(col_names_.empty() || col_names_.size() == col_count);
   for (size_t i = 0; i < col_count; ++i) {
     const auto ti = results_->getColType(i);
     fields.push_back(makeField(col_names_.empty() ? "" : col_names_[i], ti));
   }
 #if ARROW_CONVERTER_DEBUG
   VLOG(1) << "Arrow fields: ";
   for (const auto& f : fields) {
     VLOG(1) << "\t" << f->ToString(true);
   }
 #endif
   return getArrowBatch(arrow::schema(fields));
 }

Here is the call graph for this function:

Here is the caller graph for this function:

std::shared_ptr< arrow::Array > ArrowResultSetConverter::finishColumnBuilder ( ColumnBuilder & column_builder ) const

inlineprivate

Definition at line 1386 of file ArrowResultSetConverter.cpp.

References ARROW_THROW_NOT_OK, and ArrowResultSetConverter::ColumnBuilder::builder.

Referenced by getArrowBatch().

                                          {
   std::shared_ptr<arrow::Array> values;
   ARROW_THROW_NOT_OK(column_builder.builder->Finish(&values));
   return values;
 }

Here is the caller graph for this function:

std::shared_ptr< arrow::RecordBatch > ArrowResultSetConverter::getArrowBatch ( const std::shared_ptr< arrow::Schema > & schema ) const

private

Definition at line 733 of file ArrowResultSetConverter.cpp.

References append(), ARROW_RECORDBATCH_MAKE, threading_serial::async(), CHECK, CHECK_EQ, cpu_threads(), anonymous_namespace{ArrowResultSetConverter.cpp}::create_or_append_validity(), DEBUG_TIMER, device_type_, field(), finishColumnBuilder(), GPU, initializeColumnBuilder(), kBIGINT, kBOOLEAN, kDATE, kDECIMAL, kDOUBLE, kFLOAT, kINT, kSMALLINT, kTEXT, kTIME, kTIMESTAMP, kTINYINT, heavyai::Projection, run_benchmark_import::result, results_, heavyai::TableFunction, and top_n_.

Referenced by convertToArrow().

                                                     {
   std::vector<std::shared_ptr<arrow::Array>> result_columns;
 
   // First, check if the result set is empty.
   // If so, we return an arrow result set that only
   // contains the schema (no record batch will be serialized).
   if (results_->isEmpty()) {
     return ARROW_RECORDBATCH_MAKE(schema, 0, result_columns);
   }
 
   const size_t entry_count = top_n_ < 0
                                  ? results_->entryCount()
                                  : std::min(size_t(top_n_), results_->entryCount());
 
   const auto col_count = results_->colCount();
   size_t row_count = 0;
 
   result_columns.resize(col_count);
   std::vector<ColumnBuilder> builders(col_count);
 
   // Create array builders
   for (size_t i = 0; i < col_count; ++i) {
     initializeColumnBuilder(builders[i], results_->getColType(i), i, schema->field(i));
   }
 
   // TODO(miyu): speed up for columnar buffers
   auto fetch = [&](std::vector<std::shared_ptr<ValueArray>>& value_seg,
                    std::vector<std::shared_ptr<std::vector<bool>>>& null_bitmap_seg,
                    const std::vector<bool>& non_lazy_cols,
                    const size_t start_entry,
                    const size_t end_entry) -> size_t {
     CHECK_EQ(value_seg.size(), col_count);
     CHECK_EQ(null_bitmap_seg.size(), col_count);
     const auto local_entry_count = end_entry - start_entry;
     size_t seg_row_count = 0;
     for (size_t i = start_entry; i < end_entry; ++i) {
       auto row = results_->getRowAtNoTranslations(i, non_lazy_cols);
       if (row.empty()) {
         continue;
       }
       ++seg_row_count;
       for (size_t j = 0; j < col_count; ++j) {
         if (!non_lazy_cols.empty() && non_lazy_cols[j]) {
           continue;
         }
 
         if (auto scalar_value = boost::get<ScalarTargetValue>(&row[j])) {
           // TODO(miyu): support more types other than scalar.
           CHECK(scalar_value);
           const auto& column = builders[j];
           switch (column.physical_type) {
             case kBOOLEAN:
               create_or_append_value<bool, int64_t>(
                   *scalar_value, value_seg[j], local_entry_count);
               create_or_append_validity<int64_t>(
                   *scalar_value, column.col_type, null_bitmap_seg[j], local_entry_count);
               break;
             case kTINYINT:
               create_or_append_value<int8_t, int64_t>(
                   *scalar_value, value_seg[j], local_entry_count);
               create_or_append_validity<int64_t>(
                   *scalar_value, column.col_type, null_bitmap_seg[j], local_entry_count);
               break;
             case kSMALLINT:
               create_or_append_value<int16_t, int64_t>(
                   *scalar_value, value_seg[j], local_entry_count);
               create_or_append_validity<int64_t>(
                   *scalar_value, column.col_type, null_bitmap_seg[j], local_entry_count);
               break;
             case kINT:
               create_or_append_value<int32_t, int64_t>(
                   *scalar_value, value_seg[j], local_entry_count);
               create_or_append_validity<int64_t>(
                   *scalar_value, column.col_type, null_bitmap_seg[j], local_entry_count);
               break;
             case kBIGINT:
               create_or_append_value<int64_t, int64_t>(
                   *scalar_value, value_seg[j], local_entry_count);
               create_or_append_validity<int64_t>(
                   *scalar_value, column.col_type, null_bitmap_seg[j], local_entry_count);
               break;
             case kDECIMAL:
               create_or_append_value<int64_t, int64_t>(
                   *scalar_value, value_seg[j], local_entry_count);
               create_or_append_validity<int64_t>(
                   *scalar_value, column.col_type, null_bitmap_seg[j], local_entry_count);
               break;
             case kFLOAT:
               create_or_append_value<float, float>(
                   *scalar_value, value_seg[j], local_entry_count);
               create_or_append_validity<float>(
                   *scalar_value, column.col_type, null_bitmap_seg[j], local_entry_count);
               break;
             case kDOUBLE:
               create_or_append_value<double, double>(
                   *scalar_value, value_seg[j], local_entry_count);
               create_or_append_validity<double>(
                   *scalar_value, column.col_type, null_bitmap_seg[j], local_entry_count);
               break;
             case kTIME:
               create_or_append_value<int32_t, int64_t>(
                   *scalar_value, value_seg[j], local_entry_count);
               create_or_append_validity<int64_t>(
                   *scalar_value, column.col_type, null_bitmap_seg[j], local_entry_count);
               break;
             case kDATE:
               device_type_ == ExecutorDeviceType::GPU
                   ? create_or_append_value<int64_t, int64_t>(
                         *scalar_value, value_seg[j], local_entry_count)
                   : create_or_append_value<int32_t, int64_t>(
                         *scalar_value, value_seg[j], local_entry_count);
               create_or_append_validity<int64_t>(
                   *scalar_value, column.col_type, null_bitmap_seg[j], local_entry_count);
               break;
             case kTIMESTAMP:
               create_or_append_value<int64_t, int64_t>(
                   *scalar_value, value_seg[j], local_entry_count);
               create_or_append_validity<int64_t>(
                   *scalar_value, column.col_type, null_bitmap_seg[j], local_entry_count);
               break;
             case kTEXT:
               create_or_append_value<std::string, NullableString>(
                   *scalar_value, value_seg[j], local_entry_count);
               create_or_append_validity<NullableString>(
                   *scalar_value, column.col_type, null_bitmap_seg[j], local_entry_count);
               break;
             default:
               // TODO(miyu): support more scalar types.
               throw std::runtime_error(column.col_type.get_type_name() +
                                        " is not supported in Arrow result sets.");
           }
         } else if (auto array = boost::get<ArrayTargetValue>(&row[j])) {
           // array := Boost::optional<std::vector<ScalarTargetValue>>
           const auto& column = builders[j];
           switch (column.col_type.get_subtype()) {
             case kBOOLEAN:
               create_or_append_value<int8_t, int64_t>(
                   *array, value_seg[j], local_entry_count);
               create_or_append_validity(
                   *array, column.col_type, null_bitmap_seg[j], local_entry_count);
               break;
             case kTINYINT:
               create_or_append_value<int8_t, int64_t>(
                   *array, value_seg[j], local_entry_count);
               create_or_append_validity(
                   *array, column.col_type, null_bitmap_seg[j], local_entry_count);
               break;
             case kSMALLINT:
               create_or_append_value<int16_t, int64_t>(
                   *array, value_seg[j], local_entry_count);
               create_or_append_validity(
                   *array, column.col_type, null_bitmap_seg[j], local_entry_count);
               break;
             case kINT:
               create_or_append_value<int32_t, int64_t>(
                   *array, value_seg[j], local_entry_count);
               create_or_append_validity(
                   *array, column.col_type, null_bitmap_seg[j], local_entry_count);
               break;
             case kBIGINT:
               create_or_append_value<int64_t, int64_t>(
                   *array, value_seg[j], local_entry_count);
               create_or_append_validity(
                   *array, column.col_type, null_bitmap_seg[j], local_entry_count);
               break;
             case kFLOAT:
               create_or_append_value<float, float>(
                   *array, value_seg[j], local_entry_count);
               create_or_append_validity(
                   *array, column.col_type, null_bitmap_seg[j], local_entry_count);
               break;
             case kDOUBLE:
               create_or_append_value<double, double>(
                   *array, value_seg[j], local_entry_count);
               create_or_append_validity(
                   *array, column.col_type, null_bitmap_seg[j], local_entry_count);
               break;
             case kTEXT:
               if (column.col_type.is_dict_encoded_type()) {
                 create_or_append_value<int64_t, int64_t>(
                     *array, value_seg[j], local_entry_count);
                 create_or_append_validity(
                     *array, column.col_type, null_bitmap_seg[j], local_entry_count);
                 break;
               }
             default:
               throw std::runtime_error(column.col_type.get_type_name() +
                                        " is not supported in Arrow result sets.");
           }
         }
       }
     }
     return seg_row_count;
   };
 
   auto convert_columns = [&](std::vector<std::shared_ptr<arrow::Array>>& result,
                              const std::vector<bool>& non_lazy_cols,
                              const size_t start_col,
                              const size_t end_col) {
     for (size_t col = start_col; col < end_col; ++col) {
       if (!non_lazy_cols.empty() && !non_lazy_cols[col]) {
         continue;
       }
 
       const auto& column = builders[col];
       switch (column.physical_type) {
         case kTINYINT:
           convert_column<int8_t>(results_, col, entry_count, result[col]);
           break;
         case kSMALLINT:
           convert_column<int16_t>(results_, col, entry_count, result[col]);
           break;
         case kINT:
           convert_column<int32_t>(results_, col, entry_count, result[col]);
           break;
         case kBIGINT:
           convert_column<int64_t>(results_, col, entry_count, result[col]);
           break;
         case kFLOAT:
           convert_column<float>(results_, col, entry_count, result[col]);
           break;
         case kDOUBLE:
           convert_column<double>(results_, col, entry_count, result[col]);
           break;
         default:
           throw std::runtime_error(column.col_type.get_type_name() +
                                    " is not supported in Arrow column converter.");
       }
     }
   };
 
   std::vector<std::shared_ptr<ValueArray>> column_values(col_count, nullptr);
   std::vector<std::shared_ptr<std::vector<bool>>> null_bitmaps(col_count, nullptr);
   const bool multithreaded = entry_count > 10000 && !results_->isTruncated();
   // Don't believe we ever output directly from a table function, but this
   // might be possible with a future query plan optimization
   bool use_columnar_converter = results_->isDirectColumnarConversionPossible() &&
                                 (results_->getQueryMemDesc().getQueryDescriptionType() ==
                                      QueryDescriptionType::Projection ||
                                  results_->getQueryMemDesc().getQueryDescriptionType() ==
                                      QueryDescriptionType::TableFunction) &&
                                 entry_count == results_->entryCount();
   std::vector<bool> non_lazy_cols;
   if (use_columnar_converter) {
     auto timer = DEBUG_TIMER("columnar converter");
     std::vector<size_t> non_lazy_col_pos;
     size_t non_lazy_col_count = 0;
     const auto& lazy_fetch_info = results_->getLazyFetchInfo();
 
     non_lazy_cols.reserve(col_count);
     non_lazy_col_pos.reserve(col_count);
     for (size_t i = 0; i < col_count; ++i) {
       bool is_lazy =
           lazy_fetch_info.empty() ? false : lazy_fetch_info[i].is_lazily_fetched;
       // Currently column converter cannot handle some data types.
       // Treat them as lazy.
       switch (builders[i].physical_type) {
         case kBOOLEAN:
         case kTIME:
         case kDATE:
         case kTIMESTAMP:
           is_lazy = true;
           break;
         default:
           break;
       }
       if (builders[i].field->type()->id() == arrow::Type::DICTIONARY) {
         is_lazy = true;
       }
       non_lazy_cols.emplace_back(!is_lazy);
       if (!is_lazy) {
         ++non_lazy_col_count;
         non_lazy_col_pos.emplace_back(i);
       }
     }
 
     if (non_lazy_col_count == col_count) {
       non_lazy_cols.clear();
       non_lazy_col_pos.clear();
     } else {
       non_lazy_col_pos.emplace_back(col_count);
     }
 
     std::vector<std::future<void>> child_threads;
     size_t num_threads =
         std::min(multithreaded ? (size_t)cpu_threads() : (size_t)1, non_lazy_col_count);
 
     size_t start_col = 0;
     size_t end_col = 0;
     for (size_t i = 0; i < num_threads; ++i) {
       start_col = end_col;
       end_col = (i + 1) * non_lazy_col_count / num_threads;
       size_t phys_start_col =
           non_lazy_col_pos.empty() ? start_col : non_lazy_col_pos[start_col];
       size_t phys_end_col =
           non_lazy_col_pos.empty() ? end_col : non_lazy_col_pos[end_col];
       child_threads.push_back(std::async(std::launch::async,
                                          convert_columns,
                                          std::ref(result_columns),
                                          non_lazy_cols,
                                          phys_start_col,
                                          phys_end_col));
     }
     for (auto& child : child_threads) {
       child.get();
     }
     row_count = entry_count;
   }
   if (!use_columnar_converter || !non_lazy_cols.empty()) {
     auto timer = DEBUG_TIMER("row converter");
     row_count = 0;
     if (multithreaded) {
       const size_t cpu_count = cpu_threads();
       std::vector<std::future<size_t>> child_threads;
       std::vector<std::vector<std::shared_ptr<ValueArray>>> column_value_segs(
           cpu_count, std::vector<std::shared_ptr<ValueArray>>(col_count, nullptr));
       std::vector<std::vector<std::shared_ptr<std::vector<bool>>>> null_bitmap_segs(
           cpu_count, std::vector<std::shared_ptr<std::vector<bool>>>(col_count, nullptr));
       const auto stride = (entry_count + cpu_count - 1) / cpu_count;
       for (size_t i = 0, start_entry = 0; start_entry < entry_count;
            ++i, start_entry += stride) {
         const auto end_entry = std::min(entry_count, start_entry + stride);
         child_threads.push_back(std::async(std::launch::async,
                                            fetch,
                                            std::ref(column_value_segs[i]),
                                            std::ref(null_bitmap_segs[i]),
                                            non_lazy_cols,
                                            start_entry,
                                            end_entry));
       }
       for (auto& child : child_threads) {
         row_count += child.get();
       }
       {
         auto timer = DEBUG_TIMER("append rows to arrow");
         for (int i = 0; i < schema->num_fields(); ++i) {
           if (!non_lazy_cols.empty() && non_lazy_cols[i]) {
             continue;
           }
 
           for (size_t j = 0; j < cpu_count; ++j) {
             if (!column_value_segs[j][i]) {
               continue;
             }
             append(builders[i], *column_value_segs[j][i], null_bitmap_segs[j][i]);
           }
         }
       }
     } else {
       row_count =
           fetch(column_values, null_bitmaps, non_lazy_cols, size_t(0), entry_count);
       {
         auto timer = DEBUG_TIMER("append rows to arrow single thread");
         for (int i = 0; i < schema->num_fields(); ++i) {
           if (!non_lazy_cols.empty() && non_lazy_cols[i]) {
             continue;
           }
 
           append(builders[i], *column_values[i], null_bitmaps[i]);
         }
       }
     }
 
     {
       auto timer = DEBUG_TIMER("finish builders");
       for (size_t i = 0; i < col_count; ++i) {
         if (!non_lazy_cols.empty() && non_lazy_cols[i]) {
           continue;
         }
 
         result_columns[i] = finishColumnBuilder(builders[i]);
       }
     }
   }
 
   return ARROW_RECORDBATCH_MAKE(schema, row_count, result_columns);
 }

Here is the call graph for this function:

Here is the caller graph for this function:

ArrowResult ArrowResultSetConverter::getArrowResult ( ) const

Serialize an Arrow result to IPC memory. Users are responsible for freeing all CPU IPC buffers using deallocateArrowResultBuffer. GPU buffers will become owned by the caller upon deserialization, and will be automatically freed when they go out of scope.

Definition at line 446 of file ArrowResultSetConverter.cpp.

References ARROW_ASSIGN_OR_THROW, ARROW_LOG, ARROW_THROW_NOT_OK, CHECK, CHECK_GE, convertToArrow(), CPU, DEBUG_TIMER, device_id_, device_type_, arrow::get_and_copy_to_shm(), anonymous_namespace{ArrowResultSetConverter.cpp}::get_shm_buffer(), GPU, SHARED_MEMORY, transport_method_, UNREACHABLE, and WIRE.

                                                           {
   auto timer = DEBUG_TIMER(__func__);
   std::shared_ptr<arrow::RecordBatch> record_batch = convertToArrow();
 
   struct BuildResultParams {
     int64_t schemaSize() const {
       return serialized_schema ? serialized_schema->size() : 0;
     };
     int64_t dictSize() const { return serialized_dict ? serialized_dict->size() : 0; };
     int64_t totalSize() const { return schemaSize() + records_size + dictSize(); }
     bool hasRecordBatch() const { return records_size > 0; }
     bool hasDict() const { return dictSize() > 0; }
 
     int64_t records_size{0};
     std::shared_ptr<arrow::Buffer> serialized_schema{nullptr};
     std::shared_ptr<arrow::Buffer> serialized_dict{nullptr};
   } result_params;
 
   if (device_type_ == ExecutorDeviceType::CPU ||
       transport_method_ == ArrowTransport::WIRE) {
     const auto getWireResult = [&]() -> ArrowResult {
       auto timer = DEBUG_TIMER("serialize batch to wire");
       const auto total_size = result_params.totalSize();
       std::vector<char> record_handle_data(total_size);
       auto serialized_records =
           arrow::MutableBuffer::Wrap(record_handle_data.data(), total_size);
 
       ARROW_ASSIGN_OR_THROW(auto writer, arrow::Buffer::GetWriter(serialized_records));
 
       ARROW_THROW_NOT_OK(writer->Write(
           reinterpret_cast<const uint8_t*>(result_params.serialized_schema->data()),
           result_params.schemaSize()));
 
       if (result_params.hasDict()) {
         ARROW_THROW_NOT_OK(writer->Write(
             reinterpret_cast<const uint8_t*>(result_params.serialized_dict->data()),
             result_params.dictSize()));
       }
 
       arrow::io::FixedSizeBufferWriter stream(SliceMutableBuffer(
           serialized_records, result_params.schemaSize() + result_params.dictSize()));
 
       if (result_params.hasRecordBatch()) {
         ARROW_THROW_NOT_OK(arrow::ipc::SerializeRecordBatch(
             *record_batch, arrow::ipc::IpcWriteOptions::Defaults(), &stream));
       }
 
       return {std::vector<char>(0),
               0,
               std::vector<char>(0),
               serialized_records->size(),
               std::string{""},
               std::move(record_handle_data)};
     };
 
     const auto getShmResult = [&]() -> ArrowResult {
       auto timer = DEBUG_TIMER("serialize batch to shared memory");
       std::shared_ptr<arrow::Buffer> serialized_records;
       std::vector<char> schema_handle_buffer;
       std::vector<char> record_handle_buffer(sizeof(key_t), 0);
       key_t records_shm_key = IPC_PRIVATE;
       const int64_t total_size = result_params.totalSize();
 
       std::tie(records_shm_key, serialized_records) = get_shm_buffer(total_size);
 
       memcpy(serialized_records->mutable_data(),
              result_params.serialized_schema->data(),
              (size_t)result_params.schemaSize());
 
       if (result_params.hasDict()) {
         memcpy(serialized_records->mutable_data() + result_params.schemaSize(),
                result_params.serialized_dict->data(),
                (size_t)result_params.dictSize());
       }
 
       arrow::io::FixedSizeBufferWriter stream(SliceMutableBuffer(
           serialized_records, result_params.schemaSize() + result_params.dictSize()));
 
       if (result_params.hasRecordBatch()) {
         ARROW_THROW_NOT_OK(arrow::ipc::SerializeRecordBatch(
             *record_batch, arrow::ipc::IpcWriteOptions::Defaults(), &stream));
       }
 
       memcpy(&record_handle_buffer[0],
              reinterpret_cast<const unsigned char*>(&records_shm_key),
              sizeof(key_t));
 
       return {schema_handle_buffer,
               0,
               record_handle_buffer,
               serialized_records->size(),
               std::string{""}};
     };
 
     arrow::ipc::DictionaryFieldMapper mapper(*record_batch->schema());
     auto options = arrow::ipc::IpcWriteOptions::Defaults();
     auto dict_stream = arrow::io::BufferOutputStream::Create(1024).ValueOrDie();
 
     // If our record batch is going to be empty, we omit it entirely,
     // only serializing the schema.
     if (!record_batch->num_rows()) {
       ARROW_ASSIGN_OR_THROW(result_params.serialized_schema,
                             arrow::ipc::SerializeSchema(*record_batch->schema(),
                                                         arrow::default_memory_pool()));
 
       switch (transport_method_) {
         case ArrowTransport::WIRE:
           return getWireResult();
         case ArrowTransport::SHARED_MEMORY:
           return getShmResult();
         default:
           UNREACHABLE();
       }
     }
 
     ARROW_ASSIGN_OR_THROW(auto dictionaries, CollectDictionaries(*record_batch, mapper));
 
     ARROW_LOG("CPU") << "found " << dictionaries.size() << " dictionaries";
 
     for (auto& pair : dictionaries) {
       arrow::ipc::IpcPayload payload;
       int64_t dictionary_id = pair.first;
       const auto& dictionary = pair.second;
 
       ARROW_THROW_NOT_OK(
           GetDictionaryPayload(dictionary_id, dictionary, options, &payload));
       int32_t metadata_length = 0;
       ARROW_THROW_NOT_OK(
           WriteIpcPayload(payload, options, dict_stream.get(), &metadata_length));
     }
     result_params.serialized_dict = dict_stream->Finish().ValueOrDie();
 
     ARROW_ASSIGN_OR_THROW(result_params.serialized_schema,
                           arrow::ipc::SerializeSchema(*record_batch->schema(),
                                                       arrow::default_memory_pool()));
 
     ARROW_THROW_NOT_OK(
         arrow::ipc::GetRecordBatchSize(*record_batch, &result_params.records_size));
 
     switch (transport_method_) {
       case ArrowTransport::WIRE:
         return getWireResult();
       case ArrowTransport::SHARED_MEMORY:
         return getShmResult();
       default:
         UNREACHABLE();
     }
   }
 #ifdef HAVE_CUDA
   CHECK(device_type_ == ExecutorDeviceType::GPU);
 
   // Copy the schema to the schema handle
   auto out_stream_result = arrow::io::BufferOutputStream::Create(1024);
   ARROW_THROW_NOT_OK(out_stream_result.status());
   auto out_stream = std::move(out_stream_result).ValueOrDie();
 
   arrow::ipc::DictionaryFieldMapper mapper(*record_batch->schema());
   arrow::ipc::DictionaryMemo current_memo;
   arrow::ipc::DictionaryMemo serialized_memo;
 
   arrow::ipc::IpcPayload schema_payload;
   ARROW_THROW_NOT_OK(arrow::ipc::GetSchemaPayload(*record_batch->schema(),
                                                   arrow::ipc::IpcWriteOptions::Defaults(),
                                                   mapper,
                                                   &schema_payload));
   int32_t schema_payload_length = 0;
   ARROW_THROW_NOT_OK(arrow::ipc::WriteIpcPayload(schema_payload,
                                                  arrow::ipc::IpcWriteOptions::Defaults(),
                                                  out_stream.get(),
                                                  &schema_payload_length));
   ARROW_ASSIGN_OR_THROW(auto dictionaries, CollectDictionaries(*record_batch, mapper));
   ARROW_LOG("GPU") << "Dictionary "
                    << "found dicts: " << dictionaries.size();
 
   ARROW_THROW_NOT_OK(
       arrow::ipc::internal::CollectDictionaries(*record_batch, &current_memo));
 
   // now try a dictionary
   std::shared_ptr<arrow::Schema> dummy_schema;
   std::vector<std::shared_ptr<arrow::RecordBatch>> dict_batches;
 
   for (const auto& pair : dictionaries) {
     arrow::ipc::IpcPayload payload;
     const auto& dict_id = pair.first;
     CHECK_GE(dict_id, 0);
     ARROW_LOG("GPU") << "Dictionary "
                      << "dict_id: " << dict_id;
     const auto& dict = pair.second;
     CHECK(dict);
 
     if (!dummy_schema) {
       auto dummy_field = std::make_shared<arrow::Field>("", dict->type());
       dummy_schema = std::make_shared<arrow::Schema>(
           std::vector<std::shared_ptr<arrow::Field>>{dummy_field});
     }
     dict_batches.emplace_back(
         arrow::RecordBatch::Make(dummy_schema, dict->length(), {dict}));
   }
 
   if (!dict_batches.empty()) {
     ARROW_THROW_NOT_OK(arrow::ipc::WriteRecordBatchStream(
         dict_batches, arrow::ipc::IpcWriteOptions::Defaults(), out_stream.get()));
   }
 
   auto complete_ipc_stream = out_stream->Finish();
   ARROW_THROW_NOT_OK(complete_ipc_stream.status());
   auto serialized_records = std::move(complete_ipc_stream).ValueOrDie();
 
   const auto record_key = arrow::get_and_copy_to_shm(serialized_records);
   std::vector<char> schema_record_key_buffer(sizeof(key_t), 0);
   memcpy(&schema_record_key_buffer[0],
          reinterpret_cast<const unsigned char*>(&record_key),
          sizeof(key_t));
 
   arrow::cuda::CudaDeviceManager* manager;
   ARROW_ASSIGN_OR_THROW(manager, arrow::cuda::CudaDeviceManager::Instance());
   std::shared_ptr<arrow::cuda::CudaContext> context;
   ARROW_ASSIGN_OR_THROW(context, manager->GetContext(device_id_));
 
   std::shared_ptr<arrow::cuda::CudaBuffer> device_serialized;
   ARROW_ASSIGN_OR_THROW(device_serialized,
                         SerializeRecordBatch(*record_batch, context.get()));
 
   std::shared_ptr<arrow::cuda::CudaIpcMemHandle> cuda_handle;
   ARROW_ASSIGN_OR_THROW(cuda_handle, device_serialized->ExportForIpc());
 
   std::shared_ptr<arrow::Buffer> serialized_cuda_handle;
   ARROW_ASSIGN_OR_THROW(serialized_cuda_handle,
                         cuda_handle->Serialize(arrow::default_memory_pool()));
 
   std::vector<char> record_handle_buffer(serialized_cuda_handle->size(), 0);
   memcpy(&record_handle_buffer[0],
          serialized_cuda_handle->data(),
          serialized_cuda_handle->size());
 
   return {schema_record_key_buffer,
           serialized_records->size(),
           record_handle_buffer,
           serialized_cuda_handle->size(),
           serialized_cuda_handle->ToString()};
 #else
   UNREACHABLE();
   return {std::vector<char>{}, 0, std::vector<char>{}, 0, ""};
 #endif
 }

Here is the call graph for this function:

ArrowResultSetConverter::SerializedArrowOutput ArrowResultSetConverter::getSerializedArrowOutput ( arrow::ipc::DictionaryFieldMapper * mapper ) const

private

Definition at line 693 of file ArrowResultSetConverter.cpp.

References ARROW_ASSIGN_OR_THROW, ARROW_THROW_NOT_OK, convertToArrow(), and DEBUG_TIMER.

                                                  {
   auto timer = DEBUG_TIMER(__func__);
   std::shared_ptr<arrow::RecordBatch> arrow_copy = convertToArrow();
   std::shared_ptr<arrow::Buffer> serialized_records, serialized_schema;
 
   ARROW_ASSIGN_OR_THROW(
       serialized_schema,
       arrow::ipc::SerializeSchema(*arrow_copy->schema(), arrow::default_memory_pool()));
 
   if (arrow_copy->num_rows()) {
     auto timer = DEBUG_TIMER("serialize records");
     ARROW_THROW_NOT_OK(arrow_copy->Validate());
     ARROW_ASSIGN_OR_THROW(serialized_records,
                           arrow::ipc::SerializeRecordBatch(
                               *arrow_copy, arrow::ipc::IpcWriteOptions::Defaults()));
   } else {
     ARROW_ASSIGN_OR_THROW(serialized_records, arrow::AllocateBuffer(0));
   }
   return {serialized_schema, serialized_records};
 }

Here is the call graph for this function:

void ArrowResultSetConverter::initializeColumnBuilder	(	ColumnBuilder &	column_builder,
		const SQLTypeInfo &	col_type,
		const size_t	result_col_idx,
		const std::shared_ptr< arrow::Field > &	field
	)		const

private

Definition at line 1252 of file ArrowResultSetConverter.cpp.

References ALL_STRINGS_REMAPPED, ARROW_THROW_NOT_OK, ArrowResultSetConverter::ColumnBuilder::builder, CHECK, CHECK_EQ, CHECK_GT, ArrowResultSetConverter::ColumnBuilder::col_type, DEBUG_TIMER, field(), ArrowResultSetConverter::ColumnBuilder::field, anonymous_namespace{ArrowResultSetConverter.cpp}::get_dict_index_type(), foreign_storage::get_physical_type(), SQLTypeInfo::getStringDictKey(), SQLTypeInfo::is_array(), SQLTypeInfo::is_dict_encoded_string(), SQLTypeInfo::is_dict_encoded_type(), max_dictionary_to_result_size_ratio_for_bulk_dictionary_fetch_, min_result_size_for_bulk_dictionary_fetch_, ONLY_TRANSIENT_STRINGS_REMAPPED, ArrowResultSetConverter::ColumnBuilder::physical_type, results_, ArrowResultSetConverter::ColumnBuilder::string_array, ArrowResultSetConverter::ColumnBuilder::string_remap_mode, ArrowResultSetConverter::ColumnBuilder::string_remapping, StringDictionaryProxy::transientIndexToId(), and VLOG.

Referenced by getArrowBatch().

                                                   {
   column_builder.field = field;
   column_builder.col_type = col_type;
   column_builder.physical_type = col_type.is_dict_encoded_string()
                                      ? get_dict_index_type(col_type)
                                      : get_physical_type(col_type);
 
   auto value_type = field->type();
   if (col_type.is_dict_encoded_type()) {
     auto timer = DEBUG_TIMER("Translate string dictionary to Arrow dictionary");
     if (!col_type.is_array()) {
       column_builder.builder.reset(new arrow::StringDictionary32Builder());
     }
     // add values to the builder
     const auto& dict_key = col_type.getStringDictKey();
 
     // ResultSet::rowCount(), unlike ResultSet::entryCount(), will return
     // the actual number of rows in the result set, taking into account
     // things like any limit and offset set
     const size_t result_set_rows = results_->rowCount();
     // result_set_rows guaranteed > 0 by parent
     CHECK_GT(result_set_rows, 0UL);
 
     const auto sdp = results_->getStringDictionaryProxy(dict_key);
     const size_t dictionary_proxy_entries = sdp->entryCount();
     const double dictionary_to_result_size_ratio =
         static_cast<double>(dictionary_proxy_entries) / result_set_rows;
 
     // We are conservative with when we do a bulk dictionary fetch,
     // even though it is generally more efficient than dictionary unique value "plucking",
     // for the following reasons:
     // 1) The number of actual distinct dictionary values can be much lower than the
     // number of result rows, but without getting the expression range (and that would
     // only work in some cases), we don't know by how much
     // 2) Regardless of the effect of #1, the size of the dictionary generated via
     // the "pluck" method will always be at worst equal in size, and very likely
     // significantly smaller, than the dictionary created by the bulk dictionary
     // fetch method, and smaller Arrow dictionaries are always a win when it comes to
     // sending the Arrow results over the wire, and for lowering the processing load
     // for clients (which often is a web browser with a lot less compute and memory
     // resources than our server.)
 
     const bool do_dictionary_bulk_fetch =
         result_set_rows > min_result_size_for_bulk_dictionary_fetch_ &&
         dictionary_to_result_size_ratio <=
             max_dictionary_to_result_size_ratio_for_bulk_dictionary_fetch_;
 
     arrow::StringBuilder str_array_builder;
 
     if (do_dictionary_bulk_fetch) {
       VLOG(1) << "Arrow dictionary creation: bulk copying all dictionary "
               << " entries for column at offset " << results_col_slot_idx << ". "
               << "Column has " << dictionary_proxy_entries << " string entries"
               << " for a result set with " << result_set_rows << " rows.";
       column_builder.string_remap_mode =
           ArrowStringRemapMode::ONLY_TRANSIENT_STRINGS_REMAPPED;
       const auto str_list = results_->getStringDictionaryPayloadCopy(dict_key);
       ARROW_THROW_NOT_OK(str_array_builder.AppendValues(str_list));
 
       // When we fetch the bulk dictionary, we need to also fetch
       // the transient entries only contained in the proxy.
       // These values are always negative (starting at -2), and so need
       // to be remapped to point to the corresponding entries in the Arrow
       // dictionary (they are placed at the end after the materialized
       // string entries from StringDictionary)
 
       int32_t crt_transient_id = static_cast<int32_t>(str_list.size());
       auto const& transient_vecmap = sdp->getTransientVector();
       for (unsigned index = 0; index < transient_vecmap.size(); ++index) {
         ARROW_THROW_NOT_OK(str_array_builder.Append(*transient_vecmap[index]));
         auto const old_id = StringDictionaryProxy::transientIndexToId(index);
         CHECK(column_builder.string_remapping
                   .insert(std::make_pair(old_id, crt_transient_id++))
                   .second);
       }
     } else {
       // Pluck unique dictionary values from ResultSet column
       VLOG(1) << "Arrow dictionary creation: serializing unique result set dictionary "
               << " entries for column at offset " << results_col_slot_idx << ". "
               << "Column has " << dictionary_proxy_entries << " string entries"
               << " for a result set with " << result_set_rows << " rows.";
       column_builder.string_remap_mode = ArrowStringRemapMode::ALL_STRINGS_REMAPPED;
 
       // ResultSet::getUniqueStringsForDictEncodedTargetCol returns a pair of two vectors,
       // the first of int32_t values containing the unique string ids found for
       // results_col_slot_idx in the result set, the second containing the associated
       // unique strings. Note that the unique string for a unique string id are both
       // placed at the same offset in their respective vectors
 
       auto unique_ids_and_strings =
           results_->getUniqueStringsForDictEncodedTargetCol(results_col_slot_idx);
       const auto& unique_ids = unique_ids_and_strings.first;
       const auto& unique_strings = unique_ids_and_strings.second;
       ARROW_THROW_NOT_OK(str_array_builder.AppendValues(unique_strings));
       const int32_t num_unique_strings = unique_strings.size();
       CHECK_EQ(num_unique_strings, unique_ids.size());
       // We need to remap ALL string id values given the Arrow dictionary
       // will have "holes", i.e. it is a sparse representation of the underlying
       // StringDictionary
       for (int32_t unique_string_idx = 0; unique_string_idx < num_unique_strings;
            ++unique_string_idx) {
         CHECK(
             column_builder.string_remapping
                 .insert(std::make_pair(unique_ids[unique_string_idx], unique_string_idx))
                 .second);
       }
       // Note we don't need to get transients from proxy as they are already handled in
       // ResultSet::getUniqueStringsForDictEncodedTargetCol
     }
 
     std::shared_ptr<arrow::StringArray> string_array;
     ARROW_THROW_NOT_OK(str_array_builder.Finish(&string_array));
 
     if (col_type.is_array()) {
       column_builder.string_array = std::move(string_array);
       ARROW_THROW_NOT_OK(arrow::MakeBuilder(
           arrow::default_memory_pool(), value_type, &column_builder.builder));
     } else {
       auto dict_builder =
           dynamic_cast<arrow::StringDictionary32Builder*>(column_builder.builder.get());
       CHECK(dict_builder);
 
       ARROW_THROW_NOT_OK(dict_builder->InsertMemoValues(*string_array));
     }
   } else {
     ARROW_THROW_NOT_OK(arrow::MakeBuilder(
         arrow::default_memory_pool(), value_type, &column_builder.builder));
   }
 }

Here is the call graph for this function:

Here is the caller graph for this function:

std::shared_ptr< arrow::Field > ArrowResultSetConverter::makeField	(	const std::string	name,
		const SQLTypeInfo &	target_type
	)		const

private

Definition at line 1205 of file ArrowResultSetConverter.cpp.

References device_type_, field(), anonymous_namespace{ArrowResultSetConverter.cpp}::get_arrow_type(), and SQLTypeInfo::get_notnull().

Referenced by convertToArrow().

                                           {
   return arrow::field(
       name, get_arrow_type(target_type, device_type_), !target_type.get_notnull());
 }

Here is the call graph for this function:

Here is the caller graph for this function:

Friends And Related Function Documentation

friend class ArrowResultSet

friend

Definition at line 356 of file ArrowResultSet.h.

Member Data Documentation

std::vector<std::string> ArrowResultSetConverter::col_names_

private

Definition at line 351 of file ArrowResultSet.h.

Referenced by convertToArrow().

std::shared_ptr<Data_Namespace::DataMgr> ArrowResultSetConverter::data_mgr_ = nullptr

private

Definition at line 348 of file ArrowResultSet.h.

constexpr double ArrowResultSetConverter::default_max_dictionary_to_result_size_ratio_for_bulk_dictionary_fetch {0.1}

static

Definition at line 232 of file ArrowResultSet.h.

constexpr size_t ArrowResultSetConverter::default_min_result_size_for_bulk_dictionary_fetch {10000UL}

static

Definition at line 230 of file ArrowResultSet.h.

Referenced by ArrowResultSet::resultSetArrowLoopback().

int32_t ArrowResultSetConverter::device_id_ = 0

private

Definition at line 350 of file ArrowResultSet.h.

Referenced by getArrowResult().

ExecutorDeviceType ArrowResultSetConverter::device_type_ = ExecutorDeviceType::GPU

private

Definition at line 349 of file ArrowResultSet.h.

Referenced by append(), getArrowBatch(), getArrowResult(), and makeField().

const double ArrowResultSetConverter::max_dictionary_to_result_size_ratio_for_bulk_dictionary_fetch_

private

Definition at line 355 of file ArrowResultSet.h.

Referenced by initializeColumnBuilder().

const size_t ArrowResultSetConverter::min_result_size_for_bulk_dictionary_fetch_

private

Definition at line 354 of file ArrowResultSet.h.

Referenced by initializeColumnBuilder().

std::shared_ptr<ResultSet> ArrowResultSetConverter::results_

private

Definition at line 347 of file ArrowResultSet.h.

Referenced by convertToArrow(), getArrowBatch(), and initializeColumnBuilder().

int32_t ArrowResultSetConverter::top_n_

private

Definition at line 352 of file ArrowResultSet.h.

Referenced by getArrowBatch().

ArrowTransport ArrowResultSetConverter::transport_method_

private

Definition at line 353 of file ArrowResultSet.h.

Referenced by getArrowResult().

The documentation for this class was generated from the following files:

/home/jenkins-slave/workspace/core-os-doxygen/QueryEngine/ArrowResultSet.h
/home/jenkins-slave/workspace/core-os-doxygen/QueryEngine/ArrowResultSetConverter.cpp

Classes

Public Member Functions

Static Public Attributes

Private Member Functions

Private Attributes

Friends

Detailed Description

Constructor & Destructor Documentation

Member Function Documentation

Friends And Related Function Documentation

Member Data Documentation