OmniSciDB  7bf56492aa
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
ArrowResultSet Class Reference

#include <ArrowResultSet.h>

Public Member Functions

 ArrowResultSet (const std::shared_ptr< ResultSet > &rows, const std::vector< TargetMetaInfo > &targets_meta)
 
 ArrowResultSet (const std::shared_ptr< ResultSet > &rows)
 
std::vector< TargetValuegetNextRow (const bool translate_strings, const bool decimal_to_double) const
 
size_t colCount () const
 
SQLTypeInfo getColType (const size_t col_idx) const
 
bool definitelyHasNoRows () const
 
size_t rowCount () const
 

Static Public Member Functions

static void deallocateArrowResultBuffer (const ArrowResult &result, const ExecutorDeviceType device_type, const size_t device_id, std::shared_ptr< Data_Namespace::DataMgr > &data_mgr)
 

Private Member Functions

void resultSetArrowLoopback ()
 
template<typename Type , typename ArrayType >
void appendValue (std::vector< TargetValue > &row, const arrow::Array &column, const Type null_val, const size_t idx) const
 

Private Attributes

std::shared_ptr< ResultSetrows_
 
std::vector< TargetMetaInfotargets_meta_
 
std::shared_ptr
< arrow::RecordBatch > 
record_batch_
 
arrow::ipc::DictionaryMemo dictionary_memo_
 
std::vector< std::shared_ptr
< arrow::Array > > 
columns_
 
size_t crt_row_idx_
 
std::vector< TargetMetaInfocolumn_metainfo_
 

Detailed Description

Definition at line 94 of file ArrowResultSet.h.

Constructor & Destructor Documentation

ArrowResultSet::ArrowResultSet ( const std::shared_ptr< ResultSet > &  rows,
const std::vector< TargetMetaInfo > &  targets_meta 
)

Definition at line 72 of file ArrowResultSet.cpp.

References column_metainfo_, columns_, field(), record_batch_, resultSetArrowLoopback(), and anonymous_namespace{ArrowResultSet.cpp}::type_from_arrow_field().

74  : rows_(rows), targets_meta_(targets_meta), crt_row_idx_(0) {
76  auto schema = record_batch_->schema();
77  for (int i = 0; i < schema->num_fields(); ++i) {
78  std::shared_ptr<arrow::Field> field = schema->field(i);
79  SQLTypeInfo type_info = type_from_arrow_field(*schema->field(i));
80  column_metainfo_.emplace_back(field->name(), type_info);
81  columns_.emplace_back(record_batch_->column(i));
82  }
83 }
std::shared_ptr< ResultSet > rows_
SQLTypeInfo type_from_arrow_field(const arrow::Field &field)
const rapidjson::Value & field(const rapidjson::Value &obj, const char field[]) noexcept
Definition: JsonAccessors.h:31
std::vector< TargetMetaInfo > column_metainfo_
void resultSetArrowLoopback()
std::shared_ptr< arrow::RecordBatch > record_batch_
std::vector< TargetMetaInfo > targets_meta_
std::vector< std::shared_ptr< arrow::Array > > columns_

+ Here is the call graph for this function:

ArrowResultSet::ArrowResultSet ( const std::shared_ptr< ResultSet > &  rows)
inline

Definition at line 98 of file ArrowResultSet.h.

98 : ArrowResultSet(rows, {}) {}
ArrowResultSet(const std::shared_ptr< ResultSet > &rows, const std::vector< TargetMetaInfo > &targets_meta)

Member Function Documentation

template<typename Type , typename ArrayType >
void ArrowResultSet::appendValue ( std::vector< TargetValue > &  row,
const arrow::Array &  column,
const Type  null_val,
const size_t  idx 
) const
private

Definition at line 86 of file ArrowResultSet.cpp.

89  {
90  const auto& col = static_cast<const ArrayType&>(column);
91  row.emplace_back(col.IsNull(idx) ? null_val : static_cast<Type>(col.Value(idx)));
92 }
size_t ArrowResultSet::colCount ( ) const

Definition at line 191 of file ArrowResultSet.cpp.

References column_metainfo_.

191  {
192  return column_metainfo_.size();
193 }
std::vector< TargetMetaInfo > column_metainfo_
void ArrowResultSet::deallocateArrowResultBuffer ( const ArrowResult result,
const ExecutorDeviceType  device_type,
const size_t  device_id,
std::shared_ptr< Data_Namespace::DataMgr > &  data_mgr 
)
static

Definition at line 587 of file ArrowResultSetConverter.cpp.

References CHECK_EQ, CPU, ArrowResult::df_handle, ArrowResult::df_size, ArrowResult::sm_handle, ArrowResult::sm_size, and to_string().

Referenced by MapDHandler::deallocate_df().

591  {
592  // CPU buffers skip the sm handle, serializing the entire RecordBatch to df.
593  // Remove shared memory on sysmem
594  if (!result.sm_handle.empty()) {
595  CHECK_EQ(sizeof(key_t), result.sm_handle.size());
596  const key_t& schema_key = *(key_t*)(&result.sm_handle[0]);
597  auto shm_id = shmget(schema_key, result.sm_size, 0666);
598  if (shm_id < 0) {
599  throw std::runtime_error(
600  "failed to get an valid shm ID w/ given shm key of the schema");
601  }
602  if (-1 == shmctl(shm_id, IPC_RMID, 0)) {
603  throw std::runtime_error("failed to deallocate Arrow schema on errorno(" +
604  std::to_string(errno) + ")");
605  }
606  }
607 
608  if (device_type == ExecutorDeviceType::CPU) {
609  CHECK_EQ(sizeof(key_t), result.df_handle.size());
610  const key_t& df_key = *(key_t*)(&result.df_handle[0]);
611  auto shm_id = shmget(df_key, result.df_size, 0666);
612  if (shm_id < 0) {
613  throw std::runtime_error(
614  "failed to get an valid shm ID w/ given shm key of the data");
615  }
616  if (-1 == shmctl(shm_id, IPC_RMID, 0)) {
617  throw std::runtime_error("failed to deallocate Arrow data frame");
618  }
619  }
620  // CUDA buffers become owned by the caller, and will automatically be freed
621  // TODO: What if the client never takes ownership of the result? we may want to
622  // establish a check to see if the GPU buffer still exists, and then free it.
623 }
#define CHECK_EQ(x, y)
Definition: Logger.h:205
std::vector< char > sm_handle
std::string to_string(char const *&&v)
std::vector< char > df_handle
int64_t sm_size
int64_t df_size

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

bool ArrowResultSet::definitelyHasNoRows ( ) const

Definition at line 200 of file ArrowResultSet.cpp.

References rowCount().

200  {
201  return !rowCount();
202 }
size_t rowCount() const

+ Here is the call graph for this function:

SQLTypeInfo ArrowResultSet::getColType ( const size_t  col_idx) const

Definition at line 195 of file ArrowResultSet.cpp.

References CHECK_LT, and column_metainfo_.

195  {
196  CHECK_LT(col_idx, column_metainfo_.size());
197  return column_metainfo_[col_idx].get_type_info();
198 }
#define CHECK_LT(x, y)
Definition: Logger.h:207
std::vector< TargetMetaInfo > column_metainfo_
std::vector< TargetValue > ArrowResultSet::getNextRow ( const bool  translate_strings,
const bool  decimal_to_double 
) const

Definition at line 180 of file ArrowResultSet.cpp.

References CHECK_LT, crt_row_idx_, and rowCount().

181  {
182  if (crt_row_idx_ == rowCount()) {
183  return {};
184  }
186  auto row = getRowAt(crt_row_idx_);
187  ++crt_row_idx_;
188  return row;
189 }
size_t rowCount() const
#define CHECK_LT(x, y)
Definition: Logger.h:207

+ Here is the call graph for this function:

void ArrowResultSet::resultSetArrowLoopback ( )
private

Definition at line 208 of file ArrowResultSet.cpp.

References ARROW_THROW_NOT_OK, CHECK(), CHECK_EQ, dictionary_memo_, record_batch_, rows_, targets_meta_, and to_string().

Referenced by ArrowResultSet().

208  {
209  std::vector<std::string> col_names;
210 
211  if (!targets_meta_.empty()) {
212  for (auto& meta : targets_meta_) {
213  col_names.push_back(meta.get_resname());
214  }
215  } else {
216  for (unsigned int i = 0; i < rows_->colCount(); i++) {
217  col_names.push_back("col_" + std::to_string(i));
218  }
219  }
220  const auto converter = ArrowResultSetConverter(rows_, col_names, -1);
221 
222  arrow::ipc::DictionaryMemo schema_memo;
223  const auto serialized_arrow_output = converter.getSerializedArrowOutput(&schema_memo);
224 
225  arrow::io::BufferReader schema_reader(serialized_arrow_output.schema);
226 
227  std::shared_ptr<arrow::Schema> schema;
228  ARROW_THROW_NOT_OK(arrow::ipc::ReadSchema(&schema_reader, &dictionary_memo_, &schema));
229  CHECK_EQ(schema_memo.num_fields(), dictionary_memo_.num_fields());
230 
231  // add the dictionaries from the serialized output to the newly created memo
232  const auto& serialized_id_to_dict = schema_memo.id_to_dictionary();
233  for (const auto itr : serialized_id_to_dict) {
234  const auto& id = itr.first;
235  const auto& dict = itr.second;
236  CHECK(!dictionary_memo_.HasDictionary(id));
237  ARROW_THROW_NOT_OK(dictionary_memo_.AddDictionary(id, dict));
238  }
239 
240  arrow::io::BufferReader records_reader(serialized_arrow_output.records);
241 
242  ARROW_THROW_NOT_OK(arrow::ipc::ReadRecordBatch(
243  schema, &dictionary_memo_, &records_reader, &record_batch_));
244 
245  CHECK_EQ(schema->num_fields(), record_batch_->num_columns());
246 }
#define CHECK_EQ(x, y)
Definition: Logger.h:205
#define ARROW_THROW_NOT_OK(s)
Definition: ArrowUtil.h:37
std::shared_ptr< ResultSet > rows_
arrow::ipc::DictionaryMemo dictionary_memo_
std::string to_string(char const *&&v)
CHECK(cgen_state)
std::shared_ptr< arrow::RecordBatch > record_batch_
std::vector< TargetMetaInfo > targets_meta_

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

size_t ArrowResultSet::rowCount ( ) const

Definition at line 204 of file ArrowResultSet.cpp.

References record_batch_.

Referenced by definitelyHasNoRows(), and getNextRow().

204  {
205  return record_batch_->num_rows();
206 }
std::shared_ptr< arrow::RecordBatch > record_batch_

+ Here is the caller graph for this function:

Member Data Documentation

std::vector<TargetMetaInfo> ArrowResultSet::column_metainfo_
private

Definition at line 152 of file ArrowResultSet.h.

Referenced by ArrowResultSet(), colCount(), and getColType().

std::vector<std::shared_ptr<arrow::Array> > ArrowResultSet::columns_
private

Definition at line 150 of file ArrowResultSet.h.

Referenced by ArrowResultSet().

size_t ArrowResultSet::crt_row_idx_
mutableprivate

Definition at line 151 of file ArrowResultSet.h.

Referenced by getNextRow().

arrow::ipc::DictionaryMemo ArrowResultSet::dictionary_memo_
private

Definition at line 146 of file ArrowResultSet.h.

Referenced by resultSetArrowLoopback().

std::shared_ptr<arrow::RecordBatch> ArrowResultSet::record_batch_
private

Definition at line 145 of file ArrowResultSet.h.

Referenced by ArrowResultSet(), resultSetArrowLoopback(), and rowCount().

std::shared_ptr<ResultSet> ArrowResultSet::rows_
private

Definition at line 143 of file ArrowResultSet.h.

Referenced by resultSetArrowLoopback().

std::vector<TargetMetaInfo> ArrowResultSet::targets_meta_
private

Definition at line 144 of file ArrowResultSet.h.

Referenced by resultSetArrowLoopback().


The documentation for this class was generated from the following files: