OmniSciDB  c1a53651b2
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
foreign_storage::ParquetImportBatchResult Class Reference
+ Inheritance diagram for foreign_storage::ParquetImportBatchResult:
+ Collaboration diagram for foreign_storage::ParquetImportBatchResult:

Public Member Functions

 ParquetImportBatchResult ()=default
 
 ParquetImportBatchResult (const ForeignTable *foreign_table, const int db_id, const ForeignTableSchema *schema)
 
 ParquetImportBatchResult (ParquetImportBatchResult &&other)=default
 
std::optional
< Fragmenter_Namespace::InsertData
getInsertData () const override
 
import_export::ImportStatus getImportStatus () const override
 
std::pair< std::map< int,
Chunk_NS::Chunk >, std::map
< int, StringDictionary * > > 
getChunksAndDictionaries () const
 
void populateInsertData (const std::map< int, Chunk_NS::Chunk > &chunks)
 
void populateImportStatus (const size_t num_rows_completed, const size_t num_rows_rejected)
 
- Public Member Functions inherited from import_export::ImportBatchResult
virtual ~ImportBatchResult ()=default
 

Private Attributes

std::optional
< Fragmenter_Namespace::InsertData
insert_data_
 
std::map< int, std::unique_ptr
< AbstractBuffer > > 
import_buffers_
 
const ForeignTableforeign_table_
 
int db_id_
 
const ForeignTableSchemaschema_
 
import_export::ImportStatus import_status_
 

Detailed Description

Definition at line 96 of file ParquetImporter.cpp.

Constructor & Destructor Documentation

foreign_storage::ParquetImportBatchResult::ParquetImportBatchResult ( )
default
foreign_storage::ParquetImportBatchResult::ParquetImportBatchResult ( const ForeignTable foreign_table,
const int  db_id,
const ForeignTableSchema schema 
)

Definition at line 190 of file ParquetImporter.cpp.

References foreign_storage::ForeignTableSchema::getLogicalAndPhysicalColumns(), import_buffers_, and schema_.

193  : foreign_table_(foreign_table), db_id_(db_id), schema_(schema) {
194  for (const auto column_descriptor : schema_->getLogicalAndPhysicalColumns()) {
195  if (column_descriptor->columnType.is_array()) {
196  import_buffers_[column_descriptor->columnId] =
197  std::make_unique<TypedParquetStorageBuffer<ArrayDatum>>();
198  } else if ((column_descriptor->columnType.is_string() &&
199  !column_descriptor->columnType.is_dict_encoded_string()) ||
200  column_descriptor->columnType.is_geometry()) {
201  import_buffers_[column_descriptor->columnId] =
202  std::make_unique<TypedParquetStorageBuffer<std::string>>();
203  } else {
204  import_buffers_[column_descriptor->columnId] =
205  std::make_unique<ForeignStorageBuffer>();
206  }
207  }
208 }
const std::list< const ColumnDescriptor * > & getLogicalAndPhysicalColumns() const
std::map< int, std::unique_ptr< AbstractBuffer > > import_buffers_

+ Here is the call graph for this function:

foreign_storage::ParquetImportBatchResult::ParquetImportBatchResult ( ParquetImportBatchResult &&  other)
default

Member Function Documentation

std::pair< std::map< int, Chunk_NS::Chunk >, std::map< int, StringDictionary * > > foreign_storage::ParquetImportBatchResult::getChunksAndDictionaries ( ) const

Definition at line 160 of file ParquetImporter.cpp.

References CHECK, db_id_, Catalog_Namespace::SysCatalog::getCatalog(), foreign_storage::ForeignTableSchema::getLogicalAndPhysicalColumns(), import_buffers_, Catalog_Namespace::SysCatalog::instance(), schema_, and Chunk_NS::Chunk::setBuffer().

160  {
161  std::map<int, Chunk_NS::Chunk> chunks;
162  std::map<int, StringDictionary*> string_dictionaries;
164 
165  for (const auto column_descriptor : schema_->getLogicalAndPhysicalColumns()) {
166  const bool is_dictionary_encoded_string_column =
167  column_descriptor->columnType.is_dict_encoded_string() ||
168  (column_descriptor->columnType.is_array() &&
169  column_descriptor->columnType.get_elem_type().is_dict_encoded_string());
170 
171  if (is_dictionary_encoded_string_column) {
172  auto dict_descriptor = catalog->getMetadataForDict(
173  column_descriptor->columnType.get_comp_param(), true);
174  CHECK(dict_descriptor);
175  auto string_dictionary = dict_descriptor->stringDict.get();
176  string_dictionaries[column_descriptor->columnId] = string_dictionary;
177  }
178 
179  Chunk_NS::Chunk chunk{column_descriptor};
180  chunk.setBuffer(import_buffers_.at(column_descriptor->columnId).get());
181  if (column_descriptor->columnType.is_varlen_indeed()) {
182  chunk.setIndexBuffer(nullptr); // index buffers are unused
183  }
184  chunk.initEncoder();
185  chunks[column_descriptor->columnId] = chunk;
186  }
187  return {chunks, string_dictionaries};
188 }
void setBuffer(AbstractBuffer *b)
Definition: Chunk.h:150
static SysCatalog & instance()
Definition: SysCatalog.h:343
const std::list< const ColumnDescriptor * > & getLogicalAndPhysicalColumns() const
std::shared_ptr< Catalog > getCatalog(const std::string &dbName)
std::map< int, std::unique_ptr< AbstractBuffer > > import_buffers_
#define CHECK(condition)
Definition: Logger.h:291

+ Here is the call graph for this function:

import_export::ImportStatus foreign_storage::ParquetImportBatchResult::getImportStatus ( ) const
overridevirtual

Implements import_export::ImportBatchResult.

Definition at line 215 of file ParquetImporter.cpp.

References import_status_.

215  {
216  return import_status_;
217 }
import_export::ImportStatus import_status_
std::optional< Fragmenter_Namespace::InsertData > foreign_storage::ParquetImportBatchResult::getInsertData ( ) const
overridevirtual

Implements import_export::ImportBatchResult.

Definition at line 210 of file ParquetImporter.cpp.

References insert_data_.

211  {
212  return insert_data_;
213 }
std::optional< Fragmenter_Namespace::InsertData > insert_data_
void foreign_storage::ParquetImportBatchResult::populateImportStatus ( const size_t  num_rows_completed,
const size_t  num_rows_rejected 
)

Definition at line 124 of file ParquetImporter.cpp.

References import_status_, import_export::ImportStatus::rows_completed, and import_export::ImportStatus::rows_rejected.

125  {
126  import_status_.rows_completed = num_rows_completed;
127  import_status_.rows_rejected = num_rows_rejected;
128 }
import_export::ImportStatus import_status_
void foreign_storage::ParquetImportBatchResult::populateInsertData ( const std::map< int, Chunk_NS::Chunk > &  chunks)

Definition at line 130 of file ParquetImporter.cpp.

References DataBlockPtr::arraysPtr, CHECK, db_id_, foreign_table_, insert_data_, DataBlockPtr::numbersPtr, DataBlockPtr::stringsPtr, and TableDescriptor::tableId.

131  {
133  size_t num_rows = chunks.begin()->second.getBuffer()->getEncoder()->getNumElems();
134  for (const auto& [column_id, chunk] : chunks) {
135  auto column_descriptor = chunk.getColumnDesc();
136  CHECK(chunk.getBuffer()->getEncoder()->getNumElems() == num_rows);
137  insert_data_->columnIds.emplace_back(column_id);
138  auto buffer = chunk.getBuffer();
139  DataBlockPtr block_ptr;
140  if (column_descriptor->columnType.is_array()) {
141  auto array_buffer = dynamic_cast<TypedParquetStorageBuffer<ArrayDatum>*>(buffer);
142  block_ptr.arraysPtr = array_buffer->getBufferPtr();
143  } else if ((column_descriptor->columnType.is_string() &&
144  !column_descriptor->columnType.is_dict_encoded_string()) ||
145  column_descriptor->columnType.is_geometry()) {
146  auto string_buffer = dynamic_cast<TypedParquetStorageBuffer<std::string>*>(buffer);
147  block_ptr.stringsPtr = string_buffer->getBufferPtr();
148  } else {
149  block_ptr.numbersPtr = buffer->getMemoryPtr();
150  }
151  insert_data_->data.emplace_back(block_ptr);
152  }
153  insert_data_->databaseId = db_id_;
154  insert_data_->tableId = foreign_table_->tableId;
155  insert_data_->is_default.assign(insert_data_->columnIds.size(), false);
156  insert_data_->numRows = num_rows;
157 }
std::vector< std::string > * stringsPtr
Definition: sqltypes.h:224
std::vector< ArrayDatum > * arraysPtr
Definition: sqltypes.h:225
std::optional< Fragmenter_Namespace::InsertData > insert_data_
#define CHECK(condition)
Definition: Logger.h:291
The data to be inserted using the fragment manager.
Definition: Fragmenter.h:68
int8_t * numbersPtr
Definition: sqltypes.h:223

Member Data Documentation

int foreign_storage::ParquetImportBatchResult::db_id_
private

Definition at line 119 of file ParquetImporter.cpp.

Referenced by getChunksAndDictionaries(), and populateInsertData().

const ForeignTable* foreign_storage::ParquetImportBatchResult::foreign_table_
private

Definition at line 118 of file ParquetImporter.cpp.

Referenced by populateInsertData().

std::map<int, std::unique_ptr<AbstractBuffer> > foreign_storage::ParquetImportBatchResult::import_buffers_
private

Definition at line 116 of file ParquetImporter.cpp.

Referenced by getChunksAndDictionaries(), and ParquetImportBatchResult().

import_export::ImportStatus foreign_storage::ParquetImportBatchResult::import_status_
private

Definition at line 121 of file ParquetImporter.cpp.

Referenced by getImportStatus(), and populateImportStatus().

std::optional<Fragmenter_Namespace::InsertData> foreign_storage::ParquetImportBatchResult::insert_data_
private

Definition at line 115 of file ParquetImporter.cpp.

Referenced by getInsertData(), and populateInsertData().

const ForeignTableSchema* foreign_storage::ParquetImportBatchResult::schema_
private

Definition at line 120 of file ParquetImporter.cpp.

Referenced by getChunksAndDictionaries(), and ParquetImportBatchResult().


The documentation for this class was generated from the following file: