OmniSciDB  ba1bac9284
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
ArrowCsvForeignStorage Class Reference
+ Inheritance diagram for ArrowCsvForeignStorage:
+ Collaboration diagram for ArrowCsvForeignStorage:

Public Member Functions

 ArrowCsvForeignStorage ()
 
void prepareTable (const int db_id, const std::string &type, TableDescriptor &td, std::list< ColumnDescriptor > &cols) override
 
void registerTable (Catalog_Namespace::Catalog *catalog, std::pair< int, int > table_key, const std::string &type, const TableDescriptor &td, const std::list< ColumnDescriptor > &cols, Data_Namespace::AbstractBufferMgr *mgr) override
 
std::string getType () const override
 
- Public Member Functions inherited from ArrowForeignStorageBase
void append (const std::vector< ForeignStorageColumnBuffer > &column_buffers) override
 
void read (const ChunkKey &chunk_key, const SQLTypeInfo &sql_type, int8_t *dest, const size_t numBytes) override
 
int8_t * tryZeroCopy (const ChunkKey &chunk_key, const SQLTypeInfo &sql_type, const size_t numBytes) override
 
void parseArrowTable (Catalog_Namespace::Catalog *catalog, std::pair< int, int > table_key, const std::string &type, const TableDescriptor &td, const std::list< ColumnDescriptor > &cols, Data_Namespace::AbstractBufferMgr *mgr, const arrow::Table &table)
 
std::shared_ptr
< arrow::ChunkedArray > 
createDictionaryEncodedColumn (StringDictionary *dict, const ColumnDescriptor &c, std::shared_ptr< arrow::ChunkedArray > arr_col_chunked_array)
 
std::shared_ptr
< arrow::ChunkedArray > 
convertArrowDictionary (StringDictionary *dict, const ColumnDescriptor &c, std::shared_ptr< arrow::ChunkedArray > arr_col_chunked_array)
 
template<typename T , typename ChunkType >
std::shared_ptr
< arrow::ChunkedArray > 
createDecimalColumn (const ColumnDescriptor &c, std::shared_ptr< arrow::ChunkedArray > arr_col_chunked_array)
 
void generateNullValues (const std::vector< Frag > &fragments, std::shared_ptr< arrow::ChunkedArray > arr_col_chunked_array, const SQLTypeInfo &columnType)
 
template<typename T >
void setNullValues (const std::vector< Frag > &fragments, std::shared_ptr< arrow::ChunkedArray > arr_col_chunked_array)
 
template<typename T >
void setNulls (int8_t *data, int count)
 
void generateSentinelValues (int8_t *data, const SQLTypeInfo &columnType, size_t count)
 
void getSizeAndOffset (const Frag &frag, const std::shared_ptr< arrow::Array > &chunk, size_t i, int &size, int &offset)
 
int64_t makeFragment (const Frag &frag, ArrowFragment &arrowFrag, const std::vector< std::shared_ptr< arrow::Array >> &chunks, bool is_varlen, bool is_empty)
 
- Public Member Functions inherited from PersistentForeignStorageInterface
virtual ~PersistentForeignStorageInterface ()
 

Additional Inherited Members

- Public Attributes inherited from ArrowForeignStorageBase
std::map< std::array< int, 3 >
, std::vector< ArrowFragment > > 
m_columns
 

Detailed Description

Definition at line 864 of file ArrowForeignStorage.cpp.

Constructor & Destructor Documentation

ArrowCsvForeignStorage::ArrowCsvForeignStorage ( )
inline

Definition at line 866 of file ArrowForeignStorage.cpp.

866 {}

Member Function Documentation

std::string ArrowCsvForeignStorage::getType ( ) const
overridevirtual

Implements PersistentForeignStorageInterface.

Definition at line 1018 of file ArrowForeignStorage.cpp.

References logger::INFO, and LOG.

1018  {
1019  LOG(INFO) << "CSV backed temporary tables has been activated. Create table `with "
1020  "(storage_type='CSV:path/to/file.csv');`\n";
1021  return "CSV";
1022 }
#define LOG(tag)
Definition: Logger.h:200
void ArrowCsvForeignStorage::prepareTable ( const int  db_id,
const std::string &  type,
TableDescriptor td,
std::list< ColumnDescriptor > &  cols 
)
overridevirtual

Reimplemented from PersistentForeignStorageInterface.

Definition at line 882 of file ArrowForeignStorage.cpp.

References TableDescriptor::hasDeletedCol.

885  {
886  td.hasDeletedCol = false;
887 }
void ArrowCsvForeignStorage::registerTable ( Catalog_Namespace::Catalog catalog,
std::pair< int, int >  table_key,
const std::string &  type,
const TableDescriptor td,
const std::list< ColumnDescriptor > &  cols,
Data_Namespace::AbstractBufferMgr *  mgr 
)
overridevirtual

Implements PersistentForeignStorageInterface.

Definition at line 952 of file ArrowForeignStorage.cpp.

References ARROW_THROW_NOT_OK, CHECK, DataframeTableDescriptor::delimiter, measure< TimeT >::execution(), getArrowImportType(), DataframeTableDescriptor::hasHeader, ArrowForeignStorageBase::parseArrowTable(), DataframeTableDescriptor::skipRows, test_readcsv::table, and VLOG.

957  {
958  const DataframeTableDescriptor* df_td =
959  dynamic_cast<const DataframeTableDescriptor*>(&td);
960  bool isDataframe = df_td ? true : false;
961  std::unique_ptr<DataframeTableDescriptor> df_td_owned;
962  if (!isDataframe) {
963  df_td_owned = std::make_unique<DataframeTableDescriptor>(td);
964  CHECK(df_td_owned);
965  df_td = df_td_owned.get();
966  }
967  auto memory_pool = arrow::default_memory_pool();
968  auto arrow_parse_options = arrow::csv::ParseOptions::Defaults();
969  arrow_parse_options.quoting = false;
970  arrow_parse_options.escaping = false;
971  arrow_parse_options.newlines_in_values = false;
972  arrow_parse_options.delimiter = *df_td->delimiter.c_str();
973  auto arrow_read_options = arrow::csv::ReadOptions::Defaults();
974  arrow_read_options.use_threads = true;
975 
976  arrow_read_options.block_size = 20 * 1024 * 1024;
977  arrow_read_options.autogenerate_column_names = false;
978  arrow_read_options.skip_rows =
979  df_td->hasHeader ? (df_td->skipRows + 1) : df_td->skipRows;
980 
981  auto arrow_convert_options = arrow::csv::ConvertOptions::Defaults();
982  arrow_convert_options.check_utf8 = false;
983  arrow_convert_options.include_columns = arrow_read_options.column_names;
984  arrow_convert_options.strings_can_be_null = true;
985 
986  for (auto& c : cols) {
987  if (c.isSystemCol) {
988  continue; // must be processed by base interface implementation
989  }
990  arrow_convert_options.column_types.emplace(c.columnName,
991  getArrowImportType(c.columnType));
992  arrow_read_options.column_names.push_back(c.columnName);
993  }
994 
995  std::shared_ptr<arrow::io::ReadableFile> inp;
996  auto file_result = arrow::io::ReadableFile::Open(info.c_str());
997  ARROW_THROW_NOT_OK(file_result.status());
998  inp = file_result.ValueOrDie();
999 
1000  auto table_reader_result = arrow::csv::TableReader::Make(
1001  memory_pool, inp, arrow_read_options, arrow_parse_options, arrow_convert_options);
1002  ARROW_THROW_NOT_OK(table_reader_result.status());
1003  auto table_reader = table_reader_result.ValueOrDie();
1004 
1005  std::shared_ptr<arrow::Table> arrowTable;
1006  auto time = measure<>::execution([&]() {
1007  auto arrow_table_result = table_reader->Read();
1008  ARROW_THROW_NOT_OK(arrow_table_result.status());
1009  arrowTable = arrow_table_result.ValueOrDie();
1010  });
1011 
1012  VLOG(1) << "Read Arrow CSV file " << info << " in " << time << "ms";
1013 
1014  arrow::Table& table = *arrowTable.get();
1015  parseArrowTable(catalog, table_key, info, td, cols, mgr, table);
1016 }
#define ARROW_THROW_NOT_OK(s)
Definition: ArrowUtil.h:36
static TimeT::rep execution(F func, Args &&...args)
Definition: sample.cpp:29
void parseArrowTable(Catalog_Namespace::Catalog *catalog, std::pair< int, int > table_key, const std::string &type, const TableDescriptor &td, const std::list< ColumnDescriptor > &cols, Data_Namespace::AbstractBufferMgr *mgr, const arrow::Table &table)
static std::shared_ptr< arrow::DataType > getArrowImportType(const SQLTypeInfo type)
#define CHECK(condition)
Definition: Logger.h:206
#define VLOG(n)
Definition: Logger.h:300
specifies the content in-memory of a row in the table metadata table

+ Here is the call graph for this function:


The documentation for this class was generated from the following file: