OmniSciDB
bf83d84833
|
#include <CsvReader.h>
Public Member Functions | |
CompressedFileReader (const std::string &file_path, const import_export::CopyParams ©_params) | |
CompressedFileReader (const std::string &file_path, const import_export::CopyParams ©_params, const rapidjson::Value &value) | |
size_t | read (void *buffer, size_t max_size) override |
size_t | readRegion (void *buffer, size_t offset, size_t size) override |
bool | isScanFinished () override |
bool | isRemainingSizeKnown () override |
size_t | getRemainingSize () override |
void | serialize (rapidjson::Value &value, rapidjson::Document::AllocatorType &allocator) const override |
![]() | |
CsvReader (const std::string &file_path, const import_export::CopyParams ©_params) | |
virtual | ~CsvReader ()=default |
Private Member Functions | |
void | resetArchive () |
void | checkForMoreRows (size_t file_offset, const ForeignServer *server_options, const UserMapping *user_mapping) override |
void | nextEntry () |
void | skipHeader () |
void | skipBytes (size_t n_bytes) |
size_t | readInternal (void *buffer, size_t read_size, size_t buffer_size) |
Private Attributes | |
ArchiveWrapper | archive_ |
bool | initial_scan_ |
bool | scan_finished_ |
size_t | current_offset_ |
int | current_index_ |
std::vector< size_t > | cumulative_sizes_ |
std::vector< std::string > | sourcenames_ |
std::vector< int > | archive_entry_index_ |
Additional Inherited Members | |
![]() | |
import_export::CopyParams | copy_params_ |
std::string | file_path_ |
Definition at line 218 of file CsvReader.h.
foreign_storage::CompressedFileReader::CompressedFileReader | ( | const std::string & | file_path, |
const import_export::CopyParams & | copy_params | ||
) |
Definition at line 215 of file CsvReader.cpp.
References nextEntry().
foreign_storage::CompressedFileReader::CompressedFileReader | ( | const std::string & | file_path, |
const import_export::CopyParams & | copy_params, | ||
const rapidjson::Value & | value | ||
) |
Definition at line 227 of file CsvReader.cpp.
References archive_entry_index_, cumulative_sizes_, foreign_storage::json_utils::get_value_from_object(), initial_scan_, scan_finished_, and sourcenames_.
|
overrideprivatevirtual |
Rescan the target files Throws an exception if the rescan fails (ie files are not in a valid appended state or not supported)
file_offset | - where to resume the scan from (end of the last row) as not all of the bytes may have been consumed by the upstream compoennet |
server_options | - only needed for S3 backed CSV |
user_mapping | - only needed for S3 backed CSV |
Reimplemented from foreign_storage::CsvReader.
Definition at line 369 of file CsvReader.cpp.
References archive_, archive_entry_index_, CHECK, cumulative_sizes_, current_index_, current_offset_, foreign_storage::ArchiveWrapper::currentEntryDataAvailable(), foreign_storage::ArchiveWrapper::currentEntryFinished(), foreign_storage::ArchiveWrapper::entryName(), foreign_storage::CsvReader::file_path_, initial_scan_, foreign_storage::ArchiveWrapper::nextEntry(), nextEntry(), foreign_storage::ArchiveWrapper::resetArchive(), scan_finished_, skipBytes(), skipHeader(), foreign_storage::ArchiveWrapper::skipToEntry(), and sourcenames_.
|
inlineoverridevirtual |
Implements foreign_storage::CsvReader.
Definition at line 232 of file CsvReader.h.
|
inlineoverridevirtual |
Implements foreign_storage::CsvReader.
Definition at line 231 of file CsvReader.h.
|
inlineoverridevirtual |
Implements foreign_storage::CsvReader.
Definition at line 229 of file CsvReader.h.
References scan_finished_.
Referenced by readRegion().
|
private |
Go to next archive entry/header with valid data
Definition at line 299 of file CsvReader.cpp.
References archive_, archive_entry_index_, CHECK, cumulative_sizes_, current_index_, current_offset_, foreign_storage::ArchiveWrapper::currentEntryFinished(), foreign_storage::ArchiveWrapper::entryName(), foreign_storage::ArchiveWrapper::getCurrentEntryIndex(), initial_scan_, foreign_storage::ArchiveWrapper::nextEntry(), scan_finished_, skipHeader(), foreign_storage::ArchiveWrapper::skipToEntry(), and sourcenames_.
Referenced by checkForMoreRows(), CompressedFileReader(), and readInternal().
|
overridevirtual |
Read up to max_size bytes from archive into buffer starting starting from the end of the last read
buffer | - buffer to load into |
max_size | - maximum number of bytes to read into the buffer |
Implements foreign_storage::CsvReader.
Definition at line 267 of file CsvReader.cpp.
References readInternal().
|
private |
Definition at line 241 of file CsvReader.cpp.
References foreign_storage::anonymous_namespace{CsvReader.cpp}::adjust_eof(), archive_, foreign_storage::ArchiveWrapper::consumeDataFromCurrentEntry(), foreign_storage::CsvReader::copy_params_, current_offset_, foreign_storage::ArchiveWrapper::currentEntryDataAvailable(), foreign_storage::ArchiveWrapper::currentEntryFinished(), run_benchmark_import::dest, import_export::CopyParams::line_delim, and nextEntry().
Referenced by read(), and readRegion().
|
overridevirtual |
Read up to max_size bytes from archive, starting at given offset isScanFinished() must return true to use readRegion
buffer | - buffer to load into |
offset | - starting point into the archive to read |
size | - maximum number of bytes to read into the buffer |
Implements foreign_storage::CsvReader.
Definition at line 273 of file CsvReader.cpp.
References archive_, archive_entry_index_, CHECK, cumulative_sizes_, current_index_, current_offset_, foreign_storage::ArchiveWrapper::getCurrentEntryIndex(), isScanFinished(), foreign_storage::anonymous_namespace{CsvReader.cpp}::offset_to_index(), readInternal(), skipBytes(), skipHeader(), and foreign_storage::ArchiveWrapper::skipToEntry().
|
private |
Reopen file and reset back to the beginning
|
overridevirtual |
Serialize internal state to given json object This Json will later be used to restore the reader state through a constructor must be called when isScanFinished() is true
value | - json object to store needed state to this function can store any needed data or none |
allocator | - allocator to use for json contruction |
Implements foreign_storage::CsvReader.
Definition at line 462 of file CsvReader.cpp.
References foreign_storage::json_utils::add_value_to_object(), archive_entry_index_, CHECK, cumulative_sizes_, initial_scan_, scan_finished_, and sourcenames_.
|
private |
Skip forward N bytes in current entry without reading the data
n_bytes | - number of bytes to skip |
Skip forward N bytes without reading the data in current entry
n_bytes | - number of bytes to skip |
Definition at line 351 of file CsvReader.cpp.
References archive_, foreign_storage::ArchiveWrapper::consumeDataFromCurrentEntry(), current_offset_, and foreign_storage::ArchiveWrapper::currentEntryDataAvailable().
Referenced by checkForMoreRows(), and readRegion().
|
private |
Skip Header of CSV file
Definition at line 335 of file CsvReader.cpp.
References archive_, foreign_storage::ArchiveWrapper::consumeDataFromCurrentEntry(), foreign_storage::CsvReader::copy_params_, foreign_storage::ArchiveWrapper::currentEntryFinished(), import_export::CopyParams::has_header, import_export::CopyParams::line_delim, import_export::NO_HEADER, and foreign_storage::ArchiveWrapper::peekNextChar().
Referenced by checkForMoreRows(), nextEntry(), and readRegion().
|
private |
Definition at line 267 of file CsvReader.h.
Referenced by checkForMoreRows(), nextEntry(), readInternal(), readRegion(), skipBytes(), and skipHeader().
|
private |
Definition at line 287 of file CsvReader.h.
Referenced by checkForMoreRows(), CompressedFileReader(), nextEntry(), readRegion(), and serialize().
|
private |
Definition at line 282 of file CsvReader.h.
Referenced by checkForMoreRows(), CompressedFileReader(), nextEntry(), readRegion(), and serialize().
|
private |
Definition at line 279 of file CsvReader.h.
Referenced by checkForMoreRows(), nextEntry(), and readRegion().
|
private |
Definition at line 275 of file CsvReader.h.
Referenced by checkForMoreRows(), nextEntry(), readInternal(), readRegion(), and skipBytes().
|
private |
Definition at line 270 of file CsvReader.h.
Referenced by checkForMoreRows(), CompressedFileReader(), nextEntry(), and serialize().
|
private |
Definition at line 272 of file CsvReader.h.
Referenced by checkForMoreRows(), CompressedFileReader(), isScanFinished(), nextEntry(), and serialize().
|
private |
Definition at line 284 of file CsvReader.h.
Referenced by checkForMoreRows(), CompressedFileReader(), nextEntry(), and serialize().