OmniSciDB  95562058bd
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
foreign_storage::MultiFileReader Class Reference

#include <CsvReader.h>

+ Inheritance diagram for foreign_storage::MultiFileReader:
+ Collaboration diagram for foreign_storage::MultiFileReader:

Public Member Functions

 MultiFileReader (const std::string &file_path, const import_export::CopyParams &copy_params)
 
 MultiFileReader (const std::string &file_path, const import_export::CopyParams &copy_params, const rapidjson::Value &value)
 
size_t getRemainingSize () override
 
bool isRemainingSizeKnown () override
 
size_t read (void *buffer, size_t max_size) override
 
size_t readRegion (void *buffer, size_t offset, size_t size) override
 
bool isScanFinished () override
 
void serialize (rapidjson::Value &value, rapidjson::Document::AllocatorType &allocator) const override
 
- Public Member Functions inherited from foreign_storage::CsvReader
 CsvReader (const std::string &file_path, const import_export::CopyParams &copy_params)
 
virtual ~CsvReader ()=default
 
virtual void checkForMoreRows (size_t file_offset, const ForeignServer *server_options=nullptr, const UserMapping *user_mapping=nullptr)
 

Protected Attributes

std::vector< std::unique_ptr
< CsvReader > > 
files_
 
std::vector< std::string > file_locations_
 
std::vector< size_t > cumulative_sizes_
 
size_t current_index_
 
size_t current_offset_
 
- Protected Attributes inherited from foreign_storage::CsvReader
import_export::CopyParams copy_params_
 
std::string file_path_
 

Detailed Description

Definition at line 291 of file CsvReader.h.

Constructor & Destructor Documentation

foreign_storage::MultiFileReader::MultiFileReader ( const std::string &  file_path,
const import_export::CopyParams copy_params 
)

Definition at line 476 of file CsvReader.cpp.

478  : CsvReader(file_path, copy_params), current_index_(0), current_offset_(0) {}
CsvReader(const std::string &file_path, const import_export::CopyParams &copy_params)
Definition: CsvReader.h:36
foreign_storage::MultiFileReader::MultiFileReader ( const std::string &  file_path,
const import_export::CopyParams copy_params,
const rapidjson::Value &  value 
)

Definition at line 480 of file CsvReader.cpp.

References CHECK, cumulative_sizes_, current_index_, current_offset_, file_locations_, and foreign_storage::json_utils::get_value_from_object().

483  : CsvReader(file_path, copy_params), current_index_(0), current_offset_(0) {
484  json_utils::get_value_from_object(value, file_locations_, "file_locations");
485  json_utils::get_value_from_object(value, cumulative_sizes_, "cumulative_sizes");
486  json_utils::get_value_from_object(value, current_offset_, "current_offset");
487  json_utils::get_value_from_object(value, current_index_, "current_index");
488 
489  // Validate files_metadata here, but objects will be recreated by child class
490  CHECK(value.HasMember("files_metadata"));
491  CHECK(value["files_metadata"].IsArray());
492  CHECK(file_locations_.size() == value["files_metadata"].GetArray().Size());
493 }
void get_value_from_object(const rapidjson::Value &object, T &value, const std::string &name)
Definition: FsiJsonUtils.h:126
CsvReader(const std::string &file_path, const import_export::CopyParams &copy_params)
Definition: CsvReader.h:36
#define CHECK(condition)
Definition: Logger.h:197
std::vector< std::string > file_locations_
Definition: CsvReader.h:314
std::vector< size_t > cumulative_sizes_
Definition: CsvReader.h:317

+ Here is the call graph for this function:

Member Function Documentation

size_t foreign_storage::MultiFileReader::getRemainingSize ( )
overridevirtual
Returns
size of the CSV remaining to be read

Implements foreign_storage::CsvReader.

Definition at line 513 of file CsvReader.cpp.

References current_index_, and files_.

513  {
514  size_t total_size = 0;
515  for (size_t index = current_index_; index < files_.size(); index++) {
516  total_size += files_[index]->getRemainingSize();
517  }
518  return total_size;
519 }
std::vector< std::unique_ptr< CsvReader > > files_
Definition: CsvReader.h:313
bool foreign_storage::MultiFileReader::isRemainingSizeKnown ( )
overridevirtual
Returns
if remaining size is known

Implements foreign_storage::CsvReader.

Definition at line 521 of file CsvReader.cpp.

References current_index_, and files_.

521  {
522  bool size_known = true;
523  for (size_t index = current_index_; index < files_.size(); index++) {
524  size_known = size_known && files_[index]->isRemainingSizeKnown();
525  }
526  return size_known;
527 };
std::vector< std::unique_ptr< CsvReader > > files_
Definition: CsvReader.h:313
bool foreign_storage::MultiFileReader::isScanFinished ( )
inlineoverridevirtual
Returns
true if the entire CSV has been read

Implements foreign_storage::CsvReader.

Definition at line 307 of file CsvReader.h.

References current_index_, and files_.

Referenced by foreign_storage::MultiS3Reader::checkForMoreRows(), foreign_storage::LocalMultiFileReader::checkForMoreRows(), read(), and readRegion().

307 { return (current_index_ >= files_.size()); }
std::vector< std::unique_ptr< CsvReader > > files_
Definition: CsvReader.h:313

+ Here is the caller graph for this function:

size_t foreign_storage::MultiFileReader::read ( void *  buffer,
size_t  max_size 
)
overridevirtual

Read up to max_size bytes from archive into buffer starting starting from the end of the last read

Parameters
buffer- buffer to load into
max_size- maximum number of bytes to read into the buffer
Returns
number of bytes actually read

Implements foreign_storage::CsvReader.

Definition at line 644 of file CsvReader.cpp.

References foreign_storage::anonymous_namespace{CsvReader.cpp}::adjust_eof(), foreign_storage::CsvReader::copy_params_, cumulative_sizes_, current_index_, current_offset_, files_, isScanFinished(), and import_export::CopyParams::line_delim.

644  {
645  if (isScanFinished()) {
646  return 0;
647  }
648  // Leave one extra char in case we need to insert a delimiter
649  size_t bytes_read = files_[current_index_].get()->read(buffer, max_size - 1);
650  if (files_[current_index_].get()->isScanFinished()) {
651  adjust_eof(bytes_read, max_size, static_cast<char*>(buffer), copy_params_.line_delim);
652  }
653  current_offset_ += bytes_read;
654  if (current_index_ < files_.size() && files_[current_index_].get()->isScanFinished()) {
656  current_index_++;
657  }
658  return bytes_read;
659 }
import_export::CopyParams copy_params_
Definition: CsvReader.h:102
void adjust_eof(size_t &read_size, const size_t buffer_size, char *buffer, const char line_delim)
Definition: CsvReader.cpp:33
std::vector< std::unique_ptr< CsvReader > > files_
Definition: CsvReader.h:313
bool isScanFinished() override
Definition: CsvReader.h:307
std::vector< size_t > cumulative_sizes_
Definition: CsvReader.h:317

+ Here is the call graph for this function:

size_t foreign_storage::MultiFileReader::readRegion ( void *  buffer,
size_t  offset,
size_t  size 
)
overridevirtual

Read up to max_size bytes from archive, starting at given offset isScanFinished() must return true to use readRegion

Parameters
buffer- buffer to load into
offset- starting point into the archive to read
size- maximum number of bytes to read into the buffer
Returns
number of bytes actually read

Implements foreign_storage::CsvReader.

Definition at line 661 of file CsvReader.cpp.

References CHECK, foreign_storage::CsvReader::copy_params_, cumulative_sizes_, files_, isScanFinished(), import_export::CopyParams::line_delim, and foreign_storage::anonymous_namespace{CsvReader.cpp}::offset_to_index().

661  {
663  // Get file index
664  auto index = offset_to_index(cumulative_sizes_, offset);
665  // Get offset into this file
666  size_t base = 0;
667  if (index > 0) {
668  base = cumulative_sizes_[index - 1];
669  }
670 
671  size_t read_size = size;
672  if (offset + size == cumulative_sizes_[index]) {
673  // Skip the last byte as it may have been an inserted delimiter
674  read_size--;
675  }
676  size_t bytes_read = files_[index].get()->readRegion(buffer, offset - base, read_size);
677 
678  if (offset + size == cumulative_sizes_[index]) {
679  // Re-insert delimiter
680  static_cast<char*>(buffer)[size - 1] = copy_params_.line_delim;
681  bytes_read++;
682  }
683 
684  return bytes_read;
685 }
import_export::CopyParams copy_params_
Definition: CsvReader.h:102
size_t offset_to_index(const std::vector< size_t > &cumulative_sizes, size_t byte_offset)
Definition: CsvReader.cpp:53
std::vector< std::unique_ptr< CsvReader > > files_
Definition: CsvReader.h:313
#define CHECK(condition)
Definition: Logger.h:197
bool isScanFinished() override
Definition: CsvReader.h:307
std::vector< size_t > cumulative_sizes_
Definition: CsvReader.h:317

+ Here is the call graph for this function:

void foreign_storage::MultiFileReader::serialize ( rapidjson::Value &  value,
rapidjson::Document::AllocatorType &  allocator 
) const
overridevirtual

Serialize internal state to given json object This Json will later be used to restore the reader state through a constructor must be called when isScanFinished() is true

Parameters
value- json object to store needed state to this function can store any needed data or none
allocator- allocator to use for json contruction

Implements foreign_storage::CsvReader.

Reimplemented in foreign_storage::MultiS3Reader.

Definition at line 495 of file CsvReader.cpp.

References foreign_storage::json_utils::add_value_to_object(), cumulative_sizes_, current_index_, current_offset_, file_locations_, and files_.

Referenced by foreign_storage::MultiS3Reader::serialize().

496  {
497  json_utils::add_value_to_object(value, file_locations_, "file_locations", allocator);
499  value, cumulative_sizes_, "cumulative_sizes", allocator);
500  json_utils::add_value_to_object(value, current_offset_, "current_offset", allocator);
501  json_utils::add_value_to_object(value, current_index_, "current_index", allocator);
502 
503  // Serialize metadata from all files
504  rapidjson::Value files_metadata(rapidjson::kArrayType);
505  for (size_t index = 0; index < files_.size(); index++) {
506  rapidjson::Value file_metadata(rapidjson::kObjectType);
507  files_[index]->serialize(file_metadata, allocator);
508  files_metadata.PushBack(file_metadata, allocator);
509  }
510  value.AddMember("files_metadata", files_metadata, allocator);
511 };
void add_value_to_object(rapidjson::Value &object, const T &value, const std::string &name, rapidjson::Document::AllocatorType &allocator)
Definition: FsiJsonUtils.h:111
std::vector< std::unique_ptr< CsvReader > > files_
Definition: CsvReader.h:313
std::vector< std::string > file_locations_
Definition: CsvReader.h:314
std::vector< size_t > cumulative_sizes_
Definition: CsvReader.h:317

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

Member Data Documentation

std::vector<size_t> foreign_storage::MultiFileReader::cumulative_sizes_
protected
size_t foreign_storage::MultiFileReader::current_offset_
protected

The documentation for this class was generated from the following files: