OmniSciDB  bf83d84833
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
foreign_storage::LocalMultiFileReader Class Reference

#include <CsvReader.h>

+ Inheritance diagram for foreign_storage::LocalMultiFileReader:
+ Collaboration diagram for foreign_storage::LocalMultiFileReader:

Public Member Functions

 LocalMultiFileReader (const std::string &file_path, const import_export::CopyParams &copy_params)
 
 LocalMultiFileReader (const std::string &file_path, const import_export::CopyParams &copy_params, const rapidjson::Value &value)
 
void checkForMoreRows (size_t file_offset, const ForeignServer *server_options, const UserMapping *user_mapping) override
 
- Public Member Functions inherited from foreign_storage::MultiFileReader
 MultiFileReader (const std::string &file_path, const import_export::CopyParams &copy_params)
 
 MultiFileReader (const std::string &file_path, const import_export::CopyParams &copy_params, const rapidjson::Value &value)
 
size_t getRemainingSize () override
 
bool isRemainingSizeKnown () override
 
size_t read (void *buffer, size_t max_size) override
 
size_t readRegion (void *buffer, size_t offset, size_t size) override
 
bool isScanFinished () override
 
void serialize (rapidjson::Value &value, rapidjson::Document::AllocatorType &allocator) const override
 
- Public Member Functions inherited from foreign_storage::CsvReader
 CsvReader (const std::string &file_path, const import_export::CopyParams &copy_params)
 
virtual ~CsvReader ()=default
 

Private Member Functions

void insertFile (std::string location)
 

Additional Inherited Members

- Protected Attributes inherited from foreign_storage::MultiFileReader
std::vector< std::unique_ptr
< CsvReader > > 
files_
 
std::vector< std::string > file_locations_
 
std::vector< size_t > cumulative_sizes_
 
size_t current_index_
 
size_t current_offset_
 
- Protected Attributes inherited from foreign_storage::CsvReader
import_export::CopyParams copy_params_
 
std::string file_path_
 

Detailed Description

Definition at line 325 of file CsvReader.h.

Constructor & Destructor Documentation

foreign_storage::LocalMultiFileReader::LocalMultiFileReader ( const std::string &  file_path,
const import_export::CopyParams copy_params 
)

Definition at line 529 of file CsvReader.cpp.

References insertFile().

531  : MultiFileReader(file_path, copy_params) {
532  std::set<std::string> file_locations;
533  if (boost::filesystem::is_directory(file_path)) {
534  // Find all files in this directory
535  for (boost::filesystem::recursive_directory_iterator
536  it(file_path, boost::filesystem::symlink_option::recurse),
537  eit;
538  it != eit;
539  ++it) {
540  if (!boost::filesystem::is_directory(it->path())) {
541  file_locations.insert(it->path().string());
542  }
543  }
544  } else {
545  file_locations.insert(file_path);
546  }
547  for (const auto& location : file_locations) {
548  insertFile(location);
549  }
550 }
MultiFileReader(const std::string &file_path, const import_export::CopyParams &copy_params)
Definition: CsvReader.cpp:476
void insertFile(std::string location)
Definition: CsvReader.cpp:592

+ Here is the call graph for this function:

foreign_storage::LocalMultiFileReader::LocalMultiFileReader ( const std::string &  file_path,
const import_export::CopyParams copy_params,
const rapidjson::Value &  value 
)

Definition at line 572 of file CsvReader.cpp.

References foreign_storage::CsvReader::copy_params_, foreign_storage::MultiFileReader::file_locations_, foreign_storage::MultiFileReader::files_, and foreign_storage::anonymous_namespace{CsvReader.cpp}::is_compressed_file().

575  : MultiFileReader(file_path, copy_params, value) {
576  // Constructs file from files_metadata
577  for (size_t index = 0; index < file_locations_.size(); index++) {
578  if (is_compressed_file(file_locations_[index])) {
579  files_.emplace_back(std::make_unique<CompressedFileReader>(
580  file_locations_[index],
581  copy_params_,
582  value["files_metadata"].GetArray()[index]));
583  } else {
584  files_.emplace_back(
585  std::make_unique<SingleFileReader>(file_locations_[index],
586  copy_params_,
587  value["files_metadata"].GetArray()[index]));
588  }
589  }
590 }
import_export::CopyParams copy_params_
Definition: CsvReader.h:102
MultiFileReader(const std::string &file_path, const import_export::CopyParams &copy_params)
Definition: CsvReader.cpp:476
bool is_compressed_file(const std::string &location)
Definition: CsvReader.cpp:553
std::vector< std::unique_ptr< CsvReader > > files_
Definition: CsvReader.h:313
std::vector< std::string > file_locations_
Definition: CsvReader.h:314

+ Here is the call graph for this function:

Member Function Documentation

void foreign_storage::LocalMultiFileReader::checkForMoreRows ( size_t  file_offset,
const ForeignServer server_options,
const UserMapping *  user_mapping 
)
overridevirtual

Rescan the target files Throws an exception if the rescan fails (ie files are not in a valid appended state or not supported)

Parameters
file_offset- where to resume the scan from (end of the last row) as not all of the bytes may have been consumed by the upstream compoennet
server_options- only needed for S3 backed CSV
user_mapping- only needed for S3 backed CSV

Reimplemented from foreign_storage::CsvReader.

Definition at line 606 of file CsvReader.cpp.

References CHECK, foreign_storage::MultiFileReader::cumulative_sizes_, foreign_storage::MultiFileReader::current_index_, foreign_storage::MultiFileReader::current_offset_, foreign_storage::MultiFileReader::file_locations_, foreign_storage::CsvReader::file_path_, foreign_storage::MultiFileReader::files_, insertFile(), foreign_storage::MultiFileReader::isScanFinished(), and foreign_storage::throw_removed_file_error().

608  {
609  // Look for new files
610  std::set<std::string> new_locations;
612  CHECK(file_offset == current_offset_);
613  if (boost::filesystem::is_directory(file_path_)) {
614  // Find all files in this directory
615  std::set<std::string> all_file_paths;
616  for (boost::filesystem::recursive_directory_iterator
617  it(file_path_, boost::filesystem::symlink_option::recurse),
618  eit;
619  it != eit;
620  ++it) {
621  bool new_file =
622  std::find(file_locations_.begin(), file_locations_.end(), it->path()) ==
623  file_locations_.end();
624  if (!boost::filesystem::is_directory(it->path()) && new_file) {
625  new_locations.insert(it->path().string());
626  }
627  all_file_paths.emplace(it->path().string());
628  }
629 
630  for (const auto& file_path : file_locations_) {
631  if (all_file_paths.find(file_path) == all_file_paths.end()) {
632  throw_removed_file_error(file_path);
633  }
634  }
635  }
636  if (new_locations.size() > 0) {
637  for (const auto& location : new_locations) {
638  insertFile(location);
639  }
640  } else if (files_.size() == 1) {
641  // Single file, check if it has new data
642  files_[0].get()->checkForMoreRows(file_offset);
643  if (!files_[0].get()->isScanFinished()) {
644  current_index_ = 0;
645  cumulative_sizes_ = {};
646  }
647  }
648 }
void insertFile(std::string location)
Definition: CsvReader.cpp:592
void throw_removed_file_error(const std::string &file_path)
std::vector< std::unique_ptr< CsvReader > > files_
Definition: CsvReader.h:313
#define CHECK(condition)
Definition: Logger.h:197
bool isScanFinished() override
Definition: CsvReader.h:307
std::vector< std::string > file_locations_
Definition: CsvReader.h:314
std::vector< size_t > cumulative_sizes_
Definition: CsvReader.h:317

+ Here is the call graph for this function:

void foreign_storage::LocalMultiFileReader::insertFile ( std::string  location)
private

Definition at line 592 of file CsvReader.cpp.

References foreign_storage::CsvReader::copy_params_, foreign_storage::MultiFileReader::file_locations_, foreign_storage::MultiFileReader::files_, and foreign_storage::anonymous_namespace{CsvReader.cpp}::is_compressed_file().

Referenced by checkForMoreRows(), and LocalMultiFileReader().

592  {
593  if (is_compressed_file(location)) {
594  files_.emplace_back(std::make_unique<CompressedFileReader>(location, copy_params_));
595  } else {
596  files_.emplace_back(std::make_unique<SingleFileReader>(location, copy_params_));
597  }
598  if (files_.back()->isScanFinished()) {
599  // skip any initially empty files
600  files_.pop_back();
601  } else {
602  file_locations_.push_back(location);
603  }
604 }
import_export::CopyParams copy_params_
Definition: CsvReader.h:102
bool is_compressed_file(const std::string &location)
Definition: CsvReader.cpp:553
std::vector< std::unique_ptr< CsvReader > > files_
Definition: CsvReader.h:313
std::vector< std::string > file_locations_
Definition: CsvReader.h:314

+ Here is the call graph for this function:

+ Here is the caller graph for this function:


The documentation for this class was generated from the following files: