OmniSciDB  95562058bd
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
foreign_storage::SingleFileReader Class Reference

#include <CsvReader.h>

+ Inheritance diagram for foreign_storage::SingleFileReader:
+ Collaboration diagram for foreign_storage::SingleFileReader:

Public Member Functions

 SingleFileReader (const std::string &file_path, const import_export::CopyParams &copy_params)
 
 SingleFileReader (const std::string &file_path, const import_export::CopyParams &copy_params, const rapidjson::Value &value)
 
 ~SingleFileReader () override
 
 SingleFileReader (const SingleFileReader &)=delete
 
SingleFileReaderoperator= (const SingleFileReader &)=delete
 
size_t read (void *buffer, size_t max_size) override
 
size_t readRegion (void *buffer, size_t offset, size_t size) override
 
bool isScanFinished () override
 
size_t getRemainingSize () override
 
bool isRemainingSizeKnown () override
 
void checkForMoreRows (size_t file_offset, const ForeignServer *server_options, const UserMapping *user_mapping) override
 
void serialize (rapidjson::Value &value, rapidjson::Document::AllocatorType &allocator) const override
 
- Public Member Functions inherited from foreign_storage::CsvReader
 CsvReader (const std::string &file_path, const import_export::CopyParams &copy_params)
 
virtual ~CsvReader ()=default
 

Private Attributes

std::FILE * file_
 
size_t data_size_
 
bool scan_finished_
 
size_t header_offset_
 
size_t total_bytes_read_
 

Additional Inherited Members

- Protected Attributes inherited from foreign_storage::CsvReader
import_export::CopyParams copy_params_
 
std::string file_path_
 

Detailed Description

Definition at line 107 of file CsvReader.h.

Constructor & Destructor Documentation

foreign_storage::SingleFileReader::SingleFileReader ( const std::string &  file_path,
const import_export::CopyParams copy_params 
)

Definition at line 69 of file CsvReader.cpp.

References CHECK, data_size_, file_, foreign_storage::anonymous_namespace{CsvReader.cpp}::get_data_size(), import_export::CopyParams::has_header, header_offset_, parse_ast::line, import_export::CopyParams::line_delim, and import_export::NO_HEADER.

71  : CsvReader(file_path, copy_params)
72  , scan_finished_(false)
73  , header_offset_(0)
74  , total_bytes_read_(0) {
75  file_ = fopen(file_path.c_str(), "rb");
76  if (!file_) {
77  throw std::runtime_error{"An error occurred when attempting to open file \"" +
78  file_path + "\". " + strerror(errno)};
79  }
80 
81  // Skip header and record offset
83  std::ifstream file{file_path};
84  CHECK(file.good());
85  std::string line;
86  std::getline(file, line, copy_params.line_delim);
87  file.close();
88  header_offset_ = line.size() + 1;
89  }
90  fseek(file_, 0, SEEK_END);
91 
93 
94  if (fseek(file_, static_cast<long int>(header_offset_), SEEK_SET) != 0) {
95  throw std::runtime_error{"An error occurred when attempting to open file \"" +
96  file_path + "\". " + strerror(errno)};
97  };
98 }
tuple line
Definition: parse_ast.py:10
size_t get_data_size(size_t file_size, size_t header_size)
Definition: CsvReader.cpp:62
ImportHeaderRow has_header
Definition: CopyParams.h:48
CsvReader(const std::string &file_path, const import_export::CopyParams &copy_params)
Definition: CsvReader.h:36
#define CHECK(condition)
Definition: Logger.h:197

+ Here is the call graph for this function:

foreign_storage::SingleFileReader::SingleFileReader ( const std::string &  file_path,
const import_export::CopyParams copy_params,
const rapidjson::Value &  value 
)

Definition at line 100 of file CsvReader.cpp.

References data_size_, file_, foreign_storage::json_utils::get_value_from_object(), header_offset_, and total_bytes_read_.

103  : CsvReader(file_path, copy_params)
104  , scan_finished_(true)
105  , header_offset_(0)
106  , total_bytes_read_(0) {
107  file_ = fopen(file_path.c_str(), "rb");
108  if (!file_) {
109  throw std::runtime_error{"An error occurred when attempting to open file \"" +
110  file_path + "\". " + strerror(errno)};
111  }
112  json_utils::get_value_from_object(value, header_offset_, "header_offset");
113  json_utils::get_value_from_object(value, total_bytes_read_, "total_bytes_read");
114  json_utils::get_value_from_object(value, data_size_, "data_size");
115 }
void get_value_from_object(const rapidjson::Value &object, T &value, const std::string &name)
Definition: FsiJsonUtils.h:126
CsvReader(const std::string &file_path, const import_export::CopyParams &copy_params)
Definition: CsvReader.h:36

+ Here is the call graph for this function:

foreign_storage::SingleFileReader::~SingleFileReader ( )
inlineoverride

Definition at line 114 of file CsvReader.h.

References file_.

114 { fclose(file_); }
foreign_storage::SingleFileReader::SingleFileReader ( const SingleFileReader )
delete

Member Function Documentation

void foreign_storage::SingleFileReader::checkForMoreRows ( size_t  file_offset,
const ForeignServer server_options,
const UserMapping *  user_mapping 
)
overridevirtual

Rescan the target files Throws an exception if the rescan fails (ie files are not in a valid appended state or not supported)

Parameters
file_offset- where to resume the scan from (end of the last row) as not all of the bytes may have been consumed by the upstream compoennet
server_options- only needed for S3 backed CSV
user_mapping- only needed for S3 backed CSV

Reimplemented from foreign_storage::CsvReader.

Definition at line 126 of file CsvReader.cpp.

References CHECK, data_size_, file_, foreign_storage::CsvReader::file_path_, foreign_storage::anonymous_namespace{CsvReader.cpp}::get_data_size(), header_offset_, isScanFinished(), scan_finished_, foreign_storage::throw_removed_row_error(), to_string(), and total_bytes_read_.

128  {
130  // Re-open file and check if there is any new data in it
131  fclose(file_);
132  file_ = fopen(file_path_.c_str(), "rb");
133  if (!file_) {
134  throw std::runtime_error{"An error occurred when attempting to open file \"" +
135  file_path_ + "\". " + strerror(errno)};
136  }
137  fseek(file_, 0, SEEK_END);
138  size_t new_file_size = ftell(file_);
139  size_t new_data_size = get_data_size(new_file_size, header_offset_);
140  if (new_data_size < data_size_) {
142  }
143  if (fseek(file_, static_cast<long int>(file_offset + header_offset_), SEEK_SET) != 0) {
144  throw std::runtime_error{"An error occurred when attempting to read offset " +
145  std::to_string(file_offset + header_offset_) +
146  " in file: \"" + file_path_ + "\". " + strerror(errno)};
147  }
148  if (new_data_size > data_size_) {
149  scan_finished_ = false;
150  total_bytes_read_ = file_offset;
151  data_size_ = new_data_size;
152  }
153 }
size_t get_data_size(size_t file_size, size_t header_size)
Definition: CsvReader.cpp:62
std::string to_string(char const *&&v)
void throw_removed_row_error(const std::string &file_path)
#define CHECK(condition)
Definition: Logger.h:197

+ Here is the call graph for this function:

size_t foreign_storage::SingleFileReader::getRemainingSize ( )
inlineoverridevirtual
Returns
size of the CSV remaining to be read

Implements foreign_storage::CsvReader.

Definition at line 142 of file CsvReader.h.

References data_size_, and total_bytes_read_.

bool foreign_storage::SingleFileReader::isRemainingSizeKnown ( )
inlineoverridevirtual
Returns
if remaining size is known

Implements foreign_storage::CsvReader.

Definition at line 144 of file CsvReader.h.

144 { return true; };
bool foreign_storage::SingleFileReader::isScanFinished ( )
inlineoverridevirtual
Returns
true if the entire CSV has been read

Implements foreign_storage::CsvReader.

Definition at line 140 of file CsvReader.h.

References scan_finished_.

Referenced by checkForMoreRows(), and readRegion().

+ Here is the caller graph for this function:

SingleFileReader& foreign_storage::SingleFileReader::operator= ( const SingleFileReader )
delete
size_t foreign_storage::SingleFileReader::read ( void *  buffer,
size_t  max_size 
)
inlineoverridevirtual

Read up to max_size bytes from archive into buffer starting starting from the end of the last read

Parameters
buffer- buffer to load into
max_size- maximum number of bytes to read into the buffer
Returns
number of bytes actually read

Implements foreign_storage::CsvReader.

Definition at line 120 of file CsvReader.h.

References file_, scan_finished_, and total_bytes_read_.

120  {
121  size_t bytes_read = fread(buffer, 1, max_size, file_);
122  if (!scan_finished_) {
123  scan_finished_ = feof(file_);
124  }
125 
126  total_bytes_read_ += bytes_read;
127  return bytes_read;
128  }
size_t foreign_storage::SingleFileReader::readRegion ( void *  buffer,
size_t  offset,
size_t  size 
)
inlineoverridevirtual

Read up to max_size bytes from archive, starting at given offset isScanFinished() must return true to use readRegion

Parameters
buffer- buffer to load into
offset- starting point into the archive to read
size- maximum number of bytes to read into the buffer
Returns
number of bytes actually read

Implements foreign_storage::CsvReader.

Definition at line 130 of file CsvReader.h.

References CHECK, file_, foreign_storage::CsvReader::file_path_, header_offset_, isScanFinished(), and to_string().

130  {
132  if (fseek(file_, static_cast<long int>(offset + header_offset_), SEEK_SET) != 0) {
133  throw std::runtime_error{"An error occurred when attempting to read offset " +
134  std::to_string(offset) + " in file: \"" + file_path_ +
135  "\". " + strerror(errno)};
136  }
137  return fread(buffer, 1, size, file_);
138  }
std::string to_string(char const *&&v)
#define CHECK(condition)
Definition: Logger.h:197

+ Here is the call graph for this function:

void foreign_storage::SingleFileReader::serialize ( rapidjson::Value &  value,
rapidjson::Document::AllocatorType &  allocator 
) const
overridevirtual

Serialize internal state to given json object This Json will later be used to restore the reader state through a constructor must be called when isScanFinished() is true

Parameters
value- json object to store needed state to this function can store any needed data or none
allocator- allocator to use for json contruction

Implements foreign_storage::CsvReader.

Definition at line 117 of file CsvReader.cpp.

References foreign_storage::json_utils::add_value_to_object(), CHECK, data_size_, header_offset_, scan_finished_, and total_bytes_read_.

118  {
120  json_utils::add_value_to_object(value, header_offset_, "header_offset", allocator);
122  value, total_bytes_read_, "total_bytes_read", allocator);
123  json_utils::add_value_to_object(value, data_size_, "data_size", allocator);
124 };
void add_value_to_object(rapidjson::Value &object, const T &value, const std::string &name, rapidjson::Document::AllocatorType &allocator)
Definition: FsiJsonUtils.h:111
#define CHECK(condition)
Definition: Logger.h:197

+ Here is the call graph for this function:

Member Data Documentation

size_t foreign_storage::SingleFileReader::data_size_
private

Definition at line 155 of file CsvReader.h.

Referenced by checkForMoreRows(), getRemainingSize(), serialize(), and SingleFileReader().

std::FILE* foreign_storage::SingleFileReader::file_
private

Definition at line 153 of file CsvReader.h.

Referenced by checkForMoreRows(), read(), readRegion(), SingleFileReader(), and ~SingleFileReader().

size_t foreign_storage::SingleFileReader::header_offset_
private

Definition at line 160 of file CsvReader.h.

Referenced by checkForMoreRows(), readRegion(), serialize(), and SingleFileReader().

bool foreign_storage::SingleFileReader::scan_finished_
private

Definition at line 157 of file CsvReader.h.

Referenced by checkForMoreRows(), isScanFinished(), read(), and serialize().

size_t foreign_storage::SingleFileReader::total_bytes_read_
private

Definition at line 162 of file CsvReader.h.

Referenced by checkForMoreRows(), getRemainingSize(), read(), serialize(), and SingleFileReader().


The documentation for this class was generated from the following files: