OmniSciDB  2e3a973ef4
foreign_storage::anonymous_namespace{CsvReader.cpp} Namespace Reference

Functions

void adjust_eof (size_t &read_size, const size_t buffer_size, char *buffer, const char line_delim)
 
size_t offset_to_index (const std::vector< size_t > &cumulative_sizes, size_t byte_offset)
 
size_t get_data_size (size_t file_size, size_t header_size)
 
bool is_compressed_file (const std::string &location)
 

Function Documentation

◆ adjust_eof()

void foreign_storage::anonymous_namespace{CsvReader.cpp}::adjust_eof ( size_t &  read_size,
const size_t  buffer_size,
char *  buffer,
const char  line_delim 
)

Adds an end of line character (specified by the line_delim parameter) to provided buffer, if this is the last read buffer and if the buffer does not already end with an end of line character. This allows for appropriate parsing by the csv_file_buffer_parser utility functions, which expect the end of rows to be indicated by end of line characters in the buffer. Also removes extra EOL that may be inserted at the EOF that will not be present if the file is appended to

Definition at line 33 of file CsvReader.cpp.

References CHECK.

Referenced by foreign_storage::MultiFileReader::read(), and foreign_storage::CompressedFileReader::readInternal().

36  {
37  if (read_size == 0 || buffer[read_size - 1] != line_delim) {
38  CHECK(buffer_size > read_size);
39  static_cast<char*>(buffer)[read_size] = line_delim;
40  read_size++;
41  } else if (read_size > 1 && buffer[read_size - 2] == line_delim) {
42  // Extra newline may have been due to the file encoding
43  // and may disappear during an append
44  read_size--;
45  }
46 }
#define CHECK(condition)
Definition: Logger.h:197
+ Here is the caller graph for this function:

◆ get_data_size()

size_t foreign_storage::anonymous_namespace{CsvReader.cpp}::get_data_size ( size_t  file_size,
size_t  header_size 
)

Definition at line 62 of file CsvReader.cpp.

Referenced by foreign_storage::SingleFileReader::checkForMoreRows(), and foreign_storage::SingleFileReader::SingleFileReader().

62  {
63  // Add 1 byte for possible need to insert a newline
64  return file_size - header_size + 1;
65 }
size_t file_size(const int fd)
Definition: omnisci_fs.cpp:31
+ Here is the caller graph for this function:

◆ is_compressed_file()

bool foreign_storage::anonymous_namespace{CsvReader.cpp}::is_compressed_file ( const std::string &  location)

Definition at line 550 of file CsvReader.cpp.

Referenced by foreign_storage::LocalMultiFileReader::insertFile(), and foreign_storage::LocalMultiFileReader::LocalMultiFileReader().

550  {
551  const std::vector<std::string> compressed_exts = {
552  ".zip", ".gz", ".tar", ".rar", ".bz2", ".7z", ".tgz"};
553  const std::vector<std::string> uncompressed_exts = {"", ".csv", ".tsv", ".txt"};
554  if (std::find(compressed_exts.begin(),
555  compressed_exts.end(),
556  boost::filesystem::extension(location)) != compressed_exts.end()) {
557  return true;
558  } else if (std::find(uncompressed_exts.begin(),
559  uncompressed_exts.end(),
560  boost::filesystem::extension(location)) !=
561  uncompressed_exts.end()) {
562  return false;
563  } else {
564  throw std::runtime_error{"Invalid extention for file \"" + location + "\"."};
565  }
566 }
+ Here is the caller graph for this function:

◆ offset_to_index()

size_t foreign_storage::anonymous_namespace{CsvReader.cpp}::offset_to_index ( const std::vector< size_t > &  cumulative_sizes,
size_t  byte_offset 
)
Parameters
cumulative_sizesSize of each file + all previous files
byte_offsetbyte offset into the fileset from the initial scan
Returns
the file index for a given byte offset

Definition at line 53 of file CsvReader.cpp.

Referenced by foreign_storage::CompressedFileReader::readRegion(), and foreign_storage::MultiFileReader::readRegion().

53  {
54  auto iterator =
55  std::upper_bound(cumulative_sizes.begin(), cumulative_sizes.end(), byte_offset);
56  if (iterator == cumulative_sizes.end()) {
57  throw std::runtime_error{"Invalid offset into cumulative_sizes"};
58  }
59  return iterator - cumulative_sizes.begin();
60 }
+ Here is the caller graph for this function: