OmniSciDB  471d68cefb
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
foreign_storage::anonymous_namespace{RegexFileBufferParser.cpp} Namespace Reference

Functions

size_t find_last_end_of_line (const char *buffer, size_t buffer_size, size_t start, size_t end, char line_delim)
 
bool line_starts_with_regex (const char *buffer, size_t start, size_t end, const boost::regex &line_start_regex)
 
std::optional< std::string > get_line_start_regex (const ForeignTable *foreign_table)
 
std::string get_line_regex (const ForeignTable *foreign_table)
 
std::string get_next_row (const char *curr, const char *buffer_end, char line_delim, const std::optional< boost::regex > &line_start_regex)
 
size_t get_row_count (const char *buffer, size_t start, size_t end, char line_delim, const std::optional< boost::regex > &line_start_regex)
 
bool regex_match_columns (const std::string &row_str, const boost::regex &line_regex, size_t logical_column_count, std::vector< std::string > &parsed_columns_str, std::vector< std::string_view > &parsed_columns_sv, const std::string &file_path)
 

Function Documentation

size_t foreign_storage::anonymous_namespace{RegexFileBufferParser.cpp}::find_last_end_of_line ( const char *  buffer,
size_t  buffer_size,
size_t  start,
size_t  end,
char  line_delim 
)

Definition at line 29 of file RegexFileBufferParser.cpp.

References i, and to_string().

Referenced by foreign_storage::RegexFileBufferParser::findRowEndPosition().

33  {
34  int64_t i = end;
35  while (i >= static_cast<int64_t>(start)) {
36  if (buffer[i] == line_delim) {
37  return i;
38  } else {
39  i--;
40  }
41  }
42  throw InsufficientBufferSizeException{
43  "Unable to find an end of line character after reading " +
44  std::to_string(buffer_size) + " characters."};
45 }
std::string to_string(char const *&&v)

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

std::string foreign_storage::anonymous_namespace{RegexFileBufferParser.cpp}::get_line_regex ( const ForeignTable *  foreign_table)

Definition at line 66 of file RegexFileBufferParser.cpp.

References CHECK, foreign_storage::RegexFileBufferParser::LINE_REGEX_KEY, and foreign_storage::OptionsContainer::options.

66  {
67  if (foreign_table) {
68  auto it = foreign_table->options.find(RegexFileBufferParser::LINE_REGEX_KEY);
69  CHECK(it != foreign_table->options.end());
70  return it->second;
71  }
72  return {};
73 }
#define CHECK(condition)
Definition: Logger.h:209
std::optional<std::string> foreign_storage::anonymous_namespace{RegexFileBufferParser.cpp}::get_line_start_regex ( const ForeignTable *  foreign_table)

Definition at line 56 of file RegexFileBufferParser.cpp.

References foreign_storage::RegexFileBufferParser::LINE_START_REGEX_KEY, and foreign_storage::OptionsContainer::options.

Referenced by foreign_storage::RegexFileBufferParser::validateFiles().

56  {
57  if (foreign_table) {
58  auto it = foreign_table->options.find(RegexFileBufferParser::LINE_START_REGEX_KEY);
59  if (it != foreign_table->options.end()) {
60  return it->second;
61  }
62  }
63  return {};
64 }

+ Here is the caller graph for this function:

std::string foreign_storage::anonymous_namespace{RegexFileBufferParser.cpp}::get_next_row ( const char *  curr,
const char *  buffer_end,
char  line_delim,
const std::optional< boost::regex > &  line_start_regex 
)

Definition at line 75 of file RegexFileBufferParser.cpp.

References CHECK, and line_starts_with_regex().

Referenced by get_row_count(), and foreign_storage::RegexFileBufferParser::parseBuffer().

78  {
79  auto row_end = curr;
80  bool row_found{false};
81  while (!row_found && row_end <= buffer_end) {
82  if (*row_end == line_delim) {
83  if (row_end == buffer_end) {
84  row_found = true;
85  } else if (line_start_regex.has_value()) {
86  // When a LINE_START_REGEX option is present, concatenate the following lines
87  // until a line that starts with the specified regex is found.
88  CHECK(line_starts_with_regex(curr, 0, row_end - curr, line_start_regex.value()));
89  auto row_str = get_next_row(row_end + 1, buffer_end, line_delim, {});
90  while (!line_starts_with_regex(
91  row_str.c_str(), 0, row_str.length() - 1, line_start_regex.value())) {
92  row_end += row_str.length() + 1;
93  if (row_end == buffer_end) {
94  break;
95  }
96  row_str = get_next_row(row_end + 1, buffer_end, line_delim, {});
97  }
98  row_found = true;
99  } else {
100  row_found = true;
101  }
102  }
103  row_end++;
104  }
105  CHECK(row_found);
106  return std::string{curr, static_cast<size_t>(row_end - curr - 1)};
107 }
std::string get_next_row(const char *curr, const char *buffer_end, char line_delim, const std::optional< boost::regex > &line_start_regex)
#define CHECK(condition)
Definition: Logger.h:209
bool line_starts_with_regex(const char *buffer, size_t start, size_t end, const boost::regex &line_start_regex)

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

size_t foreign_storage::anonymous_namespace{RegexFileBufferParser.cpp}::get_row_count ( const char *  buffer,
size_t  start,
size_t  end,
char  line_delim,
const std::optional< boost::regex > &  line_start_regex 
)

Definition at line 109 of file RegexFileBufferParser.cpp.

References get_next_row().

Referenced by foreign_storage::RegexFileBufferParser::findRowEndPosition().

113  {
114  size_t row_count{0};
115  auto buffer_end = buffer + end;
116  auto curr = buffer + start;
117  while (curr <= buffer_end) {
118  auto row_str = get_next_row(curr, buffer_end, line_delim, line_start_regex);
119  curr += row_str.length() + 1;
120  row_count++;
121  }
122  return row_count;
123 }
std::string get_next_row(const char *curr, const char *buffer_end, char line_delim, const std::optional< boost::regex > &line_start_regex)

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

bool foreign_storage::anonymous_namespace{RegexFileBufferParser.cpp}::line_starts_with_regex ( const char *  buffer,
size_t  start,
size_t  end,
const boost::regex &  line_start_regex 
)

Definition at line 47 of file RegexFileBufferParser.cpp.

Referenced by foreign_storage::RegexFileBufferParser::findRowEndPosition(), get_next_row(), and foreign_storage::RegexFileBufferParser::validateFiles().

50  {
51  return boost::regex_search(std::string{buffer + start, end - start + 1},
52  line_start_regex,
53  boost::regex_constants::match_continuous);
54 }

+ Here is the caller graph for this function:

bool foreign_storage::anonymous_namespace{RegexFileBufferParser.cpp}::regex_match_columns ( const std::string &  row_str,
const boost::regex &  line_regex,
size_t  logical_column_count,
std::vector< std::string > &  parsed_columns_str,
std::vector< std::string_view > &  parsed_columns_sv,
const std::string &  file_path 
)

Definition at line 125 of file RegexFileBufferParser.cpp.

References CHECK_GT, i, and foreign_storage::throw_number_of_columns_mismatch_error().

Referenced by foreign_storage::RegexFileBufferParser::parseBuffer().

130  {
131  parsed_columns_str.clear();
132  parsed_columns_sv.clear();
133  boost::smatch match;
134  bool set_all_nulls{false};
135  if (boost::regex_match(row_str, match, line_regex)) {
136  auto matched_column_count = match.size() - 1;
137  if (logical_column_count != matched_column_count) {
139  logical_column_count, matched_column_count, file_path);
140  }
141  CHECK_GT(match.size(), static_cast<size_t>(1));
142  for (size_t i = 1; i < match.size(); i++) {
143  parsed_columns_str.emplace_back(match[i].str());
144  parsed_columns_sv.emplace_back(parsed_columns_str.back());
145  }
146  } else {
147  parsed_columns_sv =
148  std::vector<std::string_view>(logical_column_count, std::string_view{});
149  set_all_nulls = true;
150  }
151  return set_all_nulls;
152 }
#define CHECK_GT(x, y)
Definition: Logger.h:221
void throw_number_of_columns_mismatch_error(size_t num_table_cols, size_t num_file_cols, const std::string &file_path)

+ Here is the call graph for this function:

+ Here is the caller graph for this function: