OmniSciDB  085a039ca4
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
foreign_storage::anonymous_namespace{RegexFileBufferParser.cpp} Namespace Reference

Functions

size_t find_last_end_of_line (const char *buffer, size_t buffer_size, size_t start, size_t end, char line_delim)
 
bool line_starts_with_regex (const char *buffer, size_t start, size_t end, const boost::regex &line_start_regex)
 
std::optional< std::string > get_line_start_regex (const ForeignTable *foreign_table)
 
std::string get_line_regex (const ForeignTable *foreign_table)
 
std::string get_next_row (const char *curr, const char *buffer_end, char line_delim, const std::optional< boost::regex > &line_start_regex)
 
size_t get_row_count (const char *buffer, size_t start, size_t end, char line_delim, const std::optional< boost::regex > &line_start_regex)
 
bool regex_match_columns (const std::string &row_str, const boost::regex &line_regex, size_t logical_column_count, std::vector< std::string > &parsed_columns_str, std::vector< std::string_view > &parsed_columns_sv, const std::string &file_path)
 
std::optional< bool > validate_and_get_bool_value (const ForeignTable *foreign_table, const std::string &option_name)
 

Function Documentation

size_t foreign_storage::anonymous_namespace{RegexFileBufferParser.cpp}::find_last_end_of_line ( const char *  buffer,
size_t  buffer_size,
size_t  start,
size_t  end,
char  line_delim 
)

Definition at line 27 of file RegexFileBufferParser.cpp.

References to_string().

Referenced by foreign_storage::RegexFileBufferParser::findRowEndPosition().

31  {
32  int64_t i = end;
33  while (i >= static_cast<int64_t>(start)) {
34  if (buffer[i] == line_delim) {
35  return i;
36  } else {
37  i--;
38  }
39  }
40  throw InsufficientBufferSizeException{
41  "Unable to find an end of line character after reading " +
42  std::to_string(buffer_size) + " characters."};
43 }
std::string to_string(char const *&&v)

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

std::string foreign_storage::anonymous_namespace{RegexFileBufferParser.cpp}::get_line_regex ( const ForeignTable *  foreign_table)

Definition at line 64 of file RegexFileBufferParser.cpp.

References CHECK, foreign_storage::RegexFileBufferParser::LINE_REGEX_KEY, and foreign_storage::OptionsContainer::options.

64  {
65  if (foreign_table) {
66  auto it = foreign_table->options.find(RegexFileBufferParser::LINE_REGEX_KEY);
67  CHECK(it != foreign_table->options.end());
68  return it->second;
69  }
70  return {};
71 }
#define CHECK(condition)
Definition: Logger.h:223
std::optional<std::string> foreign_storage::anonymous_namespace{RegexFileBufferParser.cpp}::get_line_start_regex ( const ForeignTable *  foreign_table)

Definition at line 54 of file RegexFileBufferParser.cpp.

References foreign_storage::RegexFileBufferParser::LINE_START_REGEX_KEY, and foreign_storage::OptionsContainer::options.

Referenced by foreign_storage::RegexFileBufferParser::validateFiles().

54  {
55  if (foreign_table) {
56  auto it = foreign_table->options.find(RegexFileBufferParser::LINE_START_REGEX_KEY);
57  if (it != foreign_table->options.end()) {
58  return it->second;
59  }
60  }
61  return {};
62 }

+ Here is the caller graph for this function:

std::string foreign_storage::anonymous_namespace{RegexFileBufferParser.cpp}::get_next_row ( const char *  curr,
const char *  buffer_end,
char  line_delim,
const std::optional< boost::regex > &  line_start_regex 
)

Definition at line 73 of file RegexFileBufferParser.cpp.

References CHECK, and line_starts_with_regex().

Referenced by get_row_count(), and foreign_storage::RegexFileBufferParser::parseBuffer().

76  {
77  auto row_end = curr;
78  bool row_found{false};
79  while (!row_found && row_end <= buffer_end) {
80  if (*row_end == line_delim) {
81  if (row_end == buffer_end) {
82  row_found = true;
83  } else if (line_start_regex.has_value()) {
84  // When a LINE_START_REGEX option is present, concatenate the following lines
85  // until a line that starts with the specified regex is found.
86  CHECK(line_starts_with_regex(curr, 0, row_end - curr, line_start_regex.value()))
87  << "'" << line_start_regex.value() << "' not found in: '"
88  << std::string{curr, row_end - curr + 1ULL} << "'";
89  auto row_str = get_next_row(row_end + 1, buffer_end, line_delim, {});
90  while (!line_starts_with_regex(
91  row_str.c_str(), 0, row_str.length() - 1, line_start_regex.value())) {
92  row_end += row_str.length() + 1;
93  if (row_end == buffer_end) {
94  break;
95  }
96  row_str = get_next_row(row_end + 1, buffer_end, line_delim, {});
97  }
98  row_found = true;
99  } else {
100  row_found = true;
101  }
102  }
103  row_end++;
104  }
105  CHECK(row_found);
106  return std::string{curr, static_cast<size_t>(row_end - curr - 1)};
107 }
std::string get_next_row(const char *curr, const char *buffer_end, char line_delim, const std::optional< boost::regex > &line_start_regex)
#define CHECK(condition)
Definition: Logger.h:223
bool line_starts_with_regex(const char *buffer, size_t start, size_t end, const boost::regex &line_start_regex)

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

size_t foreign_storage::anonymous_namespace{RegexFileBufferParser.cpp}::get_row_count ( const char *  buffer,
size_t  start,
size_t  end,
char  line_delim,
const std::optional< boost::regex > &  line_start_regex 
)

Definition at line 109 of file RegexFileBufferParser.cpp.

References get_next_row().

Referenced by foreign_storage::RegexFileBufferParser::findRowEndPosition().

113  {
114  size_t row_count{0};
115  auto buffer_end = buffer + end;
116  auto curr = buffer + start;
117  while (curr <= buffer_end) {
118  auto row_str = get_next_row(curr, buffer_end, line_delim, line_start_regex);
119  curr += row_str.length() + 1;
120  row_count++;
121  }
122  return row_count;
123 }
std::string get_next_row(const char *curr, const char *buffer_end, char line_delim, const std::optional< boost::regex > &line_start_regex)

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

bool foreign_storage::anonymous_namespace{RegexFileBufferParser.cpp}::line_starts_with_regex ( const char *  buffer,
size_t  start,
size_t  end,
const boost::regex &  line_start_regex 
)

Definition at line 45 of file RegexFileBufferParser.cpp.

Referenced by foreign_storage::RegexFileBufferParser::findRowEndPosition(), get_next_row(), and foreign_storage::RegexFileBufferParser::validateFiles().

48  {
49  return boost::regex_search(std::string{buffer + start, end - start + 1},
50  line_start_regex,
51  boost::regex_constants::match_continuous);
52 }

+ Here is the caller graph for this function:

bool foreign_storage::anonymous_namespace{RegexFileBufferParser.cpp}::regex_match_columns ( const std::string &  row_str,
const boost::regex &  line_regex,
size_t  logical_column_count,
std::vector< std::string > &  parsed_columns_str,
std::vector< std::string_view > &  parsed_columns_sv,
const std::string &  file_path 
)

Definition at line 125 of file RegexFileBufferParser.cpp.

References CHECK_GT, and foreign_storage::throw_number_of_columns_mismatch_error().

Referenced by foreign_storage::RegexFileBufferParser::parseBuffer().

130  {
131  parsed_columns_str.clear();
132  parsed_columns_sv.clear();
133  boost::smatch match;
134  bool set_all_nulls{false};
135  if (boost::regex_match(row_str, match, line_regex)) {
136  auto matched_column_count = match.size() - 1;
137  if (logical_column_count != matched_column_count) {
139  logical_column_count, matched_column_count, file_path);
140  }
141  CHECK_GT(match.size(), static_cast<size_t>(1));
142  for (size_t i = 1; i < match.size(); i++) {
143  parsed_columns_str.emplace_back(match[i].str());
144  parsed_columns_sv.emplace_back(parsed_columns_str.back());
145  }
146  } else {
147  parsed_columns_sv =
148  std::vector<std::string_view>(logical_column_count, std::string_view{});
149  set_all_nulls = true;
150  }
151  return set_all_nulls;
152 }
#define CHECK_GT(x, y)
Definition: Logger.h:235
void throw_number_of_columns_mismatch_error(size_t num_table_cols, size_t num_file_cols, const std::string &file_path)

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

std::optional<bool> foreign_storage::anonymous_namespace{RegexFileBufferParser.cpp}::validate_and_get_bool_value ( const ForeignTable *  foreign_table,
const std::string &  option_name 
)

Definition at line 154 of file RegexFileBufferParser.cpp.

References foreign_storage::OptionsContainer::options.

155  {
156  if (auto it = foreign_table->options.find(option_name);
157  it != foreign_table->options.end()) {
158  if (boost::iequals(it->second, "TRUE")) {
159  return true;
160  } else if (boost::iequals(it->second, "FALSE")) {
161  return false;
162  } else {
163  throw std::runtime_error{"Invalid boolean value specified for \"" + option_name +
164  "\" foreign table option. "
165  "Value must be either 'true' or 'false'."};
166  }
167  }
168  return std::nullopt;
169 }