OmniSciDB  72c90bc290
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
foreign_storage::anonymous_namespace{RegexFileBufferParser.cpp} Namespace Reference

Functions

size_t find_last_end_of_line (const char *buffer, size_t buffer_size, size_t start, size_t end, char line_delim)
 
bool line_starts_with_regex (const char *buffer, size_t start, size_t end, const boost::regex &line_start_regex)
 
std::optional< std::string > get_line_start_regex (const ForeignTable *foreign_table)
 
std::string get_line_regex (const ForeignTable *foreign_table)
 
std::string get_next_row (const char *curr, const char *buffer_end, char line_delim, const std::optional< boost::regex > &line_start_regex)
 
size_t get_row_count (const char *buffer, size_t start, size_t end, char line_delim, const std::optional< boost::regex > &line_start_regex, const boost::regex &line_regex, bool remove_non_matches)
 
std::optional< bool > validate_and_get_bool_value (const ForeignTable *foreign_table, const std::string &option_name)
 

Function Documentation

size_t foreign_storage::anonymous_namespace{RegexFileBufferParser.cpp}::find_last_end_of_line ( const char *  buffer,
size_t  buffer_size,
size_t  start,
size_t  end,
char  line_delim 
)

Definition at line 28 of file RegexFileBufferParser.cpp.

References to_string().

Referenced by foreign_storage::RegexFileBufferParser::findRowEndPosition().

32  {
33  int64_t i = end;
34  while (i >= static_cast<int64_t>(start)) {
35  if (buffer[i] == line_delim) {
36  return i;
37  } else {
38  i--;
39  }
40  }
41  throw InsufficientBufferSizeException{
42  "Unable to find an end of line character after reading " +
43  std::to_string(buffer_size) + " characters."};
44 }
std::string to_string(char const *&&v)

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

std::string foreign_storage::anonymous_namespace{RegexFileBufferParser.cpp}::get_line_regex ( const ForeignTable *  foreign_table)

Definition at line 65 of file RegexFileBufferParser.cpp.

References CHECK, foreign_storage::RegexFileBufferParser::LINE_REGEX_KEY, and foreign_storage::OptionsContainer::options.

65  {
66  if (foreign_table) {
67  auto it = foreign_table->options.find(RegexFileBufferParser::LINE_REGEX_KEY);
68  CHECK(it != foreign_table->options.end());
69  return it->second;
70  }
71  return {};
72 }
#define CHECK(condition)
Definition: Logger.h:291
std::optional<std::string> foreign_storage::anonymous_namespace{RegexFileBufferParser.cpp}::get_line_start_regex ( const ForeignTable *  foreign_table)

Definition at line 55 of file RegexFileBufferParser.cpp.

References foreign_storage::RegexFileBufferParser::LINE_START_REGEX_KEY, and foreign_storage::OptionsContainer::options.

Referenced by foreign_storage::RegexFileBufferParser::validateFiles().

55  {
56  if (foreign_table) {
57  auto it = foreign_table->options.find(RegexFileBufferParser::LINE_START_REGEX_KEY);
58  if (it != foreign_table->options.end()) {
59  return it->second;
60  }
61  }
62  return {};
63 }

+ Here is the caller graph for this function:

std::string foreign_storage::anonymous_namespace{RegexFileBufferParser.cpp}::get_next_row ( const char *  curr,
const char *  buffer_end,
char  line_delim,
const std::optional< boost::regex > &  line_start_regex 
)

Definition at line 74 of file RegexFileBufferParser.cpp.

References CHECK, and line_starts_with_regex().

Referenced by get_row_count(), and foreign_storage::RegexFileBufferParser::parseBuffer().

77  {
78  auto row_end = curr;
79  bool row_found{false};
80  while (!row_found && row_end <= buffer_end) {
81  if (*row_end == line_delim) {
82  if (row_end == buffer_end) {
83  row_found = true;
84  } else if (line_start_regex.has_value()) {
85  // When a LINE_START_REGEX option is present, concatenate the following lines
86  // until a line that starts with the specified regex is found.
87  CHECK(line_starts_with_regex(curr, 0, row_end - curr, line_start_regex.value()))
88  << "'" << line_start_regex.value() << "' not found in: '"
89  << std::string{curr, row_end - curr + 1ULL} << "'";
90  auto row_str = get_next_row(row_end + 1, buffer_end, line_delim, {});
91  while (!line_starts_with_regex(
92  row_str.c_str(), 0, row_str.length() - 1, line_start_regex.value())) {
93  row_end += row_str.length() + 1;
94  if (row_end == buffer_end) {
95  break;
96  }
97  row_str = get_next_row(row_end + 1, buffer_end, line_delim, {});
98  }
99  row_found = true;
100  } else {
101  row_found = true;
102  }
103  }
104  row_end++;
105  }
106  CHECK(row_found);
107  return std::string{curr, static_cast<size_t>(row_end - curr - 1)};
108 }
std::string get_next_row(const char *curr, const char *buffer_end, char line_delim, const std::optional< boost::regex > &line_start_regex)
#define CHECK(condition)
Definition: Logger.h:291
bool line_starts_with_regex(const char *buffer, size_t start, size_t end, const boost::regex &line_start_regex)

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

size_t foreign_storage::anonymous_namespace{RegexFileBufferParser.cpp}::get_row_count ( const char *  buffer,
size_t  start,
size_t  end,
char  line_delim,
const std::optional< boost::regex > &  line_start_regex,
const boost::regex &  line_regex,
bool  remove_non_matches 
)

Definition at line 110 of file RegexFileBufferParser.cpp.

References get_next_row().

Referenced by foreign_storage::RegexFileBufferParser::findRowEndPosition().

116  {
117  size_t row_count{0};
118  auto buffer_end = buffer + end;
119  auto curr = buffer + start;
120  while (curr <= buffer_end) {
121  auto row_str = get_next_row(curr, buffer_end, line_delim, line_start_regex);
122  curr += row_str.length() + 1;
123  if (remove_non_matches) {
124  if (boost::regex_match(row_str, line_regex)) {
125  row_count++;
126  }
127  } else {
128  row_count++;
129  }
130  }
131  return row_count;
132 }
std::string get_next_row(const char *curr, const char *buffer_end, char line_delim, const std::optional< boost::regex > &line_start_regex)

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

bool foreign_storage::anonymous_namespace{RegexFileBufferParser.cpp}::line_starts_with_regex ( const char *  buffer,
size_t  start,
size_t  end,
const boost::regex &  line_start_regex 
)

Definition at line 46 of file RegexFileBufferParser.cpp.

Referenced by foreign_storage::RegexFileBufferParser::findRowEndPosition(), get_next_row(), and foreign_storage::RegexFileBufferParser::validateFiles().

49  {
50  return boost::regex_search(std::string{buffer + start, end - start + 1},
51  line_start_regex,
52  boost::regex_constants::match_continuous);
53 }

+ Here is the caller graph for this function:

std::optional<bool> foreign_storage::anonymous_namespace{RegexFileBufferParser.cpp}::validate_and_get_bool_value ( const ForeignTable *  foreign_table,
const std::string &  option_name 
)

Definition at line 134 of file RegexFileBufferParser.cpp.

References foreign_storage::OptionsContainer::options.

135  {
136  if (auto it = foreign_table->options.find(option_name);
137  it != foreign_table->options.end()) {
138  if (boost::iequals(it->second, "TRUE")) {
139  return true;
140  } else if (boost::iequals(it->second, "FALSE")) {
141  return false;
142  } else {
143  throw std::runtime_error{"Invalid boolean value specified for \"" + option_name +
144  "\" foreign table option. "
145  "Value must be either 'true' or 'false'."};
146  }
147  }
148  return std::nullopt;
149 }