#include <DelimitedParserUtils.h>
|
static size_t | find_beginning (const char *buffer, size_t begin, size_t end, const CopyParams ©_params) |
| Finds the closest possible row beginning in the given buffer. More...
|
|
static size_t | find_end (const char *buffer, size_t size, const CopyParams ©_params, unsigned int &num_rows_this_buffer) |
| Finds the closest possible row ending to the end of the given buffer. More...
|
|
static const char * | get_row (const char *buf, const char *buf_end, const char *entire_buf_end, const Importer_NS::CopyParams ©_params, const bool *is_array, std::vector< std::string > &row, bool &try_single_thread) |
| Parses the first row in the given buffer and inserts fields into given vector. More...
|
|
static void | parseStringArray (const std::string &s, const Importer_NS::CopyParams ©_params, std::vector< std::string > &string_vec) |
| Parses given string array and inserts into given vector of strings. More...
|
|
Definition at line 31 of file DelimitedParserUtils.h.
size_t Importer_NS::DelimitedParserUtils::find_beginning |
( |
const char * |
buffer, |
|
|
size_t |
begin, |
|
|
size_t |
end, |
|
|
const CopyParams & |
copy_params |
|
) |
| |
|
static |
Finds the closest possible row beginning in the given buffer.
- Parameters
-
buffer | Given buffer which has the rows in csv format. (NOT OWN) |
begin | Start index of buffer to look for the beginning. |
end | End index of buffer to look for the beginning. |
copy_params | Copy params for the table. |
- Returns
- The position of the closest possible row beginning to the start of the given buffer.
Definition at line 58 of file DelimitedParserUtils.cpp.
References Importer_NS::CopyParams::line_delim.
Referenced by Importer_NS::import_thread_delimited().
63 if (begin == 0 || (begin > 0 && buffer[begin - 1] == copy_params.line_delim)) {
67 const char* buf = buffer + begin;
68 for (i = 0; i < end - begin; i++) {
69 if (buf[i] == copy_params.line_delim) {
size_t Importer_NS::DelimitedParserUtils::find_end |
( |
const char * |
buffer, |
|
|
size_t |
size, |
|
|
const CopyParams & |
copy_params, |
|
|
unsigned int & |
num_rows_this_buffer |
|
) |
| |
|
static |
Finds the closest possible row ending to the end of the given buffer.
- Parameters
-
buffer | Given buffer which has the rows in csv format. (NOT OWN) |
size | Size of the buffer. |
copy_params | Copy params for the table. |
num_rows_this_buffer | Number of rows until the closest possible row ending. |
- Returns
- The position of the closest possible row ending to the end of the given buffer.
Definition at line 76 of file DelimitedParserUtils.cpp.
References logger::ERROR, Importer_NS::CopyParams::escape, Importer_NS::CopyParams::line_delim, LOG, Importer_NS::CopyParams::quote, and Importer_NS::CopyParams::quoted.
Referenced by Importer_NS::Importer::importDelimited().
80 size_t last_line_delim_pos = 0;
81 if (copy_params.quoted) {
82 const char* current = buffer;
83 bool in_quote =
false;
85 while (current < buffer + size) {
86 while (!in_quote && current < buffer + size) {
88 if (*current == copy_params.line_delim) {
89 last_line_delim_pos = current - buffer;
90 ++num_rows_this_buffer;
91 }
else if (*current == copy_params.quote) {
97 while (in_quote && current < buffer + size) {
99 if ((*current == copy_params.escape) && (current < buffer + size - 1) &&
100 (*(current + 1) == copy_params.quote)) {
102 }
else if (*current == copy_params.quote) {
109 const char* current = buffer;
110 while (current < buffer + size) {
111 if (*current == copy_params.line_delim) {
112 last_line_delim_pos = current - buffer;
113 ++num_rows_this_buffer;
119 if (last_line_delim_pos <= 0) {
120 size_t slen = size < 50 ? size : 50;
121 std::string showMsgStr(buffer, buffer + slen);
122 LOG(
ERROR) <<
"No line delimiter in block. Block was of size " << size
123 <<
" bytes, first few characters " << showMsgStr;
127 return last_line_delim_pos + 1;
const char * Importer_NS::DelimitedParserUtils::get_row |
( |
const char * |
buf, |
|
|
const char * |
buf_end, |
|
|
const char * |
entire_buf_end, |
|
|
const Importer_NS::CopyParams & |
copy_params, |
|
|
const bool * |
is_array, |
|
|
std::vector< std::string > & |
row, |
|
|
bool & |
try_single_thread |
|
) |
| |
|
static |
Parses the first row in the given buffer and inserts fields into given vector.
- Parameters
-
buf | Given buffer which has the rows in csv format. (NOT OWN) |
buf_end | End of the sliced buffer for the thread. (NOT OWN) |
entire_buf_end | End of the entire buffer. (NOT OWN) |
copy_params | Copy params for the table. |
is_array | Array of bools which tells if a column is an array type. |
row | Given vector to be populated with parsed fields. |
try_single_thread | In case of parse errors, this will tell if parsing should continue with single thread. |
- Returns
- Pointer to the next row after the first row is parsed.
Definition at line 130 of file DelimitedParserUtils.cpp.
References Importer_NS::CopyParams::array_begin, Importer_NS::CopyParams::array_end, Importer_NS::CopyParams::delimiter, logger::ERROR, Importer_NS::CopyParams::escape, field(), anonymous_namespace{DelimitedParserUtils.cpp}::is_eol(), LOG, Importer_NS::CopyParams::quote, Importer_NS::CopyParams::quoted, anonymous_namespace{DelimitedParserUtils.cpp}::trim_quotes(), and Importer_NS::trim_space().
Referenced by Importer_NS::import_thread_delimited(), parseStringArray(), and Importer_NS::Detector::split_raw_data().
137 const char*
field = buf;
139 bool in_quote =
false;
140 bool in_array =
false;
141 bool has_escape =
false;
142 bool strip_quotes =
false;
143 try_single_thread =
false;
144 for (p = buf; p < entire_buf_end; ++p) {
145 if (*p == copy_params.
escape && p < entire_buf_end - 1 &&
146 *(p + 1) == copy_params.
quote) {
149 }
else if (copy_params.
quoted && *p == copy_params.
quote) {
150 in_quote = !in_quote;
154 }
else if (!in_quote && is_array !=
nullptr && *p == copy_params.
array_begin &&
155 is_array[row.size()]) {
157 while (p < entire_buf_end - 1) {
166 if (!has_escape && !strip_quotes) {
167 const char* field_end = p;
169 row.emplace_back(field, field_end - field);
171 auto field_buf = std::make_unique<char[]>(p - field + 1);
173 for (; i < p -
field; i++, j++) {
174 if (has_escape && field[i] == copy_params.
escape &&
175 field[i + 1] == copy_params.
quote) {
176 field_buf[j] = copy_params.
quote;
179 field_buf[j] = field[i];
182 const char* field_begin = field_buf.get();
183 const char* field_end = field_buf.get() + j;
186 row.emplace_back(field_begin, field_end - field_begin);
190 strip_quotes =
false;
192 if (
is_eol(*p, copy_params)) {
194 while (p + 1 < buf_end &&
is_eol(*(p + 1), copy_params)) {
207 try_single_thread =
true;
211 try_single_thread =
true;
void trim_quotes(const char *&field_begin, const char *&field_end, const Importer_NS::CopyParams ©_params)
static const std::string trim_space(const char *field, const size_t len)
const rapidjson::Value & field(const rapidjson::Value &obj, const char field[]) noexcept
bool is_eol(const char &c, const Importer_NS::CopyParams ©_params)
void Importer_NS::DelimitedParserUtils::parseStringArray |
( |
const std::string & |
s, |
|
|
const Importer_NS::CopyParams & |
copy_params, |
|
|
std::vector< std::string > & |
string_vec |
|
) |
| |
|
static |
Parses given string array and inserts into given vector of strings.
- Parameters
-
s | Given string array |
copy_params | Copy params for the table. |
string_vec | Given vector to be populated with parsed fields. |
Definition at line 216 of file DelimitedParserUtils.cpp.
References Importer_NS::CopyParams::array_begin, Importer_NS::CopyParams::array_delim, Importer_NS::CopyParams::array_end, Importer_NS::CopyParams::delimiter, get_row(), StringDictionary::MAX_STRLEN, Importer_NS::CopyParams::null_str, and to_string().
Referenced by Importer_NS::TypedImportBuffer::add_value(), and RowToColumnLoader::convert_string_to_column().
219 if (s == copy_params.
null_str || s ==
"NULL" || s.size() < 1 || s.empty()) {
222 string_vec.emplace_back(
"NULL");
226 throw std::runtime_error(
"Malformed Array :" + s);
229 std::string row(s.c_str() + 1, s.length() - 2);
231 bool try_single_thread =
false;
235 row.c_str() + row.length(),
236 row.c_str() + row.length(),
242 for (
size_t i = 0; i < string_vec.size(); ++i) {
243 if (string_vec[i].empty()) {
244 string_vec.erase(string_vec.begin() + i);
247 throw std::runtime_error(
"Array String too long : " + string_vec[i] +
" max is " +
static const char * get_row(const char *buf, const char *buf_end, const char *entire_buf_end, const Importer_NS::CopyParams ©_params, const bool *is_array, std::vector< std::string > &row, bool &try_single_thread)
Parses the first row in the given buffer and inserts fields into given vector.
static constexpr size_t MAX_STRLEN
The documentation for this class was generated from the following files: