OmniSciDB  72c90bc290
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
DelimitedParserUtils.h
Go to the documentation of this file.
1 /*
2  * Copyright 2022 HEAVY.AI, Inc.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 /*
18  * @file DelimitedParserUtils.h
19  * @brief utilities for parsing delimited data
20  *
21  */
22 
23 #pragma once
24 
25 #include <string>
26 #include <vector>
27 
30 
31 namespace import_export {
32 namespace delimited_parser {
33 
34 class InsufficientBufferSizeException : public std::runtime_error {
35  public:
36  InsufficientBufferSizeException(const std::string& message)
37  : std::runtime_error(message) {}
38 };
39 
40 class DelimitedParserException : public std::runtime_error {
41  public:
42  DelimitedParserException(const std::string& message) : std::runtime_error(message) {}
43 };
44 
56 size_t find_beginning(const char* buffer,
57  size_t begin,
58  size_t end,
59  const CopyParams& copy_params);
60 
64 size_t get_max_buffer_resize();
65 
70 void set_max_buffer_resize(const size_t max_buffer_resize);
71 
90 size_t find_row_end_pos(size_t& alloc_size,
91  std::unique_ptr<char[]>& buffer,
92  size_t& buffer_size,
93  const CopyParams& copy_params,
94  const size_t buffer_first_row_index,
95  unsigned int& num_rows_in_buffer,
96  FILE* file,
97  foreign_storage::FileReader* file_reader = nullptr);
98 
115 template <typename T>
116 const char* get_row(const char* buf,
117  const char* buf_end,
118  const char* entire_buf_end,
119  const import_export::CopyParams& copy_params,
120  const bool* is_array,
121  std::vector<T>& row,
122  std::vector<std::unique_ptr<char[]>>& tmp_buffers,
123  bool& try_single_thread,
124  bool filter_empty_lines);
125 
133 void parse_string_array(const std::string& s,
134  const import_export::CopyParams& copy_params,
135  std::vector<std::string>& string_vec,
136  bool truncate_values = false);
137 
151 void extend_buffer(std::unique_ptr<char[]>& buffer,
152  size_t& buffer_size,
153  size_t& alloc_size,
154  FILE* file,
155  foreign_storage::FileReader* file_reader,
156  size_t max_buffer_resize);
157 } // namespace delimited_parser
158 
159 } // namespace import_export
size_t find_beginning(const char *buffer, size_t begin, size_t end, const import_export::CopyParams &copy_params)
Finds the closest possible row beginning in the given buffer.
const char * get_row(const char *buf, const char *buf_end, const char *entire_buf_end, const import_export::CopyParams &copy_params, const bool *is_array, std::vector< T > &row, std::vector< std::unique_ptr< char[]>> &tmp_buffers, bool &try_single_thread, bool filter_empty_lines)
Parses the first row in the given buffer and inserts fields into given vector.
void parse_string_array(const std::string &s, const import_export::CopyParams &copy_params, std::vector< std::string > &string_vec, bool truncate_values)
Parses given string array and inserts into given vector of strings.
size_t get_max_buffer_resize()
Gets the maximum size to which thread buffers should be automatically resized.
void set_max_buffer_resize(const size_t max_buffer_resize_param)
Sets the maximum size to which thread buffers should be automatically resized. This function is only ...
void extend_buffer(std::unique_ptr< char[]> &buffer, size_t &buffer_size, size_t &alloc_size, FILE *file, foreign_storage::FileReader *file_reader, size_t max_buffer_resize)
size_t find_row_end_pos(size_t &alloc_size, std::unique_ptr< char[]> &buffer, size_t &buffer_size, const CopyParams &copy_params, const size_t buffer_first_row_index, unsigned int &num_rows_in_buffer, FILE *file, foreign_storage::FileReader *file_reader)
Finds the closest possible row ending to the end of the given buffer. The buffer is resized as needed...