OmniSciDB  c1a53651b2
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
DelimitedParserUtils.h
Go to the documentation of this file.
1 /*
2  * Copyright 2022 HEAVY.AI, Inc.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 /*
18  * @file DelimitedParserUtils.h
19  * @brief utilities for parsing delimited data
20  *
21  */
22 
23 #pragma once
24 
25 #include <string>
26 #include <vector>
27 
30 
31 namespace import_export {
32 namespace delimited_parser {
33 
34 class InsufficientBufferSizeException : public std::runtime_error {
35  public:
36  InsufficientBufferSizeException(const std::string& message)
37  : std::runtime_error(message) {}
38 };
39 
51 size_t find_beginning(const char* buffer,
52  size_t begin,
53  size_t end,
54  const CopyParams& copy_params);
55 
59 size_t get_max_buffer_resize();
60 
65 void set_max_buffer_resize(const size_t max_buffer_resize);
66 
85 size_t find_row_end_pos(size_t& alloc_size,
86  std::unique_ptr<char[]>& buffer,
87  size_t& buffer_size,
88  const CopyParams& copy_params,
89  const size_t buffer_first_row_index,
90  unsigned int& num_rows_in_buffer,
91  FILE* file,
92  foreign_storage::FileReader* file_reader = nullptr);
93 
110 template <typename T>
111 const char* get_row(const char* buf,
112  const char* buf_end,
113  const char* entire_buf_end,
114  const import_export::CopyParams& copy_params,
115  const bool* is_array,
116  std::vector<T>& row,
117  std::vector<std::unique_ptr<char[]>>& tmp_buffers,
118  bool& try_single_thread,
119  bool filter_empty_lines);
120 
128 void parse_string_array(const std::string& s,
129  const import_export::CopyParams& copy_params,
130  std::vector<std::string>& string_vec,
131  bool truncate_values = false);
132 
146 void extend_buffer(std::unique_ptr<char[]>& buffer,
147  size_t& buffer_size,
148  size_t& alloc_size,
149  FILE* file,
150  foreign_storage::FileReader* file_reader,
151  size_t max_buffer_resize);
152 } // namespace delimited_parser
153 
154 } // namespace import_export
size_t find_beginning(const char *buffer, size_t begin, size_t end, const import_export::CopyParams &copy_params)
Finds the closest possible row beginning in the given buffer.
const char * get_row(const char *buf, const char *buf_end, const char *entire_buf_end, const import_export::CopyParams &copy_params, const bool *is_array, std::vector< T > &row, std::vector< std::unique_ptr< char[]>> &tmp_buffers, bool &try_single_thread, bool filter_empty_lines)
Parses the first row in the given buffer and inserts fields into given vector.
void parse_string_array(const std::string &s, const import_export::CopyParams &copy_params, std::vector< std::string > &string_vec, bool truncate_values)
Parses given string array and inserts into given vector of strings.
size_t get_max_buffer_resize()
Gets the maximum size to which thread buffers should be automatically resized.
void set_max_buffer_resize(const size_t max_buffer_resize_param)
Sets the maximum size to which thread buffers should be automatically resized. This function is only ...
void extend_buffer(std::unique_ptr< char[]> &buffer, size_t &buffer_size, size_t &alloc_size, FILE *file, foreign_storage::FileReader *file_reader, size_t max_buffer_resize)
size_t find_row_end_pos(size_t &alloc_size, std::unique_ptr< char[]> &buffer, size_t &buffer_size, const CopyParams &copy_params, const size_t buffer_first_row_index, unsigned int &num_rows_in_buffer, FILE *file, foreign_storage::FileReader *file_reader)
Finds the closest possible row ending to the end of the given buffer. The buffer is resized as needed...