OmniSciDB  baf940c279
DelimitedParserUtils.h
Go to the documentation of this file.
1 /*
2  * Copyright 2019 OmniSci, Inc.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 /*
18  * @file DelimitedParserUtils.h
19  * @author Mehmet Sariyuce <mehmet.sariyuce@omnisci.com>
20  * @brief utilities for parsing delimited data
21  */
22 
23 #pragma once
24 
25 #include <string>
26 #include <vector>
27 
30 
31 namespace import_export {
32 namespace delimited_parser {
33 
34 class InsufficientBufferSizeException : public std::runtime_error {
35  public:
36  InsufficientBufferSizeException(const std::string& message)
37  : std::runtime_error(message) {}
38 };
39 
51 size_t find_beginning(const char* buffer,
52  size_t begin,
53  size_t end,
54  const CopyParams& copy_params);
55 
59 size_t get_max_buffer_resize();
60 
65 void set_max_buffer_resize(const size_t max_buffer_resize);
66 
85 size_t find_row_end_pos(size_t& alloc_size,
86  std::unique_ptr<char[]>& buffer,
87  size_t& buffer_size,
88  const CopyParams& copy_params,
89  const size_t buffer_first_row_index,
90  unsigned int& num_rows_in_buffer,
91  FILE* file,
92  foreign_storage::CsvReader* csv_reader = nullptr);
93 
108 template <typename T>
109 const char* get_row(const char* buf,
110  const char* buf_end,
111  const char* entire_buf_end,
112  const import_export::CopyParams& copy_params,
113  const bool* is_array,
114  std::vector<T>& row,
115  std::vector<std::unique_ptr<char[]>>& tmp_buffers,
116  bool& try_single_thread);
117 
125 void parse_string_array(const std::string& s,
126  const import_export::CopyParams& copy_params,
127  std::vector<std::string>& string_vec);
128 } // namespace delimited_parser
129 
130 } // namespace import_export
size_t find_beginning(const char *buffer, size_t begin, size_t end, const import_export::CopyParams &copy_params)
Finds the closest possible row beginning in the given buffer.
size_t get_max_buffer_resize()
Gets the maximum size to which thread buffers should be automatically resized.
void set_max_buffer_resize(const size_t max_buffer_resize_param)
Sets the maximum size to which thread buffers should be automatically resized. This function is only ...
const char * get_row(const char *buf, const char *buf_end, const char *entire_buf_end, const import_export::CopyParams &copy_params, const bool *is_array, std::vector< T > &row, std::vector< std::unique_ptr< char[]>> &tmp_buffers, bool &try_single_thread)
Parses the first row in the given buffer and inserts fields into given vector.
size_t find_row_end_pos(size_t &alloc_size, std::unique_ptr< char[]> &buffer, size_t &buffer_size, const CopyParams &copy_params, const size_t buffer_first_row_index, unsigned int &num_rows_in_buffer, FILE *file, foreign_storage::CsvReader *csv_reader)
Finds the closest possible row ending to the end of the given buffer. The buffer is resized as needed...
void parse_string_array(const std::string &s, const import_export::CopyParams &copy_params, std::vector< std::string > &string_vec)
Parses given string array and inserts into given vector of strings.