OmniSciDB  72c90bc290
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
RegexFileBufferParser.h
Go to the documentation of this file.
1 /*
2  * Copyright 2022 HEAVY.AI, Inc.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #pragma once
18 
21 
22 namespace foreign_storage {
24  public:
25  RegexFileBufferParser(const ForeignTable* foreign_table);
26 
28  bool convert_data_blocks,
29  bool columns_are_pre_filtered = false,
30  bool skip_dict_encoding = false) const override;
31 
33  const ForeignTable* foreign_table) const override;
34 
35  size_t findRowEndPosition(size_t& alloc_size,
36  std::unique_ptr<char[]>& buffer,
37  size_t& buffer_size,
38  const import_export::CopyParams& copy_params,
39  const size_t buffer_first_row_index,
40  unsigned int& num_rows_in_buffer,
41  FileReader* file_reader) const override;
42 
43  void validateFiles(const FileReader* file_reader,
44  const ForeignTable* foreign_table) const override;
45 
46  // For testing purposes only
47  static void setMaxBufferResize(size_t max_buffer_resize);
48 
49  inline static const std::string LINE_REGEX_KEY = "LINE_REGEX";
50  inline static const std::string LINE_START_REGEX_KEY = "LINE_START_REGEX";
51  inline static const std::string HEADER_KEY = "HEADER";
52 
53  protected:
54  virtual bool regexMatchColumns(const std::string& row_str,
55  const boost::regex& line_regex,
56  size_t logical_column_count,
57  std::vector<std::string>& parsed_columns_str,
58  std::vector<std::string_view>& parsed_columns_sv,
59  const std::string& file_path) const;
60 
61  virtual bool shouldRemoveNonMatches() const;
62 
63  virtual bool shouldTruncateStringValues() const;
64 
65  private:
66  static size_t getMaxBufferResize();
67 
68  inline static size_t max_buffer_resize_{
70 
71  // Flag added for testing purposes only
72  inline static bool skip_first_line_{false};
73 
74  boost::regex line_regex_;
75  std::optional<boost::regex> line_start_regex_;
76 };
77 } // namespace foreign_storage
import_export::CopyParams validateAndGetCopyParams(const ForeignTable *foreign_table) const override
RegexFileBufferParser(const ForeignTable *foreign_table)
static void setMaxBufferResize(size_t max_buffer_resize)
ParseBufferResult parseBuffer(ParseBufferRequest &request, bool convert_data_blocks, bool columns_are_pre_filtered=false, bool skip_dict_encoding=false) const override
size_t findRowEndPosition(size_t &alloc_size, std::unique_ptr< char[]> &buffer, size_t &buffer_size, const import_export::CopyParams &copy_params, const size_t buffer_first_row_index, unsigned int &num_rows_in_buffer, FileReader *file_reader) const override
std::optional< boost::regex > line_start_regex_
virtual bool regexMatchColumns(const std::string &row_str, const boost::regex &line_regex, size_t logical_column_count, std::vector< std::string > &parsed_columns_str, std::vector< std::string_view > &parsed_columns_sv, const std::string &file_path) const
static constexpr size_t max_import_buffer_resize_byte_size
Definition: CopyParams.h:37
void validateFiles(const FileReader *file_reader, const ForeignTable *foreign_table) const override