OmniSciDB  085a039ca4
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
TextFileBufferParser.h
Go to the documentation of this file.
1 /*
2  * Copyright 2021 OmniSci, Inc.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #pragma once
18 
21 
22 #include "ImportExport/Importer.h"
24 
25 namespace foreign_storage {
26 
28  ParseBufferRequest(const ParseBufferRequest& request) = delete;
29  ParseBufferRequest(ParseBufferRequest&& request) = default;
32  int db_id,
33  const ForeignTable* foreign_table,
34  const std::set<int> column_filter_set,
35  const std::string& full_path,
37  const bool track_rejected_rows = false);
38 
39  inline std::shared_ptr<Catalog_Namespace::Catalog> getCatalog() const {
41  CHECK(catalog);
42  return catalog;
43  }
44 
45  inline std::list<const ColumnDescriptor*> getColumns() const {
46  return foreign_table_schema->getLogicalAndPhysicalColumns();
47  }
48 
49  inline int32_t getTableId() const {
50  return foreign_table_schema->getForeignTable()->tableId;
51  }
52 
53  inline std::string getTableName() const {
54  return foreign_table_schema->getForeignTable()->tableName;
55  }
56 
57  inline size_t getMaxFragRows() const {
58  return foreign_table_schema->getForeignTable()->maxFragRows;
59  }
60 
61  inline std::string getFilePath() const { return full_path; }
62 
63  // These must be initialized at construction (before parsing).
64  std::unique_ptr<char[]> buffer;
65  size_t buffer_size;
68  const int db_id;
69  std::unique_ptr<ForeignTableSchema> foreign_table_schema;
70  std::vector<std::unique_ptr<import_export::TypedImportBuffer>> import_buffers;
72 
73  // These are set during parsing.
75  size_t begin_pos;
76  size_t end_pos;
78  size_t file_offset;
80  std::string full_path;
81 
82  // This parameter controls the behaviour of error handling in the data wrapper
83  const bool track_rejected_rows;
84 };
85 
87  std::map<int, DataBlockPtr> column_id_to_data_blocks_map;
88  size_t row_count;
89  std::vector<size_t> row_offsets;
90  std::set<size_t> rejected_rows;
91 };
92 
94  public:
103  bool convert_data_blocks,
104  bool columns_are_pre_filtered = false) const = 0;
110  const ForeignTable* foreign_table) const = 0;
111 
119  virtual size_t findRowEndPosition(size_t& alloc_size,
120  std::unique_ptr<char[]>& buffer,
121  size_t& buffer_size,
122  const import_export::CopyParams& copy_params,
123  const size_t buffer_first_row_index,
124  unsigned int& num_rows_in_buffer,
125  FileReader* file_reader) const = 0;
126 
130  virtual void validateFiles(const FileReader* file_reader,
131  const ForeignTable* foreign_table) const = 0;
132 
133  static std::map<int, DataBlockPtr> convertImportBuffersToDataBlocks(
134  const std::vector<std::unique_ptr<import_export::TypedImportBuffer>>&
135  import_buffers);
136 
137  static bool isCoordinateScalar(const std::string_view datum);
138 
139  static void processGeoColumn(
140  std::vector<std::unique_ptr<import_export::TypedImportBuffer>>& import_buffers,
141  size_t& col_idx,
142  const import_export::CopyParams& copy_params,
143  std::list<const ColumnDescriptor*>::iterator& cd_it,
144  std::vector<std::string_view>& row,
145  size_t& import_idx,
146  bool is_null,
147  size_t first_row_index,
148  size_t row_index_plus_one,
149  std::shared_ptr<Catalog_Namespace::Catalog> catalog,
150  const RenderGroupAnalyzerMap* render_group_analyzer_map);
151 
156  static void fillRejectedRowWithInvalidData(
157  const std::list<const ColumnDescriptor*>& columns,
158  std::list<const ColumnDescriptor*>::iterator& cd_it,
159  const size_t col_idx,
160  ParseBufferRequest& request);
161 
162  static bool isNullDatum(const std::string_view datum,
163  const ColumnDescriptor* column,
164  const std::string& null_indicator);
165 
166  inline static const std::string THREADS_KEY = "THREADS";
167  inline static const std::string BUFFER_SIZE_KEY = "BUFFER_SIZE";
168 
169  private:
170  static void processInvalidGeoColumn(
171  std::vector<std::unique_ptr<import_export::TypedImportBuffer>>& import_buffers,
172  size_t& col_idx,
173  const import_export::CopyParams& copy_params,
174  const ColumnDescriptor* cd,
175  std::shared_ptr<Catalog_Namespace::Catalog> catalog);
176 };
177 } // namespace foreign_storage
std::vector< std::unique_ptr< import_export::TypedImportBuffer > > import_buffers
virtual void validateFiles(const FileReader *file_reader, const ForeignTable *foreign_table) const =0
static std::map< int, DataBlockPtr > convertImportBuffersToDataBlocks(const std::vector< std::unique_ptr< import_export::TypedImportBuffer >> &import_buffers)
ParseBufferRequest(const ParseBufferRequest &request)=delete
std::map< int, DataBlockPtr > column_id_to_data_blocks_map
const import_export::CopyParams copy_params
static void fillRejectedRowWithInvalidData(const std::list< const ColumnDescriptor * > &columns, std::list< const ColumnDescriptor * >::iterator &cd_it, const size_t col_idx, ParseBufferRequest &request)
std::unique_ptr< ForeignTableSchema > foreign_table_schema
static SysCatalog & instance()
Definition: SysCatalog.h:337
CONSTEXPR DEVICE bool is_null(const T &value)
specifies the content in-memory of a row in the column metadata table
std::list< const ColumnDescriptor * > getColumns() const
std::shared_ptr< Catalog > getCatalog(const std::string &dbName)
std::shared_ptr< Catalog_Namespace::Catalog > getCatalog() const
virtual size_t findRowEndPosition(size_t &alloc_size, std::unique_ptr< char[]> &buffer, size_t &buffer_size, const import_export::CopyParams &copy_params, const size_t buffer_first_row_index, unsigned int &num_rows_in_buffer, FileReader *file_reader) const =0
static void processInvalidGeoColumn(std::vector< std::unique_ptr< import_export::TypedImportBuffer >> &import_buffers, size_t &col_idx, const import_export::CopyParams &copy_params, const ColumnDescriptor *cd, std::shared_ptr< Catalog_Namespace::Catalog > catalog)
#define CHECK(condition)
Definition: Logger.h:223
virtual import_export::CopyParams validateAndGetCopyParams(const ForeignTable *foreign_table) const =0
const RenderGroupAnalyzerMap * render_group_analyzer_map
static void processGeoColumn(std::vector< std::unique_ptr< import_export::TypedImportBuffer >> &import_buffers, size_t &col_idx, const import_export::CopyParams &copy_params, std::list< const ColumnDescriptor * >::iterator &cd_it, std::vector< std::string_view > &row, size_t &import_idx, bool is_null, size_t first_row_index, size_t row_index_plus_one, std::shared_ptr< Catalog_Namespace::Catalog > catalog, const RenderGroupAnalyzerMap *render_group_analyzer_map)
static bool isNullDatum(const std::string_view datum, const ColumnDescriptor *column, const std::string &null_indicator)
virtual ParseBufferResult parseBuffer(ParseBufferRequest &request, bool convert_data_blocks, bool columns_are_pre_filtered=false) const =0
static bool isCoordinateScalar(const std::string_view datum)
std::map< int, std::unique_ptr< import_export::RenderGroupAnalyzer >> RenderGroupAnalyzerMap