OmniSciDB  471d68cefb
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
TextFileBufferParser.h
Go to the documentation of this file.
1 /*
2  * Copyright 2021 OmniSci, Inc.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #pragma once
18 
21 
22 #include "ImportExport/Importer.h"
23 
24 namespace foreign_storage {
26  ParseBufferRequest(const ParseBufferRequest& request) = delete;
27  ParseBufferRequest(ParseBufferRequest&& request) = default;
30  int db_id,
31  const ForeignTable* foreign_table,
32  const std::set<int> column_filter_set,
33  const std::string& full_path);
34 
35  inline std::shared_ptr<Catalog_Namespace::Catalog> getCatalog() const {
37  CHECK(catalog);
38  return catalog;
39  }
40 
41  inline std::list<const ColumnDescriptor*> getColumns() const {
42  return foreign_table_schema->getLogicalAndPhysicalColumns();
43  }
44 
45  inline int32_t getTableId() const {
46  return foreign_table_schema->getForeignTable()->tableId;
47  }
48 
49  inline std::string getTableName() const {
50  return foreign_table_schema->getForeignTable()->tableName;
51  }
52 
53  inline size_t getMaxFragRows() const {
54  return foreign_table_schema->getForeignTable()->maxFragRows;
55  }
56 
57  inline std::string getFilePath() const { return full_path; }
58 
59  // These must be initialized at construction (before parsing).
60  std::unique_ptr<char[]> buffer;
61  size_t buffer_size;
64  const int db_id;
65  std::unique_ptr<ForeignTableSchema> foreign_table_schema;
66  std::vector<std::unique_ptr<import_export::TypedImportBuffer>> import_buffers;
67 
68  // These are set during parsing.
70  size_t begin_pos;
71  size_t end_pos;
73  size_t file_offset;
75  std::string full_path;
76 };
77 
79  std::map<int, DataBlockPtr> column_id_to_data_blocks_map;
80  size_t row_count;
81  std::vector<size_t> row_offsets;
82 };
83 
85  public:
94  bool convert_data_blocks,
95  bool columns_are_pre_filtered = false) const = 0;
101  const ForeignTable* foreign_table) const = 0;
102 
110  virtual size_t findRowEndPosition(size_t& alloc_size,
111  std::unique_ptr<char[]>& buffer,
112  size_t& buffer_size,
113  const import_export::CopyParams& copy_params,
114  const size_t buffer_first_row_index,
115  unsigned int& num_rows_in_buffer,
116  FileReader* file_reader) const = 0;
117 
121  virtual void validateFiles(const FileReader* file_reader,
122  const ForeignTable* foreign_table) const = 0;
123 
124  static std::map<int, DataBlockPtr> convertImportBuffersToDataBlocks(
125  const std::vector<std::unique_ptr<import_export::TypedImportBuffer>>&
126  import_buffers);
127 
128  static bool isCoordinateScalar(const std::string_view datum);
129 
130  static void processGeoColumn(
131  std::vector<std::unique_ptr<import_export::TypedImportBuffer>>& import_buffers,
132  size_t& col_idx,
133  const import_export::CopyParams& copy_params,
134  std::list<const ColumnDescriptor*>::iterator& cd_it,
135  std::vector<std::string_view>& row,
136  size_t& import_idx,
137  bool is_null,
138  size_t first_row_index,
139  size_t row_index_plus_one,
140  std::shared_ptr<Catalog_Namespace::Catalog> catalog);
141 
142  static bool isNullDatum(const std::string_view datum,
143  const ColumnDescriptor* column,
144  const std::string& null_indicator);
145 };
146 } // namespace foreign_storage
std::vector< std::unique_ptr< import_export::TypedImportBuffer > > import_buffers
virtual void validateFiles(const FileReader *file_reader, const ForeignTable *foreign_table) const =0
static std::map< int, DataBlockPtr > convertImportBuffersToDataBlocks(const std::vector< std::unique_ptr< import_export::TypedImportBuffer >> &import_buffers)
ParseBufferRequest(const ParseBufferRequest &request)=delete
std::map< int, DataBlockPtr > column_id_to_data_blocks_map
static void processGeoColumn(std::vector< std::unique_ptr< import_export::TypedImportBuffer >> &import_buffers, size_t &col_idx, const import_export::CopyParams &copy_params, std::list< const ColumnDescriptor * >::iterator &cd_it, std::vector< std::string_view > &row, size_t &import_idx, bool is_null, size_t first_row_index, size_t row_index_plus_one, std::shared_ptr< Catalog_Namespace::Catalog > catalog)
const import_export::CopyParams copy_params
std::unique_ptr< ForeignTableSchema > foreign_table_schema
static SysCatalog & instance()
Definition: SysCatalog.h:325
CONSTEXPR DEVICE bool is_null(const T &value)
specifies the content in-memory of a row in the column metadata table
std::list< const ColumnDescriptor * > getColumns() const
std::shared_ptr< Catalog > getCatalog(const std::string &dbName)
std::shared_ptr< Catalog_Namespace::Catalog > getCatalog() const
virtual size_t findRowEndPosition(size_t &alloc_size, std::unique_ptr< char[]> &buffer, size_t &buffer_size, const import_export::CopyParams &copy_params, const size_t buffer_first_row_index, unsigned int &num_rows_in_buffer, FileReader *file_reader) const =0
#define CHECK(condition)
Definition: Logger.h:209
virtual import_export::CopyParams validateAndGetCopyParams(const ForeignTable *foreign_table) const =0
static bool isNullDatum(const std::string_view datum, const ColumnDescriptor *column, const std::string &null_indicator)
virtual ParseBufferResult parseBuffer(ParseBufferRequest &request, bool convert_data_blocks, bool columns_are_pre_filtered=false) const =0
static bool isCoordinateScalar(const std::string_view datum)