OmniSciDB  2e3a973ef4
CsvDataWrapper.h
Go to the documentation of this file.
1 /*
2  * Copyright 2020 OmniSci, Inc.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #pragma once
18 
19 #include <map>
20 #include <vector>
21 
22 #include "Catalog/Catalog.h"
23 #include "Catalog/ForeignTable.h"
24 #include "DataMgr/Chunk/Chunk.h"
26 #include "ForeignDataWrapper.h"
27 #include "ImportExport/Importer.h"
28 
29 namespace foreign_storage {
30 
35 struct FileRegion {
36  // Name of file containing region
37  std::string filename;
38  // Byte offset (within file) for the beginning of file region
40  // Index of first row in file region relative to the first row/non-header line in the
41  // file
43  // Number of rows in file region
44  size_t row_count;
45  // Size of file region in bytes
46  size_t region_size;
47 
48  bool operator<(const FileRegion& other) const {
49  return first_row_file_offset < other.first_row_file_offset;
50  }
51 };
52 
53 using FileRegions = std::vector<FileRegion>;
54 
56  public:
57  CsvDataWrapper(const int db_id, const ForeignTable* foreign_table);
58 
59  void populateChunkMetadata(ChunkMetadataVector& chunk_metadata_vector) override;
60 
61  void populateChunkBuffers(
62  std::map<ChunkKey, AbstractBuffer*>& required_buffers,
63  std::map<ChunkKey, AbstractBuffer*>& optional_buffers) override;
64 
65  static void validateOptions(const ForeignTable* foreign_table);
66 
67  static std::vector<std::string_view> getSupportedOptions();
68 
69  void serializeDataWrapperInternals(const std::string& file_path) const override;
70 
71  void restoreDataWrapperInternals(const std::string& file_path,
72  const ChunkMetadataVector& chunk_metadata) override;
73  bool isRestored() const override;
74 
75  private:
76  CsvDataWrapper(const ForeignTable* foreign_table);
77 
85  void populateChunks(std::map<int, Chunk_NS::Chunk>& column_id_to_chunk_map,
86  int fragment_id);
87 
88  std::string getFilePath();
89  import_export::CopyParams validateAndGetCopyParams();
90  void validateFilePath();
91 
101  std::string validateAndGetStringWithLength(const std::string& option_name,
102  const size_t expected_num_chars);
103 
113  std::optional<bool> validateAndGetBoolValue(const std::string& option_name);
114 
115  void populateChunkMapForColumns(const std::set<const ColumnDescriptor*>& columns,
116  const int fragment_id,
117  const std::map<ChunkKey, AbstractBuffer*>& buffers,
118  std::map<int, Chunk_NS::Chunk>& column_id_to_chunk_map);
119 
120  std::map<ChunkKey, std::shared_ptr<ChunkMetadata>> chunk_metadata_map_;
121  std::map<int, FileRegions> fragment_id_to_file_regions_map_;
122 
123  std::unique_ptr<CsvReader> csv_reader_;
124 
125  const int db_id_;
127  std::mutex file_access_mutex_;
129 
130  // Data needed for append workflow
131  std::map<ChunkKey, std::unique_ptr<ForeignStorageBuffer>> chunk_encoder_buffers_;
132  std::map<ChunkKey, size_t> chunk_byte_count_;
133  // How many rows have been read
134  size_t num_rows_;
135  // What byte offset we left off at in the csv_reader
137  // Is this datawrapper restored from disk
139  static constexpr std::array<char const*, 13> supported_options_{"BASE_PATH",
140  "FILE_PATH",
141  "ARRAY_DELIMITER",
142  "ARRAY_MARKER",
143  "BUFFER_SIZE",
144  "DELIMITER",
145  "ESCAPE",
146  "HEADER",
147  "LINE_DELIMITER",
148  "LONLAT",
149  "NULLS",
150  "QUOTE",
151  "QUOTED"};
152 };
153 } // namespace foreign_storage
std::map< ChunkKey, size_t > chunk_byte_count_
std::map< ChunkKey, std::unique_ptr< ForeignStorageBuffer > > chunk_encoder_buffers_
std::unique_ptr< CsvReader > csv_reader_
This file contains the class specification and related data structures for Catalog.
std::vector< FileRegion > FileRegions
std::map< int, FileRegions > fragment_id_to_file_regions_map_
bool operator<(const FileRegion &other) const
std::map< ChunkKey, std::shared_ptr< ChunkMetadata > > chunk_metadata_map_
const ForeignTable * foreign_table_
std::vector< std::pair< ChunkKey, std::shared_ptr< ChunkMetadata > >> ChunkMetadataVector