OmniSciDB  6686921089
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
ParquetImporter.h
Go to the documentation of this file.
1 /*
2  * Copyright 2021 OmniSci, Inc.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #pragma once
18 
19 #include <map>
20 #include <unordered_set>
21 #include <vector>
22 
24 #include "Catalog/Catalog.h"
25 #include "Catalog/ForeignTable.h"
26 #include "DataMgr/Chunk/Chunk.h"
27 #include "ForeignDataWrapper.h"
28 #include "ForeignTableSchema.h"
30 #include "ImportExport/Importer.h"
31 #include "Interval.h"
32 #include "LazyParquetChunkLoader.h"
33 
34 namespace foreign_storage {
35 
37  public:
38  virtual ~AbstractRowGroupIntervalTracker() = default;
39  virtual std::optional<RowGroupInterval> getNextRowGroupInterval() = 0;
40 };
41 
43  public:
45 
46  ParquetImporter(const int db_id,
47  const ForeignTable* foreign_table,
48  const UserMapping* user_mapping);
49 
50  void populateChunkMetadata(ChunkMetadataVector& chunk_metadata_vector) override;
51 
52  void populateChunkBuffers(const ChunkToBufferMap& required_buffers,
53  const ChunkToBufferMap& optional_buffers) override;
54 
55  std::string getSerializedDataWrapper() const override;
56 
58  const std::string& file_path,
59  const ChunkMetadataVector& chunk_metadata_vector) override;
60 
61  bool isRestored() const override;
62 
64  UNREACHABLE();
65  return {};
66  }
67 
69  UNREACHABLE();
70  return {};
71  }
72 
79  std::unique_ptr<import_export::ImportBatchResult> getNextImportBatch();
80 
86  std::vector<std::pair<const ColumnDescriptor*, StringDictionary*>>
87  getStringDictionaries() const;
88 
89  private:
90  const int db_id_;
92 
93  std::set<std::string> getAllFilePaths();
94 
95  std::unique_ptr<AbstractRowGroupIntervalTracker> row_group_interval_tracker_;
96 
97  std::unique_ptr<ForeignTableSchema> schema_;
98  std::shared_ptr<arrow::fs::FileSystem> file_system_;
99  std::unique_ptr<FileReaderMap> file_reader_cache_;
100  std::vector<std::pair<const ColumnDescriptor*, StringDictionary*>>
102 };
103 } // namespace foreign_storage
std::set< std::string > getAllFilePaths()
void populateChunkBuffers(const ChunkToBufferMap &required_buffers, const ChunkToBufferMap &optional_buffers) override
std::shared_ptr< arrow::fs::FileSystem > file_system_
std::unique_ptr< AbstractRowGroupIntervalTracker > row_group_interval_tracker_
std::unique_ptr< ForeignTableSchema > schema_
void restoreDataWrapperInternals(const std::string &file_path, const ChunkMetadataVector &chunk_metadata_vector) override
#define UNREACHABLE()
Definition: Logger.h:253
std::map< ChunkKey, AbstractBuffer * > ChunkToBufferMap
std::vector< std::pair< const ColumnDescriptor *, StringDictionary * > > getStringDictionaries() const
ParallelismLevel getCachedParallelismLevel() const override
This file contains the class specification and related data structures for Catalog.
void populateChunkMetadata(ChunkMetadataVector &chunk_metadata_vector) override
std::vector< std::pair< ChunkKey, std::shared_ptr< ChunkMetadata >>> ChunkMetadataVector
std::string getSerializedDataWrapper() const override
virtual std::optional< RowGroupInterval > getNextRowGroupInterval()=0
const ForeignTable * foreign_table_
std::unique_ptr< FileReaderMap > file_reader_cache_
ParallelismLevel getNonCachedParallelismLevel() const override
std::unique_ptr< import_export::ImportBatchResult > getNextImportBatch()
std::vector< std::pair< const ColumnDescriptor *, StringDictionary * > > string_dictionaries_per_column_