OmniSciDB  2e3a973ef4
ParquetShared.h
Go to the documentation of this file.
1 /*
2  * Copyright 2020 OmniSci, Inc.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #pragma once
18 
19 #include <arrow/api.h>
20 #include <arrow/filesystem/filesystem.h>
21 #include <arrow/io/api.h>
22 #include <parquet/arrow/reader.h>
23 #include <parquet/types.h>
24 
26 
27 namespace foreign_storage {
28 
30  std::string file_path;
31  int start_index{-1}, end_index{-1};
32 };
33 
34 void open_parquet_table(const std::string& file_path,
35  std::unique_ptr<parquet::arrow::FileReader>& reader,
36  std::shared_ptr<arrow::fs::FileSystem>& file_system);
37 
38 std::pair<int, int> get_parquet_table_size(
39  const std::unique_ptr<parquet::arrow::FileReader>& reader);
40 
41 const parquet::ColumnDescriptor* get_column_descriptor(
42  const parquet::arrow::FileReader* reader,
43  const int logical_column_index);
44 
46  const parquet::ColumnDescriptor* reference_descriptor,
47  const parquet::ColumnDescriptor* new_descriptor,
48  const std::string& reference_file_path,
49  const std::string& new_file_path);
50 
51 std::unique_ptr<ColumnDescriptor> get_sub_type_column_descriptor(
52  const ColumnDescriptor* column);
53 
54 } // namespace foreign_storage
std::unique_ptr< ColumnDescriptor > get_sub_type_column_descriptor(const ColumnDescriptor *column)
void validate_equal_column_descriptor(const parquet::ColumnDescriptor *reference_descriptor, const parquet::ColumnDescriptor *new_descriptor, const std::string &reference_file_path, const std::string &new_file_path)
std::pair< int, int > get_parquet_table_size(const std::unique_ptr< parquet::arrow::FileReader > &reader)
const parquet::ColumnDescriptor * get_column_descriptor(const parquet::arrow::FileReader *reader, const int logical_column_index)
specifies the content in-memory of a row in the column metadata table
void open_parquet_table(const std::string &file_path, std::unique_ptr< parquet::arrow::FileReader > &reader, std::shared_ptr< arrow::fs::FileSystem > &file_system)