OmniSciDB  bf83d84833
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
ParquetShared.h
Go to the documentation of this file.
1 /*
2  * Copyright 2020 OmniSci, Inc.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #pragma once
18 
19 #include <arrow/api.h>
20 #include <arrow/filesystem/filesystem.h>
21 #include <arrow/io/api.h>
22 #include <parquet/arrow/reader.h>
23 #include <parquet/statistics.h>
24 #include <parquet/types.h>
25 
27 #include "DataMgr/ChunkMetadata.h"
28 
29 namespace foreign_storage {
30 
32  std::string file_path;
33  int start_index{-1}, end_index{-1};
34 };
35 
37  std::string file_path;
39  std::list<std::shared_ptr<ChunkMetadata>> column_chunk_metadata;
40 };
41 
42 void open_parquet_table(const std::string& file_path,
43  std::unique_ptr<parquet::arrow::FileReader>& reader,
44  std::shared_ptr<arrow::fs::FileSystem>& file_system);
45 
46 std::pair<int, int> get_parquet_table_size(
47  const std::unique_ptr<parquet::arrow::FileReader>& reader);
48 
49 const parquet::ColumnDescriptor* get_column_descriptor(
50  const parquet::arrow::FileReader* reader,
51  const int logical_column_index);
52 
54  const parquet::ColumnDescriptor* reference_descriptor,
55  const parquet::ColumnDescriptor* new_descriptor,
56  const std::string& reference_file_path,
57  const std::string& new_file_path);
58 
59 std::unique_ptr<ColumnDescriptor> get_sub_type_column_descriptor(
60  const ColumnDescriptor* column);
61 
62 std::shared_ptr<parquet::Statistics> validate_and_get_column_metadata_statistics(
63  const parquet::ColumnChunkMetaData* column_metadata);
64 
65 } // namespace foreign_storage
std::shared_ptr< parquet::Statistics > validate_and_get_column_metadata_statistics(const parquet::ColumnChunkMetaData *column_metadata)
std::unique_ptr< ColumnDescriptor > get_sub_type_column_descriptor(const ColumnDescriptor *column)
void validate_equal_column_descriptor(const parquet::ColumnDescriptor *reference_descriptor, const parquet::ColumnDescriptor *new_descriptor, const std::string &reference_file_path, const std::string &new_file_path)
std::pair< int, int > get_parquet_table_size(const std::unique_ptr< parquet::arrow::FileReader > &reader)
const parquet::ColumnDescriptor * get_column_descriptor(const parquet::arrow::FileReader *reader, const int logical_column_index)
std::list< std::shared_ptr< ChunkMetadata > > column_chunk_metadata
Definition: ParquetShared.h:39
specifies the content in-memory of a row in the column metadata table
void open_parquet_table(const std::string &file_path, std::unique_ptr< parquet::arrow::FileReader > &reader, std::shared_ptr< arrow::fs::FileSystem > &file_system)