OmniSciDB  72c90bc290
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
LazyParquetChunkLoader.cpp File Reference
#include "LazyParquetChunkLoader.h"
#include <arrow/api.h>
#include <arrow/io/api.h>
#include <parquet/arrow/reader.h>
#include <parquet/column_scanner.h>
#include <parquet/exception.h>
#include <parquet/platform.h>
#include <parquet/statistics.h>
#include <parquet/types.h>
#include "AbstractFileStorageDataWrapper.h"
#include "ForeignStorageException.h"
#include "FsiChunkUtils.h"
#include "ParquetArrayDetectEncoder.h"
#include "ParquetArrayImportEncoder.h"
#include "ParquetDateInDaysFromTimestampEncoder.h"
#include "ParquetDateInSecondsEncoder.h"
#include "ParquetDecimalEncoder.h"
#include "ParquetDetectStringEncoder.h"
#include "ParquetFixedLengthArrayEncoder.h"
#include "ParquetFixedLengthEncoder.h"
#include "ParquetGeospatialEncoder.h"
#include "ParquetGeospatialImportEncoder.h"
#include "ParquetStringEncoder.h"
#include "ParquetStringImportEncoder.h"
#include "ParquetStringNoneEncoder.h"
#include "ParquetTimeEncoder.h"
#include "ParquetTimestampEncoder.h"
#include "ParquetVariableLengthArrayEncoder.h"
#include "Shared/misc.h"
#include "StringDictionary/StringDictionary.h"
#include "TypedParquetDetectBuffer.h"
+ Include dependency graph for LazyParquetChunkLoader.cpp:

Go to the source code of this file.

Classes

struct  foreign_storage::anonymous_namespace{LazyParquetChunkLoader.cpp}::MaxRowGroupSizeStats
 
struct  foreign_storage::ParquetBatchData
 
class  foreign_storage::ParquetRowGroupReader
 
struct  foreign_storage::PreviewContext
 

Namespaces

 foreign_storage
 
 foreign_storage::anonymous_namespace{LazyParquetChunkLoader.cpp}
 

Functions

bool foreign_storage::anonymous_namespace{LazyParquetChunkLoader.cpp}::within_range (int64_t lower_bound, int64_t upper_bound, int64_t value)
 
bool foreign_storage::anonymous_namespace{LazyParquetChunkLoader.cpp}::is_valid_parquet_string (const parquet::ColumnDescriptor *parquet_column)
 
bool foreign_storage::anonymous_namespace{LazyParquetChunkLoader.cpp}::is_valid_parquet_list_column (const parquet::ColumnDescriptor *parquet_column)
 Detect a valid list parquet column. More...
 
template<typename V , typename NullType >
std::shared_ptr< ParquetEncoder > foreign_storage::anonymous_namespace{LazyParquetChunkLoader.cpp}::create_parquet_decimal_encoder_with_omnisci_type (const ColumnDescriptor *column_descriptor, const parquet::ColumnDescriptor *parquet_column_descriptor, AbstractBuffer *buffer)
 
std::shared_ptr< ParquetEncoder > foreign_storage::anonymous_namespace{LazyParquetChunkLoader.cpp}::create_parquet_decimal_encoder (const ColumnDescriptor *omnisci_column, const parquet::ColumnDescriptor *parquet_column, AbstractBuffer *buffer, const bool is_metadata_scan_or_for_import)
 
template<typename V , typename T , typename U , typename NullType >
std::shared_ptr< ParquetEncoder > foreign_storage::anonymous_namespace{LazyParquetChunkLoader.cpp}::create_parquet_signed_or_unsigned_integral_encoder_with_types (AbstractBuffer *buffer, const size_t omnisci_data_type_byte_size, const size_t parquet_data_type_byte_size, const bool is_signed)
 Create a signed or unsigned integral parquet encoder using types. More...
 
template<typename V , typename NullType >
std::shared_ptr< ParquetEncoder > foreign_storage::anonymous_namespace{LazyParquetChunkLoader.cpp}::create_parquet_integral_encoder_with_omnisci_type (AbstractBuffer *buffer, const size_t omnisci_data_type_byte_size, const size_t parquet_data_type_byte_size, const int bit_width, const bool is_signed)
 Create a integral parquet encoder using types. More...
 
std::shared_ptr< ParquetEncoder > foreign_storage::anonymous_namespace{LazyParquetChunkLoader.cpp}::create_parquet_integral_encoder (const ColumnDescriptor *omnisci_column, const parquet::ColumnDescriptor *parquet_column, AbstractBuffer *buffer, const bool is_metadata_scan_or_for_import)
 
std::shared_ptr< ParquetEncoder > foreign_storage::anonymous_namespace{LazyParquetChunkLoader.cpp}::create_parquet_floating_point_encoder (const ColumnDescriptor *omnisci_column, const parquet::ColumnDescriptor *parquet_column, AbstractBuffer *buffer)
 
std::shared_ptr< ParquetEncoder > foreign_storage::anonymous_namespace{LazyParquetChunkLoader.cpp}::create_parquet_none_type_encoder (const ColumnDescriptor *omnisci_column, const parquet::ColumnDescriptor *parquet_column, AbstractBuffer *buffer)
 
template<typename V , typename T , typename NullType >
std::shared_ptr< ParquetEncoder > foreign_storage::anonymous_namespace{LazyParquetChunkLoader.cpp}::create_parquet_timestamp_encoder_with_types (const ColumnDescriptor *omnisci_column, const parquet::ColumnDescriptor *parquet_column, AbstractBuffer *buffer)
 
template<typename V , typename T , typename NullType >
std::shared_ptr< ParquetEncoder > foreign_storage::anonymous_namespace{LazyParquetChunkLoader.cpp}::create_parquet_date_from_timestamp_encoder_with_types (const ColumnDescriptor *omnisci_column, const parquet::ColumnDescriptor *parquet_column, AbstractBuffer *buffer, const bool is_metadata_scan_or_for_import)
 
std::shared_ptr< ParquetEncoder > foreign_storage::anonymous_namespace{LazyParquetChunkLoader.cpp}::create_parquet_timestamp_encoder (const ColumnDescriptor *omnisci_column, const parquet::ColumnDescriptor *parquet_column, AbstractBuffer *buffer, const bool is_metadata_scan_or_for_import)
 
template<typename V , typename T , typename NullType >
std::shared_ptr< ParquetEncoder > foreign_storage::anonymous_namespace{LazyParquetChunkLoader.cpp}::create_parquet_time_encoder_with_types (const ColumnDescriptor *omnisci_column, const parquet::ColumnDescriptor *parquet_column, AbstractBuffer *buffer)
 
std::shared_ptr< ParquetEncoder > foreign_storage::anonymous_namespace{LazyParquetChunkLoader.cpp}::create_parquet_time_encoder (const ColumnDescriptor *omnisci_column, const parquet::ColumnDescriptor *parquet_column, AbstractBuffer *buffer, const bool is_metadata_scan_or_for_import)
 
std::shared_ptr< ParquetEncoder > foreign_storage::anonymous_namespace{LazyParquetChunkLoader.cpp}::create_parquet_date_from_timestamp_encoder (const ColumnDescriptor *omnisci_column, const parquet::ColumnDescriptor *parquet_column, AbstractBuffer *buffer, const bool is_metadata_scan_or_for_import)
 
std::shared_ptr< ParquetEncoder > foreign_storage::anonymous_namespace{LazyParquetChunkLoader.cpp}::create_parquet_date_encoder (const ColumnDescriptor *omnisci_column, const parquet::ColumnDescriptor *parquet_column, AbstractBuffer *buffer, const bool is_metadata_scan_or_for_import)
 
std::shared_ptr< ParquetEncoder > foreign_storage::anonymous_namespace{LazyParquetChunkLoader.cpp}::create_parquet_string_encoder (const ColumnDescriptor *omnisci_column, const parquet::ColumnDescriptor *parquet_column, const Chunk_NS::Chunk &chunk, StringDictionary *string_dictionary, std::list< std::unique_ptr< ChunkMetadata >> &chunk_metadata, bool is_for_import, const bool is_for_detect)
 
std::shared_ptr< ParquetEncoder > foreign_storage::anonymous_namespace{LazyParquetChunkLoader.cpp}::create_parquet_geospatial_encoder (const ColumnDescriptor *omnisci_column, const parquet::ColumnDescriptor *parquet_column, std::list< Chunk_NS::Chunk > &chunks, std::list< std::unique_ptr< ChunkMetadata >> &chunk_metadata, const bool is_metadata_scan, const bool is_for_import, const bool geo_validate_geometry)
 
std::shared_ptr< ParquetEncoder > foreign_storage::anonymous_namespace{LazyParquetChunkLoader.cpp}::create_parquet_array_encoder (const ColumnDescriptor *omnisci_column, const parquet::ColumnDescriptor *parquet_column, std::list< Chunk_NS::Chunk > &chunks, StringDictionary *string_dictionary, std::list< std::unique_ptr< ChunkMetadata >> &chunk_metadata, const bool is_metadata_scan, const bool is_for_import, const bool is_for_detect, const bool geo_validate_geometry)
 
std::shared_ptr< ParquetEncoder > foreign_storage::anonymous_namespace{LazyParquetChunkLoader.cpp}::create_parquet_encoder (const ColumnDescriptor *omnisci_column, const parquet::ColumnDescriptor *parquet_column, std::list< Chunk_NS::Chunk > &chunks, StringDictionary *string_dictionary, std::list< std::unique_ptr< ChunkMetadata >> &chunk_metadata, const bool is_metadata_scan, const bool is_for_import, const bool is_for_detect, const bool geo_validate_geometry)
 Create a Parquet specific encoder for a Parquet to OmniSci mapping. More...
 
std::shared_ptr< ParquetEncoder > foreign_storage::anonymous_namespace{LazyParquetChunkLoader.cpp}::create_parquet_encoder_for_import (std::list< Chunk_NS::Chunk > &chunks, const ColumnDescriptor *omnisci_column, const parquet::ColumnDescriptor *parquet_column, StringDictionary *string_dictionary, const bool geo_validate_geometry)
 
std::shared_ptr< ParquetEncoder > foreign_storage::anonymous_namespace{LazyParquetChunkLoader.cpp}::create_parquet_encoder_for_metadata_scan (const ColumnDescriptor *omnisci_column, const parquet::ColumnDescriptor *parquet_column, const bool geo_validate_geometry)
 
void foreign_storage::anonymous_namespace{LazyParquetChunkLoader.cpp}::validate_list_column_metadata_statistics (const parquet::ParquetFileReader *reader, const int row_group_index, const int column_index, const int16_t *def_levels, const int64_t num_levels, const parquet::ColumnDescriptor *parquet_column_descriptor)
 
void foreign_storage::anonymous_namespace{LazyParquetChunkLoader.cpp}::set_definition_levels_for_zero_max_definition_level_case (const parquet::ColumnDescriptor *parquet_column_descriptor, std::vector< int16_t > &def_levels)
 
void foreign_storage::anonymous_namespace{LazyParquetChunkLoader.cpp}::validate_max_repetition_and_definition_level (const ColumnDescriptor *omnisci_column_descriptor, const parquet::ColumnDescriptor *parquet_column_descriptor)
 
void foreign_storage::anonymous_namespace{LazyParquetChunkLoader.cpp}::resize_values_buffer (const ColumnDescriptor *omnisci_column, const parquet::ColumnDescriptor *parquet_column, std::vector< int8_t > &values)
 
bool foreign_storage::anonymous_namespace{LazyParquetChunkLoader.cpp}::validate_decimal_mapping (const ColumnDescriptor *omnisci_column, const parquet::ColumnDescriptor *parquet_column)
 
SQLTypeInfo foreign_storage::anonymous_namespace{LazyParquetChunkLoader.cpp}::suggest_decimal_mapping (const parquet::ColumnDescriptor *parquet_column)
 
bool foreign_storage::anonymous_namespace{LazyParquetChunkLoader.cpp}::validate_floating_point_mapping (const ColumnDescriptor *omnisci_column, const parquet::ColumnDescriptor *parquet_column)
 
SQLTypeInfo foreign_storage::anonymous_namespace{LazyParquetChunkLoader.cpp}::suggest_floating_point_mapping (const parquet::ColumnDescriptor *parquet_column)
 
bool foreign_storage::anonymous_namespace{LazyParquetChunkLoader.cpp}::validate_integral_mapping (const ColumnDescriptor *omnisci_column, const parquet::ColumnDescriptor *parquet_column)
 
SQLTypeInfo foreign_storage::anonymous_namespace{LazyParquetChunkLoader.cpp}::suggest_integral_mapping (const parquet::ColumnDescriptor *parquet_column)
 
bool foreign_storage::anonymous_namespace{LazyParquetChunkLoader.cpp}::is_nanosecond_precision (const ColumnDescriptor *omnisci_column)
 
bool foreign_storage::anonymous_namespace{LazyParquetChunkLoader.cpp}::is_nanosecond_precision (const parquet::TimestampLogicalType *timestamp_logical_column)
 
bool foreign_storage::anonymous_namespace{LazyParquetChunkLoader.cpp}::is_microsecond_precision (const ColumnDescriptor *omnisci_column)
 
bool foreign_storage::anonymous_namespace{LazyParquetChunkLoader.cpp}::is_microsecond_precision (const parquet::TimestampLogicalType *timestamp_logical_column)
 
bool foreign_storage::anonymous_namespace{LazyParquetChunkLoader.cpp}::is_millisecond_precision (const ColumnDescriptor *omnisci_column)
 
bool foreign_storage::anonymous_namespace{LazyParquetChunkLoader.cpp}::is_millisecond_precision (const parquet::TimestampLogicalType *timestamp_logical_column)
 
bool foreign_storage::anonymous_namespace{LazyParquetChunkLoader.cpp}::validate_none_type_mapping (const ColumnDescriptor *omnisci_column, const parquet::ColumnDescriptor *parquet_column)
 
SQLTypeInfo foreign_storage::anonymous_namespace{LazyParquetChunkLoader.cpp}::suggest_boolean_type_mapping (const parquet::ColumnDescriptor *parquet_column)
 
bool foreign_storage::anonymous_namespace{LazyParquetChunkLoader.cpp}::validate_timestamp_mapping (const ColumnDescriptor *omnisci_column, const parquet::ColumnDescriptor *parquet_column)
 
SQLTypeInfo foreign_storage::anonymous_namespace{LazyParquetChunkLoader.cpp}::suggest_timestamp_mapping (const parquet::ColumnDescriptor *parquet_column)
 
bool foreign_storage::anonymous_namespace{LazyParquetChunkLoader.cpp}::validate_time_mapping (const ColumnDescriptor *omnisci_column, const parquet::ColumnDescriptor *parquet_column)
 
SQLTypeInfo foreign_storage::anonymous_namespace{LazyParquetChunkLoader.cpp}::suggest_time_mapping (const parquet::ColumnDescriptor *parquet_column)
 
bool foreign_storage::anonymous_namespace{LazyParquetChunkLoader.cpp}::validate_date_mapping (const ColumnDescriptor *omnisci_column, const parquet::ColumnDescriptor *parquet_column)
 
SQLTypeInfo foreign_storage::anonymous_namespace{LazyParquetChunkLoader.cpp}::suggest_date_mapping (const parquet::ColumnDescriptor *parquet_column)
 
bool foreign_storage::anonymous_namespace{LazyParquetChunkLoader.cpp}::validate_string_mapping (const ColumnDescriptor *omnisci_column, const parquet::ColumnDescriptor *parquet_column)
 
SQLTypeInfo foreign_storage::anonymous_namespace{LazyParquetChunkLoader.cpp}::suggest_string_mapping (const parquet::ColumnDescriptor *parquet_column)
 
bool foreign_storage::anonymous_namespace{LazyParquetChunkLoader.cpp}::validate_geospatial_mapping (const ColumnDescriptor *omnisci_column, const parquet::ColumnDescriptor *parquet_column)
 
void foreign_storage::anonymous_namespace{LazyParquetChunkLoader.cpp}::validate_equal_schema (const parquet::arrow::FileReader *reference_file_reader, const parquet::arrow::FileReader *new_file_reader, const std::string &reference_file_path, const std::string &new_file_path)
 
void foreign_storage::anonymous_namespace{LazyParquetChunkLoader.cpp}::validate_allowed_mapping (const parquet::ColumnDescriptor *parquet_column, const ColumnDescriptor *omnisci_column)
 
SQLTypeInfo foreign_storage::anonymous_namespace{LazyParquetChunkLoader.cpp}::suggest_column_scalar_type (const parquet::ColumnDescriptor *parquet_column)
 
void foreign_storage::anonymous_namespace{LazyParquetChunkLoader.cpp}::validate_number_of_columns (const std::shared_ptr< parquet::FileMetaData > &file_metadata, const std::string &file_path, const ForeignTableSchema &schema)
 
void foreign_storage::anonymous_namespace{LazyParquetChunkLoader.cpp}::throw_missing_metadata_error (const int row_group_index, const int column_index, const std::string &file_path)
 
void foreign_storage::anonymous_namespace{LazyParquetChunkLoader.cpp}::throw_row_group_larger_than_fragment_size_error (const MaxRowGroupSizeStats max_row_group_stats, const int fragment_size)
 
MaxRowGroupSizeStats foreign_storage::anonymous_namespace{LazyParquetChunkLoader.cpp}::validate_column_mapping_and_row_group_metadata (const std::shared_ptr< parquet::FileMetaData > &file_metadata, const std::string &file_path, const ForeignTableSchema &schema, const bool do_metadata_stats_validation)
 
MaxRowGroupSizeStats foreign_storage::anonymous_namespace{LazyParquetChunkLoader.cpp}::validate_parquet_metadata (const std::shared_ptr< parquet::FileMetaData > &file_metadata, const std::string &file_path, const ForeignTableSchema &schema, const bool do_metadata_stats_validation)
 
std::list< RowGroupMetadata > foreign_storage::anonymous_namespace{LazyParquetChunkLoader.cpp}::metadata_scan_rowgroup_interval (const std::map< int, std::shared_ptr< ParquetEncoder >> &encoder_map, const RowGroupInterval &row_group_interval, const ReaderPtr &reader, const ForeignTableSchema &schema)
 
std::map< int, std::shared_ptr
< ParquetEncoder > > 
foreign_storage::anonymous_namespace{LazyParquetChunkLoader.cpp}::populate_encoder_map_for_import (const std::map< int, Chunk_NS::Chunk > chunks, const ForeignTableSchema &schema, const ReaderPtr &reader, const std::map< int, StringDictionary * > column_dictionaries, const int64_t num_rows, const bool geo_validate_geometry)
 
std::map< int, std::shared_ptr
< ParquetEncoder > > 
foreign_storage::anonymous_namespace{LazyParquetChunkLoader.cpp}::populate_encoder_map_for_metadata_scan (const Interval< ColumnType > &column_interval, const ForeignTableSchema &schema, const ReaderPtr &reader, const bool do_metadata_stats_validation, const bool geo_validate_geometry)