OmniSciDB
bf83d84833
|
Functions | |
bool | is_valid_parquet_string (const parquet::ColumnDescriptor *parquet_column) |
bool | is_valid_parquet_list_column (const parquet::ColumnDescriptor *parquet_column) |
Detect a valid list parquet column. More... | |
template<typename V > | |
std::shared_ptr< ParquetEncoder > | create_parquet_decimal_encoder_with_omnisci_type (const ColumnDescriptor *column_descriptor, const parquet::ColumnDescriptor *parquet_column_descriptor, AbstractBuffer *buffer) |
std::shared_ptr< ParquetEncoder > | create_parquet_decimal_encoder (const ColumnDescriptor *omnisci_column, const parquet::ColumnDescriptor *parquet_column, AbstractBuffer *buffer, const bool is_metadata_scan) |
template<typename V , typename T , typename U > | |
std::shared_ptr< ParquetEncoder > | create_parquet_signed_or_unsigned_integral_encoder_with_types (AbstractBuffer *buffer, const size_t omnisci_data_type_byte_size, const size_t parquet_data_type_byte_size, const bool is_signed) |
Create a signed or unsigned integral parquet encoder using types. More... | |
template<typename V > | |
std::shared_ptr< ParquetEncoder > | create_parquet_integral_encoder_with_omnisci_type (AbstractBuffer *buffer, const size_t omnisci_data_type_byte_size, const size_t parquet_data_type_byte_size, const int bit_width, const bool is_signed) |
Create a integral parquet encoder using types. More... | |
std::shared_ptr< ParquetEncoder > | create_parquet_integral_encoder (const ColumnDescriptor *omnisci_column, const parquet::ColumnDescriptor *parquet_column, AbstractBuffer *buffer, const bool is_metadata_scan) |
std::shared_ptr< ParquetEncoder > | create_parquet_floating_point_encoder (const ColumnDescriptor *omnisci_column, const parquet::ColumnDescriptor *parquet_column, AbstractBuffer *buffer) |
std::shared_ptr< ParquetEncoder > | create_parquet_none_type_encoder (const ColumnDescriptor *omnisci_column, const parquet::ColumnDescriptor *parquet_column, AbstractBuffer *buffer) |
template<typename V , typename T > | |
std::shared_ptr< ParquetEncoder > | create_parquet_timestamp_encoder_with_types (const ColumnDescriptor *omnisci_column, const parquet::ColumnDescriptor *parquet_column, AbstractBuffer *buffer) |
template<typename V , typename T > | |
std::shared_ptr< ParquetEncoder > | create_parquet_date_from_timestamp_encoder_with_types (const ColumnDescriptor *omnisci_column, const parquet::ColumnDescriptor *parquet_column, AbstractBuffer *buffer) |
std::shared_ptr< ParquetEncoder > | create_parquet_timestamp_encoder (const ColumnDescriptor *omnisci_column, const parquet::ColumnDescriptor *parquet_column, AbstractBuffer *buffer, const bool is_metadata_scan) |
template<typename V , typename T > | |
std::shared_ptr< ParquetEncoder > | create_parquet_time_encoder_with_types (const ColumnDescriptor *omnisci_column, const parquet::ColumnDescriptor *parquet_column, AbstractBuffer *buffer) |
std::shared_ptr< ParquetEncoder > | create_parquet_time_encoder (const ColumnDescriptor *omnisci_column, const parquet::ColumnDescriptor *parquet_column, AbstractBuffer *buffer, const bool is_metadata_scan) |
std::shared_ptr< ParquetEncoder > | create_parquet_date_from_timestamp_encoder (const ColumnDescriptor *omnisci_column, const parquet::ColumnDescriptor *parquet_column, AbstractBuffer *buffer, const bool is_metadata_scan) |
std::shared_ptr< ParquetEncoder > | create_parquet_date_encoder (const ColumnDescriptor *omnisci_column, const parquet::ColumnDescriptor *parquet_column, AbstractBuffer *buffer, const bool is_metadata_scan) |
std::shared_ptr< ParquetEncoder > | create_parquet_string_encoder (const ColumnDescriptor *omnisci_column, const parquet::ColumnDescriptor *parquet_column, const Chunk_NS::Chunk &chunk, StringDictionary *string_dictionary, std::list< std::unique_ptr< ChunkMetadata >> &chunk_metadata) |
std::shared_ptr< ParquetEncoder > | create_parquet_geospatial_encoder (const ColumnDescriptor *omnisci_column, const parquet::ColumnDescriptor *parquet_column, std::list< Chunk_NS::Chunk > &chunks, std::list< std::unique_ptr< ChunkMetadata >> &chunk_metadata, const bool is_metadata_scan) |
std::shared_ptr< ParquetEncoder > | create_parquet_array_encoder (const ColumnDescriptor *omnisci_column, const parquet::ColumnDescriptor *parquet_column, std::list< Chunk_NS::Chunk > &chunks, StringDictionary *string_dictionary, std::list< std::unique_ptr< ChunkMetadata >> &chunk_metadata, const bool is_metadata_scan) |
std::shared_ptr< ParquetEncoder > | create_parquet_encoder (const ColumnDescriptor *omnisci_column, const parquet::ColumnDescriptor *parquet_column, std::list< Chunk_NS::Chunk > &chunks, StringDictionary *string_dictionary, std::list< std::unique_ptr< ChunkMetadata >> &chunk_metadata, const bool is_metadata_scan=false) |
Create a Parquet specific encoder for a Parquet to OmniSci mapping. More... | |
std::shared_ptr< ParquetEncoder > | create_parquet_encoder (const ColumnDescriptor *omnisci_column, const parquet::ColumnDescriptor *parquet_column) |
void | validate_max_repetition_and_definition_level (const ColumnDescriptor *omnisci_column_descriptor, const parquet::ColumnDescriptor *parquet_column_descriptor) |
void | resize_values_buffer (const ColumnDescriptor *omnisci_column, const parquet::ColumnDescriptor *parquet_column, std::vector< int8_t > &values) |
std::list< std::unique_ptr < ChunkMetadata > > | append_row_groups (const std::vector< RowGroupInterval > &row_group_intervals, const int parquet_column_index, const ColumnDescriptor *column_descriptor, std::list< Chunk_NS::Chunk > &chunks, StringDictionary *string_dictionary, std::shared_ptr< arrow::fs::FileSystem > file_system) |
bool | validate_decimal_mapping (const ColumnDescriptor *omnisci_column, const parquet::ColumnDescriptor *parquet_column) |
bool | validate_floating_point_mapping (const ColumnDescriptor *omnisci_column, const parquet::ColumnDescriptor *parquet_column) |
bool | validate_integral_mapping (const ColumnDescriptor *omnisci_column, const parquet::ColumnDescriptor *parquet_column) |
bool | is_nanosecond_precision (const ColumnDescriptor *omnisci_column) |
bool | is_nanosecond_precision (const parquet::TimestampLogicalType *timestamp_logical_column) |
bool | is_microsecond_precision (const ColumnDescriptor *omnisci_column) |
bool | is_microsecond_precision (const parquet::TimestampLogicalType *timestamp_logical_column) |
bool | is_millisecond_precision (const ColumnDescriptor *omnisci_column) |
bool | is_millisecond_precision (const parquet::TimestampLogicalType *timestamp_logical_column) |
bool | validate_none_type_mapping (const ColumnDescriptor *omnisci_column, const parquet::ColumnDescriptor *parquet_column) |
bool | validate_timestamp_mapping (const ColumnDescriptor *omnisci_column, const parquet::ColumnDescriptor *parquet_column) |
bool | validate_time_mapping (const ColumnDescriptor *omnisci_column, const parquet::ColumnDescriptor *parquet_column) |
bool | validate_date_mapping (const ColumnDescriptor *omnisci_column, const parquet::ColumnDescriptor *parquet_column) |
bool | validate_string_mapping (const ColumnDescriptor *omnisci_column, const parquet::ColumnDescriptor *parquet_column) |
bool | validate_array_mapping (const ColumnDescriptor *omnisci_column, const parquet::ColumnDescriptor *parquet_column) |
bool | validate_geospatial_mapping (const ColumnDescriptor *omnisci_column, const parquet::ColumnDescriptor *parquet_column) |
void | validate_equal_schema (const parquet::arrow::FileReader *reference_file_reader, const parquet::arrow::FileReader *new_file_reader, const std::string &reference_file_path, const std::string &new_file_path) |
void | validate_allowed_mapping (const parquet::ColumnDescriptor *parquet_column, const ColumnDescriptor *omnisci_column) |
void | validate_number_of_columns (const std::shared_ptr< parquet::FileMetaData > &file_metadata, const std::string &file_path, const ForeignTableSchema &schema) |
void | throw_missing_metadata_error (const int row_group_index, const int column_index, const std::string &file_path) |
void | throw_row_group_larger_than_fragment_size_error (const int row_group_index, const int64_t max_row_group_size, const int fragment_size, const std::string &file_path) |
void | validate_column_mapping_and_row_group_metadata (const std::shared_ptr< parquet::FileMetaData > &file_metadata, const std::string &file_path, const ForeignTableSchema &schema) |
void | validate_parquet_metadata (const std::shared_ptr< parquet::FileMetaData > &file_metadata, const std::string &file_path, const ForeignTableSchema &schema) |
void | metadata_scan_rowgroup_interval (const std::map< int, std::shared_ptr< ParquetEncoder >> &encoder_map, const RowGroupInterval &row_group_interval, const std::unique_ptr< parquet::arrow::FileReader > &reader, const ForeignTableSchema &schema, std::list< RowGroupMetadata > &row_group_metadata) |
void | populate_encoder_map (std::map< int, std::shared_ptr< ParquetEncoder >> &encoder_map, const Interval< ColumnType > &column_interval, const ForeignTableSchema &schema, const std::unique_ptr< parquet::arrow::FileReader > &reader) |
std::list<std::unique_ptr<ChunkMetadata> > foreign_storage::anonymous_namespace{LazyParquetChunkLoader.cpp}::append_row_groups | ( | const std::vector< RowGroupInterval > & | row_group_intervals, |
const int | parquet_column_index, | ||
const ColumnDescriptor * | column_descriptor, | ||
std::list< Chunk_NS::Chunk > & | chunks, | ||
StringDictionary * | string_dictionary, | ||
std::shared_ptr< arrow::fs::FileSystem > | file_system | ||
) |
Definition at line 965 of file LazyParquetChunkLoader.cpp.
References foreign_storage::LazyParquetChunkLoader::batch_reader_num_elements, CHECK, create_parquet_encoder(), foreign_storage::get_column_descriptor(), foreign_storage::get_parquet_table_size(), foreign_storage::open_parquet_table(), resize_values_buffer(), to_string(), foreign_storage::validate_equal_column_descriptor(), and validate_max_repetition_and_definition_level().
Referenced by foreign_storage::LazyParquetChunkLoader::loadChunk().
std::shared_ptr< ParquetEncoder > foreign_storage::anonymous_namespace{LazyParquetChunkLoader.cpp}::create_parquet_array_encoder | ( | const ColumnDescriptor * | omnisci_column, |
const parquet::ColumnDescriptor * | parquet_column, | ||
std::list< Chunk_NS::Chunk > & | chunks, | ||
StringDictionary * | string_dictionary, | ||
std::list< std::unique_ptr< ChunkMetadata >> & | chunk_metadata, | ||
const bool | is_metadata_scan | ||
) |
Definition at line 878 of file LazyParquetChunkLoader.cpp.
References CHECK, ColumnDescriptor::columnType, create_parquet_encoder(), foreign_storage::get_sub_type_column_descriptor(), SQLTypeInfo::is_array(), SQLTypeInfo::is_fixlen_array(), and is_valid_parquet_list_column().
Referenced by create_parquet_encoder().
std::shared_ptr<ParquetEncoder> foreign_storage::anonymous_namespace{LazyParquetChunkLoader.cpp}::create_parquet_date_encoder | ( | const ColumnDescriptor * | omnisci_column, |
const parquet::ColumnDescriptor * | parquet_column, | ||
AbstractBuffer * | buffer, | ||
const bool | is_metadata_scan | ||
) |
Definition at line 669 of file LazyParquetChunkLoader.cpp.
References ColumnDescriptor::columnType, kENCODING_DATE_IN_DAYS, kENCODING_NONE, and UNREACHABLE.
Referenced by create_parquet_encoder().
std::shared_ptr<ParquetEncoder> foreign_storage::anonymous_namespace{LazyParquetChunkLoader.cpp}::create_parquet_date_from_timestamp_encoder | ( | const ColumnDescriptor * | omnisci_column, |
const parquet::ColumnDescriptor * | parquet_column, | ||
AbstractBuffer * | buffer, | ||
const bool | is_metadata_scan | ||
) |
Definition at line 642 of file LazyParquetChunkLoader.cpp.
References CHECK, ColumnDescriptor::columnType, kENCODING_DATE_IN_DAYS, and UNREACHABLE.
Referenced by create_parquet_encoder().
std::shared_ptr<ParquetEncoder> foreign_storage::anonymous_namespace{LazyParquetChunkLoader.cpp}::create_parquet_date_from_timestamp_encoder_with_types | ( | const ColumnDescriptor * | omnisci_column, |
const parquet::ColumnDescriptor * | parquet_column, | ||
AbstractBuffer * | buffer | ||
) |
std::shared_ptr<ParquetEncoder> foreign_storage::anonymous_namespace{LazyParquetChunkLoader.cpp}::create_parquet_decimal_encoder | ( | const ColumnDescriptor * | omnisci_column, |
const parquet::ColumnDescriptor * | parquet_column, | ||
AbstractBuffer * | buffer, | ||
const bool | is_metadata_scan | ||
) |
Definition at line 156 of file LazyParquetChunkLoader.cpp.
References ColumnDescriptor::columnType, SQLTypeInfo::get_comp_param(), SQLTypeInfo::get_compression(), kENCODING_FIXED, kENCODING_NONE, and UNREACHABLE.
Referenced by create_parquet_encoder().
std::shared_ptr<ParquetEncoder> foreign_storage::anonymous_namespace{LazyParquetChunkLoader.cpp}::create_parquet_decimal_encoder_with_omnisci_type | ( | const ColumnDescriptor * | column_descriptor, |
const parquet::ColumnDescriptor * | parquet_column_descriptor, | ||
AbstractBuffer * | buffer | ||
) |
std::shared_ptr<ParquetEncoder> foreign_storage::anonymous_namespace{LazyParquetChunkLoader.cpp}::create_parquet_encoder | ( | const ColumnDescriptor * | omnisci_column, |
const parquet::ColumnDescriptor * | parquet_column, | ||
std::list< Chunk_NS::Chunk > & | chunks, | ||
StringDictionary * | string_dictionary, | ||
std::list< std::unique_ptr< ChunkMetadata >> & | chunk_metadata, | ||
const bool | is_metadata_scan = false |
||
) |
Create a Parquet specific encoder for a Parquet to OmniSci mapping.
omnisci_column | - the descriptor of OmniSci column |
parquet_column | - the descriptor of Parquet column |
chunks | - list of chunks to populate (the case of more than one chunk happens only if a logical column expands to multiple physical columns) |
string_dictionary | - string dictionary used in encoding for string dictionary encoded columns |
chunk_metadata | - similar to the list of chunks, a list of chunk metadata that is populated |
is_metadata_scan | - a flag indicating if the encoders created should be for a metadata scan |
Notes:
isColumnMappingSupported
work in conjunction with each other. For example, once a mapping is known to be allowed (since isColumnMappingSupported
returned true) this function does not have to check many corner cases exhaustively as it would be redundant with what was checked in isColumnMappingSupported
. Definition at line 801 of file LazyParquetChunkLoader.cpp.
References create_parquet_array_encoder(), create_parquet_date_encoder(), create_parquet_date_from_timestamp_encoder(), create_parquet_decimal_encoder(), create_parquet_floating_point_encoder(), create_parquet_geospatial_encoder(), create_parquet_integral_encoder(), create_parquet_none_type_encoder(), create_parquet_string_encoder(), create_parquet_time_encoder(), create_parquet_timestamp_encoder(), and UNREACHABLE.
Referenced by append_row_groups(), create_parquet_array_encoder(), create_parquet_encoder(), and populate_encoder_map().
std::shared_ptr<ParquetEncoder> foreign_storage::anonymous_namespace{LazyParquetChunkLoader.cpp}::create_parquet_encoder | ( | const ColumnDescriptor * | omnisci_column, |
const parquet::ColumnDescriptor * | parquet_column | ||
) |
Intended to be used only with metadata scan. Creates an incomplete encoder capable of updating metadata.
Definition at line 869 of file LazyParquetChunkLoader.cpp.
References create_parquet_encoder().
std::shared_ptr<ParquetEncoder> foreign_storage::anonymous_namespace{LazyParquetChunkLoader.cpp}::create_parquet_floating_point_encoder | ( | const ColumnDescriptor * | omnisci_column, |
const parquet::ColumnDescriptor * | parquet_column, | ||
AbstractBuffer * | buffer | ||
) |
Definition at line 397 of file LazyParquetChunkLoader.cpp.
References CHECK, CHECK_EQ, ColumnDescriptor::columnType, kDOUBLE, kENCODING_NONE, kFLOAT, and UNREACHABLE.
Referenced by create_parquet_encoder().
std::shared_ptr<ParquetEncoder> foreign_storage::anonymous_namespace{LazyParquetChunkLoader.cpp}::create_parquet_geospatial_encoder | ( | const ColumnDescriptor * | omnisci_column, |
const parquet::ColumnDescriptor * | parquet_column, | ||
std::list< Chunk_NS::Chunk > & | chunks, | ||
std::list< std::unique_ptr< ChunkMetadata >> & | chunk_metadata, | ||
const bool | is_metadata_scan | ||
) |
Definition at line 739 of file LazyParquetChunkLoader.cpp.
References ColumnDescriptor::columnType, and is_valid_parquet_string().
Referenced by create_parquet_encoder().
std::shared_ptr<ParquetEncoder> foreign_storage::anonymous_namespace{LazyParquetChunkLoader.cpp}::create_parquet_integral_encoder | ( | const ColumnDescriptor * | omnisci_column, |
const parquet::ColumnDescriptor * | parquet_column, | ||
AbstractBuffer * | buffer, | ||
const bool | is_metadata_scan | ||
) |
Definition at line 267 of file LazyParquetChunkLoader.cpp.
References CHECK, ColumnDescriptor::columnType, kBIGINT, kENCODING_NONE, kINT, kSMALLINT, kTINYINT, and UNREACHABLE.
Referenced by create_parquet_encoder().
std::shared_ptr<ParquetEncoder> foreign_storage::anonymous_namespace{LazyParquetChunkLoader.cpp}::create_parquet_integral_encoder_with_omnisci_type | ( | AbstractBuffer * | buffer, |
const size_t | omnisci_data_type_byte_size, | ||
const size_t | parquet_data_type_byte_size, | ||
const int | bit_width, | ||
const bool | is_signed | ||
) |
Create a integral parquet encoder using types.
buffer | - buffer used within the encoder |
omnisci_data_type_byte_size | - size in number of bytes of OmniSci type |
parquet_data_type_byte_size | - size in number of bytes of Parquet physical type |
bit_width | - bit width specified for the Parquet column |
is_signed | - flag indicating if Parquet column is signed |
See the documentation for ParquetFixedLengthEncoder and ParquetUnsignedFixedLengthEncoder for a description of the semantics of the templated type V
.
Note, this function determines the appropriate bit depth integral encoder to create, while create_parquet_signed_or_unsigned_integral_encoder_with_types
determines whether to create a signed or unsigned integral encoder.
Definition at line 234 of file LazyParquetChunkLoader.cpp.
References create_parquet_signed_or_unsigned_integral_encoder_with_types(), and UNREACHABLE.
std::shared_ptr<ParquetEncoder> foreign_storage::anonymous_namespace{LazyParquetChunkLoader.cpp}::create_parquet_none_type_encoder | ( | const ColumnDescriptor * | omnisci_column, |
const parquet::ColumnDescriptor * | parquet_column, | ||
AbstractBuffer * | buffer | ||
) |
Definition at line 428 of file LazyParquetChunkLoader.cpp.
References ColumnDescriptor::columnType, SQLTypeInfo::is_string(), kBOOLEAN, kENCODING_NONE, and UNREACHABLE.
Referenced by create_parquet_encoder().
std::shared_ptr<ParquetEncoder> foreign_storage::anonymous_namespace{LazyParquetChunkLoader.cpp}::create_parquet_signed_or_unsigned_integral_encoder_with_types | ( | AbstractBuffer * | buffer, |
const size_t | omnisci_data_type_byte_size, | ||
const size_t | parquet_data_type_byte_size, | ||
const bool | is_signed | ||
) |
Create a signed or unsigned integral parquet encoder using types.
buffer | - buffer used within the encoder |
omnisci_data_type_byte_size | - size in number of bytes of OmniSci type |
parquet_data_type_byte_size | - size in number of bytes of Parquet physical type |
is_signed | - flag indicating if Parquet column is signed |
See the documentation for ParquetFixedLengthEncoder and ParquetUnsignedFixedLengthEncoder for a description of the semantics of the templated types V
, T
, and U
.
Definition at line 200 of file LazyParquetChunkLoader.cpp.
Referenced by create_parquet_integral_encoder_with_omnisci_type().
std::shared_ptr<ParquetEncoder> foreign_storage::anonymous_namespace{LazyParquetChunkLoader.cpp}::create_parquet_string_encoder | ( | const ColumnDescriptor * | omnisci_column, |
const parquet::ColumnDescriptor * | parquet_column, | ||
const Chunk_NS::Chunk & | chunk, | ||
StringDictionary * | string_dictionary, | ||
std::list< std::unique_ptr< ChunkMetadata >> & | chunk_metadata | ||
) |
Definition at line 702 of file LazyParquetChunkLoader.cpp.
References ColumnDescriptor::columnType, Chunk_NS::Chunk::getBuffer(), Chunk_NS::Chunk::getIndexBuf(), SQLTypeInfo::is_string(), is_valid_parquet_string(), kENCODING_DICT, kENCODING_NONE, and UNREACHABLE.
Referenced by create_parquet_encoder().
std::shared_ptr<ParquetEncoder> foreign_storage::anonymous_namespace{LazyParquetChunkLoader.cpp}::create_parquet_time_encoder | ( | const ColumnDescriptor * | omnisci_column, |
const parquet::ColumnDescriptor * | parquet_column, | ||
AbstractBuffer * | buffer, | ||
const bool | is_metadata_scan | ||
) |
Definition at line 589 of file LazyParquetChunkLoader.cpp.
References CHECK, ColumnDescriptor::columnType, kENCODING_FIXED, kENCODING_NONE, and UNREACHABLE.
Referenced by create_parquet_encoder().
std::shared_ptr<ParquetEncoder> foreign_storage::anonymous_namespace{LazyParquetChunkLoader.cpp}::create_parquet_time_encoder_with_types | ( | const ColumnDescriptor * | omnisci_column, |
const parquet::ColumnDescriptor * | parquet_column, | ||
AbstractBuffer * | buffer | ||
) |
std::shared_ptr<ParquetEncoder> foreign_storage::anonymous_namespace{LazyParquetChunkLoader.cpp}::create_parquet_timestamp_encoder | ( | const ColumnDescriptor * | omnisci_column, |
const parquet::ColumnDescriptor * | parquet_column, | ||
AbstractBuffer * | buffer, | ||
const bool | is_metadata_scan | ||
) |
Definition at line 505 of file LazyParquetChunkLoader.cpp.
References CHECK, ColumnDescriptor::columnType, SQLTypeInfo::get_precision(), kENCODING_FIXED, kENCODING_NONE, and UNREACHABLE.
Referenced by create_parquet_encoder().
std::shared_ptr<ParquetEncoder> foreign_storage::anonymous_namespace{LazyParquetChunkLoader.cpp}::create_parquet_timestamp_encoder_with_types | ( | const ColumnDescriptor * | omnisci_column, |
const parquet::ColumnDescriptor * | parquet_column, | ||
AbstractBuffer * | buffer | ||
) |
bool foreign_storage::anonymous_namespace{LazyParquetChunkLoader.cpp}::is_microsecond_precision | ( | const ColumnDescriptor * | omnisci_column | ) |
Definition at line 1115 of file LazyParquetChunkLoader.cpp.
References ColumnDescriptor::columnType, and SQLTypeInfo::get_dimension().
Referenced by validate_timestamp_mapping().
bool foreign_storage::anonymous_namespace{LazyParquetChunkLoader.cpp}::is_microsecond_precision | ( | const parquet::TimestampLogicalType * | timestamp_logical_column | ) |
Definition at line 1119 of file LazyParquetChunkLoader.cpp.
bool foreign_storage::anonymous_namespace{LazyParquetChunkLoader.cpp}::is_millisecond_precision | ( | const ColumnDescriptor * | omnisci_column | ) |
Definition at line 1124 of file LazyParquetChunkLoader.cpp.
References ColumnDescriptor::columnType, and SQLTypeInfo::get_dimension().
Referenced by validate_timestamp_mapping().
bool foreign_storage::anonymous_namespace{LazyParquetChunkLoader.cpp}::is_millisecond_precision | ( | const parquet::TimestampLogicalType * | timestamp_logical_column | ) |
Definition at line 1128 of file LazyParquetChunkLoader.cpp.
bool foreign_storage::anonymous_namespace{LazyParquetChunkLoader.cpp}::is_nanosecond_precision | ( | const ColumnDescriptor * | omnisci_column | ) |
Definition at line 1106 of file LazyParquetChunkLoader.cpp.
References ColumnDescriptor::columnType, and SQLTypeInfo::get_dimension().
Referenced by validate_timestamp_mapping().
bool foreign_storage::anonymous_namespace{LazyParquetChunkLoader.cpp}::is_nanosecond_precision | ( | const parquet::TimestampLogicalType * | timestamp_logical_column | ) |
Definition at line 1110 of file LazyParquetChunkLoader.cpp.
bool foreign_storage::anonymous_namespace{LazyParquetChunkLoader.cpp}::is_valid_parquet_list_column | ( | const parquet::ColumnDescriptor * | parquet_column | ) |
Detect a valid list parquet column.
parquet_column | - the parquet column descriptor of the column to detect |
Note: the notion of a valid parquet list column is adapted from the parquet schema specification for logical type definitions:
<list-repetition> group <name> (LIST) { repeated group list { <element-repetition> <element-type> element; } }
Testing has shown that there are small deviations from this specification in at least one library– pyarrow– where the innermost schema node is named "item" as opposed to "element".
The following is also true of the schema definition.
FSI further restricts lists to be defined only at the top level, meaning directly below the root schema node.
Definition at line 87 of file LazyParquetChunkLoader.cpp.
Referenced by create_parquet_array_encoder(), validate_array_mapping(), and validate_max_repetition_and_definition_level().
bool foreign_storage::anonymous_namespace{LazyParquetChunkLoader.cpp}::is_valid_parquet_string | ( | const parquet::ColumnDescriptor * | parquet_column | ) |
Definition at line 45 of file LazyParquetChunkLoader.cpp.
Referenced by create_parquet_geospatial_encoder(), create_parquet_string_encoder(), validate_geospatial_mapping(), and validate_string_mapping().
void foreign_storage::anonymous_namespace{LazyParquetChunkLoader.cpp}::metadata_scan_rowgroup_interval | ( | const std::map< int, std::shared_ptr< ParquetEncoder >> & | encoder_map, |
const RowGroupInterval & | row_group_interval, | ||
const std::unique_ptr< parquet::arrow::FileReader > & | reader, | ||
const ForeignTableSchema & | schema, | ||
std::list< RowGroupMetadata > & | row_group_metadata | ||
) |
Definition at line 1382 of file LazyParquetChunkLoader.cpp.
References CHECK, ColumnDescriptor::columnId, foreign_storage::RowGroupInterval::end_index, foreign_storage::RowGroupInterval::file_path, foreign_storage::ForeignTableSchema::getColumnDescriptor(), foreign_storage::ForeignTableSchema::getLogicalAndPhysicalColumns(), foreign_storage::ForeignTableSchema::getLogicalColumn(), foreign_storage::ForeignTableSchema::getParquetColumnIndex(), and foreign_storage::RowGroupInterval::start_index.
Referenced by foreign_storage::LazyParquetChunkLoader::metadataScan().
void foreign_storage::anonymous_namespace{LazyParquetChunkLoader.cpp}::populate_encoder_map | ( | std::map< int, std::shared_ptr< ParquetEncoder >> & | encoder_map, |
const Interval< ColumnType > & | column_interval, | ||
const ForeignTableSchema & | schema, | ||
const std::unique_ptr< parquet::arrow::FileReader > & | reader | ||
) |
Definition at line 1424 of file LazyParquetChunkLoader.cpp.
References create_parquet_encoder(), foreign_storage::Interval< T >::end, foreign_storage::ForeignTableSchema::getColumnDescriptor(), foreign_storage::ForeignTableSchema::getParquetColumnIndex(), and foreign_storage::Interval< T >::start.
Referenced by foreign_storage::LazyParquetChunkLoader::metadataScan().
void foreign_storage::anonymous_namespace{LazyParquetChunkLoader.cpp}::resize_values_buffer | ( | const ColumnDescriptor * | omnisci_column, |
const parquet::ColumnDescriptor * | parquet_column, | ||
std::vector< int8_t > & | values | ||
) |
Definition at line 954 of file LazyParquetChunkLoader.cpp.
References foreign_storage::LazyParquetChunkLoader::batch_reader_num_elements, ColumnDescriptor::columnType, and SQLTypeInfo::get_size().
Referenced by append_row_groups().
void foreign_storage::anonymous_namespace{LazyParquetChunkLoader.cpp}::throw_missing_metadata_error | ( | const int | row_group_index, |
const int | column_index, | ||
const std::string & | file_path | ||
) |
Definition at line 1300 of file LazyParquetChunkLoader.cpp.
References to_string().
Referenced by validate_column_mapping_and_row_group_metadata().
void foreign_storage::anonymous_namespace{LazyParquetChunkLoader.cpp}::throw_row_group_larger_than_fragment_size_error | ( | const int | row_group_index, |
const int64_t | max_row_group_size, | ||
const int | fragment_size, | ||
const std::string & | file_path | ||
) |
Definition at line 1310 of file LazyParquetChunkLoader.cpp.
References to_string().
Referenced by validate_column_mapping_and_row_group_metadata().
void foreign_storage::anonymous_namespace{LazyParquetChunkLoader.cpp}::validate_allowed_mapping | ( | const parquet::ColumnDescriptor * | parquet_column, |
const ColumnDescriptor * | omnisci_column | ||
) |
Definition at line 1259 of file LazyParquetChunkLoader.cpp.
References CHECK, ColumnDescriptor::columnName, ColumnDescriptor::columnType, SQLTypeInfo::get_type_name(), foreign_storage::LazyParquetChunkLoader::isColumnMappingSupported(), LOG, run_benchmark_import::type, and logger::WARNING.
Referenced by validate_column_mapping_and_row_group_metadata().
bool foreign_storage::anonymous_namespace{LazyParquetChunkLoader.cpp}::validate_array_mapping | ( | const ColumnDescriptor * | omnisci_column, |
const parquet::ColumnDescriptor * | parquet_column | ||
) |
Definition at line 1216 of file LazyParquetChunkLoader.cpp.
References ColumnDescriptor::columnType, foreign_storage::get_sub_type_column_descriptor(), SQLTypeInfo::is_array(), is_valid_parquet_list_column(), and foreign_storage::LazyParquetChunkLoader::isColumnMappingSupported().
Referenced by foreign_storage::LazyParquetChunkLoader::isColumnMappingSupported().
void foreign_storage::anonymous_namespace{LazyParquetChunkLoader.cpp}::validate_column_mapping_and_row_group_metadata | ( | const std::shared_ptr< parquet::FileMetaData > & | file_metadata, |
const std::string & | file_path, | ||
const ForeignTableSchema & | schema | ||
) |
Definition at line 1323 of file LazyParquetChunkLoader.cpp.
References foreign_storage::ForeignTableSchema::getForeignTable(), foreign_storage::ForeignTableSchema::getLogicalColumns(), TableDescriptor::maxFragRows, throw_missing_metadata_error(), throw_row_group_larger_than_fragment_size_error(), and validate_allowed_mapping().
Referenced by validate_parquet_metadata().
bool foreign_storage::anonymous_namespace{LazyParquetChunkLoader.cpp}::validate_date_mapping | ( | const ColumnDescriptor * | omnisci_column, |
const parquet::ColumnDescriptor * | parquet_column | ||
) |
Definition at line 1191 of file LazyParquetChunkLoader.cpp.
References ColumnDescriptor::columnType, SQLTypeInfo::get_comp_param(), SQLTypeInfo::get_compression(), SQLTypeInfo::get_type(), kDATE, kENCODING_DATE_IN_DAYS, and kENCODING_NONE.
Referenced by foreign_storage::LazyParquetChunkLoader::isColumnMappingSupported().
bool foreign_storage::anonymous_namespace{LazyParquetChunkLoader.cpp}::validate_decimal_mapping | ( | const ColumnDescriptor * | omnisci_column, |
const parquet::ColumnDescriptor * | parquet_column | ||
) |
Definition at line 1049 of file LazyParquetChunkLoader.cpp.
References ColumnDescriptor::columnType, SQLTypeInfo::get_compression(), SQLTypeInfo::get_precision(), SQLTypeInfo::get_scale(), SQLTypeInfo::is_decimal(), kENCODING_FIXED, and kENCODING_NONE.
Referenced by foreign_storage::LazyParquetChunkLoader::isColumnMappingSupported().
void foreign_storage::anonymous_namespace{LazyParquetChunkLoader.cpp}::validate_equal_schema | ( | const parquet::arrow::FileReader * | reference_file_reader, |
const parquet::arrow::FileReader * | new_file_reader, | ||
const std::string & | reference_file_path, | ||
const std::string & | new_file_path | ||
) |
Definition at line 1233 of file LazyParquetChunkLoader.cpp.
References foreign_storage::get_column_descriptor(), to_string(), and foreign_storage::validate_equal_column_descriptor().
Referenced by foreign_storage::LazyParquetChunkLoader::metadataScan().
bool foreign_storage::anonymous_namespace{LazyParquetChunkLoader.cpp}::validate_floating_point_mapping | ( | const ColumnDescriptor * | omnisci_column, |
const parquet::ColumnDescriptor * | parquet_column | ||
) |
Definition at line 1063 of file LazyParquetChunkLoader.cpp.
References ColumnDescriptor::columnType, SQLTypeInfo::get_compression(), SQLTypeInfo::get_type(), SQLTypeInfo::is_fp(), kENCODING_NONE, and kFLOAT.
Referenced by foreign_storage::LazyParquetChunkLoader::isColumnMappingSupported().
bool foreign_storage::anonymous_namespace{LazyParquetChunkLoader.cpp}::validate_geospatial_mapping | ( | const ColumnDescriptor * | omnisci_column, |
const parquet::ColumnDescriptor * | parquet_column | ||
) |
Definition at line 1227 of file LazyParquetChunkLoader.cpp.
References ColumnDescriptor::columnType, SQLTypeInfo::is_geometry(), and is_valid_parquet_string().
Referenced by foreign_storage::LazyParquetChunkLoader::isColumnMappingSupported().
bool foreign_storage::anonymous_namespace{LazyParquetChunkLoader.cpp}::validate_integral_mapping | ( | const ColumnDescriptor * | omnisci_column, |
const parquet::ColumnDescriptor * | parquet_column | ||
) |
Definition at line 1079 of file LazyParquetChunkLoader.cpp.
References CHECK, ColumnDescriptor::columnType, SQLTypeInfo::get_compression(), SQLTypeInfo::get_size(), SQLTypeInfo::is_integer(), kENCODING_FIXED, and kENCODING_NONE.
Referenced by foreign_storage::LazyParquetChunkLoader::isColumnMappingSupported().
void foreign_storage::anonymous_namespace{LazyParquetChunkLoader.cpp}::validate_max_repetition_and_definition_level | ( | const ColumnDescriptor * | omnisci_column_descriptor, |
const parquet::ColumnDescriptor * | parquet_column_descriptor | ||
) |
Definition at line 915 of file LazyParquetChunkLoader.cpp.
References ColumnDescriptor::columnName, ColumnDescriptor::columnType, SQLTypeInfo::is_array(), is_valid_parquet_list_column(), and to_string().
Referenced by append_row_groups().
bool foreign_storage::anonymous_namespace{LazyParquetChunkLoader.cpp}::validate_none_type_mapping | ( | const ColumnDescriptor * | omnisci_column, |
const parquet::ColumnDescriptor * | parquet_column | ||
) |
Definition at line 1133 of file LazyParquetChunkLoader.cpp.
References ColumnDescriptor::columnType, SQLTypeInfo::get_compression(), SQLTypeInfo::get_type(), kBOOLEAN, and kENCODING_NONE.
Referenced by foreign_storage::LazyParquetChunkLoader::isColumnMappingSupported().
void foreign_storage::anonymous_namespace{LazyParquetChunkLoader.cpp}::validate_number_of_columns | ( | const std::shared_ptr< parquet::FileMetaData > & | file_metadata, |
const std::string & | file_path, | ||
const ForeignTableSchema & | schema | ||
) |
Definition at line 1290 of file LazyParquetChunkLoader.cpp.
References foreign_storage::ForeignTableSchema::numLogicalColumns(), and foreign_storage::throw_number_of_columns_mismatch_error().
Referenced by validate_parquet_metadata().
void foreign_storage::anonymous_namespace{LazyParquetChunkLoader.cpp}::validate_parquet_metadata | ( | const std::shared_ptr< parquet::FileMetaData > & | file_metadata, |
const std::string & | file_path, | ||
const ForeignTableSchema & | schema | ||
) |
Definition at line 1373 of file LazyParquetChunkLoader.cpp.
References validate_column_mapping_and_row_group_metadata(), and validate_number_of_columns().
Referenced by foreign_storage::LazyParquetChunkLoader::metadataScan().
bool foreign_storage::anonymous_namespace{LazyParquetChunkLoader.cpp}::validate_string_mapping | ( | const ColumnDescriptor * | omnisci_column, |
const parquet::ColumnDescriptor * | parquet_column | ||
) |
Definition at line 1208 of file LazyParquetChunkLoader.cpp.
References ColumnDescriptor::columnType, SQLTypeInfo::get_compression(), SQLTypeInfo::is_string(), is_valid_parquet_string(), kENCODING_DICT, and kENCODING_NONE.
Referenced by foreign_storage::LazyParquetChunkLoader::isColumnMappingSupported().
bool foreign_storage::anonymous_namespace{LazyParquetChunkLoader.cpp}::validate_time_mapping | ( | const ColumnDescriptor * | omnisci_column, |
const parquet::ColumnDescriptor * | parquet_column | ||
) |
Definition at line 1177 of file LazyParquetChunkLoader.cpp.
References ColumnDescriptor::columnType, SQLTypeInfo::get_comp_param(), SQLTypeInfo::get_compression(), SQLTypeInfo::get_type(), kENCODING_FIXED, kENCODING_NONE, and kTIME.
Referenced by foreign_storage::LazyParquetChunkLoader::isColumnMappingSupported().
bool foreign_storage::anonymous_namespace{LazyParquetChunkLoader.cpp}::validate_timestamp_mapping | ( | const ColumnDescriptor * | omnisci_column, |
const parquet::ColumnDescriptor * | parquet_column | ||
) |
Definition at line 1142 of file LazyParquetChunkLoader.cpp.
References ColumnDescriptor::columnType, SQLTypeInfo::get_comp_param(), SQLTypeInfo::get_compression(), SQLTypeInfo::get_dimension(), SQLTypeInfo::get_type(), is_microsecond_precision(), is_millisecond_precision(), is_nanosecond_precision(), kENCODING_FIXED, kENCODING_NONE, and kTIMESTAMP.
Referenced by foreign_storage::LazyParquetChunkLoader::isColumnMappingSupported().