24 #include <parquet/metadata.h> 33 virtual void appendData(
const int16_t* def_levels,
34 const int16_t* rep_levels,
35 const int64_t values_read,
36 const int64_t levels_read,
37 const bool is_last_batch,
41 const parquet::RowGroupMetaData* group_metadata,
42 const int parquet_column_index,
45 auto column_metadata = group_metadata->ColumnChunk(parquet_column_index);
49 auto null_count = stats->null_count();
53 metadata->chunkStats.has_nulls = null_count > 0;
56 metadata->numElements = group_metadata->num_rows();
64 auto metadata = std::make_shared<ChunkMetadata>();
68 auto encoder = buffer.getEncoder();
69 encoder->getMetadata(metadata);
70 metadata->sqlType = column_type;
75 std::stringstream error_message;
76 error_message <<
"A null value was detected in Parquet column '" 77 << parquet_column_name <<
"' but OmniSci column is set to not null";
78 throw std::runtime_error(error_message.str());
84 bool has_nulls = null_count > 0;
95 virtual void setNull(int8_t* omnisci_data_bytes) = 0;
96 virtual void copy(
const int8_t* omnisci_data_bytes_source,
97 int8_t* omnisci_data_bytes_destination) = 0;
98 virtual void encodeAndCopy(
const int8_t* parquet_data_bytes,
99 int8_t* omnisci_data_bytes) = 0;
101 virtual void encodeAndCopyContiguous(
const int8_t* parquet_data_bytes,
102 int8_t* omnisci_data_bytes,
103 const size_t num_elements) = 0;
ParquetEncoder(Data_Namespace::AbstractBuffer *buffer)
virtual ~ParquetEncoder()=default
std::shared_ptr< parquet::Statistics > validate_and_get_column_metadata_statistics(const parquet::ColumnChunkMetaData *column_metadata)
static void throwNotNullViolation(const std::string &parquet_column_name)
void initEncoder(const SQLTypeInfo &tmp_sql_type)
virtual void appendData(const int16_t *def_levels, const int16_t *rep_levels, const int64_t values_read, const int64_t levels_read, const bool is_last_batch, int8_t *values)=0
DEVICE auto copy(ARGS &&... args)
HOST DEVICE bool get_notnull() const
static void validateNullCount(const std::string &parquet_column_name, int64_t null_count, const SQLTypeInfo &column_type)
An AbstractBuffer is a unit of data management for a data manager.
static std::shared_ptr< ChunkMetadata > createMetadata(const SQLTypeInfo &column_type)
virtual std::shared_ptr< ChunkMetadata > getRowGroupMetadata(const parquet::RowGroupMetaData *group_metadata, const int parquet_column_index, const SQLTypeInfo &column_type)
SQLTypeInfo get_elem_type() const
ParquetScalarEncoder(Data_Namespace::AbstractBuffer *buffer)
Data_Namespace::AbstractBuffer * buffer_