OmniSciDB  72c90bc290
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
foreign_storage::ParquetStringNoneEncoder Class Reference

#include <ParquetStringNoneEncoder.h>

+ Inheritance diagram for foreign_storage::ParquetStringNoneEncoder:
+ Collaboration diagram for foreign_storage::ParquetStringNoneEncoder:

Public Member Functions

 ParquetStringNoneEncoder (Data_Namespace::AbstractBuffer *buffer, Data_Namespace::AbstractBuffer *index_buffer)
 
void appendData (const int16_t *def_levels, const int16_t *rep_levels, const int64_t values_read, const int64_t levels_read, int8_t *values) override
 
void appendDataTrackErrors (const int16_t *def_levels, const int16_t *rep_levels, const int64_t values_read, const int64_t levels_read, int8_t *values) override
 
- Public Member Functions inherited from foreign_storage::ParquetEncoder
 ParquetEncoder (Data_Namespace::AbstractBuffer *buffer)
 
virtual ~ParquetEncoder ()=default
 
virtual std::shared_ptr
< ChunkMetadata
getRowGroupMetadata (const parquet::RowGroupMetaData *group_metadata, const int parquet_column_index, const SQLTypeInfo &column_type)
 
RejectedRowIndices getRejectedRowIndices () const
 
virtual void disableMetadataStatsValidation ()
 
virtual void initializeErrorTracking ()
 
virtual void initializeColumnType (const SQLTypeInfo &column_type)
 

Private Member Functions

void writeInitialOffsetIfApplicable ()
 

Private Attributes

Data_Namespace::AbstractBufferindex_buffer_
 
std::vector< int8_t > encode_buffer_
 

Additional Inherited Members

- Static Protected Member Functions inherited from foreign_storage::ParquetEncoder
static std::shared_ptr
< ChunkMetadata
createMetadata (const SQLTypeInfo &column_type)
 
static void throwNotNullViolation (const std::string &parquet_column_name)
 
static void validateNullCount (const std::string &parquet_column_name, int64_t null_count, const SQLTypeInfo &column_type)
 
- Protected Attributes inherited from foreign_storage::ParquetEncoder
Data_Namespace::AbstractBufferbuffer_
 
bool is_error_tracking_enabled_
 
RejectedRowIndices invalid_indices_
 
size_t current_chunk_offset_
 
SQLTypeInfo column_type_
 
bool validate_metadata_stats_
 

Detailed Description

Definition at line 27 of file ParquetStringNoneEncoder.h.

Constructor & Destructor Documentation

foreign_storage::ParquetStringNoneEncoder::ParquetStringNoneEncoder ( Data_Namespace::AbstractBuffer buffer,
Data_Namespace::AbstractBuffer index_buffer 
)
inline

Definition at line 29 of file ParquetStringNoneEncoder.h.

31  : ParquetEncoder(buffer)
32  , index_buffer_(index_buffer)
34  sizeof(StringOffsetT)) {}
ParquetEncoder(Data_Namespace::AbstractBuffer *buffer)
Data_Namespace::AbstractBuffer * index_buffer_
int32_t StringOffsetT
Definition: sqltypes.h:1493

Member Function Documentation

void foreign_storage::ParquetStringNoneEncoder::appendData ( const int16_t *  def_levels,
const int16_t *  rep_levels,
const int64_t  values_read,
const int64_t  levels_read,
int8_t *  values 
)
inlineoverridevirtual

Implements foreign_storage::ParquetEncoder.

Definition at line 36 of file ParquetStringNoneEncoder.h.

References Data_Namespace::AbstractBuffer::append(), foreign_storage::ParquetEncoder::buffer_, CHECK, foreign_storage::ParquetEncoder::column_type_, foreign_storage::ParquetEncoder::current_chunk_offset_, encode_buffer_, SQLTypeInfo::get_max_strlen(), index_buffer_, foreign_storage::ParquetEncoder::invalid_indices_, foreign_storage::ParquetEncoder::is_error_tracking_enabled_, Data_Namespace::AbstractBuffer::reserve(), Data_Namespace::AbstractBuffer::size(), and writeInitialOffsetIfApplicable().

Referenced by appendDataTrackErrors().

40  {
41  CHECK(levels_read > 0);
44 
45  auto parquet_data_ptr = reinterpret_cast<const parquet::ByteArray*>(values);
46  auto offsets = reinterpret_cast<StringOffsetT*>(encode_buffer_.data());
47  auto last_offset = buffer_->size();
48 
49  size_t total_len = 0;
50  for (int64_t i = 0, j = 0; i < levels_read; ++i) {
51  if (def_levels[i]) {
52  CHECK(j < values_read);
53  auto& byte_array = parquet_data_ptr[j++];
55  byte_array.len > ParquetEncoder::column_type_.get_max_strlen()) {
56  // no-op, or effectively inserting a null: total_len += 0;
57  } else {
58  total_len += byte_array.len;
59  }
60  }
61  offsets[i] = last_offset + total_len;
62  }
63  index_buffer_->append(encode_buffer_.data(), levels_read * sizeof(StringOffsetT));
64 
65  encode_buffer_.resize(std::max<size_t>(total_len, encode_buffer_.size()));
66  buffer_->reserve(buffer_->size() + total_len);
67  total_len = 0;
68  for (int64_t i = 0, j = 0; i < levels_read; ++i) {
69  if (def_levels[i]) {
70  CHECK(j < values_read);
71  auto& byte_array = parquet_data_ptr[j++];
73  byte_array.len > ParquetEncoder::column_type_.get_max_strlen()) {
75  i);
76  } else {
77  memcpy(encode_buffer_.data() + total_len, byte_array.ptr, byte_array.len);
78  total_len += byte_array.len;
79  }
80  } else if (is_error_tracking_enabled_ &&
82  .get_notnull()) { // item is null for NOT NULL column
84  i);
85  }
86  }
89  }
90  buffer_->append(encode_buffer_.data(), total_len);
91  }
Data_Namespace::AbstractBuffer * index_buffer_
RejectedRowIndices invalid_indices_
int32_t StringOffsetT
Definition: sqltypes.h:1493
virtual void append(int8_t *src, const size_t num_bytes, const MemoryLevel src_buffer_type=CPU_LEVEL, const int device_id=-1)=0
#define CHECK(condition)
Definition: Logger.h:291
HOST DEVICE size_t get_max_strlen() const
Definition: sqltypes.h:405
Data_Namespace::AbstractBuffer * buffer_
virtual void reserve(size_t num_bytes)=0

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

void foreign_storage::ParquetStringNoneEncoder::appendDataTrackErrors ( const int16_t *  def_levels,
const int16_t *  rep_levels,
const int64_t  values_read,
const int64_t  levels_read,
int8_t *  values 
)
inlineoverridevirtual

Implements foreign_storage::ParquetEncoder.

Definition at line 93 of file ParquetStringNoneEncoder.h.

References appendData(), CHECK, and foreign_storage::ParquetEncoder::is_error_tracking_enabled_.

97  {
99  appendData(def_levels, rep_levels, values_read, levels_read, values);
100  }
void appendData(const int16_t *def_levels, const int16_t *rep_levels, const int64_t values_read, const int64_t levels_read, int8_t *values) override
#define CHECK(condition)
Definition: Logger.h:291

+ Here is the call graph for this function:

void foreign_storage::ParquetStringNoneEncoder::writeInitialOffsetIfApplicable ( )
inlineprivate

Definition at line 103 of file ParquetStringNoneEncoder.h.

References Data_Namespace::AbstractBuffer::append(), index_buffer_, and Data_Namespace::AbstractBuffer::size().

Referenced by appendData().

103  {
104  if (!index_buffer_->size()) {
105  // write the initial starting offset
106  StringOffsetT zero = 0;
107  index_buffer_->append(reinterpret_cast<int8_t*>(&zero), sizeof(StringOffsetT));
108  }
109  }
Data_Namespace::AbstractBuffer * index_buffer_
int32_t StringOffsetT
Definition: sqltypes.h:1493
virtual void append(int8_t *src, const size_t num_bytes, const MemoryLevel src_buffer_type=CPU_LEVEL, const int device_id=-1)=0

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

Member Data Documentation

std::vector<int8_t> foreign_storage::ParquetStringNoneEncoder::encode_buffer_
private

Definition at line 112 of file ParquetStringNoneEncoder.h.

Referenced by appendData().

Data_Namespace::AbstractBuffer* foreign_storage::ParquetStringNoneEncoder::index_buffer_
private

Definition at line 111 of file ParquetStringNoneEncoder.h.

Referenced by appendData(), and writeInitialOffsetIfApplicable().


The documentation for this class was generated from the following file: