OmniSciDB  72c90bc290
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
foreign_storage::ParquetFixedLengthArrayEncoder Class Reference

#include <ParquetFixedLengthArrayEncoder.h>

+ Inheritance diagram for foreign_storage::ParquetFixedLengthArrayEncoder:
+ Collaboration diagram for foreign_storage::ParquetFixedLengthArrayEncoder:

Public Member Functions

 ParquetFixedLengthArrayEncoder (Data_Namespace::AbstractBuffer *data_buffer, std::shared_ptr< ParquetScalarEncoder > scalar_encoder, const ColumnDescriptor *column_desciptor)
 
std::shared_ptr< ChunkMetadatagetRowGroupMetadata (const parquet::RowGroupMetaData *group_metadata, const int parquet_column_index, const SQLTypeInfo &column_type) override
 
- Public Member Functions inherited from foreign_storage::ParquetArrayEncoder
 ParquetArrayEncoder (Data_Namespace::AbstractBuffer *data_buffer, std::shared_ptr< ParquetScalarEncoder > scalar_encoder, const ColumnDescriptor *column_desciptor)
 
void appendDataTrackErrors (const int16_t *def_levels, const int16_t *rep_levels, const int64_t values_read, const int64_t levels_read, int8_t *values) override
 
void appendData (const int16_t *def_levels, const int16_t *rep_levels, const int64_t values_read, const int64_t levels_read, int8_t *values) override
 
void finalizeRowGroup ()
 
virtual void disableMetadataStatsValidation () override
 
virtual void initializeErrorTracking () override
 
virtual void initializeColumnType (const SQLTypeInfo &column_type) override
 
- Public Member Functions inherited from foreign_storage::ParquetEncoder
 ParquetEncoder (Data_Namespace::AbstractBuffer *buffer)
 
virtual ~ParquetEncoder ()=default
 
RejectedRowIndices getRejectedRowIndices () const
 

Protected Member Functions

void processLastArray () override
 
- Protected Member Functions inherited from foreign_storage::ParquetArrayEncoder
virtual void appendArraysToBuffer ()
 
bool isLastArrayNull () const
 
bool isLastArrayEmpty () const
 
size_t sizeOfLastArray () const
 
int8_t * resizeArrayDataBytes (const size_t additional_num_elements)
 
virtual void resetLastArrayMetadata ()
 
bool isNewArray (const int16_t rep_level) const
 
int8_t * encodedDataAtIndex (const size_t index)
 
void updateMetadataForAppendedArrayItem (const int64_t encoded_index)
 
virtual void appendArrayItem (const int64_t encoded_index)
 
virtual void encodeAllValues (const int8_t *values, const int64_t values_read)
 

Private Member Functions

void appendNullFixedLengthArray ()
 
void setNullFixedLengthArraySentinel (int8_t *omnisci_data_bytes)
 
void appendNullArrayOrCheckArraySize ()
 
void throwEmptyArrayException (const size_t array_element_count, const std::string &omnisci_column_name)
 
void throwWrongSizeArray (const size_t size_of_last_array, const size_t array_element_count, const std::string &omnisci_column_name)
 

Private Attributes

const ColumnDescriptor column_desciptor_
 
size_t array_element_count_
 

Additional Inherited Members

- Static Protected Member Functions inherited from foreign_storage::ParquetEncoder
static std::shared_ptr
< ChunkMetadata
createMetadata (const SQLTypeInfo &column_type)
 
static void throwNotNullViolation (const std::string &parquet_column_name)
 
static void validateNullCount (const std::string &parquet_column_name, int64_t null_count, const SQLTypeInfo &column_type)
 
- Protected Attributes inherited from foreign_storage::ParquetArrayEncoder
size_t omnisci_data_type_byte_size_
 
std::shared_ptr
< ParquetScalarEncoder
scalar_encoder_
 
std::vector< int8_t > data_buffer_bytes_
 
- Protected Attributes inherited from foreign_storage::ParquetEncoder
Data_Namespace::AbstractBufferbuffer_
 
bool is_error_tracking_enabled_
 
RejectedRowIndices invalid_indices_
 
size_t current_chunk_offset_
 
SQLTypeInfo column_type_
 
bool validate_metadata_stats_
 
- Static Protected Attributes inherited from foreign_storage::ParquetArrayEncoder
static const int16_t non_null_def_level = 3
 
static const int16_t item_null_def_level = 2
 
static const int16_t empty_list_def_level = 1
 
static const int16_t list_null_def_level = 0
 

Detailed Description

Definition at line 28 of file ParquetFixedLengthArrayEncoder.h.

Constructor & Destructor Documentation

foreign_storage::ParquetFixedLengthArrayEncoder::ParquetFixedLengthArrayEncoder ( Data_Namespace::AbstractBuffer data_buffer,
std::shared_ptr< ParquetScalarEncoder scalar_encoder,
const ColumnDescriptor column_desciptor 
)
inline

Definition at line 30 of file ParquetFixedLengthArrayEncoder.h.

References CHECK, ColumnDescriptor::columnType, SQLTypeInfo::get_size(), and foreign_storage::ParquetArrayEncoder::omnisci_data_type_byte_size_.

33  : ParquetArrayEncoder(data_buffer, scalar_encoder, column_desciptor)
34  , column_desciptor_(*column_desciptor)
35  , array_element_count_(column_desciptor->columnType.get_size() /
37  CHECK(column_desciptor->columnType.get_size() % omnisci_data_type_byte_size_ == 0);
38  }
HOST DEVICE int get_size() const
Definition: sqltypes.h:403
ParquetArrayEncoder(Data_Namespace::AbstractBuffer *data_buffer, std::shared_ptr< ParquetScalarEncoder > scalar_encoder, const ColumnDescriptor *column_desciptor)
#define CHECK(condition)
Definition: Logger.h:291
SQLTypeInfo columnType

+ Here is the call graph for this function:

Member Function Documentation

void foreign_storage::ParquetFixedLengthArrayEncoder::appendNullArrayOrCheckArraySize ( )
inlineprivate

Definition at line 121 of file ParquetFixedLengthArrayEncoder.h.

References appendNullFixedLengthArray(), array_element_count_, CHECK, column_desciptor_, ColumnDescriptor::columnName, foreign_storage::ParquetArrayEncoder::isLastArrayEmpty(), foreign_storage::ParquetArrayEncoder::isLastArrayNull(), foreign_storage::ParquetArrayEncoder::sizeOfLastArray(), throwEmptyArrayException(), and throwWrongSizeArray().

Referenced by processLastArray().

121  {
122  auto size_of_last_array = sizeOfLastArray();
123  if (isLastArrayNull()) {
124  // append a null array sentinel
125  CHECK(size_of_last_array == 0);
127  } else if (isLastArrayEmpty()) {
129  } else {
130  if (size_of_last_array != array_element_count_) {
132  size_of_last_array, array_element_count_, column_desciptor_.columnName);
133  }
134  }
135  }
void throwWrongSizeArray(const size_t size_of_last_array, const size_t array_element_count, const std::string &omnisci_column_name)
void throwEmptyArrayException(const size_t array_element_count, const std::string &omnisci_column_name)
#define CHECK(condition)
Definition: Logger.h:291
std::string columnName

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

void foreign_storage::ParquetFixedLengthArrayEncoder::appendNullFixedLengthArray ( )
inlineprivate

Definition at line 58 of file ParquetFixedLengthArrayEncoder.h.

References array_element_count_, foreign_storage::ParquetArrayEncoder::omnisci_data_type_byte_size_, foreign_storage::ParquetArrayEncoder::resizeArrayDataBytes(), foreign_storage::ParquetArrayEncoder::scalar_encoder_, and setNullFixedLengthArraySentinel().

Referenced by appendNullArrayOrCheckArraySize().

58  {
59  auto omnisci_data_ptr = resizeArrayDataBytes(array_element_count_);
60  setNullFixedLengthArraySentinel(omnisci_data_ptr);
61  for (size_t i = 1; i < array_element_count_; ++i) {
62  scalar_encoder_->setNull(omnisci_data_ptr + i * omnisci_data_type_byte_size_);
63  }
64  }
int8_t * resizeArrayDataBytes(const size_t additional_num_elements)
std::shared_ptr< ParquetScalarEncoder > scalar_encoder_

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

std::shared_ptr<ChunkMetadata> foreign_storage::ParquetFixedLengthArrayEncoder::getRowGroupMetadata ( const parquet::RowGroupMetaData *  group_metadata,
const int  parquet_column_index,
const SQLTypeInfo column_type 
)
inlineoverridevirtual

Reimplemented from foreign_storage::ParquetArrayEncoder.

Definition at line 40 of file ParquetFixedLengthArrayEncoder.h.

References array_element_count_, foreign_storage::ParquetArrayEncoder::getRowGroupMetadata(), and foreign_storage::ParquetArrayEncoder::omnisci_data_type_byte_size_.

43  {
45  group_metadata, parquet_column_index, column_type);
46  metadata->numBytes =
47  omnisci_data_type_byte_size_ * group_metadata->num_rows() * array_element_count_;
48  return metadata;
49  }
std::shared_ptr< ChunkMetadata > getRowGroupMetadata(const parquet::RowGroupMetaData *group_metadata, const int parquet_column_index, const SQLTypeInfo &column_type) override

+ Here is the call graph for this function:

void foreign_storage::ParquetFixedLengthArrayEncoder::processLastArray ( )
inlineoverrideprotectedvirtual

Reimplemented from foreign_storage::ParquetArrayEncoder.

Definition at line 52 of file ParquetFixedLengthArrayEncoder.h.

References appendNullArrayOrCheckArraySize(), and foreign_storage::ParquetArrayEncoder::processLastArray().

+ Here is the call graph for this function:

void foreign_storage::ParquetFixedLengthArrayEncoder::setNullFixedLengthArraySentinel ( int8_t *  omnisci_data_bytes)
inlineprivate

Definition at line 66 of file ParquetFixedLengthArrayEncoder.h.

References column_desciptor_, ColumnDescriptor::columnType, SQLTypeInfo::get_elem_type(), inline_fixed_encoding_null_array_val(), kBIGINT, kBOOLEAN, kCHAR, kDATE, kDECIMAL, kDOUBLE, kFLOAT, kINT, kLINESTRING, kMULTIPOLYGON, kNUMERIC, kPOINT, kPOLYGON, kSMALLINT, kTEXT, kTIME, kTIMESTAMP, kTINYINT, kVARCHAR, NULL_ARRAY_DOUBLE, NULL_ARRAY_FLOAT, string_dict_to_int_type(), run_benchmark_import::type, and UNREACHABLE.

Referenced by appendNullFixedLengthArray().

66  {
68  SQLTypes type;
69  if (ti.is_dict_encoded_string()) {
70  type = string_dict_to_int_type(ti);
71  } else {
72  type = ti.get_type();
73  }
74  switch (type) {
75  case kBOOLEAN:
76  reinterpret_cast<bool*>(omnisci_data_bytes)[0] =
78  break;
79  case kBIGINT:
80  case kNUMERIC:
81  case kDECIMAL:
82  reinterpret_cast<int64_t*>(omnisci_data_bytes)[0] =
84  break;
85  case kINT:
86  reinterpret_cast<int32_t*>(omnisci_data_bytes)[0] =
88  break;
89  case kSMALLINT:
90  reinterpret_cast<int16_t*>(omnisci_data_bytes)[0] =
92  break;
93  case kTINYINT:
94  reinterpret_cast<int8_t*>(omnisci_data_bytes)[0] =
96  break;
97  case kFLOAT:
98  reinterpret_cast<float*>(omnisci_data_bytes)[0] = NULL_ARRAY_FLOAT;
99  break;
100  case kDOUBLE:
101  reinterpret_cast<double*>(omnisci_data_bytes)[0] = NULL_ARRAY_DOUBLE;
102  break;
103  case kTIME:
104  case kTIMESTAMP:
105  case kDATE:
106  reinterpret_cast<int64_t*>(omnisci_data_bytes)[0] =
108  break;
109  case kTEXT:
110  case kVARCHAR:
111  case kCHAR:
112  case kPOINT:
113  case kLINESTRING:
114  case kPOLYGON:
115  case kMULTIPOLYGON:
116  default:
117  UNREACHABLE();
118  }
119  }
Definition: sqltypes.h:76
SQLTypes
Definition: sqltypes.h:65
#define UNREACHABLE()
Definition: Logger.h:338
#define NULL_ARRAY_FLOAT
Definition: sqltypes.h:79
Definition: sqltypes.h:80
Definition: sqltypes.h:68
int64_t inline_fixed_encoding_null_array_val(const SQL_TYPE_INFO &ti)
#define NULL_ARRAY_DOUBLE
Definition: sqltypes.h:72
SQLTypeInfo columnType
SQLTypeInfo get_elem_type() const
Definition: sqltypes.h:975
SQLTypes string_dict_to_int_type(const SQLTypeInfo &ti)
Definition: Datum.cpp:565

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

void foreign_storage::ParquetFixedLengthArrayEncoder::throwEmptyArrayException ( const size_t  array_element_count,
const std::string &  omnisci_column_name 
)
inlineprivate

Definition at line 137 of file ParquetFixedLengthArrayEncoder.h.

References to_string().

Referenced by appendNullArrayOrCheckArraySize().

138  {
139  throw ForeignStorageException(
140  "Detected an empty array"
141  " being loaded into"
142  " HeavyDB column '" +
143  omnisci_column_name +
144  "' which has a fixed length array type,"
145  " expecting " +
146  std::to_string(array_element_count) + " elements.");
147  }
std::string to_string(char const *&&v)

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

void foreign_storage::ParquetFixedLengthArrayEncoder::throwWrongSizeArray ( const size_t  size_of_last_array,
const size_t  array_element_count,
const std::string &  omnisci_column_name 
)
inlineprivate

Definition at line 149 of file ParquetFixedLengthArrayEncoder.h.

References to_string().

Referenced by appendNullArrayOrCheckArraySize().

151  {
152  throw ForeignStorageException("Detected a row with " +
153  std::to_string(size_of_last_array) +
154  " elements being loaded into"
155  " HeavyDB column '" +
156  omnisci_column_name +
157  "' which has a fixed length array type,"
158  " expecting " +
159  std::to_string(array_element_count) + " elements.");
160  }
std::string to_string(char const *&&v)

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

Member Data Documentation

size_t foreign_storage::ParquetFixedLengthArrayEncoder::array_element_count_
private
const ColumnDescriptor foreign_storage::ParquetFixedLengthArrayEncoder::column_desciptor_
private

The documentation for this class was generated from the following file: