OmniSciDB  cde582ebc3
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
NoneEncoder< T > Class Template Reference

#include <NoneEncoder.h>

+ Inheritance diagram for NoneEncoder< T >:
+ Collaboration diagram for NoneEncoder< T >:

Public Member Functions

 NoneEncoder (Data_Namespace::AbstractBuffer *buffer)
 
size_t getNumElemsForBytesEncodedDataAtIndices (const int8_t *index_data, const std::vector< size_t > &selected_idx, const size_t byte_limit) override
 
std::shared_ptr< ChunkMetadataappendEncodedDataAtIndices (const int8_t *, int8_t *data, const std::vector< size_t > &selected_idx) override
 
std::shared_ptr< ChunkMetadataappendEncodedData (const int8_t *, int8_t *data, const size_t start_idx, const size_t num_elements) override
 
std::shared_ptr< ChunkMetadataappendData (int8_t *&src_data, const size_t num_elems_to_append, const SQLTypeInfo &, const bool replicating=false, const int64_t offset=-1) override
 
void getMetadata (const std::shared_ptr< ChunkMetadata > &chunkMetadata) override
 
std::shared_ptr< ChunkMetadatagetMetadata (const SQLTypeInfo &ti) override
 
void updateStats (const int64_t val, const bool is_null) override
 
void updateStats (const double val, const bool is_null) override
 
void updateStats (const int8_t *const src_data, const size_t num_elements) override
 
void updateStatsEncoded (const int8_t *const dst_data, const size_t num_elements) override
 
void updateStats (const std::vector< std::string > *const src_data, const size_t start_idx, const size_t num_elements) override
 
void updateStats (const std::vector< ArrayDatum > *const src_data, const size_t start_idx, const size_t num_elements) override
 
void reduceStats (const Encoder &that) override
 
void writeMetadata (FILE *f) override
 
void readMetadata (FILE *f) override
 
bool resetChunkStats (const ChunkStats &stats) override
 : Reset chunk level stats (min, max, nulls) using new values from the argument. More...
 
void copyMetadata (const Encoder *copyFromEncoder) override
 
void resetChunkStats () override
 
- Public Member Functions inherited from Encoder
 Encoder (Data_Namespace::AbstractBuffer *buffer)
 
virtual ~Encoder ()
 
size_t getNumElems () const
 
void setNumElems (const size_t num_elems)
 

Public Attributes

dataMin
 
dataMax
 
bool has_nulls
 

Private Member Functions

validateDataAndUpdateStats (const T &unencoded_data)
 

Additional Inherited Members

- Static Public Member Functions inherited from Encoder
static EncoderCreate (Data_Namespace::AbstractBuffer *buffer, const SQLTypeInfo sqlType)
 
- Protected Attributes inherited from Encoder
size_t num_elems_
 
Data_Namespace::AbstractBufferbuffer_
 
DecimalOverflowValidator decimal_overflow_validator_
 
DateDaysOverflowValidator date_days_overflow_validator_
 

Detailed Description

template<typename T>
class NoneEncoder< T >

Definition at line 37 of file NoneEncoder.h.

Constructor & Destructor Documentation

template<typename T>
NoneEncoder< T >::NoneEncoder ( Data_Namespace::AbstractBuffer buffer)
inline

Definition at line 39 of file NoneEncoder.h.

References NoneEncoder< T >::resetChunkStats().

39  : Encoder(buffer) {
41  }
void resetChunkStats() override
Definition: NoneEncoder.h:247
Encoder(Data_Namespace::AbstractBuffer *buffer)
Definition: Encoder.cpp:221

+ Here is the call graph for this function:

Member Function Documentation

template<typename T>
std::shared_ptr<ChunkMetadata> NoneEncoder< T >::appendData ( int8_t *&  src_data,
const size_t  num_elems_to_append,
const SQLTypeInfo ti,
const bool  replicating = false,
const int64_t  offset = -1 
)
inlineoverridevirtual

Append data to the chunk buffer backing this encoder.

Parameters
src_dataSource data for the append
num_elems_to_appendNumber of elements to append
tiSQL Type Info for the column TODO(adb): used?
replicatingPass one value and fill the chunk with it
offsetWrite data starting at a given offset. Default is -1 which indicates an append, an offset of 0 rewrites the chunk up to num_elems_to_append.

Implements Encoder.

Definition at line 78 of file NoneEncoder.h.

References Data_Namespace::AbstractBuffer::append(), Encoder::buffer_, CHECK, CHECK_GE, gpu_enabled::fill(), NoneEncoder< T >::getMetadata(), Encoder::num_elems_, NoneEncoder< T >::resetChunkStats(), heavydb.dtypes::T, NoneEncoder< T >::updateStats(), NoneEncoder< T >::validateDataAndUpdateStats(), and Data_Namespace::AbstractBuffer::write().

Referenced by NoneEncoder< T >::appendEncodedData(), and NoneEncoder< T >::appendEncodedDataAtIndices().

82  {
83  if (offset == 0 && num_elems_to_append >= num_elems_) {
85  }
86  T* unencodedData = reinterpret_cast<T*>(src_data);
87  std::vector<T> encoded_data;
88  if (replicating) {
89  if (num_elems_to_append > 0) {
90  encoded_data.resize(num_elems_to_append);
91  T data = validateDataAndUpdateStats(unencodedData[0]);
92  std::fill(encoded_data.begin(), encoded_data.end(), data);
93  }
94  } else {
95  updateStats(src_data, num_elems_to_append);
96  }
97  if (offset == -1) {
98  num_elems_ += num_elems_to_append;
99  buffer_->append(
100  replicating ? reinterpret_cast<int8_t*>(encoded_data.data()) : src_data,
101  num_elems_to_append * sizeof(T));
102  if (!replicating) {
103  src_data += num_elems_to_append * sizeof(T);
104  }
105  } else {
106  num_elems_ = offset + num_elems_to_append;
107  CHECK(!replicating);
108  CHECK_GE(offset, 0);
109  buffer_->write(
110  src_data, num_elems_to_append * sizeof(T), static_cast<size_t>(offset));
111  }
112  auto chunk_metadata = std::make_shared<ChunkMetadata>();
113  getMetadata(chunk_metadata);
114  return chunk_metadata;
115  }
size_t num_elems_
Definition: Encoder.h:288
#define CHECK_GE(x, y)
Definition: Logger.h:235
void updateStats(const int64_t val, const bool is_null) override
Definition: NoneEncoder.h:130
void resetChunkStats() override
Definition: NoneEncoder.h:247
DEVICE void fill(ARGS &&...args)
Definition: gpu_enabled.h:60
Data_Namespace::AbstractBuffer * buffer_
Definition: Encoder.h:290
void getMetadata(const std::shared_ptr< ChunkMetadata > &chunkMetadata) override
Definition: NoneEncoder.h:117
virtual void write(int8_t *src, const size_t num_bytes, const size_t offset=0, const MemoryLevel src_buffer_type=CPU_LEVEL, const int src_device_id=-1)=0
virtual void append(int8_t *src, const size_t num_bytes, const MemoryLevel src_buffer_type=CPU_LEVEL, const int device_id=-1)=0
#define CHECK(condition)
Definition: Logger.h:222
T validateDataAndUpdateStats(const T &unencoded_data)
Definition: NoneEncoder.h:258

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

template<typename T>
std::shared_ptr<ChunkMetadata> NoneEncoder< T >::appendEncodedData ( const int8_t *  index_data,
int8_t *  data,
const size_t  start_idx,
const size_t  num_elements 
)
inlineoverridevirtual

Append encoded data to the chunk buffer backing this encoder.

Parameters
index_data- (optional) the index data of data to append
data- the data to append
start_idx- the position to start encoding from in the data array
num_elements- the number of elements to encode from the data array
Returns
updated chunk metadata for the chunk buffer backing this encoder

NOTE: index_data must be non-null for varlen encoder types.

Implements Encoder.

Definition at line 70 of file NoneEncoder.h.

References NoneEncoder< T >::appendData(), and heavydb.dtypes::T.

73  {
74  auto current_data = data + sizeof(T) * start_idx;
75  return appendData(current_data, num_elements, SQLTypeInfo{}, false);
76  }
std::shared_ptr< ChunkMetadata > appendData(int8_t *&src_data, const size_t num_elems_to_append, const SQLTypeInfo &, const bool replicating=false, const int64_t offset=-1) override
Definition: NoneEncoder.h:78

+ Here is the call graph for this function:

template<typename T>
std::shared_ptr<ChunkMetadata> NoneEncoder< T >::appendEncodedDataAtIndices ( const int8_t *  index_data,
int8_t *  data,
const std::vector< size_t > &  selected_idx 
)
inlineoverridevirtual

Append selected encoded data to the chunk buffer backing this encoder.

Parameters
index_data- (optional) the index data of data to append
data- the data to append
selected_idx- which indices in the encoded data to append
Returns
updated chunk metadata for the chunk buffer backing this encoder

NOTE: index_data must be non-null for varlen encoder types.

Implements Encoder.

Definition at line 52 of file NoneEncoder.h.

References NoneEncoder< T >::appendData(), shared::execute_over_contiguous_indices(), and heavydb.dtypes::T.

55  {
56  std::shared_ptr<ChunkMetadata> chunk_metadata;
57  // NOTE: the use of `execute_over_contiguous_indices` is an optimization;
58  // it prevents having to copy or move the indexed data and instead performs
59  // an append over contiguous sections of indices.
61  selected_idx, [&](const size_t start_pos, const size_t end_pos) {
62  size_t elem_count = end_pos - start_pos;
63  auto data_ptr = data + sizeof(T) * selected_idx[start_pos];
64  chunk_metadata = appendData(data_ptr, elem_count, SQLTypeInfo{}, false);
65  });
66 
67  return chunk_metadata;
68  }
void execute_over_contiguous_indices(const std::vector< size_t > &indices, std::function< void(const size_t, const size_t)> to_execute)
Definition: Iteration.h:22
std::shared_ptr< ChunkMetadata > appendData(int8_t *&src_data, const size_t num_elems_to_append, const SQLTypeInfo &, const bool replicating=false, const int64_t offset=-1) override
Definition: NoneEncoder.h:78

+ Here is the call graph for this function:

template<typename T>
void NoneEncoder< T >::copyMetadata ( const Encoder copyFromEncoder)
inlineoverridevirtual

Implements Encoder.

Definition at line 239 of file NoneEncoder.h.

References NoneEncoder< T >::dataMax, NoneEncoder< T >::dataMin, Encoder::getNumElems(), NoneEncoder< T >::has_nulls, and Encoder::num_elems_.

239  {
240  num_elems_ = copyFromEncoder->getNumElems();
241  auto castedEncoder = reinterpret_cast<const NoneEncoder<T>*>(copyFromEncoder);
242  dataMin = castedEncoder->dataMin;
243  dataMax = castedEncoder->dataMax;
244  has_nulls = castedEncoder->has_nulls;
245  }
size_t num_elems_
Definition: Encoder.h:288
size_t getNumElems() const
Definition: Encoder.h:284
bool has_nulls
Definition: NoneEncoder.h:255

+ Here is the call graph for this function:

template<typename T>
void NoneEncoder< T >::getMetadata ( const std::shared_ptr< ChunkMetadata > &  chunkMetadata)
inlineoverridevirtual

Reimplemented from Encoder.

Definition at line 117 of file NoneEncoder.h.

References NoneEncoder< T >::dataMax, NoneEncoder< T >::dataMin, Encoder::getMetadata(), and NoneEncoder< T >::has_nulls.

Referenced by NoneEncoder< T >::appendData().

117  {
118  Encoder::getMetadata(chunkMetadata); // call on parent class
119  chunkMetadata->fillChunkStats(dataMin, dataMax, has_nulls);
120  }
virtual void getMetadata(const std::shared_ptr< ChunkMetadata > &chunkMetadata)
Definition: Encoder.cpp:227
bool has_nulls
Definition: NoneEncoder.h:255

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

template<typename T>
std::shared_ptr<ChunkMetadata> NoneEncoder< T >::getMetadata ( const SQLTypeInfo ti)
inlineoverridevirtual

Implements Encoder.

Definition at line 123 of file NoneEncoder.h.

References NoneEncoder< T >::dataMax, NoneEncoder< T >::dataMin, and NoneEncoder< T >::has_nulls.

123  {
124  auto chunk_metadata = std::make_shared<ChunkMetadata>(ti, 0, 0, ChunkStats{});
125  chunk_metadata->fillChunkStats(dataMin, dataMax, has_nulls);
126  return chunk_metadata;
127  }
bool has_nulls
Definition: NoneEncoder.h:255
template<typename T>
size_t NoneEncoder< T >::getNumElemsForBytesEncodedDataAtIndices ( const int8_t *  index_data,
const std::vector< size_t > &  selected_idx,
const size_t  byte_limit 
)
inlineoverridevirtual

Compute the maximum number of variable length encoded elements given a byte limit

Parameters
index_data- (optional) index data for the encoded type
selected_idx- which indices in the encoded data to consider
byte_limit- byte limit that must be respected
Returns
the number of elements

NOTE: optional parameters above may be ignored by the implementation, but may or may not be required depending on the encoder type backing the implementation.

Implements Encoder.

Definition at line 43 of file NoneEncoder.h.

References UNREACHABLE.

45  {
46  UNREACHABLE()
47  << "getNumElemsForBytesEncodedDataAtIndices unexpectedly called for non varlen"
48  " encoder";
49  return {};
50  }
#define UNREACHABLE()
Definition: Logger.h:266
template<typename T>
void NoneEncoder< T >::readMetadata ( FILE *  f)
inlineoverridevirtual

Implements Encoder.

Definition at line 217 of file NoneEncoder.h.

References NoneEncoder< T >::dataMax, NoneEncoder< T >::dataMin, NoneEncoder< T >::has_nulls, Encoder::num_elems_, and heavydb.dtypes::T.

217  {
218  // assumes pointer is already in right place
219  fread((int8_t*)&num_elems_, sizeof(size_t), 1, f);
220  fread((int8_t*)&dataMin, sizeof(T), 1, f);
221  fread((int8_t*)&dataMax, sizeof(T), 1, f);
222  fread((int8_t*)&has_nulls, sizeof(bool), 1, f);
223  }
size_t num_elems_
Definition: Encoder.h:288
constexpr double f
Definition: Utm.h:31
bool has_nulls
Definition: NoneEncoder.h:255
template<typename T>
void NoneEncoder< T >::reduceStats ( const Encoder that)
inlineoverridevirtual

Implements Encoder.

Definition at line 200 of file NoneEncoder.h.

References NoneEncoder< T >::dataMax, NoneEncoder< T >::dataMin, and NoneEncoder< T >::has_nulls.

200  {
201  const auto that_typed = static_cast<const NoneEncoder&>(that);
202  if (that_typed.has_nulls) {
203  has_nulls = true;
204  }
205  dataMin = std::min(dataMin, that_typed.dataMin);
206  dataMax = std::max(dataMax, that_typed.dataMax);
207  }
bool has_nulls
Definition: NoneEncoder.h:255
template<typename T>
bool NoneEncoder< T >::resetChunkStats ( const ChunkStats )
inlineoverridevirtual

: Reset chunk level stats (min, max, nulls) using new values from the argument.

Returns
: True if an update occurred and the chunk needs to be flushed. False otherwise. Default false if metadata update is unsupported. Only reset chunk stats if the incoming stats differ from the current stats.

Reimplemented from Encoder.

Definition at line 225 of file NoneEncoder.h.

References NoneEncoder< T >::dataMax, NoneEncoder< T >::dataMin, ChunkStats::has_nulls, NoneEncoder< T >::has_nulls, ChunkStats::max, and ChunkStats::min.

225  {
226  const auto new_min = DatumFetcher::getDatumVal<T>(stats.min);
227  const auto new_max = DatumFetcher::getDatumVal<T>(stats.max);
228 
229  if (dataMin == new_min && dataMax == new_max && has_nulls == stats.has_nulls) {
230  return false;
231  }
232 
233  dataMin = new_min;
234  dataMax = new_max;
235  has_nulls = stats.has_nulls;
236  return true;
237  }
bool has_nulls
Definition: NoneEncoder.h:255
template<typename T>
void NoneEncoder< T >::resetChunkStats ( )
inlineoverridevirtual

Resets chunk metadata stats to their default values.

Implements Encoder.

Definition at line 247 of file NoneEncoder.h.

References NoneEncoder< T >::dataMax, NoneEncoder< T >::dataMin, and NoneEncoder< T >::has_nulls.

Referenced by NoneEncoder< T >::appendData(), and NoneEncoder< T >::NoneEncoder().

247  {
248  dataMin = std::numeric_limits<T>::max();
249  dataMax = std::numeric_limits<T>::lowest();
250  has_nulls = false;
251  }
bool has_nulls
Definition: NoneEncoder.h:255

+ Here is the caller graph for this function:

template<typename T>
void NoneEncoder< T >::updateStats ( const int64_t  val,
const bool  is_null 
)
inlineoverridevirtual

Implements Encoder.

Definition at line 130 of file NoneEncoder.h.

References NoneEncoder< T >::dataMax, NoneEncoder< T >::dataMin, NoneEncoder< T >::has_nulls, and heavydb.dtypes::T.

Referenced by NoneEncoder< T >::appendData().

130  {
131  if (is_null) {
132  has_nulls = true;
133  } else {
134  const auto data = static_cast<T>(val);
135  dataMin = std::min(dataMin, data);
136  dataMax = std::max(dataMax, data);
137  }
138  }
CONSTEXPR DEVICE bool is_null(const T &value)
bool has_nulls
Definition: NoneEncoder.h:255

+ Here is the caller graph for this function:

template<typename T>
void NoneEncoder< T >::updateStats ( const double  val,
const bool  is_null 
)
inlineoverridevirtual

Implements Encoder.

Definition at line 141 of file NoneEncoder.h.

References NoneEncoder< T >::dataMax, NoneEncoder< T >::dataMin, NoneEncoder< T >::has_nulls, and heavydb.dtypes::T.

141  {
142  if (is_null) {
143  has_nulls = true;
144  } else {
145  const auto data = static_cast<T>(val);
146  dataMin = std::min(dataMin, data);
147  dataMax = std::max(dataMax, data);
148  }
149  }
CONSTEXPR DEVICE bool is_null(const T &value)
bool has_nulls
Definition: NoneEncoder.h:255
template<typename T>
void NoneEncoder< T >::updateStats ( const int8_t *const  src_data,
const size_t  num_elements 
)
inlineoverridevirtual

Update statistics for data without appending.

Parameters
src_data- the data with which to update statistics
num_elements- the number of elements to scan in the data

Implements Encoder.

Definition at line 151 of file NoneEncoder.h.

References heavydb.dtypes::T, and NoneEncoder< T >::validateDataAndUpdateStats().

151  {
152  const T* unencoded_data = reinterpret_cast<const T*>(src_data);
153  for (size_t i = 0; i < num_elements; ++i) {
154  validateDataAndUpdateStats(unencoded_data[i]);
155  }
156  }
T validateDataAndUpdateStats(const T &unencoded_data)
Definition: NoneEncoder.h:258

+ Here is the call graph for this function:

template<typename T>
void NoneEncoder< T >::updateStats ( const std::vector< std::string > *const  src_data,
const size_t  start_idx,
const size_t  num_elements 
)
inlineoverridevirtual

Update statistics for string data without appending.

Parameters
src_data- the string data with which to update statistics
start_idx- the offset into src_data to start the update
num_elements- the number of elements to scan in the string data

Implements Encoder.

Definition at line 187 of file NoneEncoder.h.

References UNREACHABLE.

189  {
190  UNREACHABLE();
191  }
#define UNREACHABLE()
Definition: Logger.h:266
template<typename T>
void NoneEncoder< T >::updateStats ( const std::vector< ArrayDatum > *const  src_data,
const size_t  start_idx,
const size_t  num_elements 
)
inlineoverridevirtual

Update statistics for array data without appending.

Parameters
src_data- the array data with which to update statistics
start_idx- the offset into src_data to start the update
num_elements- the number of elements to scan in the array data

Implements Encoder.

Definition at line 193 of file NoneEncoder.h.

References UNREACHABLE.

195  {
196  UNREACHABLE();
197  }
#define UNREACHABLE()
Definition: Logger.h:266
template<typename T>
void NoneEncoder< T >::updateStatsEncoded ( const int8_t *const  dst_data,
const size_t  num_elements 
)
inlineoverridevirtual

Update statistics for encoded data without appending.

Parameters
dst_data- the data with which to update statistics
num_elements- the number of elements to scan in the data

Reimplemented from Encoder.

Definition at line 158 of file NoneEncoder.h.

References NoneEncoder< T >::dataMax, NoneEncoder< T >::dataMin, Encoder::decimal_overflow_validator_, NoneEncoder< T >::has_nulls, logger::init(), threading_serial::parallel_reduce(), heavydb.dtypes::T, and DecimalOverflowValidator::validate().

159  {
160  const T* data = reinterpret_cast<const T*>(dst_data);
161 
163  tbb::blocked_range(size_t(0), num_elements),
164  std::tuple(dataMin, dataMax, has_nulls),
165  [&](const auto& range, auto init) {
166  auto [min, max, nulls] = init;
167  for (size_t i = range.begin(); i < range.end(); i++) {
168  if (data[i] != none_encoded_null_value<T>()) {
170  min = std::min(min, data[i]);
171  max = std::max(max, data[i]);
172  } else {
173  nulls = true;
174  }
175  }
176  return std::tuple(min, max, nulls);
177  },
178  [&](auto lhs, auto rhs) {
179  const auto [lhs_min, lhs_max, lhs_nulls] = lhs;
180  const auto [rhs_min, rhs_max, rhs_nulls] = rhs;
181  return std::tuple(std::min(lhs_min, rhs_min),
182  std::max(lhs_max, rhs_max),
183  lhs_nulls || rhs_nulls);
184  });
185  }
DecimalOverflowValidator decimal_overflow_validator_
Definition: Encoder.h:292
void init(LogOptions const &log_opts)
Definition: Logger.cpp:308
Value parallel_reduce(const blocked_range< Int > &range, const Value &identity, const RealBody &real_body, const Reduction &reduction, const Partitioner &p=Partitioner())
Parallel iteration with reduction.
bool has_nulls
Definition: NoneEncoder.h:255
void validate(T value) const
Definition: Encoder.h:54

+ Here is the call graph for this function:

template<typename T>
T NoneEncoder< T >::validateDataAndUpdateStats ( const T &  unencoded_data)
inlineprivate

Definition at line 258 of file NoneEncoder.h.

References NoneEncoder< T >::dataMax, NoneEncoder< T >::dataMin, Encoder::decimal_overflow_validator_, NoneEncoder< T >::has_nulls, and DecimalOverflowValidator::validate().

Referenced by NoneEncoder< T >::appendData(), and NoneEncoder< T >::updateStats().

258  {
259  if (unencoded_data == none_encoded_null_value<T>()) {
260  has_nulls = true;
261  } else {
262  decimal_overflow_validator_.validate(unencoded_data);
263  dataMin = std::min(dataMin, unencoded_data);
264  dataMax = std::max(dataMax, unencoded_data);
265  }
266  return unencoded_data;
267  }
DecimalOverflowValidator decimal_overflow_validator_
Definition: Encoder.h:292
bool has_nulls
Definition: NoneEncoder.h:255
void validate(T value) const
Definition: Encoder.h:54

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

template<typename T>
void NoneEncoder< T >::writeMetadata ( FILE *  f)
inlineoverridevirtual

Implements Encoder.

Definition at line 209 of file NoneEncoder.h.

References NoneEncoder< T >::dataMax, NoneEncoder< T >::dataMin, NoneEncoder< T >::has_nulls, Encoder::num_elems_, and heavydb.dtypes::T.

209  {
210  // assumes pointer is already in right place
211  fwrite((int8_t*)&num_elems_, sizeof(size_t), 1, f);
212  fwrite((int8_t*)&dataMin, sizeof(T), 1, f);
213  fwrite((int8_t*)&dataMax, sizeof(T), 1, f);
214  fwrite((int8_t*)&has_nulls, sizeof(bool), 1, f);
215  }
size_t num_elems_
Definition: Encoder.h:288
constexpr double f
Definition: Utm.h:31
bool has_nulls
Definition: NoneEncoder.h:255

Member Data Documentation


The documentation for this class was generated from the following file: