OmniSciDB  cde582ebc3
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
StringNoneEncoder.h
Go to the documentation of this file.
1 /*
2  * Copyright 2022 HEAVY.AI, Inc.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
23 #ifndef STRING_NONE_ENCODER_H
24 #define STRING_NONE_ENCODER_H
25 #include "Logger/Logger.h"
26 
27 #include <cassert>
28 #include <string>
29 #include <vector>
30 #include "AbstractBuffer.h"
31 #include "ChunkMetadata.h"
32 #include "Encoder.h"
33 
35 
36 class StringNoneEncoder : public Encoder {
37  public:
39  : Encoder(buffer), index_buf(nullptr), last_offset(-1), has_nulls(false) {}
40 
41  size_t getNumElemsForBytesInsertData(const std::vector<std::string>* srcData,
42  const int start_idx,
43  const size_t numAppendElems,
44  const size_t byteLimit,
45  const bool replicating = false);
46 
47  size_t getNumElemsForBytesEncodedDataAtIndices(const int8_t* index_data,
48  const std::vector<size_t>& selected_idx,
49  const size_t byte_limit) override;
50 
51  std::shared_ptr<ChunkMetadata> appendData(int8_t*& src_data,
52  const size_t num_elems_to_append,
53  const SQLTypeInfo& ti,
54  const bool replicating = false,
55  const int64_t offset = -1) override {
56  UNREACHABLE(); // should never be called for strings
57  return nullptr;
58  }
59 
60  std::shared_ptr<ChunkMetadata> appendEncodedDataAtIndices(
61  const int8_t* index_data,
62  int8_t* data,
63  const std::vector<size_t>& selected_idx) override;
64 
65  std::shared_ptr<ChunkMetadata> appendEncodedData(const int8_t* index_data,
66  int8_t* data,
67  const size_t start_idx,
68  const size_t num_elements) override;
69 
70  template <typename StringType>
71  std::shared_ptr<ChunkMetadata> appendData(const std::vector<StringType>* srcData,
72  const int start_idx,
73  const size_t numAppendElems,
74  const bool replicating = false);
75 
76  void getMetadata(const std::shared_ptr<ChunkMetadata>& chunkMetadata) override;
77 
78  // Only called from the executor for synthesized meta-information.
79  std::shared_ptr<ChunkMetadata> getMetadata(const SQLTypeInfo& ti) override;
80 
81  void updateStats(const int64_t, const bool) override { CHECK(false); }
82 
83  void updateStats(const double, const bool) override { CHECK(false); }
84 
85  void updateStats(const int8_t* const src_data, const size_t num_elements) override {
86  UNREACHABLE();
87  }
88 
89  void updateStats(const std::vector<std::string>* const src_data,
90  const size_t start_idx,
91  const size_t num_elements) override;
92 
93  void updateStats(const std::vector<ArrayDatum>* const src_data,
94  const size_t start_idx,
95  const size_t num_elements) override {
96  UNREACHABLE();
97  }
98 
99  void reduceStats(const Encoder&) override { CHECK(false); }
100 
101  void writeMetadata(FILE* f) override {
102  // assumes pointer is already in right place
103  fwrite((int8_t*)&num_elems_, sizeof(size_t), 1, f);
104  fwrite((int8_t*)&has_nulls, sizeof(bool), 1, f);
105  }
106 
107  void readMetadata(FILE* f) override {
108  // assumes pointer is already in right place
109  CHECK_NE(fread((int8_t*)&num_elems_, sizeof(size_t), size_t(1), f), size_t(0));
110  CHECK_NE(fread((int8_t*)&has_nulls, sizeof(bool), size_t(1), f), size_t(0));
111  }
112 
113  void copyMetadata(const Encoder* copyFromEncoder) override {
114  num_elems_ = copyFromEncoder->getNumElems();
115  has_nulls = static_cast<const StringNoneEncoder*>(copyFromEncoder)->has_nulls;
116  }
117 
118  AbstractBuffer* getIndexBuf() const { return index_buf; }
120 
121  bool resetChunkStats(const ChunkStats& stats) override {
122  if (has_nulls == stats.has_nulls) {
123  return false;
124  }
125  has_nulls = stats.has_nulls;
126  return true;
127  }
128 
129  void resetChunkStats() override { has_nulls = false; }
130 
131  private:
132  std::pair<StringOffsetT, StringOffsetT> getStringOffsets(const int8_t* index_data,
133  size_t index);
134 
135  size_t getStringSizeAtIndex(const int8_t* index_data, size_t index);
136  std::string_view getStringAtIndex(const int8_t* index_data,
137  const int8_t* data,
138  size_t index);
139 
142  bool has_nulls;
143 
144  template <typename StringType>
145  void update_elem_stats(const StringType& elem);
146 
147 }; // class StringNoneEncoder
148 
149 #endif // STRING_NONE_ENCODER_H
std::string_view getStringAtIndex(const int8_t *index_data, const int8_t *data, size_t index)
void copyMetadata(const Encoder *copyFromEncoder) override
size_t num_elems_
Definition: Encoder.h:288
void updateStats(const int64_t, const bool) override
void resetChunkStats() override
void writeMetadata(FILE *f) override
std::shared_ptr< ChunkMetadata > appendData(int8_t *&src_data, const size_t num_elems_to_append, const SQLTypeInfo &ti, const bool replicating=false, const int64_t offset=-1) override
#define UNREACHABLE()
Definition: Logger.h:266
bool has_nulls
Definition: ChunkMetadata.h:30
std::shared_ptr< ChunkMetadata > appendEncodedDataAtIndices(const int8_t *index_data, int8_t *data, const std::vector< size_t > &selected_idx) override
bool resetChunkStats(const ChunkStats &stats) override
: Reset chunk level stats (min, max, nulls) using new values from the argument.
constexpr double f
Definition: Utm.h:31
size_t getNumElemsForBytesInsertData(const std::vector< std::string > *srcData, const int start_idx, const size_t numAppendElems, const size_t byteLimit, const bool replicating=false)
int32_t StringOffsetT
Definition: sqltypes.h:1113
AbstractBuffer * index_buf
size_t getNumElemsForBytesEncodedDataAtIndices(const int8_t *index_data, const std::vector< size_t > &selected_idx, const size_t byte_limit) override
#define CHECK_NE(x, y)
Definition: Logger.h:231
void reduceStats(const Encoder &) override
size_t getNumElems() const
Definition: Encoder.h:284
StringNoneEncoder(AbstractBuffer *buffer)
An AbstractBuffer is a unit of data management for a data manager.
size_t getStringSizeAtIndex(const int8_t *index_data, size_t index)
void update_elem_stats(const StringType &elem)
StringOffsetT last_offset
void getMetadata(const std::shared_ptr< ChunkMetadata > &chunkMetadata) override
void updateStats(const int8_t *const src_data, const size_t num_elements) override
std::shared_ptr< ChunkMetadata > appendEncodedData(const int8_t *index_data, int8_t *data, const size_t start_idx, const size_t num_elements) override
AbstractBuffer * getIndexBuf() const
void updateStats(const std::vector< ArrayDatum > *const src_data, const size_t start_idx, const size_t num_elements) override
bool g_enable_watchdog false
Definition: Execute.cpp:79
#define CHECK(condition)
Definition: Logger.h:222
void setIndexBuffer(AbstractBuffer *buf)
void readMetadata(FILE *f) override
std::pair< StringOffsetT, StringOffsetT > getStringOffsets(const int8_t *index_data, size_t index)
void updateStats(const double, const bool) override