OmniSciDB  72c90bc290
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
StringNoneEncoder.h
Go to the documentation of this file.
1 /*
2  * Copyright 2022 HEAVY.AI, Inc.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
23 #ifndef STRING_NONE_ENCODER_H
24 #define STRING_NONE_ENCODER_H
25 #include "Logger/Logger.h"
26 
27 #include <cassert>
28 #include <string>
29 #include <vector>
30 #include "AbstractBuffer.h"
31 #include "ChunkMetadata.h"
32 #include "Encoder.h"
33 
35 
36 class StringNoneEncoder : public Encoder {
37  public:
39  : Encoder(buffer), index_buf(nullptr), last_offset(-1), has_nulls(false) {}
40 
41  size_t getNumElemsForBytesInsertData(const std::vector<std::string>* srcData,
42  const int start_idx,
43  const size_t numAppendElems,
44  const size_t byteLimit,
45  const bool replicating = false);
46 
47  size_t getNumElemsForBytesEncodedDataAtIndices(const int8_t* index_data,
48  const std::vector<size_t>& selected_idx,
49  const size_t byte_limit) override;
50 
51  std::shared_ptr<ChunkMetadata> appendData(int8_t*& src_data,
52  const size_t num_elems_to_append,
53  const SQLTypeInfo& ti,
54  const bool replicating = false,
55  const int64_t offset = -1) override {
56  UNREACHABLE(); // should never be called for strings
57  return nullptr;
58  }
59 
60  std::shared_ptr<ChunkMetadata> appendEncodedDataAtIndices(
61  const int8_t* index_data,
62  int8_t* data,
63  const std::vector<size_t>& selected_idx) override;
64 
65  std::shared_ptr<ChunkMetadata> appendEncodedData(const int8_t* index_data,
66  int8_t* data,
67  const size_t start_idx,
68  const size_t num_elements) override;
69 
70  template <typename StringType>
71  std::shared_ptr<ChunkMetadata> appendData(const StringType* srcData,
72  const int start_idx,
73  const size_t numAppendElems,
74  const bool replicating = false);
75 
76  template <typename StringType>
77  std::shared_ptr<ChunkMetadata> appendData(const std::vector<StringType>* srcData,
78  const int start_idx,
79  const size_t numAppendElems,
80  const bool replicating = false);
81 
82  void getMetadata(const std::shared_ptr<ChunkMetadata>& chunkMetadata) override;
83 
84  // Only called from the executor for synthesized meta-information.
85  std::shared_ptr<ChunkMetadata> getMetadata(const SQLTypeInfo& ti) override;
86 
87  void updateStats(const int64_t, const bool) override { CHECK(false); }
88 
89  void updateStats(const double, const bool) override { CHECK(false); }
90 
91  void updateStats(const int8_t* const src_data, const size_t num_elements) override {
92  UNREACHABLE();
93  }
94 
95  void updateStats(const std::vector<std::string>* const src_data,
96  const size_t start_idx,
97  const size_t num_elements) override;
98 
99  void updateStats(const std::vector<ArrayDatum>* const src_data,
100  const size_t start_idx,
101  const size_t num_elements) override {
102  UNREACHABLE();
103  }
104 
105  void reduceStats(const Encoder&) override { CHECK(false); }
106 
107  void writeMetadata(FILE* f) override {
108  // assumes pointer is already in right place
109  fwrite((int8_t*)&num_elems_, sizeof(size_t), 1, f);
110  fwrite((int8_t*)&has_nulls, sizeof(bool), 1, f);
111  }
112 
113  void readMetadata(FILE* f) override {
114  // assumes pointer is already in right place
115  CHECK_NE(fread((int8_t*)&num_elems_, sizeof(size_t), size_t(1), f), size_t(0));
116  CHECK_NE(fread((int8_t*)&has_nulls, sizeof(bool), size_t(1), f), size_t(0));
117  }
118 
119  void copyMetadata(const Encoder* copyFromEncoder) override {
120  num_elems_ = copyFromEncoder->getNumElems();
121  has_nulls = static_cast<const StringNoneEncoder*>(copyFromEncoder)->has_nulls;
122  }
123 
124  AbstractBuffer* getIndexBuf() const { return index_buf; }
126 
127  bool resetChunkStats(const ChunkStats& stats) override {
128  if (has_nulls == stats.has_nulls) {
129  return false;
130  }
131  has_nulls = stats.has_nulls;
132  return true;
133  }
134 
135  void resetChunkStats() override { has_nulls = false; }
136 
137  static std::string_view getStringAtIndex(const int8_t* index_data,
138  const int8_t* data,
139  size_t index);
140 
141  private:
142  static std::pair<StringOffsetT, StringOffsetT> getStringOffsets(
143  const int8_t* index_data,
144  size_t index);
145 
146  static size_t getStringSizeAtIndex(const int8_t* index_data, size_t index);
147 
150  bool has_nulls;
151 
152  template <typename StringType>
153  void update_elem_stats(const StringType& elem);
154 
155 }; // class StringNoneEncoder
156 
157 #endif // STRING_NONE_ENCODER_H
static std::string_view getStringAtIndex(const int8_t *index_data, const int8_t *data, size_t index)
void copyMetadata(const Encoder *copyFromEncoder) override
size_t num_elems_
Definition: Encoder.h:288
void updateStats(const int64_t, const bool) override
void resetChunkStats() override
void writeMetadata(FILE *f) override
std::shared_ptr< ChunkMetadata > appendData(int8_t *&src_data, const size_t num_elems_to_append, const SQLTypeInfo &ti, const bool replicating=false, const int64_t offset=-1) override
#define UNREACHABLE()
Definition: Logger.h:338
bool has_nulls
Definition: ChunkMetadata.h:30
std::shared_ptr< ChunkMetadata > appendEncodedDataAtIndices(const int8_t *index_data, int8_t *data, const std::vector< size_t > &selected_idx) override
dictionary stats
Definition: report.py:116
bool resetChunkStats(const ChunkStats &stats) override
: Reset chunk level stats (min, max, nulls) using new values from the argument.
size_t getNumElemsForBytesInsertData(const std::vector< std::string > *srcData, const int start_idx, const size_t numAppendElems, const size_t byteLimit, const bool replicating=false)
int32_t StringOffsetT
Definition: sqltypes.h:1493
AbstractBuffer * index_buf
size_t getNumElemsForBytesEncodedDataAtIndices(const int8_t *index_data, const std::vector< size_t > &selected_idx, const size_t byte_limit) override
#define CHECK_NE(x, y)
Definition: Logger.h:302
void reduceStats(const Encoder &) override
size_t getNumElems() const
Definition: Encoder.h:284
StringNoneEncoder(AbstractBuffer *buffer)
An AbstractBuffer is a unit of data management for a data manager.
static size_t getStringSizeAtIndex(const int8_t *index_data, size_t index)
void update_elem_stats(const StringType &elem)
StringOffsetT last_offset
void getMetadata(const std::shared_ptr< ChunkMetadata > &chunkMetadata) override
void updateStats(const int8_t *const src_data, const size_t num_elements) override
std::shared_ptr< ChunkMetadata > appendEncodedData(const int8_t *index_data, int8_t *data, const size_t start_idx, const size_t num_elements) override
torch::Tensor f(torch::Tensor x, torch::Tensor W_target, torch::Tensor b_target)
AbstractBuffer * getIndexBuf() const
void updateStats(const std::vector< ArrayDatum > *const src_data, const size_t start_idx, const size_t num_elements) override
bool g_enable_watchdog false
Definition: Execute.cpp:80
#define CHECK(condition)
Definition: Logger.h:291
void setIndexBuffer(AbstractBuffer *buf)
void readMetadata(FILE *f) override
static std::pair< StringOffsetT, StringOffsetT > getStringOffsets(const int8_t *index_data, size_t index)
void updateStats(const double, const bool) override