OmniSciDB  72c90bc290
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
ConversionFactory.h
Go to the documentation of this file.
1 /*
2  * Copyright 2022 HEAVY.AI, Inc.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #pragma once
18 
19 #include "StringViewSource.h"
21 #include "StringViewToGeoEncoder.h"
25 
26 namespace data_conversion {
27 
30  Chunk_NS::Chunk scalar_temp_chunk; // used by array encoders
31  std::list<Chunk_NS::Chunk> geo_chunks; // used by geo encoder
32  std::list<std::unique_ptr<ChunkMetadata>> geo_chunk_metadata; // used by geo encoder
33  std::optional<Chunk_NS::Chunk> delete_chunk;
34  int db_id;
35 };
36 
37 std::unique_ptr<BaseConvertEncoder> create_string_view_encoder(
39  const bool error_tracking_enabled,
40  const bool geo_validate_geometry) {
41  auto dst_type_info = param.geo_chunks.size()
42  ? param.geo_chunks.begin()->getColumnDesc()->columnType
44  if (dst_type_info.is_dict_encoded_string()) {
45  switch (dst_type_info.get_size()) {
46  case 1:
47  return std::make_unique<StringViewToStringDictEncoder<uint8_t>>(
48  param.dst_chunk, error_tracking_enabled);
49  case 2:
50  return std::make_unique<StringViewToStringDictEncoder<uint16_t>>(
51  param.dst_chunk, error_tracking_enabled);
52  case 4:
53  return std::make_unique<StringViewToStringDictEncoder<int32_t>>(
54  param.dst_chunk, error_tracking_enabled);
55  default:
56  UNREACHABLE();
57  }
58  } else if (dst_type_info.is_none_encoded_string()) {
59  return std::make_unique<StringViewToStringNoneEncoder>(param.dst_chunk,
60  error_tracking_enabled);
61  } else if (dst_type_info.is_date_in_days()) {
62  switch (dst_type_info.get_comp_param()) {
63  case 0:
64  case 32:
65  return std::make_unique<StringViewToScalarEncoder<int32_t, int64_t>>(
66  param.dst_chunk, error_tracking_enabled);
67  case 16:
68  return std::make_unique<StringViewToScalarEncoder<int16_t, int64_t>>(
69  param.dst_chunk, error_tracking_enabled);
70  default:
71  UNREACHABLE();
72  }
73  } else if (dst_type_info.is_integer() || dst_type_info.is_boolean() ||
74  dst_type_info.is_fp() || dst_type_info.is_decimal() ||
75  dst_type_info.is_time_or_date()) {
76  if (dst_type_info.get_compression() == kENCODING_NONE) {
77  switch (dst_type_info.get_type()) {
78  case kBOOLEAN:
79  case kTINYINT:
80  return std::make_unique<StringViewToScalarEncoder<int8_t>>(
81  param.dst_chunk, error_tracking_enabled);
82  case kSMALLINT:
83  return std::make_unique<StringViewToScalarEncoder<int16_t>>(
84  param.dst_chunk, error_tracking_enabled);
85  case kINT:
86  return std::make_unique<StringViewToScalarEncoder<int32_t>>(
87  param.dst_chunk, error_tracking_enabled);
88  case kBIGINT:
89  case kNUMERIC:
90  case kDECIMAL:
91  case kTIME:
92  case kTIMESTAMP:
93  case kDATE:
94  return std::make_unique<StringViewToScalarEncoder<int64_t>>(
95  param.dst_chunk, error_tracking_enabled);
96  case kFLOAT:
97  return std::make_unique<StringViewToScalarEncoder<float>>(
98  param.dst_chunk, error_tracking_enabled);
99  case kDOUBLE:
100  return std::make_unique<StringViewToScalarEncoder<double>>(
101  param.dst_chunk, error_tracking_enabled);
102  default:
103  UNREACHABLE();
104  }
105  } else if (dst_type_info.get_compression() == kENCODING_FIXED) {
106  switch (dst_type_info.get_type()) {
107  case kSMALLINT: {
108  switch (dst_type_info.get_comp_param()) {
109  case 8:
110  return std::make_unique<StringViewToScalarEncoder<int8_t, int16_t>>(
111  param.dst_chunk, error_tracking_enabled);
112  default:
113  UNREACHABLE();
114  }
115  } break;
116  case kINT: {
117  switch (dst_type_info.get_comp_param()) {
118  case 8:
119  return std::make_unique<StringViewToScalarEncoder<int8_t, int32_t>>(
120  param.dst_chunk, error_tracking_enabled);
121  case 16:
122  return std::make_unique<StringViewToScalarEncoder<int16_t, int32_t>>(
123  param.dst_chunk, error_tracking_enabled);
124  default:
125  UNREACHABLE();
126  }
127  } break;
128  case kBIGINT:
129  case kNUMERIC:
130  case kDECIMAL: {
131  switch (dst_type_info.get_comp_param()) {
132  case 8:
133  return std::make_unique<StringViewToScalarEncoder<int8_t, int64_t>>(
134  param.dst_chunk, error_tracking_enabled);
135  case 16:
136  return std::make_unique<StringViewToScalarEncoder<int16_t, int64_t>>(
137  param.dst_chunk, error_tracking_enabled);
138  case 32:
139  return std::make_unique<StringViewToScalarEncoder<int32_t, int64_t>>(
140  param.dst_chunk, error_tracking_enabled);
141  default:
142  UNREACHABLE();
143  }
144  } break;
145  case kTIME:
146  case kTIMESTAMP:
147  case kDATE:
148  return std::make_unique<StringViewToScalarEncoder<int32_t, int64_t>>(
149  param.dst_chunk, error_tracking_enabled);
150  default:
151  UNREACHABLE();
152  }
153  } else {
154  UNREACHABLE() << "unknown encoding type";
155  }
156  } else if (dst_type_info.is_array()) {
157  auto dst_sub_type_info = dst_type_info.get_elem_type();
158  if (dst_sub_type_info.is_dict_encoded_string()) {
159  switch (dst_sub_type_info.get_size()) {
160  case 4:
161  return std::make_unique<
163  param.scalar_temp_chunk, param.dst_chunk, error_tracking_enabled);
164  default:
165  UNREACHABLE();
166  }
167  } else if (dst_sub_type_info.is_none_encoded_string()) {
168  UNREACHABLE();
169  } else if (dst_sub_type_info.is_date_in_days()) {
170  switch (dst_sub_type_info.get_comp_param()) {
171  case 0:
172  case 32:
173  return std::make_unique<
175  param.scalar_temp_chunk,
176  param.dst_chunk,
177 
178  error_tracking_enabled);
179  default:
180  UNREACHABLE();
181  }
182  } else if (dst_sub_type_info.is_integer() || dst_sub_type_info.is_boolean() ||
183  dst_sub_type_info.is_fp() || dst_sub_type_info.is_decimal() ||
184  dst_sub_type_info.is_time_or_date()) {
185  if (dst_sub_type_info.get_compression() == kENCODING_NONE) {
186  switch (dst_sub_type_info.get_type()) {
187  case kBOOLEAN:
188  return std::make_unique<
190  param.scalar_temp_chunk,
191  param.dst_chunk,
192 
193  error_tracking_enabled);
194  case kTINYINT:
195  return std::make_unique<
197  param.scalar_temp_chunk,
198  param.dst_chunk,
199 
200  error_tracking_enabled);
201  case kSMALLINT:
202  return std::make_unique<
204  param.scalar_temp_chunk,
205  param.dst_chunk,
206 
207  error_tracking_enabled);
208  case kINT:
209  return std::make_unique<
211  param.scalar_temp_chunk,
212  param.dst_chunk,
213 
214  error_tracking_enabled);
215  case kBIGINT:
216  case kNUMERIC:
217  case kDECIMAL:
218  case kTIME:
219  case kTIMESTAMP:
220  case kDATE:
221  return std::make_unique<
223  param.scalar_temp_chunk,
224  param.dst_chunk,
225 
226  error_tracking_enabled);
227  case kFLOAT:
228  return std::make_unique<
230  param.scalar_temp_chunk,
231  param.dst_chunk,
232 
233  error_tracking_enabled);
234  case kDOUBLE:
235  return std::make_unique<
237  param.scalar_temp_chunk,
238  param.dst_chunk,
239 
240  error_tracking_enabled);
241  default:
242  UNREACHABLE();
243  }
244  } else if (dst_sub_type_info.get_compression() == kENCODING_FIXED) {
245  UNREACHABLE();
246  } else {
247  UNREACHABLE() << "unknown encoding type";
248  }
249  }
250  } else if (dst_type_info.is_geometry()) {
251  return std::make_unique<StringViewToGeoEncoder>(param.geo_chunks,
252  param.geo_chunk_metadata,
253  error_tracking_enabled,
254  geo_validate_geometry);
255  }
256 
257  UNREACHABLE() << "could not find appropriate encoder to create, conversion use case is "
258  "unsupported";
259 
260  return {};
261 }
262 
263 std::unique_ptr<BaseSource> create_source(const Chunk_NS::Chunk& input, const int db_id) {
264  auto src_type_info = input.getColumnDesc()->columnType;
265  CHECK(src_type_info.is_string()) << "Only string source types currently implemented.";
266 
267  if (src_type_info.is_dict_encoded_string() || src_type_info.is_none_encoded_string()) {
268  return std::make_unique<StringViewSource>(input);
269  } else {
270  UNREACHABLE() << "unknown string type, not supported";
271  }
272 
273  UNREACHABLE();
274  return {};
275 }
276 
277 } // namespace data_conversion
Definition: sqltypes.h:76
#define UNREACHABLE()
Definition: Logger.h:338
std::unique_ptr< BaseConvertEncoder > create_string_view_encoder(ConversionFactoryParam &param, const bool error_tracking_enabled, const bool geo_validate_geometry)
const ColumnDescriptor * getColumnDesc() const
Definition: Chunk.h:65
Definition: sqltypes.h:80
std::optional< Chunk_NS::Chunk > delete_chunk
#define CHECK(condition)
Definition: Logger.h:291
Definition: sqltypes.h:72
SQLTypeInfo columnType
std::list< Chunk_NS::Chunk > geo_chunks
std::unique_ptr< BaseSource > create_source(const Chunk_NS::Chunk &input, const int db_id)
std::list< std::unique_ptr< ChunkMetadata > > geo_chunk_metadata