OmniSciDB  6686921089
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
GeospatialEncoder.h
Go to the documentation of this file.
1 /*
2  * Copyright 2021 OmniSci, Inc.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #pragma once
18 
19 #include "DataMgr/Chunk/Chunk.h"
20 #include "Geospatial/Compression.h"
21 #include "Geospatial/Types.h"
22 
27 #include "ImportExport/Importer.h"
28 
29 namespace foreign_storage {
30 
31 template <typename T>
32 inline ArrayDatum encode_as_array_datum(const std::vector<T>& data) {
33  const size_t num_bytes = data.size() * sizeof(T);
34  std::shared_ptr<int8_t> buffer(new int8_t[num_bytes], std::default_delete<int8_t[]>());
35  memcpy(buffer.get(), data.data(), num_bytes);
36  return ArrayDatum(num_bytes, buffer, false);
37 }
38 
40  public:
41  virtual ~GeospatialEncoder() = default;
42 
44 
45  GeospatialEncoder(std::list<Chunk_NS::Chunk>& chunks)
46  : geo_column_descriptor_(chunks.begin()->getColumnDesc())
47  , base_column_encoder_(nullptr)
48  , coords_column_encoder_(nullptr)
49  , bounds_column_encoder_(nullptr)
53  , base_column_metadata_(nullptr)
54  , coords_column_metadata_(nullptr)
55  , bounds_column_metadata_(nullptr)
60  validateChunksSizing(chunks);
61  const auto geo_column_type = geo_column_descriptor_->columnType.get_type();
62 
63  // initialize coords column
64  coords_column_descriptor_ = getColumnDescriptor(chunks, geo_column_type, COORDS);
65 
66  // initialize bounds column
67  if (hasBoundsColumn()) {
68  bounds_column_descriptor_ = getColumnDescriptor(chunks, geo_column_type, BOUNDS);
69  }
70 
71  // initialize ring sizes column & render group column
72  if (hasRingSizesColumn()) {
74  getColumnDescriptor(chunks, geo_column_type, RING_SIZES);
75  }
76  if (hasRenderGroupColumn()) {
78  getColumnDescriptor(chunks, geo_column_type, RENDER_GROUP);
79  }
80 
81  // initialize poly rings column
82  if (hasPolyRingsColumn()) {
84  getColumnDescriptor(chunks, geo_column_type, POLY_RINGS);
85  }
86  }
87 
88  GeospatialEncoder(std::list<Chunk_NS::Chunk>& chunks,
89  std::list<std::unique_ptr<ChunkMetadata>>& chunk_metadata)
90  : geo_column_descriptor_(chunks.begin()->getColumnDesc())
91  , base_column_encoder_(nullptr)
92  , coords_column_encoder_(nullptr)
93  , bounds_column_encoder_(nullptr)
97  , base_column_metadata_(nullptr)
98  , coords_column_metadata_(nullptr)
99  , bounds_column_metadata_(nullptr)
100  , ring_sizes_column_metadata_(nullptr)
101  , poly_rings_column_metadata_(nullptr)
102  , render_group_column_metadata_(nullptr) {
104 
105  validateChunksSizing(chunks);
106  validateMetadataSizing(chunk_metadata);
107 
108  const auto geo_column_type = geo_column_descriptor_->columnType.get_type();
109 
110  // initialize base column encoder
111  auto base_chunk = chunks.begin();
112  base_chunk->initEncoder();
114  dynamic_cast<StringNoneEncoder*>(base_chunk->getBuffer()->getEncoder());
115  base_column_metadata_ = chunk_metadata.begin()->get();
117 
118  // initialize coords column
121  chunks, chunk_metadata, geo_column_type, COORDS);
122 
123  // initialize bounds column
124  if (hasBoundsColumn()) {
125  std::tie(
128  chunks, chunk_metadata, geo_column_type, BOUNDS);
129  }
130 
131  // initialize ring sizes column & render group column
132  if (hasRingSizesColumn()) {
137  chunks, chunk_metadata, geo_column_type, RING_SIZES);
138  }
139  if (hasRenderGroupColumn()) {
144  chunks, chunk_metadata, geo_column_type, RENDER_GROUP);
145  }
146 
147  // initialize poly rings column
148  if (hasPolyRingsColumn()) {
153  chunks, chunk_metadata, geo_column_type, POLY_RINGS);
154  }
155  }
156 
157  protected:
158  void appendBaseAndRenderGroupDataAndUpdateMetadata(const int64_t row_count) {
159  // add nulls to base column & zeros to render group (if applicable)
160  render_group_values_.resize(row_count, 0);
161  base_values_.resize(row_count);
163  *base_column_encoder_->appendData(&base_values_, 0, row_count);
164  if (hasRenderGroupColumn()) {
165  auto data_ptr = reinterpret_cast<int8_t*>(render_group_values_.data());
167  data_ptr, row_count, render_group_column_descriptor_->columnType);
168  }
169  }
170 
171  void validateChunksSizing(std::list<Chunk_NS::Chunk>& chunks) const {
172  const auto geo_column_type = geo_column_descriptor_->columnType.get_type();
173  if (geo_column_type == kPOINT) {
174  CHECK(chunks.size() == 2);
175  } else if (geo_column_type == kLINESTRING) {
176  CHECK(chunks.size() == 3);
177  } else if (geo_column_type == kPOLYGON) {
178  CHECK(chunks.size() == 5);
179  } else if (geo_column_type == kMULTIPOLYGON) {
180  CHECK(chunks.size() == 6);
181  }
182  }
183 
185  std::list<std::unique_ptr<ChunkMetadata>>& chunk_metadata) const {
186  const auto geo_column_type = geo_column_descriptor_->columnType.get_type();
187  if (geo_column_type == kPOINT) {
188  CHECK(chunk_metadata.size() == 2);
189  } else if (geo_column_type == kLINESTRING) {
190  CHECK(chunk_metadata.size() == 3);
191  } else if (geo_column_type == kPOLYGON) {
192  CHECK(chunk_metadata.size() == 5);
193  } else if (geo_column_type == kMULTIPOLYGON) {
194  CHECK(chunk_metadata.size() == 6);
195  }
196  }
197 
209  }
210 
212  const std::vector<ArrayDatum>& datum_parse_buffer,
213  Encoder* encoder,
214  ChunkMetadata* chunk_metadata) const {
215  if (!encoder) {
216  CHECK(!chunk_metadata);
217  return;
218  }
219  if (auto fixed_len_array_encoder =
220  dynamic_cast<FixedLengthArrayNoneEncoder*>(encoder)) {
221  auto new_chunk_metadata = fixed_len_array_encoder->appendData(
222  &datum_parse_buffer, 0, datum_parse_buffer.size());
223  *chunk_metadata = *new_chunk_metadata;
224  } else if (auto array_encoder = dynamic_cast<ArrayNoneEncoder*>(encoder)) {
225  auto new_chunk_metadata = array_encoder->appendData(
226  &datum_parse_buffer, 0, datum_parse_buffer.size(), false);
227  *chunk_metadata = *new_chunk_metadata;
228  } else {
229  UNREACHABLE();
230  }
231  }
232 
233  void processGeoElement(std::string_view geo_string_view) {
235  if (!Geospatial::GeoTypesFactory::getGeoColumns(std::string(geo_string_view),
236  import_ti,
243  }
244 
245  // validate types
246  if (geo_column_descriptor_->columnType.get_type() != import_ti.get_type()) {
248  !(import_ti.get_type() == SQLTypes::kPOLYGON &&
251  }
252  }
253 
254  // append coords
255  std::vector<uint8_t> compressed_coords = Geospatial::compress_coords(
257  coords_datum_buffer_.emplace_back(encode_as_array_datum(compressed_coords));
258 
259  // append bounds
260  if (hasBoundsColumn()) {
262  }
263 
264  // append ring sizes
265  if (hasRingSizesColumn()) {
266  ring_sizes_datum_buffer_.emplace_back(
268  }
269 
270  // append poly rings
271  if (hasPolyRingsColumn()) {
272  poly_rings_datum_buffer_.emplace_back(
274  }
275  }
276 
285  // POINT columns are represented using fixed length arrays and need
286  // special treatment of nulls
288  std::vector<uint8_t> compressed_coords = Geospatial::compress_coords(
290  coords_datum_buffer_.emplace_back(encode_as_array_datum(compressed_coords));
291  } else {
294  }
295  if (hasBoundsColumn()) {
298  }
299  if (hasRingSizesColumn()) {
300  ring_sizes_datum_buffer_.emplace_back(
303  }
304  if (hasPolyRingsColumn()) {
305  poly_rings_datum_buffer_.emplace_back(
308  }
309  }
310 
312  coords_parse_buffer_.clear();
313  bounds_parse_buffer_.clear();
314  ring_sizes_parse_buffer_.clear();
315  poly_rings_parse_buffer_.clear();
316  }
317 
319  coords_datum_buffer_.clear();
320  bounds_datum_buffer_.clear();
321  ring_sizes_datum_buffer_.clear();
322  poly_rings_datum_buffer_.clear();
323  }
324 
326 
327  template <typename T>
328  typename std::list<T>::iterator getIteratorForGeoColumnType(
329  std::list<T>& list,
330  const SQLTypes column_type,
331  const GeoColumnType geo_column) {
332  auto list_iter = list.begin();
333  list_iter++; // skip base column
334  switch (column_type) {
335  case kPOINT: {
336  if (geo_column == COORDS) {
337  return list_iter;
338  }
339  UNREACHABLE();
340  }
341  case kLINESTRING: {
342  if (geo_column == COORDS) {
343  return list_iter;
344  }
345  list_iter++;
346  if (geo_column == BOUNDS) {
347  return list_iter;
348  }
349  UNREACHABLE();
350  }
351  case kPOLYGON: {
352  if (geo_column == COORDS) {
353  return list_iter;
354  }
355  list_iter++;
356  if (geo_column == RING_SIZES) {
357  return list_iter;
358  }
359  list_iter++;
360  if (geo_column == BOUNDS) {
361  return list_iter;
362  }
363  list_iter++;
364  if (geo_column == RENDER_GROUP) {
365  return list_iter;
366  }
367  UNREACHABLE();
368  }
369  case kMULTIPOLYGON: {
370  if (geo_column == COORDS) {
371  return list_iter;
372  }
373  list_iter++;
374  if (geo_column == RING_SIZES) {
375  return list_iter;
376  }
377  list_iter++;
378  if (geo_column == POLY_RINGS) {
379  return list_iter;
380  }
381  list_iter++;
382  if (geo_column == BOUNDS) {
383  return list_iter;
384  }
385  list_iter++;
386  if (geo_column == RENDER_GROUP) {
387  return list_iter;
388  }
389  UNREACHABLE();
390  }
391  default:
392  UNREACHABLE();
393  }
394  return {};
395  }
396 
397  std::tuple<Encoder*, ChunkMetadata*, const ColumnDescriptor*>
399  std::list<Chunk_NS::Chunk>& chunks,
400  std::list<std::unique_ptr<ChunkMetadata>>& chunk_metadata,
401  const SQLTypes sql_type,
402  GeoColumnType geo_column_type) {
403  auto chunk = getIteratorForGeoColumnType(chunks, sql_type, geo_column_type);
404  chunk->initEncoder();
405  auto encoder = chunk->getBuffer()->getEncoder();
406  auto metadata =
407  getIteratorForGeoColumnType(chunk_metadata, sql_type, geo_column_type)->get();
408  auto column_descriptor = chunk->getColumnDesc();
409  return {encoder, metadata, column_descriptor};
410  }
411 
412  const ColumnDescriptor* getColumnDescriptor(std::list<Chunk_NS::Chunk>& chunks,
413  const SQLTypes sql_type,
414  GeoColumnType geo_column_type) {
415  auto chunk = getIteratorForGeoColumnType(chunks, sql_type, geo_column_type);
416  auto column_descriptor = chunk->getColumnDesc();
417  return column_descriptor;
418  }
419 
420  static void throwMalformedGeoElement(const std::string& omnisci_column_name) {
421  std::string error_message = "Failed to extract valid geometry in OmniSci column '" +
422  omnisci_column_name + "'.";
423  throw foreign_storage::ForeignStorageException(error_message);
424  }
425 
426  static void throwMismatchedGeoElement(const std::string& omnisci_column_name) {
428  "Imported geometry"
429  " doesn't match the geospatial type of OmniSci column '" +
430  omnisci_column_name + "'.");
431  }
432 
433  bool hasBoundsColumn() const {
434  const auto geo_column_type = geo_column_descriptor_->columnType.get_type();
435  return geo_column_type == kLINESTRING || geo_column_type == kPOLYGON ||
436  geo_column_type == kMULTIPOLYGON;
437  }
438 
439  bool hasRingSizesColumn() const {
440  const auto geo_column_type = geo_column_descriptor_->columnType.get_type();
441  return geo_column_type == kPOLYGON || geo_column_type == kMULTIPOLYGON;
442  }
443 
444  bool hasRenderGroupColumn() const {
445  const auto geo_column_type = geo_column_descriptor_->columnType.get_type();
446  return geo_column_type == kPOLYGON || geo_column_type == kMULTIPOLYGON;
447  }
448 
449  bool hasPolyRingsColumn() const {
450  const auto geo_column_type = geo_column_descriptor_->columnType.get_type();
451  return geo_column_type == kMULTIPOLYGON;
452  }
453 
455 
456  constexpr static bool PROMOTE_POLYGON_TO_MULTIPOLYGON = true;
457 
464 
471 
477 
478  std::vector<int32_t> render_group_values_;
479  std::vector<std::string> base_values_;
480 
481  // Used repeatedly in parsing geo types, declared as members to prevent
482  // deallocation/reallocation costs
483  std::vector<double> coords_parse_buffer_;
484  std::vector<double> bounds_parse_buffer_;
485  std::vector<int> ring_sizes_parse_buffer_;
486  std::vector<int> poly_rings_parse_buffer_;
487 
488  // Used to buffer array appends in memory for a batch
489  std::vector<ArrayDatum> coords_datum_buffer_;
490  std::vector<ArrayDatum> bounds_datum_buffer_;
491  std::vector<ArrayDatum> ring_sizes_datum_buffer_;
492  std::vector<ArrayDatum> poly_rings_datum_buffer_;
493 };
494 
495 } // namespace foreign_storage
static void throwMismatchedGeoElement(const std::string &omnisci_column_name)
static constexpr bool PROMOTE_POLYGON_TO_MULTIPOLYGON
SQLTypes
Definition: sqltypes.h:38
const ColumnDescriptor * poly_rings_column_descriptor_
const ColumnDescriptor * render_group_column_descriptor_
GeospatialEncoder(std::list< Chunk_NS::Chunk > &chunks)
std::vector< ArrayDatum > coords_datum_buffer_
std::vector< std::string > base_values_
static ArrayDatum composeNullArray(const SQLTypeInfo &ti)
Definition: Importer.cpp:410
GeospatialEncoder(std::list< Chunk_NS::Chunk > &chunks, std::list< std::unique_ptr< ChunkMetadata >> &chunk_metadata)
std::shared_ptr< ChunkMetadata > appendData(int8_t *&src_data, const size_t num_elems_to_append, const SQLTypeInfo &ti, const bool replicating=false, const int64_t offset=-1) override
#define UNREACHABLE()
Definition: Logger.h:253
void validateMetadataSizing(std::list< std::unique_ptr< ChunkMetadata >> &chunk_metadata) const
static void getNullGeoColumns(SQLTypeInfo &ti, std::vector< double > &coords, std::vector< double > &bounds, std::vector< int > &ring_sizes, std::vector< int > &poly_rings, const bool promote_poly_to_mpoly=false)
Definition: Types.cpp:1144
std::vector< ArrayDatum > bounds_datum_buffer_
HOST DEVICE SQLTypes get_type() const
Definition: sqltypes.h:329
void appendToArrayEncoderAndUpdateMetadata(const std::vector< ArrayDatum > &datum_parse_buffer, Encoder *encoder, ChunkMetadata *chunk_metadata) const
static void throwMalformedGeoElement(const std::string &omnisci_column_name)
std::conditional_t< is_cuda_compiler(), DeviceArrayDatum, HostArrayDatum > ArrayDatum
Definition: sqltypes.h:208
std::vector< uint8_t > compress_coords(const std::vector< double > &coords, const SQLTypeInfo &ti)
Definition: Compression.cpp:52
void validateChunksSizing(std::list< Chunk_NS::Chunk > &chunks) const
const ColumnDescriptor * ring_sizes_column_descriptor_
specifies the content in-memory of a row in the column metadata table
void processGeoElement(std::string_view geo_string_view)
const ColumnDescriptor * getColumnDescriptor(std::list< Chunk_NS::Chunk > &chunks, const SQLTypes sql_type, GeoColumnType geo_column_type)
const ColumnDescriptor * coords_column_descriptor_
std::vector< ArrayDatum > ring_sizes_datum_buffer_
static bool getGeoColumns(const std::string &wkt_or_wkb_hex, SQLTypeInfo &ti, std::vector< double > &coords, std::vector< double > &bounds, std::vector< int > &ring_sizes, std::vector< int > &poly_rings, const bool promote_poly_to_mpoly=false)
Definition: Types.cpp:937
std::vector< double > bounds_parse_buffer_
std::vector< double > coords_parse_buffer_
unencoded fixed length array encoder
void appendBaseAndRenderGroupDataAndUpdateMetadata(const int64_t row_count)
ArrayDatum encode_as_array_datum(const std::vector< T > &data)
#define CHECK(condition)
Definition: Logger.h:209
bool is_geometry() const
Definition: sqltypes.h:521
For unencoded strings.
const ColumnDescriptor * geo_column_descriptor_
SQLTypeInfo columnType
std::tuple< Encoder *, ChunkMetadata *, const ColumnDescriptor * > initEncoderAndGetEncoderAndMetadata(std::list< Chunk_NS::Chunk > &chunks, std::list< std::unique_ptr< ChunkMetadata >> &chunk_metadata, const SQLTypes sql_type, GeoColumnType geo_column_type)
unencoded array encoder
std::vector< ArrayDatum > poly_rings_datum_buffer_
std::string columnName
std::list< T >::iterator getIteratorForGeoColumnType(std::list< T > &list, const SQLTypes column_type, const GeoColumnType geo_column)
virtual ~GeospatialEncoder()=default
std::vector< int32_t > render_group_values_
const ColumnDescriptor * bounds_column_descriptor_
virtual std::shared_ptr< ChunkMetadata > appendData(int8_t *&src_data, const size_t num_elems_to_append, const SQLTypeInfo &ti, const bool replicating=false, const int64_t offset=-1)=0