OmniSciDB  085a039ca4
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
GeospatialEncoder.h
Go to the documentation of this file.
1 /*
2  * Copyright 2021 OmniSci, Inc.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #pragma once
18 
19 #include "DataMgr/Chunk/Chunk.h"
20 #include "Geospatial/Compression.h"
21 #include "Geospatial/Types.h"
22 
27 #include "ImportExport/Importer.h"
29 
30 namespace foreign_storage {
31 
32 template <typename T>
33 inline ArrayDatum encode_as_array_datum(const std::vector<T>& data) {
34  const size_t num_bytes = data.size() * sizeof(T);
35  std::shared_ptr<int8_t> buffer(new int8_t[num_bytes], std::default_delete<int8_t[]>());
36  memcpy(buffer.get(), data.data(), num_bytes);
37  return ArrayDatum(num_bytes, buffer, false);
38 }
39 
41  public:
42  virtual ~GeospatialEncoder() = default;
43 
44  GeospatialEncoder(const RenderGroupAnalyzerMap* render_group_analyzer_map)
45  : render_group_analyzer_map_{render_group_analyzer_map} {}
46 
47  GeospatialEncoder(std::list<Chunk_NS::Chunk>& chunks,
48  const RenderGroupAnalyzerMap* render_group_analyzer_map)
49  : geo_column_descriptor_(chunks.begin()->getColumnDesc())
50  , base_column_encoder_(nullptr)
51  , coords_column_encoder_(nullptr)
52  , bounds_column_encoder_(nullptr)
56  , base_column_metadata_(nullptr)
57  , coords_column_metadata_(nullptr)
58  , bounds_column_metadata_(nullptr)
62  , render_group_analyzer_map_{render_group_analyzer_map} {
64  validateChunksSizing(chunks);
65  const auto geo_column_type = geo_column_descriptor_->columnType.get_type();
66 
67  // initialize coords column
68  coords_column_descriptor_ = getColumnDescriptor(chunks, geo_column_type, COORDS);
69 
70  // initialize bounds column
71  if (hasBoundsColumn()) {
72  bounds_column_descriptor_ = getColumnDescriptor(chunks, geo_column_type, BOUNDS);
73  }
74 
75  // initialize ring sizes column & render group column
76  if (hasRingSizesColumn()) {
78  getColumnDescriptor(chunks, geo_column_type, RING_SIZES);
79  }
80  if (hasRenderGroupColumn()) {
82  getColumnDescriptor(chunks, geo_column_type, RENDER_GROUP);
83  }
84 
85  // initialize poly rings column
86  if (hasPolyRingsColumn()) {
88  getColumnDescriptor(chunks, geo_column_type, POLY_RINGS);
89  }
90  }
91 
92  GeospatialEncoder(std::list<Chunk_NS::Chunk>& chunks,
93  std::list<std::unique_ptr<ChunkMetadata>>& chunk_metadata,
94  const RenderGroupAnalyzerMap* render_group_analyzer_map)
95  : geo_column_descriptor_(chunks.begin()->getColumnDesc())
96  , base_column_encoder_(nullptr)
97  , coords_column_encoder_(nullptr)
98  , bounds_column_encoder_(nullptr)
100  , poly_rings_column_encoder_(nullptr)
102  , base_column_metadata_(nullptr)
103  , coords_column_metadata_(nullptr)
104  , bounds_column_metadata_(nullptr)
105  , ring_sizes_column_metadata_(nullptr)
106  , poly_rings_column_metadata_(nullptr)
108  , render_group_analyzer_map_{render_group_analyzer_map} {
110 
111  validateChunksSizing(chunks);
112  validateMetadataSizing(chunk_metadata);
113 
114  const auto geo_column_type = geo_column_descriptor_->columnType.get_type();
115 
116  // initialize base column encoder
117  auto base_chunk = chunks.begin();
118  base_chunk->initEncoder();
120  dynamic_cast<StringNoneEncoder*>(base_chunk->getBuffer()->getEncoder());
121  base_column_metadata_ = chunk_metadata.begin()->get();
123 
124  // initialize coords column
127  chunks, chunk_metadata, geo_column_type, COORDS);
128 
129  // initialize bounds column
130  if (hasBoundsColumn()) {
131  std::tie(
134  chunks, chunk_metadata, geo_column_type, BOUNDS);
135  }
136 
137  // initialize ring sizes column & render group column
138  if (hasRingSizesColumn()) {
143  chunks, chunk_metadata, geo_column_type, RING_SIZES);
144  }
145  if (hasRenderGroupColumn()) {
150  chunks, chunk_metadata, geo_column_type, RENDER_GROUP);
151  }
152 
153  // initialize poly rings column
154  if (hasPolyRingsColumn()) {
159  chunks, chunk_metadata, geo_column_type, POLY_RINGS);
160  }
161  }
162 
163  protected:
164  void appendBaseAndRenderGroupDataAndUpdateMetadata(const int64_t row_count) {
165  base_values_.resize(row_count);
167  *base_column_encoder_->appendData(&base_values_, 0, row_count);
168  if (hasRenderGroupColumn()) {
169  CHECK_EQ(render_group_value_buffer_.size(), static_cast<size_t>(row_count))
170  << "Render Group Values not generated correctly!";
171  auto data_ptr = reinterpret_cast<int8_t*>(render_group_value_buffer_.data());
173  data_ptr, row_count, render_group_column_descriptor_->columnType);
174  }
175  }
176 
177  void validateChunksSizing(std::list<Chunk_NS::Chunk>& chunks) const {
178  const auto geo_column_type = geo_column_descriptor_->columnType.get_type();
179  if (geo_column_type == kPOINT) {
180  CHECK(chunks.size() == 2);
181  } else if (geo_column_type == kLINESTRING) {
182  CHECK(chunks.size() == 3);
183  } else if (geo_column_type == kPOLYGON) {
184  CHECK(chunks.size() == 5);
185  } else if (geo_column_type == kMULTIPOLYGON) {
186  CHECK(chunks.size() == 6);
187  }
188  }
189 
191  std::list<std::unique_ptr<ChunkMetadata>>& chunk_metadata) const {
192  const auto geo_column_type = geo_column_descriptor_->columnType.get_type();
193  if (geo_column_type == kPOINT) {
194  CHECK(chunk_metadata.size() == 2);
195  } else if (geo_column_type == kLINESTRING) {
196  CHECK(chunk_metadata.size() == 3);
197  } else if (geo_column_type == kPOLYGON) {
198  CHECK(chunk_metadata.size() == 5);
199  } else if (geo_column_type == kMULTIPOLYGON) {
200  CHECK(chunk_metadata.size() == 6);
201  }
202  }
203 
215  }
216 
218  const std::vector<ArrayDatum>& datum_parse_buffer,
219  Encoder* encoder,
220  ChunkMetadata* chunk_metadata) const {
221  if (!encoder) {
222  CHECK(!chunk_metadata);
223  return;
224  }
225  if (auto fixed_len_array_encoder =
226  dynamic_cast<FixedLengthArrayNoneEncoder*>(encoder)) {
227  auto new_chunk_metadata = fixed_len_array_encoder->appendData(
228  &datum_parse_buffer, 0, datum_parse_buffer.size());
229  *chunk_metadata = *new_chunk_metadata;
230  } else if (auto array_encoder = dynamic_cast<ArrayNoneEncoder*>(encoder)) {
231  auto new_chunk_metadata = array_encoder->appendData(
232  &datum_parse_buffer, 0, datum_parse_buffer.size(), false);
233  *chunk_metadata = *new_chunk_metadata;
234  } else {
235  UNREACHABLE();
236  }
237  }
238 
239  void processGeoElement(std::string_view geo_string_view) {
241  if (!Geospatial::GeoTypesFactory::getGeoColumns(std::string(geo_string_view),
242  import_ti,
249  }
250 
251  // validate types
252  if (geo_column_descriptor_->columnType.get_type() != import_ti.get_type()) {
254  !(import_ti.get_type() == SQLTypes::kPOLYGON &&
257  }
258  }
259 
260  // append coords
261  std::vector<uint8_t> compressed_coords = Geospatial::compress_coords(
263  coords_datum_buffer_.emplace_back(encode_as_array_datum(compressed_coords));
264 
265  // append bounds
266  if (hasBoundsColumn()) {
268  }
269 
270  // append ring sizes
271  if (hasRingSizesColumn()) {
272  ring_sizes_datum_buffer_.emplace_back(
274  }
275 
276  // append poly rings
277  if (hasPolyRingsColumn()) {
278  poly_rings_datum_buffer_.emplace_back(
280  }
281 
282  if (hasRenderGroupColumn()) {
283  if (IS_GEO_POLY(import_ti.get_type()) && render_group_analyzer_map_ &&
284  render_group_analyzer_map_->size()) {
285  CHECK_EQ(bounds_parse_buffer_.size(), 4u);
286  auto const itr =
288  int render_group{0};
289  if (itr != render_group_analyzer_map_->end()) {
290  auto& render_group_analyzer = *itr->second;
291  render_group = render_group_analyzer.insertBoundsAndReturnRenderGroup(
293  }
294  render_group_value_buffer_.emplace_back(render_group);
295  } else {
296  render_group_value_buffer_.emplace_back(0);
297  }
298  }
299  }
300 
309  // POINT columns are represented using fixed length arrays and need
310  // special treatment of nulls
312  std::vector<uint8_t> compressed_coords = Geospatial::compress_coords(
314  coords_datum_buffer_.emplace_back(encode_as_array_datum(compressed_coords));
315  } else {
318  }
319  if (hasBoundsColumn()) {
322  }
323  if (hasRingSizesColumn()) {
324  ring_sizes_datum_buffer_.emplace_back(
327  }
328  if (hasPolyRingsColumn()) {
329  poly_rings_datum_buffer_.emplace_back(
332  }
333  if (hasRenderGroupColumn()) {
334  static constexpr int32_t kNullRenderGroupValue = -1;
335  render_group_value_buffer_.emplace_back(kNullRenderGroupValue);
336  }
337  }
338 
340  coords_parse_buffer_.clear();
341  bounds_parse_buffer_.clear();
342  ring_sizes_parse_buffer_.clear();
343  poly_rings_parse_buffer_.clear();
344  }
345 
347  coords_datum_buffer_.clear();
348  bounds_datum_buffer_.clear();
349  ring_sizes_datum_buffer_.clear();
350  poly_rings_datum_buffer_.clear();
352  }
353 
355 
356  template <typename T>
357  typename std::list<T>::iterator getIteratorForGeoColumnType(
358  std::list<T>& list,
359  const SQLTypes column_type,
360  const GeoColumnType geo_column) {
361  auto list_iter = list.begin();
362  list_iter++; // skip base column
363  switch (column_type) {
364  case kPOINT: {
365  if (geo_column == COORDS) {
366  return list_iter;
367  }
368  UNREACHABLE();
369  }
370  case kLINESTRING: {
371  if (geo_column == COORDS) {
372  return list_iter;
373  }
374  list_iter++;
375  if (geo_column == BOUNDS) {
376  return list_iter;
377  }
378  UNREACHABLE();
379  }
380  case kPOLYGON: {
381  if (geo_column == COORDS) {
382  return list_iter;
383  }
384  list_iter++;
385  if (geo_column == RING_SIZES) {
386  return list_iter;
387  }
388  list_iter++;
389  if (geo_column == BOUNDS) {
390  return list_iter;
391  }
392  list_iter++;
393  if (geo_column == RENDER_GROUP) {
394  return list_iter;
395  }
396  UNREACHABLE();
397  }
398  case kMULTIPOLYGON: {
399  if (geo_column == COORDS) {
400  return list_iter;
401  }
402  list_iter++;
403  if (geo_column == RING_SIZES) {
404  return list_iter;
405  }
406  list_iter++;
407  if (geo_column == POLY_RINGS) {
408  return list_iter;
409  }
410  list_iter++;
411  if (geo_column == BOUNDS) {
412  return list_iter;
413  }
414  list_iter++;
415  if (geo_column == RENDER_GROUP) {
416  return list_iter;
417  }
418  UNREACHABLE();
419  }
420  default:
421  UNREACHABLE();
422  }
423  return {};
424  }
425 
426  std::tuple<Encoder*, ChunkMetadata*, const ColumnDescriptor*>
428  std::list<Chunk_NS::Chunk>& chunks,
429  std::list<std::unique_ptr<ChunkMetadata>>& chunk_metadata,
430  const SQLTypes sql_type,
431  GeoColumnType geo_column_type) {
432  auto chunk = getIteratorForGeoColumnType(chunks, sql_type, geo_column_type);
433  chunk->initEncoder();
434  auto encoder = chunk->getBuffer()->getEncoder();
435  auto metadata =
436  getIteratorForGeoColumnType(chunk_metadata, sql_type, geo_column_type)->get();
437  auto column_descriptor = chunk->getColumnDesc();
438  return {encoder, metadata, column_descriptor};
439  }
440 
441  const ColumnDescriptor* getColumnDescriptor(std::list<Chunk_NS::Chunk>& chunks,
442  const SQLTypes sql_type,
443  GeoColumnType geo_column_type) {
444  auto chunk = getIteratorForGeoColumnType(chunks, sql_type, geo_column_type);
445  auto column_descriptor = chunk->getColumnDesc();
446  return column_descriptor;
447  }
448 
449  static void throwMalformedGeoElement(const std::string& omnisci_column_name) {
450  std::string error_message = "Failed to extract valid geometry in HeavyDB column '" +
451  omnisci_column_name + "'.";
452  throw foreign_storage::ForeignStorageException(error_message);
453  }
454 
455  static void throwMismatchedGeoElement(const std::string& omnisci_column_name) {
457  "Imported geometry"
458  " doesn't match the geospatial type of HeavyDB column '" +
459  omnisci_column_name + "'.");
460  }
461 
462  bool hasBoundsColumn() const {
463  const auto geo_column_type = geo_column_descriptor_->columnType.get_type();
464  return geo_column_type == kLINESTRING || geo_column_type == kPOLYGON ||
465  geo_column_type == kMULTIPOLYGON;
466  }
467 
468  bool hasRingSizesColumn() const {
469  const auto geo_column_type = geo_column_descriptor_->columnType.get_type();
470  return geo_column_type == kPOLYGON || geo_column_type == kMULTIPOLYGON;
471  }
472 
473  bool hasRenderGroupColumn() const {
474  const auto geo_column_type = geo_column_descriptor_->columnType.get_type();
475  return geo_column_type == kPOLYGON || geo_column_type == kMULTIPOLYGON;
476  }
477 
478  bool hasPolyRingsColumn() const {
479  const auto geo_column_type = geo_column_descriptor_->columnType.get_type();
480  return geo_column_type == kMULTIPOLYGON;
481  }
482 
484 
485  constexpr static bool PROMOTE_POLYGON_TO_MULTIPOLYGON = true;
486 
493 
500 
506 
507  std::vector<std::string> base_values_;
508 
509  // Used repeatedly in parsing geo types, declared as members to prevent
510  // deallocation/reallocation costs
511  std::vector<double> coords_parse_buffer_;
512  std::vector<double> bounds_parse_buffer_;
513  std::vector<int> ring_sizes_parse_buffer_;
514  std::vector<int> poly_rings_parse_buffer_;
515 
516  // Used to buffer array appends in memory for a batch
517  std::vector<ArrayDatum> coords_datum_buffer_;
518  std::vector<ArrayDatum> bounds_datum_buffer_;
519  std::vector<ArrayDatum> ring_sizes_datum_buffer_;
520  std::vector<ArrayDatum> poly_rings_datum_buffer_;
521  std::vector<int32_t> render_group_value_buffer_;
522 
524 };
525 
526 } // namespace foreign_storage
static void throwMismatchedGeoElement(const std::string &omnisci_column_name)
#define CHECK_EQ(x, y)
Definition: Logger.h:231
static constexpr bool PROMOTE_POLYGON_TO_MULTIPOLYGON
SQLTypes
Definition: sqltypes.h:38
const ColumnDescriptor * poly_rings_column_descriptor_
const ColumnDescriptor * render_group_column_descriptor_
std::vector< ArrayDatum > coords_datum_buffer_
std::vector< std::string > base_values_
static ArrayDatum composeNullArray(const SQLTypeInfo &ti)
Definition: Importer.cpp:434
std::shared_ptr< ChunkMetadata > appendData(int8_t *&src_data, const size_t num_elems_to_append, const SQLTypeInfo &ti, const bool replicating=false, const int64_t offset=-1) override
#define UNREACHABLE()
Definition: Logger.h:267
void validateMetadataSizing(std::list< std::unique_ptr< ChunkMetadata >> &chunk_metadata) const
static void getNullGeoColumns(SQLTypeInfo &ti, std::vector< double > &coords, std::vector< double > &bounds, std::vector< int > &ring_sizes, std::vector< int > &poly_rings, const bool promote_poly_to_mpoly=false)
Definition: Types.cpp:1144
std::vector< ArrayDatum > bounds_datum_buffer_
HOST DEVICE SQLTypes get_type() const
Definition: sqltypes.h:329
void appendToArrayEncoderAndUpdateMetadata(const std::vector< ArrayDatum > &datum_parse_buffer, Encoder *encoder, ChunkMetadata *chunk_metadata) const
static void throwMalformedGeoElement(const std::string &omnisci_column_name)
std::vector< int32_t > render_group_value_buffer_
std::conditional_t< is_cuda_compiler(), DeviceArrayDatum, HostArrayDatum > ArrayDatum
Definition: sqltypes.h:208
std::vector< uint8_t > compress_coords(const std::vector< double > &coords, const SQLTypeInfo &ti)
Definition: Compression.cpp:52
GeospatialEncoder(const RenderGroupAnalyzerMap *render_group_analyzer_map)
GeospatialEncoder(std::list< Chunk_NS::Chunk > &chunks, const RenderGroupAnalyzerMap *render_group_analyzer_map)
void validateChunksSizing(std::list< Chunk_NS::Chunk > &chunks) const
const ColumnDescriptor * ring_sizes_column_descriptor_
specifies the content in-memory of a row in the column metadata table
void processGeoElement(std::string_view geo_string_view)
const ColumnDescriptor * getColumnDescriptor(std::list< Chunk_NS::Chunk > &chunks, const SQLTypes sql_type, GeoColumnType geo_column_type)
const RenderGroupAnalyzerMap * render_group_analyzer_map_
const ColumnDescriptor * coords_column_descriptor_
std::vector< ArrayDatum > ring_sizes_datum_buffer_
static bool getGeoColumns(const std::string &wkt_or_wkb_hex, SQLTypeInfo &ti, std::vector< double > &coords, std::vector< double > &bounds, std::vector< int > &ring_sizes, std::vector< int > &poly_rings, const bool promote_poly_to_mpoly=false)
Definition: Types.cpp:937
std::vector< double > bounds_parse_buffer_
std::vector< double > coords_parse_buffer_
unencoded fixed length array encoder
void appendBaseAndRenderGroupDataAndUpdateMetadata(const int64_t row_count)
ArrayDatum encode_as_array_datum(const std::vector< T > &data)
#define CHECK(condition)
Definition: Logger.h:223
bool is_geometry() const
Definition: sqltypes.h:522
For unencoded strings.
const ColumnDescriptor * geo_column_descriptor_
SQLTypeInfo columnType
std::map< int, std::unique_ptr< import_export::RenderGroupAnalyzer >> RenderGroupAnalyzerMap
std::tuple< Encoder *, ChunkMetadata *, const ColumnDescriptor * > initEncoderAndGetEncoderAndMetadata(std::list< Chunk_NS::Chunk > &chunks, std::list< std::unique_ptr< ChunkMetadata >> &chunk_metadata, const SQLTypes sql_type, GeoColumnType geo_column_type)
unencoded array encoder
std::vector< ArrayDatum > poly_rings_datum_buffer_
std::string columnName
GeospatialEncoder(std::list< Chunk_NS::Chunk > &chunks, std::list< std::unique_ptr< ChunkMetadata >> &chunk_metadata, const RenderGroupAnalyzerMap *render_group_analyzer_map)
std::list< T >::iterator getIteratorForGeoColumnType(std::list< T > &list, const SQLTypes column_type, const GeoColumnType geo_column)
virtual ~GeospatialEncoder()=default
const ColumnDescriptor * bounds_column_descriptor_
virtual std::shared_ptr< ChunkMetadata > appendData(int8_t *&src_data, const size_t num_elems_to_append, const SQLTypeInfo &ti, const bool replicating=false, const int64_t offset=-1)=0
#define IS_GEO_POLY(T)
Definition: sqltypes.h:255