OmniSciDB  72c90bc290
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
Compression.cpp
Go to the documentation of this file.
1 /*
2  * Copyright 2022 HEAVY.AI, Inc.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #include "Geospatial/Compression.h"
18 #include "Geospatial/Types.h"
20 
21 namespace Geospatial {
22 
23 int32_t get_compression_scheme(const SQLTypeInfo& ti) {
24  if (ti.get_compression() == kENCODING_GEOINT && ti.get_comp_param() == 32) {
25  return COMPRESSION_GEOINT32;
26  }
27  if (ti.get_compression() != kENCODING_NONE) {
28  throw std::runtime_error("Invalid compression");
29  }
30  return COMPRESSION_NONE;
31 }
32 
33 uint64_t compress_coord(double coord, const SQLTypeInfo& ti, bool x) {
34  if (ti.get_compression() == kENCODING_GEOINT && ti.get_comp_param() == 32) {
37  }
38  return *reinterpret_cast<uint64_t*>(may_alias_ptr(&coord));
39 }
40 
41 uint64_t compress_null_point(const SQLTypeInfo& ti, bool x) {
42  if (ti.get_compression() == kENCODING_GEOINT && ti.get_comp_param() == 32) {
45  }
46  double n = x ? NULL_ARRAY_DOUBLE : NULL_DOUBLE;
47  auto u = *reinterpret_cast<uint64_t*>(may_alias_ptr(&n));
48  return u;
49 }
50 
51 // Compress non-NULL geo coords; and also NULL POINT coords (special case)
52 std::vector<uint8_t> compress_coords(const std::vector<double>& coords,
53  const SQLTypeInfo& ti) {
54  CHECK(!coords.empty()) << "Coord compression received no data";
55  bool is_null_point = false;
56  if (!ti.get_notnull()) {
57  is_null_point = (ti.get_type() == kPOINT && coords[0] == NULL_ARRAY_DOUBLE);
58  }
59 
60  bool x = true;
61  bool is_geoint32 =
62  (ti.get_compression() == kENCODING_GEOINT && ti.get_comp_param() == 32);
63  size_t coord_data_size = (is_geoint32) ? (ti.get_comp_param() / 8) : sizeof(double);
64  std::vector<uint8_t> compressed_coords;
65  compressed_coords.reserve(coords.size() * coord_data_size);
66  for (auto coord : coords) {
67  uint64_t coord_data;
68  if (is_null_point) {
69  coord_data = compress_null_point(ti, x);
70  } else {
71  if (ti.get_output_srid() == 4326) {
72  if (x) {
73  if (coord < -180.0 || coord > 180.0) {
74  throw std::runtime_error("WGS84 longitude " + std::to_string(coord) +
75  " is out of bounds");
76  }
77  } else {
78  if (coord < -90.0 || coord > 90.0) {
79  throw std::runtime_error("WGS84 latitude " + std::to_string(coord) +
80  " is out of bounds");
81  }
82  }
83  }
84  if (is_geoint32) {
85  coord_data = compress_coord(coord, ti, x);
86  } else {
87  auto coord_data_ptr = reinterpret_cast<uint64_t*>(&coord);
88  coord_data = *coord_data_ptr;
89  }
90  }
91  for (size_t i = 0; i < coord_data_size; i++) {
92  compressed_coords.push_back(coord_data & 0xFF);
93  coord_data >>= 8;
94  }
95  x = !x;
96  }
97  return compressed_coords;
98 }
99 
100 template <typename T>
101 void unpack_geo_vector(std::vector<T>& output, const int8_t* input_ptr, const size_t sz) {
102  if (sz == 0) {
103  return;
104  }
105  auto elems = reinterpret_cast<const T*>(input_ptr);
106  CHECK_EQ(size_t(0), sz % sizeof(T));
107  const size_t num_elems = sz / sizeof(T);
108  output.resize(num_elems);
109  for (size_t i = 0; i < num_elems; i++) {
110  output[i] = elems[i];
111  }
112 }
113 
114 template <>
115 void unpack_geo_vector<int32_t>(std::vector<int32_t>& output,
116  const int8_t* input_ptr,
117  const size_t sz) {
118  if (sz == 0) {
119  return;
120  }
121  auto elems = reinterpret_cast<const int32_t*>(input_ptr);
122  CHECK_EQ(size_t(0), sz % sizeof(int32_t));
123  const size_t num_elems = sz / sizeof(int32_t);
124  output.resize(num_elems);
125  for (size_t i = 0; i < num_elems; i++) {
126  output[i] = elems[i];
127  }
128 }
129 
130 template <typename T>
131 void decompress_geo_coords_geoint32(std::vector<T>& dec,
132  const int8_t* enc,
133  const size_t sz) {
134  if (sz == 0) {
135  return;
136  }
137  const auto compressed_coords = reinterpret_cast<const int32_t*>(enc);
138  const auto num_coords = sz / sizeof(int32_t);
139  dec.resize(num_coords);
140  for (size_t i = 0; i < num_coords; i += 2) {
141  dec[i] = Geospatial::decompress_longitude_coord_geoint32(compressed_coords[i]);
142  dec[i + 1] = Geospatial::decompress_latitude_coord_geoint32(compressed_coords[i + 1]);
143  }
144 }
145 
146 template <>
147 std::shared_ptr<std::vector<double>> decompress_coords<double, SQLTypeInfo>(
148  const SQLTypeInfo& geo_ti,
149  const int8_t* coords,
150  const size_t coords_sz) {
151  auto decompressed_coords_ptr = std::make_shared<std::vector<double>>();
152  if (geo_ti.get_compression() == kENCODING_GEOINT) {
153  if (geo_ti.get_comp_param() == 32) {
154  decompress_geo_coords_geoint32(*decompressed_coords_ptr, coords, coords_sz);
155  }
156  } else {
157  CHECK_EQ(geo_ti.get_compression(), kENCODING_NONE);
158  unpack_geo_vector(*decompressed_coords_ptr, coords, coords_sz);
159  }
160  return decompressed_coords_ptr;
161 }
162 
163 template <>
164 std::shared_ptr<std::vector<double>> decompress_coords<double, int32_t>(
165  const int32_t& ic,
166  const int8_t* coords,
167  const size_t coords_sz) {
168  auto decompressed_coords_ptr = std::make_shared<std::vector<double>>();
169  if (ic == COMPRESSION_GEOINT32) {
170  decompress_geo_coords_geoint32(*decompressed_coords_ptr, coords, coords_sz);
171  } else {
173  unpack_geo_vector(*decompressed_coords_ptr, coords, coords_sz);
174  }
175  return decompressed_coords_ptr;
176 }
177 
178 bool is_null_point(const SQLTypeInfo& geo_ti,
179  const int8_t* coords,
180  const size_t coords_sz) {
181  if (geo_ti.get_type() == kPOINT && !geo_ti.get_notnull()) {
182  if (geo_ti.get_compression() == kENCODING_GEOINT) {
183  if (geo_ti.get_comp_param() == 32) {
184  return Geospatial::is_null_point_longitude_geoint32(*((int32_t*)coords));
185  }
186  } else {
188  auto coords_ptr = (double*)coords;
189  if (!coords_ptr) {
190  return true;
191  }
192  return *coords_ptr == NULL_ARRAY_DOUBLE;
193  }
194  }
195  return false;
196 }
197 
198 } // namespace Geospatial
#define CHECK_EQ(x, y)
Definition: Logger.h:301
#define COMPRESSION_NONE
#define NULL_DOUBLE
DEVICE double decompress_latitude_coord_geoint32(const int32_t compressed)
void decompress_geo_coords_geoint32(std::vector< T > &dec, const int8_t *enc, const size_t sz)
DEVICE uint64_t compress_longitude_coord_geoint32(const double coord)
int32_t get_compression_scheme(const SQLTypeInfo &ti)
Definition: Compression.cpp:23
bool is_null_point(const SQLTypeInfo &geo_ti, const int8_t *coords, const size_t coords_sz)
HOST DEVICE SQLTypes get_type() const
Definition: sqltypes.h:391
DEVICE ALWAYS_INLINE Point2D coord(const int8_t *data, const int32_t x_index, const int32_t ic, const int32_t isr, const int32_t osr)
std::string to_string(char const *&&v)
DEVICE bool is_null_point_longitude_geoint32(const int32_t compressed)
std::shared_ptr< std::vector< double > > decompress_coords< double, SQLTypeInfo >(const SQLTypeInfo &geo_ti, const int8_t *coords, const size_t coords_sz)
std::vector< uint8_t > compress_coords(const std::vector< double > &coords, const SQLTypeInfo &ti)
Definition: Compression.cpp:52
DEVICE uint64_t compress_latitude_coord_geoint32(const double coord)
DEVICE constexpr uint64_t compress_null_point_latitude_geoint32()
HOST DEVICE EncodingType get_compression() const
Definition: sqltypes.h:399
DEVICE double decompress_longitude_coord_geoint32(const int32_t compressed)
std::shared_ptr< std::vector< double > > decompress_coords< double, int32_t >(const int32_t &ic, const int8_t *coords, const size_t coords_sz)
HOST DEVICE int get_comp_param() const
Definition: sqltypes.h:402
#define NULL_ARRAY_DOUBLE
void unpack_geo_vector< int32_t >(std::vector< int32_t > &output, const int8_t *input_ptr, const size_t sz)
#define CHECK(condition)
Definition: Logger.h:291
#define COMPRESSION_GEOINT32
constexpr double n
Definition: Utm.h:38
HOST DEVICE bool get_notnull() const
Definition: sqltypes.h:398
DEVICE constexpr uint64_t compress_null_point_longitude_geoint32()
HOST DEVICE int get_output_srid() const
Definition: sqltypes.h:397
uint64_t compress_coord(double coord, const SQLTypeInfo &ti, bool x)
Definition: Compression.cpp:33
uint64_t compress_null_point(const SQLTypeInfo &ti, bool x)
Definition: Compression.cpp:41
void unpack_geo_vector(std::vector< T > &output, const int8_t *input_ptr, const size_t sz)