OmniSciDB  2e3a973ef4
Compression.cpp
Go to the documentation of this file.
1 /*
2  * Copyright 2020 OmniSci, Inc.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #include "Geospatial/Compression.h"
18 #include "Geospatial/Types.h"
20 
21 namespace Geospatial {
22 
23 int32_t get_compression_scheme(const SQLTypeInfo& ti) {
24  if (ti.get_compression() == kENCODING_GEOINT && ti.get_comp_param() == 32) {
25  return COMPRESSION_GEOINT32;
26  }
27  if (ti.get_compression() != kENCODING_NONE) {
28  throw std::runtime_error("Invalid compression");
29  }
30  return COMPRESSION_NONE;
31 }
32 
33 uint64_t compress_coord(double coord, const SQLTypeInfo& ti, bool x) {
34  if (ti.get_compression() == kENCODING_GEOINT && ti.get_comp_param() == 32) {
37  }
38  return *reinterpret_cast<uint64_t*>(may_alias_ptr(&coord));
39 }
40 
41 uint64_t compress_null_point(const SQLTypeInfo& ti, bool x) {
42  if (ti.get_compression() == kENCODING_GEOINT && ti.get_comp_param() == 32) {
45  }
46  double n = x ? NULL_ARRAY_DOUBLE : NULL_DOUBLE;
47  auto u = *reinterpret_cast<uint64_t*>(may_alias_ptr(&n));
48  return u;
49 }
50 
51 // Compress non-NULL geo coords; and also NULL POINT coords (special case)
52 std::vector<uint8_t> compress_coords(std::vector<double>& coords, const SQLTypeInfo& ti) {
53  CHECK(!coords.empty()) << "Coord compression received no data";
54  bool is_null_point = false;
55  if (!ti.get_notnull()) {
56  is_null_point = (ti.get_type() == kPOINT && coords[0] == NULL_ARRAY_DOUBLE);
57  }
58 
59  bool x = true;
60  bool is_geoint32 =
61  (ti.get_compression() == kENCODING_GEOINT && ti.get_comp_param() == 32);
62  size_t coord_data_size = (is_geoint32) ? (ti.get_comp_param() / 8) : sizeof(double);
63  std::vector<uint8_t> compressed_coords;
64  compressed_coords.reserve(coords.size() * coord_data_size);
65  for (auto coord : coords) {
66  uint64_t coord_data;
67  if (is_null_point) {
68  coord_data = compress_null_point(ti, x);
69  } else {
70  if (ti.get_output_srid() == 4326) {
71  if (x) {
72  if (coord < -180.0 || coord > 180.0) {
73  throw std::runtime_error("WGS84 longitude " + std::to_string(coord) +
74  " is out of bounds");
75  }
76  } else {
77  if (coord < -90.0 || coord > 90.0) {
78  throw std::runtime_error("WGS84 latitude " + std::to_string(coord) +
79  " is out of bounds");
80  }
81  }
82  }
83  if (is_geoint32) {
84  coord_data = compress_coord(coord, ti, x);
85  } else {
86  auto coord_data_ptr = reinterpret_cast<uint64_t*>(&coord);
87  coord_data = *coord_data_ptr;
88  }
89  }
90  for (size_t i = 0; i < coord_data_size; i++) {
91  compressed_coords.push_back(coord_data & 0xFF);
92  coord_data >>= 8;
93  }
94  x = !x;
95  }
96  return compressed_coords;
97 }
98 
99 template <typename T>
100 void unpack_geo_vector(std::vector<T>& output, const int8_t* input_ptr, const size_t sz) {
101  if (sz == 0) {
102  return;
103  }
104  auto elems = reinterpret_cast<const T*>(input_ptr);
105  CHECK_EQ(size_t(0), sz % sizeof(T));
106  const size_t num_elems = sz / sizeof(T);
107  output.resize(num_elems);
108  for (size_t i = 0; i < num_elems; i++) {
109  output[i] = elems[i];
110  }
111 }
112 
113 template <>
114 void unpack_geo_vector<int32_t>(std::vector<int32_t>& output,
115  const int8_t* input_ptr,
116  const size_t sz) {
117  if (sz == 0) {
118  return;
119  }
120  auto elems = reinterpret_cast<const int32_t*>(input_ptr);
121  CHECK_EQ(size_t(0), sz % sizeof(int32_t));
122  const size_t num_elems = sz / sizeof(int32_t);
123  output.resize(num_elems);
124  for (size_t i = 0; i < num_elems; i++) {
125  output[i] = elems[i];
126  }
127 }
128 
129 template <typename T>
130 void decompress_geo_coords_geoint32(std::vector<T>& dec,
131  const int8_t* enc,
132  const size_t sz) {
133  if (sz == 0) {
134  return;
135  }
136  const auto compressed_coords = reinterpret_cast<const int32_t*>(enc);
137  const auto num_coords = sz / sizeof(int32_t);
138  dec.resize(num_coords);
139  for (size_t i = 0; i < num_coords; i += 2) {
140  dec[i] = Geospatial::decompress_longitude_coord_geoint32(compressed_coords[i]);
141  dec[i + 1] =
142  Geospatial::decompress_lattitude_coord_geoint32(compressed_coords[i + 1]);
143  }
144 }
145 
146 template <>
147 std::shared_ptr<std::vector<double>> decompress_coords<double, SQLTypeInfo>(
148  const SQLTypeInfo& geo_ti,
149  const int8_t* coords,
150  const size_t coords_sz) {
151  auto decompressed_coords_ptr = std::make_shared<std::vector<double>>();
152  if (geo_ti.get_compression() == kENCODING_GEOINT) {
153  if (geo_ti.get_comp_param() == 32) {
154  decompress_geo_coords_geoint32(*decompressed_coords_ptr, coords, coords_sz);
155  }
156  } else {
157  CHECK_EQ(geo_ti.get_compression(), kENCODING_NONE);
158  unpack_geo_vector(*decompressed_coords_ptr, coords, coords_sz);
159  }
160  return decompressed_coords_ptr;
161 }
162 
163 template <>
164 std::shared_ptr<std::vector<double>> decompress_coords<double, int32_t>(
165  const int32_t& ic,
166  const int8_t* coords,
167  const size_t coords_sz) {
168  auto decompressed_coords_ptr = std::make_shared<std::vector<double>>();
169  if (ic == COMPRESSION_GEOINT32) {
170  decompress_geo_coords_geoint32(*decompressed_coords_ptr, coords, coords_sz);
171  } else {
173  unpack_geo_vector(*decompressed_coords_ptr, coords, coords_sz);
174  }
175  return decompressed_coords_ptr;
176 }
177 
178 bool is_null_point(const SQLTypeInfo& geo_ti,
179  const int8_t* coords,
180  const size_t coords_sz) {
181  if (geo_ti.get_type() == kPOINT && !geo_ti.get_notnull()) {
182  if (geo_ti.get_compression() == kENCODING_GEOINT) {
183  if (geo_ti.get_comp_param() == 32) {
184  return Geospatial::is_null_point_longitude_geoint32(*((int32_t*)coords));
185  }
186  } else {
188  return *((double*)coords) == NULL_ARRAY_DOUBLE;
189  }
190  }
191  return false;
192 }
193 
194 } // namespace Geospatial
#define CHECK_EQ(x, y)
Definition: Logger.h:205
#define COMPRESSION_NONE
#define NULL_DOUBLE
Definition: sqltypes.h:186
void decompress_geo_coords_geoint32(std::vector< T > &dec, const int8_t *enc, const size_t sz)
DEVICE uint64_t compress_longitude_coord_geoint32(const double coord)
std::vector< uint8_t > compress_coords(std::vector< double > &coords, const SQLTypeInfo &ti)
Definition: Compression.cpp:52
#define NULL_ARRAY_DOUBLE
Definition: sqltypes.h:194
int32_t get_compression_scheme(const SQLTypeInfo &ti)
Definition: Compression.cpp:23
HOST DEVICE int get_comp_param() const
Definition: sqltypes.h:268
DEVICE double decompress_lattitude_coord_geoint32(const int32_t compressed)
int64_t const int32_t sz
bool is_null_point(const SQLTypeInfo &geo_ti, const int8_t *coords, const size_t coords_sz)
HOST DEVICE EncodingType get_compression() const
Definition: sqltypes.h:267
std::string to_string(char const *&&v)
HOST DEVICE bool get_notnull() const
Definition: sqltypes.h:266
DEVICE bool is_null_point_longitude_geoint32(const int32_t compressed)
std::shared_ptr< std::vector< double > > decompress_coords< double, SQLTypeInfo >(const SQLTypeInfo &geo_ti, const int8_t *coords, const size_t coords_sz)
HOST DEVICE int get_output_srid() const
Definition: sqltypes.h:265
DEVICE constexpr uint64_t compress_null_point_lattitude_geoint32()
DEVICE double decompress_longitude_coord_geoint32(const int32_t compressed)
std::shared_ptr< std::vector< double > > decompress_coords< double, int32_t >(const int32_t &ic, const int8_t *coords, const size_t coords_sz)
void unpack_geo_vector< int32_t >(std::vector< int32_t > &output, const int8_t *input_ptr, const size_t sz)
#define CHECK(condition)
Definition: Logger.h:197
HOST DEVICE SQLTypes get_type() const
Definition: sqltypes.h:259
#define COMPRESSION_GEOINT32
DEVICE constexpr uint64_t compress_null_point_longitude_geoint32()
uint64_t compress_coord(double coord, const SQLTypeInfo &ti, bool x)
Definition: Compression.cpp:33
DEVICE uint64_t compress_lattitude_coord_geoint32(const double coord)
uint64_t compress_null_point(const SQLTypeInfo &ti, bool x)
Definition: Compression.cpp:41
void unpack_geo_vector(std::vector< T > &output, const int8_t *input_ptr, const size_t sz)