OmniSciDB  06b3bd477c
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
TargetValueConvertersImpl.h
Go to the documentation of this file.
1 /*
2  * Copyright 2018, OmniSci, Inc.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #ifndef TARGET_VALUE_CONVERTERS_IMPL_H_
18 #define TARGET_VALUE_CONVERTERS_IMPL_H_
19 
21 #include "Shared/geo_compression.h"
23 
24 #include <atomic>
25 #include <future>
26 #include <thread>
27 
28 template <typename T>
30  if (std::is_floating_point<T>::value) {
31  return static_cast<T>(inline_fp_null_array_value<T>());
32  } else {
33  return static_cast<T>(inline_int_null_array_value<T>());
34  }
35 }
36 
37 template <typename SOURCE_TYPE, typename RETURN_TYPE, typename CHECKED_CAST_TYPE>
38 RETURN_TYPE checked_cast(SOURCE_TYPE val, bool check_null, RETURN_TYPE null_value) {
39  if (!std::is_same<SOURCE_TYPE, CHECKED_CAST_TYPE>::value) {
40  // do an overflow check
41  try {
42  CHECKED_CAST_TYPE castedVal = boost::numeric_cast<CHECKED_CAST_TYPE>(val);
43  if (check_null && castedVal == null_value) {
44  throw std::runtime_error("Overflow or underflow");
45  }
46  } catch (...) {
47  throw std::runtime_error("Overflow or underflow");
48  }
49  }
50 
51  return static_cast<RETURN_TYPE>(val);
52 }
53 
54 template <typename SOURCE_TYPE, typename TARGET_TYPE>
56  using ColumnDataPtr = std::unique_ptr<TARGET_TYPE, CheckedMallocDeleter<TARGET_TYPE>>;
58  using CasterFunc = std::function<TARGET_TYPE(SOURCE_TYPE, bool, TARGET_TYPE)>;
59 
61  TARGET_TYPE null_value_;
62  SOURCE_TYPE null_check_value_;
66 
68 
70  size_t num_rows,
71  TARGET_TYPE nullValue,
72  SOURCE_TYPE nullCheckValue,
73  bool doNullCheck)
75  , null_value_(nullValue)
76  , null_check_value_(nullCheckValue)
77  , do_null_check_(doNullCheck) {
78  fixed_array_null_value_ = get_fixed_array_null_value<TARGET_TYPE>();
79  if (num_rows) {
80  allocateColumnarData(num_rows);
81  }
82  }
83 
84  ~NumericValueConverter() override {}
85 
86  void setValueCaster(CasterFunc caster) { checked_caster_ = caster; }
87 
88  bool allowFixedNullArray() { return true; }
89 
91  allocateColumnarData(num_rows);
94  }
95 
96  void allocateColumnarData(size_t num_rows) override {
97  CHECK(num_rows > 0);
99  reinterpret_cast<TARGET_TYPE*>(checked_malloc(num_rows * sizeof(TARGET_TYPE))));
100  }
101 
103  CHECK(num_rows > 0);
105  reinterpret_cast<TARGET_TYPE*>(checked_malloc(num_rows * sizeof(TARGET_TYPE))));
106  }
107 
109  size_t row,
110  typename ElementsBufferColumnPtr::pointer columnData,
111  const ScalarTargetValue* scalarValue) {
112  auto db_p = checked_get<SOURCE_TYPE>(row, scalarValue, SOURCE_TYPE_ACCESSOR);
113  auto val = *db_p;
114 
115  if (do_null_check_ && null_check_value_ == val) {
116  columnData[row] = null_value_;
117  } else {
118  if (checked_caster_) {
119  columnData[row] = checked_caster_(val, do_null_check_, null_value_);
120  } else {
121  columnData[row] = static_cast<TARGET_TYPE>(val);
122  }
123  }
124  }
125 
126  void convertToColumnarFormat(size_t row, const ScalarTargetValue* scalarValue) {
127  convertElementToColumnarFormat(row, column_data_.get(), scalarValue);
128  }
129 
130  void convertToColumnarFormat(size_t row, const TargetValue* value) override {
131  auto scalarValue =
132  checked_get<ScalarTargetValue>(row, value, SCALAR_TARGET_VALUE_ACCESSOR);
133  convertToColumnarFormat(row, scalarValue);
134  }
135 
137  std::unique_ptr<std::vector<std::pair<size_t, ElementsBufferColumnPtr>>>&
138  array_buffer,
139  std::unique_ptr<std::vector<ArrayDatum>>::pointer arrayData) {
140  for (size_t row = 0; row < array_buffer->size(); row++) {
141  auto& element = (array_buffer->at(row));
142  bool is_null = false;
143  if (element.second) {
144  ColumnDataPtr& data = element.second;
145  int8_t* arrayDataPtr = reinterpret_cast<int8_t*>(data.release());
146  (*arrayData)[row] =
147  ArrayDatum(element.first * sizeof(TARGET_TYPE), arrayDataPtr, is_null);
148  }
149  }
150  }
151 
153  DataBlockPtr dataBlock;
154  dataBlock.numbersPtr = reinterpret_cast<int8_t*>(column_data_.get());
155  insertData.data.push_back(dataBlock);
156  insertData.columnIds.push_back(column_descriptor_->columnId);
157  }
158 };
159 
160 template <typename TARGET_TYPE>
161 struct DictionaryValueConverter : public NumericValueConverter<int64_t, TARGET_TYPE> {
162  using ElementsDataColumnPtr =
164 
165  using ElementsBufferColumnPtr = std::unique_ptr<std::vector<int32_t>>;
166 
168 
171 
173 
174  const int32_t buffer_null_sentinal_ = std::numeric_limits<int32_t>::min();
175 
177 
178  std::unordered_map<int32_t, int32_t> literals_lookup_;
180 
182  int32_t sourceDictId,
183  const ColumnDescriptor* targetDescriptor,
184  size_t num_rows,
185  TARGET_TYPE nullValue,
186  int64_t nullCheckValue,
187  bool doNullCheck,
188  StringDictionaryProxy* literals_dict,
189  StringDictionaryProxy* source_dict_proxy)
190  : NumericValueConverter<int64_t, TARGET_TYPE>(targetDescriptor,
191  num_rows,
192  nullValue,
193  nullCheckValue,
194  doNullCheck) {
195  literals_dict_ = literals_dict;
197  cat.getMetadataForDict(targetDescriptor->columnType.get_comp_param(), true);
198 
199  source_dict_desc_ = nullptr;
200  source_dict_proxy_ = source_dict_proxy;
201 
202  use_literals_ = 0 == sourceDictId;
203  if (!use_literals_) {
204  source_dict_desc_ = cat.getMetadataForDict(std::abs(sourceDictId), true);
206  } else {
207  if (literals_dict) {
208  for (auto& entry : literals_dict->getTransientMapping()) {
209  auto newId = target_dict_desc_->stringDict->getOrAdd(entry.second);
210  literals_lookup_[entry.first] = newId;
211  }
212  }
213 
215  }
216 
218 
219  if (num_rows) {
221  }
222  }
223 
225 
226  bool allowFixedNullArray() { return false; }
227 
229  CHECK(num_rows > 0);
230  return std::make_unique<std::vector<int32_t>>(num_rows);
231  }
232 
234  size_t row,
235  typename ElementsBufferColumnPtr::pointer columnBuffer,
236  const ScalarTargetValue* scalarValue) {
237  auto db_p = checked_get<int64_t>(row, scalarValue, this->SOURCE_TYPE_ACCESSOR);
238  auto val = *db_p;
239 
240  if (this->do_null_check_ && this->null_check_value_ == val) {
241  (*columnBuffer)[row] = this->buffer_null_sentinal_;
242  } else {
243  (*columnBuffer)[row] = (int32_t)val;
244  }
245  }
246 
247  void convertToColumnarFormat(size_t row, const ScalarTargetValue* scalarValue) {
248  convertElementToColumnarFormat(row, this->column_buffer_.get(), scalarValue);
249  }
250 
251  void convertToColumnarFormat(size_t row, const TargetValue* value) override {
252  auto scalarValue =
253  checked_get<ScalarTargetValue>(row, value, this->SCALAR_TARGET_VALUE_ACCESSOR);
254 
255  convertToColumnarFormat(row, scalarValue);
256  }
257 
258  inline int32_t convertTransientStringIdToPermanentId(int32_t& transient_string_id) {
259  if (source_dict_proxy_) {
260  auto str = source_dict_proxy_->getString(transient_string_id);
261  return source_dict_proxy_->getOrAdd(str);
262  } else {
263  throw std::runtime_error("Unexpected negative source ID");
264  }
265  }
266 
268  ElementsBufferColumnPtr buffer) {
271  reinterpret_cast<TARGET_TYPE*>(
272  checked_malloc(buffer->size() * sizeof(TARGET_TYPE))));
273 
274  std::vector<int32_t>* bufferPtr =
275  reinterpret_cast<std::vector<int32_t>*>(buffer.get());
276  TARGET_TYPE* columnDataPtr = reinterpret_cast<TARGET_TYPE*>(data.get());
277  if (use_literals_) {
278  for (size_t i = 0; i < bufferPtr->size(); i++) {
279  auto id = literals_lookup_[(*bufferPtr)[i]];
280  if (id == buffer_null_sentinal_) {
281  columnDataPtr[i] = this->null_value_;
282  } else {
283  columnDataPtr[i] = static_cast<TARGET_TYPE>(id);
284  }
285  }
286  } else {
288  // special case, where source and target dict are the same
289  // mostly in update
290  for (size_t i = 0; i < bufferPtr->size(); i++) {
291  auto src_id = (*bufferPtr)[i];
292  if (src_id == buffer_null_sentinal_) {
293  columnDataPtr[i] = this->null_value_;
294  } else if (src_id < 0) {
295  columnDataPtr[i] = convertTransientStringIdToPermanentId(src_id);
296  } else {
297  columnDataPtr[i] = static_cast<TARGET_TYPE>(src_id);
298  }
299  }
300 
301  } else {
302  std::vector<int32_t> dest_ids;
303  dest_ids.resize(bufferPtr->size());
304 
305  if (source_dict_proxy_) {
307  dest_ids,
309  *bufferPtr,
312  } else {
315  *bufferPtr,
317  }
318 
319  // fixup NULL sentinel
320  for (size_t i = 0; i < dest_ids.size(); i++) {
321  auto id = dest_ids[i];
322  if (id == buffer_null_sentinal_) {
323  columnDataPtr[i] = this->null_value_;
324  } else {
325  if (std::is_signed<TARGET_TYPE>::value) {
326  if (id < 0) {
327  throw std::runtime_error(
328  "Maximum number of unique strings (" +
329  std::to_string(std::numeric_limits<TARGET_TYPE>::max()) +
330  ") reached in target dictionary");
331  }
332  } else {
333  if (id >= std::numeric_limits<TARGET_TYPE>::max()) {
334  throw std::runtime_error(
335  "Maximum number of unique strings (" +
336  std::to_string(std::numeric_limits<TARGET_TYPE>::max()) +
337  ") reached in target column's dict encoding");
338  }
339  }
340  columnDataPtr[i] = static_cast<TARGET_TYPE>(id);
341  }
342  }
343  }
344  }
345 
346  return data;
347  }
348 
350  std::unique_ptr<std::vector<std::pair<size_t, ElementsBufferColumnPtr>>>&
351  array_buffer,
352  std::unique_ptr<std::vector<ArrayDatum>>::pointer arrayData) {
353  if (use_literals_) {
354  for (size_t row = 0; row < array_buffer->size(); row++) {
355  auto& element = (array_buffer->at(row));
356  bool is_null = false;
357  if (element.second) {
359  processBuffer(std::move(element.second));
360  int8_t* arrayDataPtr = reinterpret_cast<int8_t*>(data.release());
361  (*arrayData)[row] =
362  ArrayDatum(element.first * sizeof(TARGET_TYPE), arrayDataPtr, is_null);
363  }
364  }
365  } else {
366  std::vector<std::vector<int32_t>> srcArrayIds(array_buffer->size());
367  std::vector<std::vector<int32_t>> destArrayIds(0);
368 
369  for (size_t row = 0; row < array_buffer->size(); row++) {
370  auto& element = (array_buffer->at(row));
371  if (element.second) {
372  srcArrayIds[row] = *(element.second.get());
373  }
374  }
375 
378  srcArrayIds,
380 
381  for (size_t row = 0; row < array_buffer->size(); row++) {
382  auto& element = (array_buffer->at(row));
383  bool is_null = false;
384  if (element.second) {
385  *(element.second.get()) = destArrayIds[row];
386  int8_t* arrayDataPtr = reinterpret_cast<int8_t*>(&(element.second->at(0)));
387  (*arrayData)[row] = ArrayDatum(element.first * sizeof(TARGET_TYPE),
388  arrayDataPtr,
389  is_null,
390  DoNothingDeleter());
391  }
392  }
393  }
394  }
395 
397  if (column_buffer_) {
398  this->column_data_ = processBuffer(std::move(column_buffer_));
399  column_buffer_ = nullptr;
400  }
401  }
402 
405  DataBlockPtr dataBlock;
406  dataBlock.numbersPtr = reinterpret_cast<int8_t*>(this->column_data_.get());
407  insertData.data.push_back(dataBlock);
408  insertData.columnIds.push_back(this->column_descriptor_->columnId);
409  }
410 };
411 
413  std::unique_ptr<std::vector<std::string>> column_data_;
414 
416 
420 
422  const ColumnDescriptor* cd,
423  size_t num_rows,
424  bool dictEncoded,
425  int32_t sourceDictId,
426  StringDictionaryProxy* literals_dict)
427  : TargetValueConverter(cd) {
428  source_dict_ = nullptr;
429  literals_source_dict_ = nullptr;
430  dict_encoded_ = dictEncoded;
431  if (dictEncoded) {
432  if (0 != sourceDictId) {
433  auto source_dict_desc = cat.getMetadataForDict(std::abs(sourceDictId), true);
434  CHECK(source_dict_desc);
435  source_dict_ = source_dict_desc->stringDict.get();
437  } else {
438  literals_source_dict_ = literals_dict;
439  }
440  }
441  if (num_rows) {
442  allocateColumnarData(num_rows);
443  }
444  }
445 
446  ~StringValueConverter() override {}
447 
448  void allocateColumnarData(size_t num_rows) override {
449  CHECK(num_rows > 0);
450  column_data_ = std::make_unique<std::vector<std::string>>(num_rows);
451  }
452 
453  void convertToColumnarFormatFromDict(size_t row, const TargetValue* value) {
454  auto scalarValue =
455  checked_get<ScalarTargetValue>(row, value, SCALAR_TARGET_VALUE_ACCESSOR);
456  auto db_p = checked_get<int64_t>(row, scalarValue, this->SOURCE_TYPE_ACCESSOR);
457  auto val = *db_p;
458 
459  if (std::numeric_limits<int32_t>::min() == val) {
460  (*column_data_)[row] = std::string("");
461  } else {
462  if (source_dict_) {
463  std::string strVal = source_dict_->getString(val);
464  (*column_data_)[row] = strVal;
465  } else if (literals_source_dict_) {
466  std::string strVal = literals_source_dict_->getString(val);
467  (*column_data_)[row] = strVal;
468  } else {
469  CHECK_EQ(val, inline_int_null_value<int32_t>());
470  std::string nullStr = "";
471  (*column_data_)[row] = nullStr;
472  }
473  }
474  }
475 
476  void convertToColumnarFormatFromString(size_t row, const TargetValue* value) {
477  auto scalarValue =
478  checked_get<ScalarTargetValue>(row, value, SCALAR_TARGET_VALUE_ACCESSOR);
479  auto db_p = checked_get<NullableString>(row, scalarValue, NULLABLE_STRING_ACCESSOR);
480  const auto db_str_p = checked_get<std::string>(row, db_p, STRING_ACCESSOR);
481 
482  if (nullptr != db_str_p) {
483  (*column_data_)[row] = *db_str_p;
484  } else {
485  (*column_data_)[row] = std::string("");
486  }
487  }
488 
489  void convertToColumnarFormat(size_t row, const TargetValue* value) override {
490  if (dict_encoded_) {
492  } else {
494  }
495  }
496 
498  DataBlockPtr dataBlock;
499  dataBlock.stringsPtr = column_data_.get();
500  insertData.data.push_back(dataBlock);
501  insertData.columnIds.push_back(column_descriptor_->columnId);
502  }
503 };
504 
505 template <typename ELEMENT_CONVERTER>
507  std::unique_ptr<
508  std::vector<std::pair<size_t, typename ELEMENT_CONVERTER::ElementsBufferColumnPtr>>>
510  std::unique_ptr<std::vector<ArrayDatum>> column_data_;
511  std::unique_ptr<ELEMENT_CONVERTER> element_converter_;
514  bool data_finalized_ = false;
518 
520 
522  size_t num_rows,
523  std::unique_ptr<ELEMENT_CONVERTER> element_converter,
524  bool do_check_null)
526  , element_converter_(std::move(element_converter))
527  , element_type_info_(cd->columnType.get_elem_type())
528  , do_check_null_(do_check_null) {
529  if (num_rows) {
530  allocateColumnarData(num_rows);
531  }
532 
533  if (cd->columnType.get_size() > 0) {
536  fixed_array_size_ / sizeof(ELEMENT_CONVERTER::fixed_array_null_value_);
537  element_converter_->populateFixedArrayNullSentinel(fixed_array_elements_count_);
539  reinterpret_cast<int8_t*>(element_converter_->column_data_.get());
540  } else {
541  fixed_array_size_ = 0;
543  fixed_array_null_sentinel_ = nullptr;
544  }
545  }
546 
547  ~ArrayValueConverter() override {}
548 
549  void allocateColumnarData(size_t num_rows) override {
550  CHECK(num_rows > 0);
551  column_data_ = std::make_unique<std::vector<ArrayDatum>>(num_rows);
552  column_buffer_ = std::make_unique<std::vector<
553  std::pair<size_t, typename ELEMENT_CONVERTER::ElementsBufferColumnPtr>>>(
554  num_rows);
555  }
556 
557  void convertToColumnarFormat(size_t row, const TargetValue* value) override {
558  const auto arrayValue =
559  checked_get<ArrayTargetValue>(row, value, ARRAY_VALUE_ACCESSOR);
560  CHECK(arrayValue);
561  if (arrayValue->is_initialized()) {
562  const auto& vec = arrayValue->get();
563  bool is_null = false;
564 
566  if (fixed_array_elements_count_ != vec.size()) {
567  throw std::runtime_error(
568  "Incorrect number of array elements for fixed length array column");
569  }
570  }
571 
572  if (vec.size()) {
573  typename ELEMENT_CONVERTER::ElementsBufferColumnPtr elementBuffer =
574  element_converter_->allocateColumnarBuffer(vec.size());
575 
576  int elementIndex = 0;
577  for (const auto& scalarValue : vec) {
578  element_converter_->convertElementToColumnarFormat(
579  elementIndex++, elementBuffer.get(), &scalarValue);
580  }
581 
582  column_buffer_->at(row) = {vec.size(), std::move(elementBuffer)};
583 
584  } else {
585  // Empty, not NULL
586  (*column_data_)[row] = ArrayDatum(0, nullptr, is_null, DoNothingDeleter());
587  }
588  } else {
589  if (!do_check_null_) {
590  throw std::runtime_error("NULL assignment of non null column not allowed");
591  }
592 
593  if (fixed_array_elements_count_ && !element_converter_->allowFixedNullArray()) {
594  throw std::runtime_error("NULL assignment of fixed length array not allowed");
595  }
596 
597  bool is_null = true; // do_check_null_;
598  (*column_data_)[row] = ArrayDatum(
600  (*column_data_)[row].is_null = is_null;
601  }
602  }
603 
605  if (!data_finalized_) {
606  element_converter_->processArrayBuffer(column_buffer_, column_data_.get());
607  data_finalized_ = true;
608  }
609  }
610 
613  DataBlockPtr dataBlock;
614  dataBlock.arraysPtr = column_data_.get();
615  insertData.data.push_back(dataBlock);
616  insertData.columnIds.push_back(column_descriptor_->columnId);
617  }
618 };
619 
622 
623  std::unique_ptr<std::vector<std::string>> column_data_;
624  std::unique_ptr<std::vector<ArrayDatum>> signed_compressed_coords_data_;
625 
627  size_t num_rows,
628  const ColumnDescriptor* logicalColumnDescriptor)
629  : TargetValueConverter(logicalColumnDescriptor) {
633 
634  if (num_rows) {
635  allocateColumnarData(num_rows);
636  }
637  }
638 
640 
641  void allocateColumnarData(size_t num_rows) override {
642  CHECK(num_rows > 0);
643  column_data_ = std::make_unique<std::vector<std::string>>(num_rows);
644  signed_compressed_coords_data_ = std::make_unique<std::vector<ArrayDatum>>(num_rows);
645  }
646 
649 
651  const std::shared_ptr<std::vector<double>>& coords) {
652  const auto compressed_coords_vector =
654 
655  uint8_t* compressed_coords_array = reinterpret_cast<uint8_t*>(
656  checked_malloc(sizeof(uint8_t) * compressed_coords_vector.size()));
657  memcpy(compressed_coords_array,
658  &compressed_coords_vector[0],
659  compressed_coords_vector.size());
660 
661  return ArrayDatum((int)compressed_coords_vector.size(),
662  reinterpret_cast<int8_t*>(compressed_coords_array),
663  false);
664  }
665 
666  void convertToColumnarFormat(size_t row, const TargetValue* value) override {
667  const auto geoValue = checked_get<GeoTargetValue>(row, value, GEO_VALUE_ACCESSOR);
668  CHECK(geoValue);
669  if (geoValue->is_initialized()) {
670  const auto geo = geoValue->get();
671  const auto geoPoint =
672  checked_get<GeoPointTargetValue>(row, &geo, GEO_POINT_VALUE_ACCESSOR);
673  CHECK(geoPoint);
674  (*column_data_)[row] = "";
675  (*signed_compressed_coords_data_)[row] = toCompressedCoords(geoPoint->coords);
676  } else {
677  // NULL point
678  (*column_data_)[row] = "";
679  auto coords = std::make_shared<std::vector<double>>(NULL_ARRAY_DOUBLE, NULL_DOUBLE);
680  auto coords_datum = toCompressedCoords(coords);
681  coords_datum.is_null = true;
682  (*signed_compressed_coords_data_)[row] = coords_datum;
683  }
684  }
685 
687  DataBlockPtr logical, coords;
688 
689  logical.stringsPtr = column_data_.get();
691 
692  insertData.data.emplace_back(logical);
693  insertData.columnIds.emplace_back(column_descriptor_->columnId);
694 
695  insertData.data.emplace_back(coords);
696  insertData.columnIds.emplace_back(coords_column_descriptor_->columnId);
697  }
698 };
699 
700 inline std::vector<double> compute_bounds_of_coords(
701  const std::shared_ptr<std::vector<double>>& coords) {
702  std::vector<double> bounds(4);
703  constexpr auto DOUBLE_MAX = std::numeric_limits<double>::max();
704  constexpr auto DOUBLE_MIN = std::numeric_limits<double>::lowest();
705  bounds[0] = DOUBLE_MAX;
706  bounds[1] = DOUBLE_MAX;
707  bounds[2] = DOUBLE_MIN;
708  bounds[3] = DOUBLE_MIN;
709  auto size_coords = coords->size();
710 
711  for (size_t i = 0; i < size_coords; i += 2) {
712  double x = (*coords)[i];
713  double y = (*coords)[i + 1];
714 
715  bounds[0] = std::min(bounds[0], x);
716  bounds[1] = std::min(bounds[1], y);
717  bounds[2] = std::max(bounds[2], x);
718  bounds[3] = std::max(bounds[3], y);
719  }
720  return bounds;
721 }
722 
723 template <typename ELEM_TYPE>
724 inline ArrayDatum to_array_datum(const std::vector<ELEM_TYPE>& vector) {
725  ELEM_TYPE* array =
726  reinterpret_cast<ELEM_TYPE*>(checked_malloc(sizeof(ELEM_TYPE) * vector.size()));
727  memcpy(array, vector.data(), vector.size() * sizeof(ELEM_TYPE));
728 
729  return ArrayDatum(
730  (int)(vector.size() * sizeof(ELEM_TYPE)), reinterpret_cast<int8_t*>(array), false);
731 }
732 
733 template <typename ELEM_TYPE>
734 inline ArrayDatum to_array_datum(const std::shared_ptr<std::vector<ELEM_TYPE>>& vector) {
735  return to_array_datum(*vector.get());
736 }
737 
740 
741  std::unique_ptr<std::vector<ArrayDatum>> bounds_data_;
742 
744  size_t num_rows,
745  const ColumnDescriptor* logicalColumnDescriptor)
746  : GeoPointValueConverter(cat, num_rows, logicalColumnDescriptor) {
750 
751  if (num_rows) {
752  allocateColumnarData(num_rows);
753  }
754  }
755 
757 
758  void allocateColumnarData(size_t num_rows) override {
759  CHECK(num_rows > 0);
761  bounds_data_ = std::make_unique<std::vector<ArrayDatum>>(num_rows);
762  }
763 
765 
766  void convertToColumnarFormat(size_t row, const TargetValue* value) override {
767  const auto geoValue =
768  checked_get<GeoTargetValue>(row, value, GEO_TARGET_VALUE_ACCESSOR);
769  CHECK(geoValue);
770  if (geoValue->is_initialized()) {
771  const auto geo = geoValue->get();
772  const auto geoLinestring =
773  checked_get<GeoLineStringTargetValue>(row, &geo, GEO_LINESTRING_VALUE_ACCESSOR);
774 
775  (*column_data_)[row] = "";
776  (*signed_compressed_coords_data_)[row] = toCompressedCoords(geoLinestring->coords);
777  auto bounds = compute_bounds_of_coords(geoLinestring->coords);
778  (*bounds_data_)[row] = to_array_datum(bounds);
779  } else {
780  // NULL Linestring
781  (*column_data_)[row] = "";
782  (*signed_compressed_coords_data_)[row] = ArrayDatum(0, nullptr, true);
783  std::vector<double> bounds = {
784  NULL_ARRAY_DOUBLE, NULL_DOUBLE, NULL_DOUBLE, NULL_DOUBLE};
785  auto bounds_datum = to_array_datum(bounds);
786  bounds_datum.is_null = true;
787  (*bounds_data_)[row] = bounds_datum;
788  }
789  }
790 
793 
794  DataBlockPtr bounds;
795 
796  bounds.arraysPtr = bounds_data_.get();
797 
798  insertData.data.emplace_back(bounds);
799  insertData.columnIds.emplace_back(bounds_column_descriptor_->columnId);
800  }
801 };
802 
808 
809  std::unique_ptr<std::vector<ArrayDatum>> ring_sizes_data_;
810  std::unique_ptr<std::vector<ArrayDatum>> bounds_data_;
811  std::unique_ptr<int32_t[]> render_group_data_;
812 
814  size_t num_rows,
815  const ColumnDescriptor* logicalColumnDescriptor)
816  : GeoPointValueConverter(cat, num_rows, logicalColumnDescriptor) {
826 
827  if (num_rows) {
828  allocateColumnarData(num_rows);
829  }
830  }
831 
833 
834  void allocateColumnarData(size_t num_rows) override {
836  ring_sizes_data_ = std::make_unique<std::vector<ArrayDatum>>(num_rows);
837  bounds_data_ = std::make_unique<std::vector<ArrayDatum>>(num_rows);
838  render_group_data_ = std::make_unique<int32_t[]>(num_rows);
839  }
840 
842 
843  void convertToColumnarFormat(size_t row, const TargetValue* value) override {
844  const auto geoValue =
845  checked_get<GeoTargetValue>(row, value, GEO_TARGET_VALUE_ACCESSOR);
846  CHECK(geoValue);
847  if (geoValue->is_initialized()) {
848  const auto geo = geoValue->get();
849  const auto geoPoly =
850  checked_get<GeoPolyTargetValue>(row, &geo, GEO_POLY_VALUE_ACCESSOR);
851 
852  (*column_data_)[row] = "";
853  (*signed_compressed_coords_data_)[row] = toCompressedCoords(geoPoly->coords);
854  (*ring_sizes_data_)[row] = to_array_datum(geoPoly->ring_sizes);
855  auto bounds = compute_bounds_of_coords(geoPoly->coords);
856  (*bounds_data_)[row] = to_array_datum(bounds);
857  render_group_data_[row] =
859  } else {
860  // NULL Polygon
861  (*column_data_)[row] = "";
862  (*signed_compressed_coords_data_)[row] = ArrayDatum(0, nullptr, true);
863  (*ring_sizes_data_)[row] = ArrayDatum(0, nullptr, true);
864  std::vector<double> bounds = {
865  NULL_ARRAY_DOUBLE, NULL_DOUBLE, NULL_DOUBLE, NULL_DOUBLE};
866  auto bounds_datum = to_array_datum(bounds);
867  bounds_datum.is_null = true;
868  (*bounds_data_)[row] = bounds_datum;
870  }
871  }
872 
875 
876  DataBlockPtr ringSizes, bounds, renderGroup;
877 
878  ringSizes.arraysPtr = ring_sizes_data_.get();
879  bounds.arraysPtr = bounds_data_.get();
880  renderGroup.numbersPtr = reinterpret_cast<int8_t*>(render_group_data_.get());
881 
882  insertData.data.emplace_back(ringSizes);
883  insertData.columnIds.emplace_back(ring_sizes_column_descriptor_->columnId);
884 
885  insertData.data.emplace_back(bounds);
886  insertData.columnIds.emplace_back(bounds_column_descriptor_->columnId);
887 
888  insertData.data.emplace_back(renderGroup);
889  insertData.columnIds.emplace_back(render_group_column_descriptor_->columnId);
890  }
891 };
892 
899 
900  std::unique_ptr<std::vector<ArrayDatum>> ring_sizes_data_;
901  std::unique_ptr<std::vector<ArrayDatum>> poly_rings_data_;
902  std::unique_ptr<std::vector<ArrayDatum>> bounds_data_;
903  std::unique_ptr<int32_t[]> render_group_data_;
904 
906  size_t num_rows,
907  const ColumnDescriptor* logicalColumnDescriptor)
908  : GeoPointValueConverter(cat, num_rows, logicalColumnDescriptor) {
921 
922  if (num_rows) {
923  allocateColumnarData(num_rows);
924  }
925  }
926 
928 
929  void allocateColumnarData(size_t num_rows) override {
931  ring_sizes_data_ = std::make_unique<std::vector<ArrayDatum>>(num_rows);
932  poly_rings_data_ = std::make_unique<std::vector<ArrayDatum>>(num_rows);
933  bounds_data_ = std::make_unique<std::vector<ArrayDatum>>(num_rows);
934  render_group_data_ = std::make_unique<int32_t[]>(num_rows);
935  }
936 
938 
939  void convertToColumnarFormat(size_t row, const TargetValue* value) override {
940  const auto geoValue =
941  checked_get<GeoTargetValue>(row, value, GEO_TARGET_VALUE_ACCESSOR);
942  CHECK(geoValue);
943  if (geoValue->is_initialized()) {
944  const auto geo = geoValue->get();
945  const auto geoMultiPoly =
946  checked_get<GeoMultiPolyTargetValue>(row, &geo, GEO_MULTI_POLY_VALUE_ACCESSOR);
947 
948  (*column_data_)[row] = "";
949  (*signed_compressed_coords_data_)[row] = toCompressedCoords(geoMultiPoly->coords);
950  (*ring_sizes_data_)[row] = to_array_datum(geoMultiPoly->ring_sizes);
951  (*poly_rings_data_)[row] = to_array_datum(geoMultiPoly->poly_rings);
952  auto bounds = compute_bounds_of_coords(geoMultiPoly->coords);
953  (*bounds_data_)[row] = to_array_datum(bounds);
954  render_group_data_[row] =
956  } else {
957  // NULL MultiPolygon
958  (*column_data_)[row] = "";
959  (*signed_compressed_coords_data_)[row] = ArrayDatum(0, nullptr, true);
960  (*ring_sizes_data_)[row] = ArrayDatum(0, nullptr, true);
961  (*poly_rings_data_)[row] = ArrayDatum(0, nullptr, true);
962  std::vector<double> bounds = {
963  NULL_ARRAY_DOUBLE, NULL_DOUBLE, NULL_DOUBLE, NULL_DOUBLE};
964  auto bounds_datum = to_array_datum(bounds);
965  bounds_datum.is_null = true;
966  (*bounds_data_)[row] = bounds_datum;
968  }
969  }
970 
973 
974  DataBlockPtr ringSizes, polyRings, bounds, renderGroup;
975 
976  ringSizes.arraysPtr = ring_sizes_data_.get();
977  polyRings.arraysPtr = poly_rings_data_.get();
978  bounds.arraysPtr = bounds_data_.get();
979  renderGroup.numbersPtr = reinterpret_cast<int8_t*>(render_group_data_.get());
980 
981  insertData.data.emplace_back(ringSizes);
982  insertData.columnIds.emplace_back(ring_sizes_column_descriptor_->columnId);
983 
984  insertData.data.emplace_back(polyRings);
985  insertData.columnIds.emplace_back(ring_sizes_solumn_descriptor_->columnId);
986 
987  insertData.data.emplace_back(bounds);
988  insertData.columnIds.emplace_back(bounds_column_descriptor_->columnId);
989 
990  insertData.data.emplace_back(renderGroup);
991  insertData.columnIds.emplace_back(render_group_column_descriptor_->columnId);
992  }
993 };
994 
995 #endif
std::unique_ptr< std::vector< ArrayDatum > > ring_sizes_data_
ArrayDatum to_array_datum(const std::vector< ELEM_TYPE > &vector)
std::unique_ptr< std::vector< ArrayDatum > > bounds_data_
std::unique_ptr< TARGET_TYPE, CheckedMallocDeleter< TARGET_TYPE >> ColumnDataPtr
std::unique_ptr< std::vector< int32_t >> ElementsBufferColumnPtr
StringDictionaryProxy * literals_source_dict_
#define CHECK_EQ(x, y)
Definition: Logger.h:205
void convertToColumnarFormatFromDict(size_t row, const TargetValue *value)
void allocateColumnarData(size_t num_rows) override
std::unique_ptr< std::vector< ArrayDatum > > bounds_data_
#define NULL_DOUBLE
Definition: sqltypes.h:185
HOST DEVICE int get_size() const
Definition: sqltypes.h:268
std::string cat(Ts &&...args)
void addDataBlocksToInsertData(Fragmenter_Namespace::InsertData &insertData) override
const int8_t const int64_t * num_rows
class for a per-database catalog. also includes metadata for the current database and the current use...
Definition: Catalog.h:86
std::vector< std::string > * stringsPtr
Definition: sqltypes.h:149
const ColumnDescriptor * ring_sizes_solumn_descriptor_
std::vector< ArrayDatum > * arraysPtr
Definition: sqltypes.h:150
import_export::RenderGroupAnalyzer render_group_analyzer_
std::unique_ptr< int32_t[]> render_group_data_
void allocateColumnarData(size_t num_rows) override
#define NULL_ARRAY_DOUBLE
Definition: sqltypes.h:193
void processArrayBuffer(std::unique_ptr< std::vector< std::pair< size_t, ElementsBufferColumnPtr >>> &array_buffer, std::unique_ptr< std::vector< ArrayDatum >>::pointer arrayData)
boost_variant_accessor< int64_t > SOURCE_TYPE_ACCESSOR
const ColumnDescriptor * bounds_column_descriptor_
ArrayValueConverter(const ColumnDescriptor *cd, size_t num_rows, std::unique_ptr< ELEMENT_CONVERTER > element_converter, bool do_check_null)
void populateFixedArrayNullSentinel(size_t num_rows)
const DictDescriptor * source_dict_desc_
void convertToColumnarFormat(size_t row, const TargetValue *value) override
void convertToColumnarFormat(size_t row, const ScalarTargetValue *scalarValue)
std::unique_ptr< std::vector< ArrayDatum > > bounds_data_
void allocateColumnarData(size_t num_rows) override
const ColumnDescriptor * column_descriptor_
std::string getString(int32_t string_id) const
ArrayDatum toCompressedCoords(const std::shared_ptr< std::vector< double >> &coords)
static void populate_string_ids(std::vector< int32_t > &dest_ids, StringDictionary *dest_dict, const std::vector< int32_t > &source_ids, const StringDictionary *source_dict, const std::map< int32_t, std::string > transient_mapping={})
Populates provided dest_ids vector with string ids corresponding to given source strings.
ElementsBufferColumnPtr column_buffer_
std::vector< double > compute_bounds_of_coords(const std::shared_ptr< std::vector< double >> &coords)
std::unique_ptr< std::vector< ArrayDatum > > poly_rings_data_
void processArrayBuffer(std::unique_ptr< std::vector< std::pair< size_t, ElementsBufferColumnPtr >>> &array_buffer, std::unique_ptr< std::vector< ArrayDatum >>::pointer arrayData)
std::function< TARGET_TYPE(int64_t, bool, TARGET_TYPE)> CasterFunc
ElementsBufferColumnPtr allocateColumnarBuffer(size_t num_rows)
void convertElementToColumnarFormat(size_t row, typename ElementsBufferColumnPtr::pointer columnData, const ScalarTargetValue *scalarValue)
std::string to_string(char const *&&v)
const DictDescriptor * target_dict_desc_
void addDataBlocksToInsertData(Fragmenter_Namespace::InsertData &insertData) override
void addDataBlocksToInsertData(Fragmenter_Namespace::InsertData &insertData) override
const StringDictionaryProxy * literals_dict_
void convertToColumnarFormat(size_t row, const ScalarTargetValue *scalarValue)
std::shared_ptr< StringDictionary > stringDict
boost_variant_accessor< ScalarTargetValue > SCALAR_TARGET_VALUE_ACCESSOR
boost_variant_accessor< GeoLineStringTargetValue > GEO_LINESTRING_VALUE_ACCESSOR
std::conditional_t< is_cuda_compiler(), DeviceArrayDatum, HostArrayDatum > ArrayDatum
Definition: sqltypes.h:130
GeoPointValueConverter(const Catalog_Namespace::Catalog &cat, size_t num_rows, const ColumnDescriptor *logicalColumnDescriptor)
boost_variant_accessor< NullableString > NULLABLE_STRING_ACCESSOR
std::unique_ptr< std::vector< std::string > > column_data_
CHECK(cgen_state)
void addDataBlocksToInsertData(Fragmenter_Namespace::InsertData &insertData) override
void convertToColumnarFormatFromString(size_t row, const TargetValue *value)
void * checked_malloc(const size_t size)
Definition: checked_alloc.h:44
boost_variant_accessor< GeoMultiPolyTargetValue > GEO_MULTI_POLY_VALUE_ACCESSOR
const ColumnDescriptor * bounds_column_descriptor_
boost_variant_accessor< GeoPolyTargetValue > GEO_POLY_VALUE_ACCESSOR
boost_variant_accessor< std::string > STRING_ACCESSOR
const ColumnDescriptor * ring_sizes_column_descriptor_
std::unique_ptr< std::vector< ArrayDatum > > ring_sizes_data_
void finalizeDataBlocksForInsertData() override
GeoLinestringValueConverter(const Catalog_Namespace::Catalog &cat, size_t num_rows, const ColumnDescriptor *logicalColumnDescriptor)
const ColumnDescriptor * getMetadataForColumn(int tableId, const std::string &colName) const
const std::map< int32_t, std::string > getTransientMapping() const
const DictDescriptor * getMetadataForDict(int dict_ref, bool loadDict=true) const
Definition: Catalog.cpp:1449
specifies the content in-memory of a row in the column metadata table
#define NULL_INT
Definition: sqltypes.h:182
GeoMultiPolygonValueConverter(const Catalog_Namespace::Catalog &cat, size_t num_rows, const ColumnDescriptor *logicalColumnDescriptor)
void convertToColumnarFormat(size_t row, const TargetValue *value) override
std::unique_ptr< ELEMENT_CONVERTER > element_converter_
T get_fixed_array_null_value()
void convertToColumnarFormat(size_t row, const TargetValue *value) override
std::unique_ptr< int32_t[]> render_group_data_
StringValueConverter(const Catalog_Namespace::Catalog &cat, const ColumnDescriptor *cd, size_t num_rows, bool dictEncoded, int32_t sourceDictId, StringDictionaryProxy *literals_dict)
void finalizeDataBlocksForInsertData() override
void addDataBlocksToInsertData(Fragmenter_Namespace::InsertData &insertData) override
void setValueCaster(CasterFunc caster)
bool is_null(const T &v, const SQLTypeInfo &t)
std::vector< DataBlockPtr > data
the number of rows being inserted
Definition: Fragmenter.h:64
int32_t getOrAdd(const std::string &str) noexcept
void allocateColumnarData(size_t num_rows) override
int32_t convertTransientStringIdToPermanentId(int32_t &transient_string_id)
void convertToColumnarFormat(size_t row, const TargetValue *value) override
std::unique_ptr< std::vector< std::string > > column_data_
const ColumnDescriptor * render_group_column_descriptor_
typename NumericValueConverter< int64_t, TARGET_TYPE >::ColumnDataPtr ElementsDataColumnPtr
std::unordered_map< int32_t, int32_t > literals_lookup_
RETURN_TYPE checked_cast(SOURCE_TYPE val, bool check_null, RETURN_TYPE null_value)
const ColumnDescriptor * coords_column_descriptor_
void allocateColumnarData(size_t num_rows) override
std::string getString(int32_t string_id) const
std::vector< uint8_t > compress_coords(std::vector< double > &coords, const SQLTypeInfo &ti)
HOST DEVICE int get_comp_param() const
Definition: sqltypes.h:267
void allocateColumnarData(size_t num_rows) override
boost_variant_accessor< GeoTargetValue > GEO_VALUE_ACCESSOR
const ColumnDescriptor * render_group_column_descriptor_
void addDataBlocksToInsertData(Fragmenter_Namespace::InsertData &insertData) override
static void populate_string_array_ids(std::vector< std::vector< int32_t >> &dest_array_ids, StringDictionary *dest_dict, const std::vector< std::vector< int32_t >> &source_array_ids, const StringDictionary *source_dict)
NumericValueConverter< int64_t, TARGET_TYPE >::ColumnDataPtr processBuffer(ElementsBufferColumnPtr buffer)
std::unique_ptr< std::vector< std::pair< size_t, typename ELEMENT_CONVERTER::ElementsBufferColumnPtr > > > column_buffer_
boost_variant_accessor< GeoTargetValue > GEO_TARGET_VALUE_ACCESSOR
Descriptor for a dictionary for a string columne.
const ColumnDescriptor * bounds_column_descriptor_
void convertToColumnarFormat(size_t row, const TargetValue *value) override
void convertToColumnarFormat(size_t row, const TargetValue *value) override
The data to be inserted using the fragment manager.
Definition: Fragmenter.h:59
import_export::RenderGroupAnalyzer render_group_analyzer_
boost::variant< ScalarTargetValue, ArrayTargetValue, GeoTargetValue, GeoTargetValuePtr > TargetValue
Definition: TargetValue.h:167
std::unique_ptr< std::vector< ArrayDatum > > column_data_
SQLTypeInfo columnType
void convertElementToColumnarFormat(size_t row, typename ElementsBufferColumnPtr::pointer columnBuffer, const ScalarTargetValue *scalarValue)
void allocateColumnarData(size_t num_rows) override
void convertToColumnarFormat(size_t row, const TargetValue *value) override
int8_t * numbersPtr
Definition: sqltypes.h:148
ElementsBufferColumnPtr allocateColumnarBuffer(size_t num_rows)
void convertToColumnarFormat(size_t row, const TargetValue *value) override
boost_variant_accessor< ArrayTargetValue > ARRAY_VALUE_ACCESSOR
boost_variant_accessor< GeoPointTargetValue > GEO_POINT_VALUE_ACCESSOR
const ColumnDescriptor * ring_sizes_column_descriptor_
boost_variant_accessor< SOURCE_TYPE > SOURCE_TYPE_ACCESSOR
std::vector< int > columnIds
identifies the table into which the data is being inserted
Definition: Fragmenter.h:62
void addDataBlocksToInsertData(Fragmenter_Namespace::InsertData &insertData) override
int insertBoundsAndReturnRenderGroup(const std::vector< double > &bounds)
Definition: Importer.cpp:5092
GeoPolygonValueConverter(const Catalog_Namespace::Catalog &cat, size_t num_rows, const ColumnDescriptor *logicalColumnDescriptor)
std::unique_ptr< std::vector< ArrayDatum > > signed_compressed_coords_data_
void addDataBlocksToInsertData(Fragmenter_Namespace::InsertData &insertData) override
StringDictionaryProxy * source_dict_proxy_
NumericValueConverter(const ColumnDescriptor *cd, size_t num_rows, TARGET_TYPE nullValue, SOURCE_TYPE nullCheckValue, bool doNullCheck)
DictionaryValueConverter(const Catalog_Namespace::Catalog &cat, int32_t sourceDictId, const ColumnDescriptor *targetDescriptor, size_t num_rows, TARGET_TYPE nullValue, int64_t nullCheckValue, bool doNullCheck, StringDictionaryProxy *literals_dict, StringDictionaryProxy *source_dict_proxy)
boost::variant< int64_t, double, float, NullableString > ScalarTargetValue
Definition: TargetValue.h:156