OmniSciDB  6686921089
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
TargetValueConvertersImpl.h
Go to the documentation of this file.
1 /*
2  * Copyright 2018, OmniSci, Inc.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #ifndef TARGET_VALUE_CONVERTERS_IMPL_H_
18 #define TARGET_VALUE_CONVERTERS_IMPL_H_
19 
21 #include "Geospatial/Compression.h"
23 #include "Shared/checked_alloc.h"
25 
26 #include <atomic>
27 #include <future>
28 #include <thread>
29 
30 template <typename T>
32  if (std::is_floating_point<T>::value) {
33  return static_cast<T>(inline_fp_null_array_value<T>());
34  } else {
35  return static_cast<T>(inline_int_null_array_value<T>());
36  }
37 }
38 
39 template <typename SOURCE_TYPE, typename RETURN_TYPE, typename CHECKED_CAST_TYPE>
40 RETURN_TYPE checked_cast(SOURCE_TYPE val, bool check_null, RETURN_TYPE null_value) {
41  if (!std::is_same<SOURCE_TYPE, CHECKED_CAST_TYPE>::value) {
42  // do an overflow check
43  try {
44  CHECKED_CAST_TYPE castedVal = boost::numeric_cast<CHECKED_CAST_TYPE>(val);
45  if (check_null && castedVal == null_value) {
46  throw std::runtime_error("Overflow or underflow");
47  }
48  } catch (...) {
49  throw std::runtime_error("Overflow or underflow");
50  }
51  }
52 
53  return static_cast<RETURN_TYPE>(val);
54 }
55 
56 template <typename SOURCE_TYPE, typename TARGET_TYPE>
58  using ColumnDataPtr = std::unique_ptr<TARGET_TYPE, CheckedMallocDeleter<TARGET_TYPE>>;
60  using CasterFunc = std::function<TARGET_TYPE(SOURCE_TYPE, bool, TARGET_TYPE)>;
61 
63  TARGET_TYPE null_value_;
64  SOURCE_TYPE null_check_value_;
68 
70 
72  size_t num_rows,
73  TARGET_TYPE nullValue,
74  SOURCE_TYPE nullCheckValue,
75  bool doNullCheck)
77  , null_value_(nullValue)
78  , null_check_value_(nullCheckValue)
79  , do_null_check_(doNullCheck) {
80  fixed_array_null_value_ = get_fixed_array_null_value<TARGET_TYPE>();
81  if (num_rows) {
82  allocateColumnarData(num_rows);
83  }
84  }
85 
86  ~NumericValueConverter() override {}
87 
88  void setValueCaster(CasterFunc caster) { checked_caster_ = caster; }
89 
90  bool allowFixedNullArray() { return true; }
91 
92  void populateFixedArrayNullSentinel(size_t num_rows) {
93  allocateColumnarData(num_rows);
96  }
97 
98  void allocateColumnarData(size_t num_rows) override {
99  CHECK(num_rows > 0);
101  reinterpret_cast<TARGET_TYPE*>(checked_malloc(num_rows * sizeof(TARGET_TYPE))));
102  }
103 
105  CHECK(num_rows > 0);
107  reinterpret_cast<TARGET_TYPE*>(checked_malloc(num_rows * sizeof(TARGET_TYPE))));
108  }
109 
111  size_t row,
112  typename ElementsBufferColumnPtr::pointer columnData,
113  const ScalarTargetValue* scalarValue) {
114  auto db_p = checked_get<SOURCE_TYPE>(row, scalarValue, SOURCE_TYPE_ACCESSOR);
115  auto val = *db_p;
116 
117  if (do_null_check_ && null_check_value_ == val) {
118  columnData[row] = null_value_;
119  } else {
120  if (checked_caster_) {
121  columnData[row] = checked_caster_(val, do_null_check_, null_value_);
122  } else {
123  columnData[row] = static_cast<TARGET_TYPE>(val);
124  }
125  }
126  }
127 
128  void convertToColumnarFormat(size_t row, const ScalarTargetValue* scalarValue) {
129  convertElementToColumnarFormat(row, column_data_.get(), scalarValue);
130  }
131 
132  void convertToColumnarFormat(size_t row, const TargetValue* value) override {
133  auto scalarValue =
134  checked_get<ScalarTargetValue>(row, value, SCALAR_TARGET_VALUE_ACCESSOR);
135  convertToColumnarFormat(row, scalarValue);
136  }
137 
139  std::unique_ptr<std::vector<std::pair<size_t, ElementsBufferColumnPtr>>>&
140  array_buffer,
141  std::unique_ptr<std::vector<ArrayDatum>>::pointer arrayData) {
142  for (size_t row = 0; row < array_buffer->size(); row++) {
143  auto& element = (array_buffer->at(row));
144  bool is_null = false;
145  if (element.second) {
146  ColumnDataPtr& data = element.second;
147  int8_t* arrayDataPtr = reinterpret_cast<int8_t*>(data.release());
148  (*arrayData)[row] =
149  ArrayDatum(element.first * sizeof(TARGET_TYPE), arrayDataPtr, is_null);
150  }
151  }
152  }
153 
155  DataBlockPtr dataBlock;
156  dataBlock.numbersPtr = reinterpret_cast<int8_t*>(column_data_.get());
157  insertData.data.push_back(dataBlock);
158  insertData.columnIds.push_back(column_descriptor_->columnId);
159  }
160 };
161 
162 template <typename TARGET_TYPE>
163 struct DictionaryValueConverter : public NumericValueConverter<int64_t, TARGET_TYPE> {
164  using ElementsDataColumnPtr =
166 
167  using ElementsBufferColumnPtr = std::unique_ptr<std::vector<int32_t>>;
168 
170 
173 
175 
176  const int32_t buffer_null_sentinal_ = std::numeric_limits<int32_t>::min();
177 
179 
180  std::unordered_map<int32_t, int32_t> literals_lookup_;
182 
184  int32_t sourceDictId,
185  const ColumnDescriptor* targetDescriptor,
186  size_t num_rows,
187  TARGET_TYPE nullValue,
188  int64_t nullCheckValue,
189  bool doNullCheck,
190  StringDictionaryProxy* literals_dict,
191  StringDictionaryProxy* source_dict_proxy)
192  : NumericValueConverter<int64_t, TARGET_TYPE>(targetDescriptor,
193  num_rows,
194  nullValue,
195  nullCheckValue,
196  doNullCheck) {
197  literals_dict_ = literals_dict;
199  cat.getMetadataForDict(targetDescriptor->columnType.get_comp_param(), true);
200 
201  source_dict_desc_ = nullptr;
202  source_dict_proxy_ = source_dict_proxy;
203 
204  use_literals_ = 0 == sourceDictId;
205  if (!use_literals_) {
206  source_dict_desc_ = cat.getMetadataForDict(std::abs(sourceDictId), true);
208  } else {
209  if (literals_dict) {
210  for (auto& entry : literals_dict->getTransientMapping()) {
211  auto newId = target_dict_desc_->stringDict->getOrAdd(entry.second);
212  literals_lookup_[entry.first] = newId;
213  }
214  }
215 
217  }
218 
220 
221  if (num_rows) {
223  }
224  }
225 
227 
228  bool allowFixedNullArray() { return false; }
229 
231  CHECK(num_rows > 0);
232  return std::make_unique<std::vector<int32_t>>(num_rows);
233  }
234 
236  size_t row,
237  typename ElementsBufferColumnPtr::pointer columnBuffer,
238  const ScalarTargetValue* scalarValue) {
239  auto db_p = checked_get<int64_t>(row, scalarValue, this->SOURCE_TYPE_ACCESSOR);
240  auto val = *db_p;
241 
242  if (this->do_null_check_ && this->null_check_value_ == val) {
243  (*columnBuffer)[row] = this->buffer_null_sentinal_;
244  } else {
245  (*columnBuffer)[row] = (int32_t)val;
246  }
247  }
248 
249  void convertToColumnarFormat(size_t row, const ScalarTargetValue* scalarValue) {
250  convertElementToColumnarFormat(row, this->column_buffer_.get(), scalarValue);
251  }
252 
253  void convertToColumnarFormat(size_t row, const TargetValue* value) override {
254  auto scalarValue =
255  checked_get<ScalarTargetValue>(row, value, this->SCALAR_TARGET_VALUE_ACCESSOR);
256 
257  convertToColumnarFormat(row, scalarValue);
258  }
259 
260  inline int32_t convertTransientStringIdToPermanentId(int32_t& transient_string_id) {
261  if (source_dict_proxy_) {
262  auto str = source_dict_proxy_->getString(transient_string_id);
263  return source_dict_proxy_->getOrAdd(str);
264  } else {
265  throw std::runtime_error("Unexpected negative source ID");
266  }
267  }
268 
270  ElementsBufferColumnPtr buffer) {
273  reinterpret_cast<TARGET_TYPE*>(
274  checked_malloc(buffer->size() * sizeof(TARGET_TYPE))));
275 
276  std::vector<int32_t>* bufferPtr =
277  reinterpret_cast<std::vector<int32_t>*>(buffer.get());
278  TARGET_TYPE* columnDataPtr = reinterpret_cast<TARGET_TYPE*>(data.get());
279  if (use_literals_) {
280  for (size_t i = 0; i < bufferPtr->size(); i++) {
281  auto id = literals_lookup_[(*bufferPtr)[i]];
282  if (id == buffer_null_sentinal_) {
283  columnDataPtr[i] = this->null_value_;
284  } else {
285  columnDataPtr[i] = static_cast<TARGET_TYPE>(id);
286  }
287  }
288  } else {
290  // special case, where source and target dict are the same
291  // mostly in update
292  for (size_t i = 0; i < bufferPtr->size(); i++) {
293  auto src_id = (*bufferPtr)[i];
294  if (src_id == buffer_null_sentinal_) {
295  columnDataPtr[i] = this->null_value_;
296  } else if (src_id < 0) {
297  columnDataPtr[i] = convertTransientStringIdToPermanentId(src_id);
298  } else {
299  columnDataPtr[i] = static_cast<TARGET_TYPE>(src_id);
300  }
301  }
302 
303  } else {
304  std::vector<int32_t> dest_ids;
305  dest_ids.resize(bufferPtr->size());
306 
307  if (source_dict_proxy_) {
309  dest_ids,
311  *bufferPtr,
314  } else {
317  *bufferPtr,
319  }
320 
321  // fixup NULL sentinel
322  for (size_t i = 0; i < dest_ids.size(); i++) {
323  auto id = dest_ids[i];
324  if (id == buffer_null_sentinal_) {
325  columnDataPtr[i] = this->null_value_;
326  } else {
327  if (std::is_signed<TARGET_TYPE>::value) {
328  if (id < 0) {
329  throw std::runtime_error(
330  "Maximum number of unique strings (" +
331  std::to_string(std::numeric_limits<TARGET_TYPE>::max()) +
332  ") reached in target dictionary");
333  }
334  } else {
335  if (id >= std::numeric_limits<TARGET_TYPE>::max()) {
336  throw std::runtime_error(
337  "Maximum number of unique strings (" +
338  std::to_string(std::numeric_limits<TARGET_TYPE>::max()) +
339  ") reached in target column's dict encoding");
340  }
341  }
342  columnDataPtr[i] = static_cast<TARGET_TYPE>(id);
343  }
344  }
345  }
346  }
347 
348  return data;
349  }
350 
352  std::unique_ptr<std::vector<std::pair<size_t, ElementsBufferColumnPtr>>>&
353  array_buffer,
354  std::unique_ptr<std::vector<ArrayDatum>>::pointer arrayData) {
355  if (use_literals_) {
356  for (size_t row = 0; row < array_buffer->size(); row++) {
357  auto& element = (array_buffer->at(row));
358  bool is_null = false;
359  if (element.second) {
361  processBuffer(std::move(element.second));
362  int8_t* arrayDataPtr = reinterpret_cast<int8_t*>(data.release());
363  (*arrayData)[row] =
364  ArrayDatum(element.first * sizeof(TARGET_TYPE), arrayDataPtr, is_null);
365  }
366  }
367  } else {
368  std::vector<std::vector<int32_t>> srcArrayIds(array_buffer->size());
369  std::vector<std::vector<int32_t>> destArrayIds(0);
370 
371  for (size_t row = 0; row < array_buffer->size(); row++) {
372  auto& element = (array_buffer->at(row));
373  if (element.second) {
374  srcArrayIds[row] = *(element.second.get());
375  }
376  }
377 
380  srcArrayIds,
382 
383  for (size_t row = 0; row < array_buffer->size(); row++) {
384  auto& element = (array_buffer->at(row));
385  bool is_null = false;
386  if (element.second) {
387  *(element.second.get()) = destArrayIds[row];
388  int8_t* arrayDataPtr = reinterpret_cast<int8_t*>(&(element.second->at(0)));
389  (*arrayData)[row] = ArrayDatum(element.first * sizeof(TARGET_TYPE),
390  arrayDataPtr,
391  is_null,
392  DoNothingDeleter());
393  }
394  }
395  }
396  }
397 
399  if (column_buffer_) {
400  this->column_data_ = processBuffer(std::move(column_buffer_));
401  column_buffer_ = nullptr;
402  }
403  }
404 
407  DataBlockPtr dataBlock;
408  dataBlock.numbersPtr = reinterpret_cast<int8_t*>(this->column_data_.get());
409  insertData.data.push_back(dataBlock);
410  insertData.columnIds.push_back(this->column_descriptor_->columnId);
411  }
412 };
413 
415  std::unique_ptr<std::vector<std::string>> column_data_;
416 
418 
422 
424  const ColumnDescriptor* cd,
425  size_t num_rows,
426  bool dictEncoded,
427  int32_t sourceDictId,
428  StringDictionaryProxy* literals_dict)
429  : TargetValueConverter(cd) {
430  source_dict_ = nullptr;
431  literals_source_dict_ = nullptr;
432  dict_encoded_ = dictEncoded;
433  if (dictEncoded) {
434  if (0 != sourceDictId) {
435  auto source_dict_desc = cat.getMetadataForDict(std::abs(sourceDictId), true);
436  CHECK(source_dict_desc);
437  source_dict_ = source_dict_desc->stringDict.get();
439  } else {
440  literals_source_dict_ = literals_dict;
441  }
442  }
443  if (num_rows) {
444  allocateColumnarData(num_rows);
445  }
446  }
447 
448  ~StringValueConverter() override {}
449 
450  void allocateColumnarData(size_t num_rows) override {
451  CHECK(num_rows > 0);
452  column_data_ = std::make_unique<std::vector<std::string>>(num_rows);
453  }
454 
455  void convertToColumnarFormatFromDict(size_t row, const TargetValue* value) {
456  auto scalarValue =
457  checked_get<ScalarTargetValue>(row, value, SCALAR_TARGET_VALUE_ACCESSOR);
458  auto db_p = checked_get<int64_t>(row, scalarValue, this->SOURCE_TYPE_ACCESSOR);
459  auto val = *db_p;
460 
461  if (std::numeric_limits<int32_t>::min() == val) {
462  (*column_data_)[row] = std::string("");
463  } else {
464  if (source_dict_) {
465  std::string strVal = source_dict_->getString(val);
466  (*column_data_)[row] = strVal;
467  } else if (literals_source_dict_) {
468  std::string strVal = literals_source_dict_->getString(val);
469  (*column_data_)[row] = strVal;
470  } else {
471  CHECK_EQ(val, inline_int_null_value<int32_t>());
472  std::string nullStr = "";
473  (*column_data_)[row] = nullStr;
474  }
475  }
476  }
477 
478  void convertToColumnarFormatFromString(size_t row, const TargetValue* value) {
479  auto scalarValue =
480  checked_get<ScalarTargetValue>(row, value, SCALAR_TARGET_VALUE_ACCESSOR);
481  auto db_p = checked_get<NullableString>(row, scalarValue, NULLABLE_STRING_ACCESSOR);
482  const auto db_str_p = checked_get<std::string>(row, db_p, STRING_ACCESSOR);
483 
484  if (nullptr != db_str_p) {
485  (*column_data_)[row] = *db_str_p;
486  } else {
487  (*column_data_)[row] = std::string("");
488  }
489  }
490 
491  void convertToColumnarFormat(size_t row, const TargetValue* value) override {
492  if (dict_encoded_) {
494  } else {
496  }
497  }
498 
500  DataBlockPtr dataBlock;
501  dataBlock.stringsPtr = column_data_.get();
502  insertData.data.push_back(dataBlock);
503  insertData.columnIds.push_back(column_descriptor_->columnId);
504  }
505 };
506 
507 template <typename ELEMENT_CONVERTER>
509  std::unique_ptr<
510  std::vector<std::pair<size_t, typename ELEMENT_CONVERTER::ElementsBufferColumnPtr>>>
512  std::unique_ptr<std::vector<ArrayDatum>> column_data_;
513  std::unique_ptr<ELEMENT_CONVERTER> element_converter_;
516  bool data_finalized_ = false;
520 
522 
524  size_t num_rows,
525  std::unique_ptr<ELEMENT_CONVERTER> element_converter,
526  bool do_check_null)
528  , element_converter_(std::move(element_converter))
529  , element_type_info_(cd->columnType.get_elem_type())
530  , do_check_null_(do_check_null) {
531  if (num_rows) {
532  allocateColumnarData(num_rows);
533  }
534 
535  if (cd->columnType.get_size() > 0) {
538  fixed_array_size_ / sizeof(ELEMENT_CONVERTER::fixed_array_null_value_);
539  element_converter_->populateFixedArrayNullSentinel(fixed_array_elements_count_);
541  reinterpret_cast<int8_t*>(element_converter_->column_data_.get());
542  } else {
543  fixed_array_size_ = 0;
545  fixed_array_null_sentinel_ = nullptr;
546  }
547  }
548 
549  ~ArrayValueConverter() override {}
550 
551  void allocateColumnarData(size_t num_rows) override {
552  CHECK(num_rows > 0);
553  column_data_ = std::make_unique<std::vector<ArrayDatum>>(num_rows);
554  column_buffer_ = std::make_unique<std::vector<
555  std::pair<size_t, typename ELEMENT_CONVERTER::ElementsBufferColumnPtr>>>(
556  num_rows);
557  }
558 
559  void convertToColumnarFormat(size_t row, const TargetValue* value) override {
560  const auto arrayValue =
561  checked_get<ArrayTargetValue>(row, value, ARRAY_VALUE_ACCESSOR);
562  CHECK(arrayValue);
563  if (arrayValue->is_initialized()) {
564  const auto& vec = arrayValue->get();
565  bool is_null = false;
566 
568  if (fixed_array_elements_count_ != vec.size()) {
569  throw std::runtime_error(
570  "Incorrect number of array elements for fixed length array column");
571  }
572  }
573 
574  if (vec.size()) {
575  typename ELEMENT_CONVERTER::ElementsBufferColumnPtr elementBuffer =
576  element_converter_->allocateColumnarBuffer(vec.size());
577 
578  int elementIndex = 0;
579  for (const auto& scalarValue : vec) {
580  element_converter_->convertElementToColumnarFormat(
581  elementIndex++, elementBuffer.get(), &scalarValue);
582  }
583 
584  column_buffer_->at(row) = {vec.size(), std::move(elementBuffer)};
585 
586  } else {
587  // Empty, not NULL
588  (*column_data_)[row] = ArrayDatum(0, nullptr, is_null, DoNothingDeleter());
589  }
590  } else {
591  if (!do_check_null_) {
592  throw std::runtime_error("NULL assignment of non null column not allowed");
593  }
594 
595  if (fixed_array_elements_count_ && !element_converter_->allowFixedNullArray()) {
596  throw std::runtime_error("NULL assignment of fixed length array not allowed");
597  }
598 
599  bool is_null = true; // do_check_null_;
600  (*column_data_)[row] = ArrayDatum(
602  (*column_data_)[row].is_null = is_null;
603  }
604  }
605 
607  if (!data_finalized_) {
608  element_converter_->processArrayBuffer(column_buffer_, column_data_.get());
609  data_finalized_ = true;
610  }
611  }
612 
615  DataBlockPtr dataBlock;
616  dataBlock.arraysPtr = column_data_.get();
617  insertData.data.push_back(dataBlock);
618  insertData.columnIds.push_back(column_descriptor_->columnId);
619  }
620 };
621 
624 
625  std::unique_ptr<std::vector<std::string>> column_data_;
626  std::unique_ptr<std::vector<ArrayDatum>> signed_compressed_coords_data_;
627 
629  size_t num_rows,
630  const ColumnDescriptor* logicalColumnDescriptor)
631  : TargetValueConverter(logicalColumnDescriptor) {
635 
636  if (num_rows) {
637  allocateColumnarData(num_rows);
638  }
639  }
640 
642 
643  void allocateColumnarData(size_t num_rows) override {
644  CHECK(num_rows > 0);
645  column_data_ = std::make_unique<std::vector<std::string>>(num_rows);
646  signed_compressed_coords_data_ = std::make_unique<std::vector<ArrayDatum>>(num_rows);
647  }
648 
651 
653  const std::shared_ptr<std::vector<double>>& coords) {
654  const auto compressed_coords_vector =
656 
657  uint8_t* compressed_coords_array = reinterpret_cast<uint8_t*>(
658  checked_malloc(sizeof(uint8_t) * compressed_coords_vector.size()));
659  memcpy(compressed_coords_array,
660  &compressed_coords_vector[0],
661  compressed_coords_vector.size());
662 
663  return ArrayDatum((int)compressed_coords_vector.size(),
664  reinterpret_cast<int8_t*>(compressed_coords_array),
665  false);
666  }
667 
668  void convertToColumnarFormat(size_t row, const TargetValue* value) override {
669  const auto geoValue = checked_get<GeoTargetValue>(row, value, GEO_VALUE_ACCESSOR);
670  CHECK(geoValue);
671  if (geoValue->is_initialized()) {
672  const auto geo = geoValue->get();
673  const auto geoPoint =
674  checked_get<GeoPointTargetValue>(row, &geo, GEO_POINT_VALUE_ACCESSOR);
675  CHECK(geoPoint);
676  (*column_data_)[row] = "";
677  (*signed_compressed_coords_data_)[row] = toCompressedCoords(geoPoint->coords);
678  } else {
679  // NULL point
680  (*column_data_)[row] = "";
681  auto coords = std::make_shared<std::vector<double>>();
682  coords->push_back(NULL_ARRAY_DOUBLE);
683  coords->push_back(NULL_DOUBLE);
684  auto coords_datum = toCompressedCoords(coords);
685  coords_datum.is_null = true;
686  (*signed_compressed_coords_data_)[row] = coords_datum;
687  }
688  }
689 
691  DataBlockPtr logical, coords;
692 
693  logical.stringsPtr = column_data_.get();
695 
696  insertData.data.emplace_back(logical);
697  insertData.columnIds.emplace_back(column_descriptor_->columnId);
698 
699  insertData.data.emplace_back(coords);
700  insertData.columnIds.emplace_back(coords_column_descriptor_->columnId);
701  }
702 };
703 
704 inline std::vector<double> compute_bounds_of_coords(
705  const std::shared_ptr<std::vector<double>>& coords) {
706  std::vector<double> bounds(4);
707  constexpr auto DOUBLE_MAX = std::numeric_limits<double>::max();
708  constexpr auto DOUBLE_MIN = std::numeric_limits<double>::lowest();
709  bounds[0] = DOUBLE_MAX;
710  bounds[1] = DOUBLE_MAX;
711  bounds[2] = DOUBLE_MIN;
712  bounds[3] = DOUBLE_MIN;
713  auto size_coords = coords->size();
714 
715  for (size_t i = 0; i < size_coords; i += 2) {
716  double x = (*coords)[i];
717  double y = (*coords)[i + 1];
718 
719  bounds[0] = std::min(bounds[0], x);
720  bounds[1] = std::min(bounds[1], y);
721  bounds[2] = std::max(bounds[2], x);
722  bounds[3] = std::max(bounds[3], y);
723  }
724  return bounds;
725 }
726 
727 template <typename ELEM_TYPE>
728 inline ArrayDatum to_array_datum(const std::vector<ELEM_TYPE>& vector) {
729  ELEM_TYPE* array =
730  reinterpret_cast<ELEM_TYPE*>(checked_malloc(sizeof(ELEM_TYPE) * vector.size()));
731  memcpy(array, vector.data(), vector.size() * sizeof(ELEM_TYPE));
732 
733  return ArrayDatum(
734  (int)(vector.size() * sizeof(ELEM_TYPE)), reinterpret_cast<int8_t*>(array), false);
735 }
736 
737 template <typename ELEM_TYPE>
738 inline ArrayDatum to_array_datum(const std::shared_ptr<std::vector<ELEM_TYPE>>& vector) {
739  return to_array_datum(*vector.get());
740 }
741 
744 
745  std::unique_ptr<std::vector<ArrayDatum>> bounds_data_;
746 
748  size_t num_rows,
749  const ColumnDescriptor* logicalColumnDescriptor)
750  : GeoPointValueConverter(cat, num_rows, logicalColumnDescriptor) {
754 
755  if (num_rows) {
756  allocateColumnarData(num_rows);
757  }
758  }
759 
761 
762  void allocateColumnarData(size_t num_rows) override {
763  CHECK(num_rows > 0);
765  bounds_data_ = std::make_unique<std::vector<ArrayDatum>>(num_rows);
766  }
767 
769 
770  void convertToColumnarFormat(size_t row, const TargetValue* value) override {
771  const auto geoValue =
772  checked_get<GeoTargetValue>(row, value, GEO_TARGET_VALUE_ACCESSOR);
773  CHECK(geoValue);
774  if (geoValue->is_initialized()) {
775  const auto geo = geoValue->get();
776  const auto geoLinestring =
777  checked_get<GeoLineStringTargetValue>(row, &geo, GEO_LINESTRING_VALUE_ACCESSOR);
778 
779  (*column_data_)[row] = "";
780  (*signed_compressed_coords_data_)[row] = toCompressedCoords(geoLinestring->coords);
781  auto bounds = compute_bounds_of_coords(geoLinestring->coords);
782  (*bounds_data_)[row] = to_array_datum(bounds);
783  } else {
784  // NULL Linestring
785  (*column_data_)[row] = "";
786  (*signed_compressed_coords_data_)[row] = ArrayDatum(0, nullptr, true);
787  std::vector<double> bounds = {
788  NULL_ARRAY_DOUBLE, NULL_DOUBLE, NULL_DOUBLE, NULL_DOUBLE};
789  auto bounds_datum = to_array_datum(bounds);
790  bounds_datum.is_null = true;
791  (*bounds_data_)[row] = bounds_datum;
792  }
793  }
794 
797 
798  DataBlockPtr bounds;
799 
800  bounds.arraysPtr = bounds_data_.get();
801 
802  insertData.data.emplace_back(bounds);
803  insertData.columnIds.emplace_back(bounds_column_descriptor_->columnId);
804  }
805 };
806 
812 
813  std::unique_ptr<std::vector<ArrayDatum>> ring_sizes_data_;
814  std::unique_ptr<std::vector<ArrayDatum>> bounds_data_;
815  std::unique_ptr<int32_t[]> render_group_data_;
816 
818  size_t num_rows,
819  const ColumnDescriptor* logicalColumnDescriptor)
820  : GeoPointValueConverter(cat, num_rows, logicalColumnDescriptor) {
830 
831  if (num_rows) {
832  allocateColumnarData(num_rows);
833  }
834  }
835 
837 
838  void allocateColumnarData(size_t num_rows) override {
840  ring_sizes_data_ = std::make_unique<std::vector<ArrayDatum>>(num_rows);
841  bounds_data_ = std::make_unique<std::vector<ArrayDatum>>(num_rows);
842  render_group_data_ = std::make_unique<int32_t[]>(num_rows);
843  }
844 
846 
847  void convertToColumnarFormat(size_t row, const TargetValue* value) override {
848  const auto geoValue =
849  checked_get<GeoTargetValue>(row, value, GEO_TARGET_VALUE_ACCESSOR);
850  CHECK(geoValue);
851  if (geoValue->is_initialized()) {
852  const auto geo = geoValue->get();
853  const auto geoPoly =
854  checked_get<GeoPolyTargetValue>(row, &geo, GEO_POLY_VALUE_ACCESSOR);
855 
856  (*column_data_)[row] = "";
857  (*signed_compressed_coords_data_)[row] = toCompressedCoords(geoPoly->coords);
858  (*ring_sizes_data_)[row] = to_array_datum(geoPoly->ring_sizes);
859  auto bounds = compute_bounds_of_coords(geoPoly->coords);
860  (*bounds_data_)[row] = to_array_datum(bounds);
861  render_group_data_[row] =
863  } else {
864  // NULL Polygon
865  (*column_data_)[row] = "";
866  (*signed_compressed_coords_data_)[row] = ArrayDatum(0, nullptr, true);
867  (*ring_sizes_data_)[row] = ArrayDatum(0, nullptr, true);
868  std::vector<double> bounds = {
869  NULL_ARRAY_DOUBLE, NULL_DOUBLE, NULL_DOUBLE, NULL_DOUBLE};
870  auto bounds_datum = to_array_datum(bounds);
871  bounds_datum.is_null = true;
872  (*bounds_data_)[row] = bounds_datum;
874  }
875  }
876 
879 
880  DataBlockPtr ringSizes, bounds, renderGroup;
881 
882  ringSizes.arraysPtr = ring_sizes_data_.get();
883  bounds.arraysPtr = bounds_data_.get();
884  renderGroup.numbersPtr = reinterpret_cast<int8_t*>(render_group_data_.get());
885 
886  insertData.data.emplace_back(ringSizes);
887  insertData.columnIds.emplace_back(ring_sizes_column_descriptor_->columnId);
888 
889  insertData.data.emplace_back(bounds);
890  insertData.columnIds.emplace_back(bounds_column_descriptor_->columnId);
891 
892  insertData.data.emplace_back(renderGroup);
893  insertData.columnIds.emplace_back(render_group_column_descriptor_->columnId);
894  }
895 };
896 
903 
904  std::unique_ptr<std::vector<ArrayDatum>> ring_sizes_data_;
905  std::unique_ptr<std::vector<ArrayDatum>> poly_rings_data_;
906  std::unique_ptr<std::vector<ArrayDatum>> bounds_data_;
907  std::unique_ptr<int32_t[]> render_group_data_;
908 
910  size_t num_rows,
911  const ColumnDescriptor* logicalColumnDescriptor)
912  : GeoPointValueConverter(cat, num_rows, logicalColumnDescriptor) {
925 
926  if (num_rows) {
927  allocateColumnarData(num_rows);
928  }
929  }
930 
932 
933  void allocateColumnarData(size_t num_rows) override {
935  ring_sizes_data_ = std::make_unique<std::vector<ArrayDatum>>(num_rows);
936  poly_rings_data_ = std::make_unique<std::vector<ArrayDatum>>(num_rows);
937  bounds_data_ = std::make_unique<std::vector<ArrayDatum>>(num_rows);
938  render_group_data_ = std::make_unique<int32_t[]>(num_rows);
939  }
940 
942 
943  void convertToColumnarFormat(size_t row, const TargetValue* value) override {
944  const auto geoValue =
945  checked_get<GeoTargetValue>(row, value, GEO_TARGET_VALUE_ACCESSOR);
946  CHECK(geoValue);
947  if (geoValue->is_initialized()) {
948  const auto geo = geoValue->get();
949  const auto geoMultiPoly =
950  checked_get<GeoMultiPolyTargetValue>(row, &geo, GEO_MULTI_POLY_VALUE_ACCESSOR);
951 
952  (*column_data_)[row] = "";
953  (*signed_compressed_coords_data_)[row] = toCompressedCoords(geoMultiPoly->coords);
954  (*ring_sizes_data_)[row] = to_array_datum(geoMultiPoly->ring_sizes);
955  (*poly_rings_data_)[row] = to_array_datum(geoMultiPoly->poly_rings);
956  auto bounds = compute_bounds_of_coords(geoMultiPoly->coords);
957  (*bounds_data_)[row] = to_array_datum(bounds);
958  render_group_data_[row] =
960  } else {
961  // NULL MultiPolygon
962  (*column_data_)[row] = "";
963  (*signed_compressed_coords_data_)[row] = ArrayDatum(0, nullptr, true);
964  (*ring_sizes_data_)[row] = ArrayDatum(0, nullptr, true);
965  (*poly_rings_data_)[row] = ArrayDatum(0, nullptr, true);
966  std::vector<double> bounds = {
967  NULL_ARRAY_DOUBLE, NULL_DOUBLE, NULL_DOUBLE, NULL_DOUBLE};
968  auto bounds_datum = to_array_datum(bounds);
969  bounds_datum.is_null = true;
970  (*bounds_data_)[row] = bounds_datum;
972  }
973  }
974 
977 
978  DataBlockPtr ringSizes, polyRings, bounds, renderGroup;
979 
980  ringSizes.arraysPtr = ring_sizes_data_.get();
981  polyRings.arraysPtr = poly_rings_data_.get();
982  bounds.arraysPtr = bounds_data_.get();
983  renderGroup.numbersPtr = reinterpret_cast<int8_t*>(render_group_data_.get());
984 
985  insertData.data.emplace_back(ringSizes);
986  insertData.columnIds.emplace_back(ring_sizes_column_descriptor_->columnId);
987 
988  insertData.data.emplace_back(polyRings);
989  insertData.columnIds.emplace_back(ring_sizes_solumn_descriptor_->columnId);
990 
991  insertData.data.emplace_back(bounds);
992  insertData.columnIds.emplace_back(bounds_column_descriptor_->columnId);
993 
994  insertData.data.emplace_back(renderGroup);
995  insertData.columnIds.emplace_back(render_group_column_descriptor_->columnId);
996  }
997 };
998 
999 #endif
std::unique_ptr< std::vector< ArrayDatum > > ring_sizes_data_
ArrayDatum to_array_datum(const std::vector< ELEM_TYPE > &vector)
std::unique_ptr< std::vector< ArrayDatum > > bounds_data_
std::unique_ptr< TARGET_TYPE, CheckedMallocDeleter< TARGET_TYPE >> ColumnDataPtr
std::unique_ptr< std::vector< int32_t >> ElementsBufferColumnPtr
StringDictionaryProxy * literals_source_dict_
#define CHECK_EQ(x, y)
Definition: Logger.h:217
void convertToColumnarFormatFromDict(size_t row, const TargetValue *value)
void allocateColumnarData(size_t num_rows) override
std::unique_ptr< std::vector< ArrayDatum > > bounds_data_
#define NULL_DOUBLE
HOST DEVICE int get_size() const
Definition: sqltypes.h:339
std::string cat(Ts &&...args)
void addDataBlocksToInsertData(Fragmenter_Namespace::InsertData &insertData) override
class for a per-database catalog. also includes metadata for the current database and the current use...
Definition: Catalog.h:111
std::vector< std::string > * stringsPtr
Definition: sqltypes.h:227
const ColumnDescriptor * ring_sizes_solumn_descriptor_
std::vector< ArrayDatum > * arraysPtr
Definition: sqltypes.h:228
import_export::RenderGroupAnalyzer render_group_analyzer_
std::unique_ptr< int32_t[]> render_group_data_
constexpr auto DOUBLE_MAX
Definition: Types.cpp:40
void allocateColumnarData(size_t num_rows) override
void processArrayBuffer(std::unique_ptr< std::vector< std::pair< size_t, ElementsBufferColumnPtr >>> &array_buffer, std::unique_ptr< std::vector< ArrayDatum >>::pointer arrayData)
boost_variant_accessor< int64_t > SOURCE_TYPE_ACCESSOR
const ColumnDescriptor * bounds_column_descriptor_
ArrayValueConverter(const ColumnDescriptor *cd, size_t num_rows, std::unique_ptr< ELEMENT_CONVERTER > element_converter, bool do_check_null)
void populateFixedArrayNullSentinel(size_t num_rows)
const DictDescriptor * source_dict_desc_
void convertToColumnarFormat(size_t row, const TargetValue *value) override
void convertToColumnarFormat(size_t row, const ScalarTargetValue *scalarValue)
std::unique_ptr< std::vector< ArrayDatum > > bounds_data_
void allocateColumnarData(size_t num_rows) override
const ColumnDescriptor * column_descriptor_
std::string getString(int32_t string_id) const
ArrayDatum toCompressedCoords(const std::shared_ptr< std::vector< double >> &coords)
static void populate_string_ids(std::vector< int32_t > &dest_ids, StringDictionary *dest_dict, const std::vector< int32_t > &source_ids, const StringDictionary *source_dict, const std::map< int32_t, std::string > transient_mapping={})
Populates provided dest_ids vector with string ids corresponding to given source strings.
ElementsBufferColumnPtr column_buffer_
std::vector< double > compute_bounds_of_coords(const std::shared_ptr< std::vector< double >> &coords)
std::unique_ptr< std::vector< ArrayDatum > > poly_rings_data_
void processArrayBuffer(std::unique_ptr< std::vector< std::pair< size_t, ElementsBufferColumnPtr >>> &array_buffer, std::unique_ptr< std::vector< ArrayDatum >>::pointer arrayData)
std::function< TARGET_TYPE(int64_t, bool, TARGET_TYPE)> CasterFunc
ElementsBufferColumnPtr allocateColumnarBuffer(size_t num_rows)
void convertElementToColumnarFormat(size_t row, typename ElementsBufferColumnPtr::pointer columnData, const ScalarTargetValue *scalarValue)
std::string to_string(char const *&&v)
const DictDescriptor * target_dict_desc_
void addDataBlocksToInsertData(Fragmenter_Namespace::InsertData &insertData) override
void addDataBlocksToInsertData(Fragmenter_Namespace::InsertData &insertData) override
const StringDictionaryProxy * literals_dict_
#define NULL_INT
void convertToColumnarFormat(size_t row, const ScalarTargetValue *scalarValue)
std::shared_ptr< StringDictionary > stringDict
boost_variant_accessor< ScalarTargetValue > SCALAR_TARGET_VALUE_ACCESSOR
boost_variant_accessor< GeoLineStringTargetValue > GEO_LINESTRING_VALUE_ACCESSOR
std::conditional_t< is_cuda_compiler(), DeviceArrayDatum, HostArrayDatum > ArrayDatum
Definition: sqltypes.h:208
GeoPointValueConverter(const Catalog_Namespace::Catalog &cat, size_t num_rows, const ColumnDescriptor *logicalColumnDescriptor)
std::vector< uint8_t > compress_coords(const std::vector< double > &coords, const SQLTypeInfo &ti)
Definition: Compression.cpp:52
boost_variant_accessor< NullableString > NULLABLE_STRING_ACCESSOR
std::unique_ptr< std::vector< std::string > > column_data_
CONSTEXPR DEVICE bool is_null(const T &value)
void addDataBlocksToInsertData(Fragmenter_Namespace::InsertData &insertData) override
void convertToColumnarFormatFromString(size_t row, const TargetValue *value)
void * checked_malloc(const size_t size)
Definition: checked_alloc.h:45
boost_variant_accessor< GeoMultiPolyTargetValue > GEO_MULTI_POLY_VALUE_ACCESSOR
const ColumnDescriptor * bounds_column_descriptor_
boost_variant_accessor< GeoPolyTargetValue > GEO_POLY_VALUE_ACCESSOR
boost_variant_accessor< std::string > STRING_ACCESSOR
const ColumnDescriptor * ring_sizes_column_descriptor_
std::unique_ptr< std::vector< ArrayDatum > > ring_sizes_data_
void finalizeDataBlocksForInsertData() override
GeoLinestringValueConverter(const Catalog_Namespace::Catalog &cat, size_t num_rows, const ColumnDescriptor *logicalColumnDescriptor)
const ColumnDescriptor * getMetadataForColumn(int tableId, const std::string &colName) const
const std::map< int32_t, std::string > getTransientMapping() const
const DictDescriptor * getMetadataForDict(int dict_ref, bool loadDict=true) const
Definition: Catalog.cpp:1537
specifies the content in-memory of a row in the column metadata table
GeoMultiPolygonValueConverter(const Catalog_Namespace::Catalog &cat, size_t num_rows, const ColumnDescriptor *logicalColumnDescriptor)
void convertToColumnarFormat(size_t row, const TargetValue *value) override
std::unique_ptr< ELEMENT_CONVERTER > element_converter_
T get_fixed_array_null_value()
void convertToColumnarFormat(size_t row, const TargetValue *value) override
std::unique_ptr< int32_t[]> render_group_data_
StringValueConverter(const Catalog_Namespace::Catalog &cat, const ColumnDescriptor *cd, size_t num_rows, bool dictEncoded, int32_t sourceDictId, StringDictionaryProxy *literals_dict)
void finalizeDataBlocksForInsertData() override
void addDataBlocksToInsertData(Fragmenter_Namespace::InsertData &insertData) override
void setValueCaster(CasterFunc caster)
std::vector< DataBlockPtr > data
the number of rows being inserted
Definition: Fragmenter.h:64
int32_t getOrAdd(const std::string &str) noexcept
void allocateColumnarData(size_t num_rows) override
int32_t convertTransientStringIdToPermanentId(int32_t &transient_string_id)
void convertToColumnarFormat(size_t row, const TargetValue *value) override
std::unique_ptr< std::vector< std::string > > column_data_
const ColumnDescriptor * render_group_column_descriptor_
typename NumericValueConverter< int64_t, TARGET_TYPE >::ColumnDataPtr ElementsDataColumnPtr
std::unordered_map< int32_t, int32_t > literals_lookup_
RETURN_TYPE checked_cast(SOURCE_TYPE val, bool check_null, RETURN_TYPE null_value)
const ColumnDescriptor * coords_column_descriptor_
void allocateColumnarData(size_t num_rows) override
std::string getString(int32_t string_id) const
constexpr auto DOUBLE_MIN
Definition: Types.cpp:41
HOST DEVICE int get_comp_param() const
Definition: sqltypes.h:338
void allocateColumnarData(size_t num_rows) override
boost_variant_accessor< GeoTargetValue > GEO_VALUE_ACCESSOR
#define NULL_ARRAY_DOUBLE
const ColumnDescriptor * render_group_column_descriptor_
void addDataBlocksToInsertData(Fragmenter_Namespace::InsertData &insertData) override
static void populate_string_array_ids(std::vector< std::vector< int32_t >> &dest_array_ids, StringDictionary *dest_dict, const std::vector< std::vector< int32_t >> &source_array_ids, const StringDictionary *source_dict)
#define CHECK(condition)
Definition: Logger.h:209
NumericValueConverter< int64_t, TARGET_TYPE >::ColumnDataPtr processBuffer(ElementsBufferColumnPtr buffer)
std::unique_ptr< std::vector< std::pair< size_t, typename ELEMENT_CONVERTER::ElementsBufferColumnPtr > > > column_buffer_
boost_variant_accessor< GeoTargetValue > GEO_TARGET_VALUE_ACCESSOR
Descriptor for a dictionary for a string columne.
const ColumnDescriptor * bounds_column_descriptor_
void convertToColumnarFormat(size_t row, const TargetValue *value) override
void convertToColumnarFormat(size_t row, const TargetValue *value) override
The data to be inserted using the fragment manager.
Definition: Fragmenter.h:59
import_export::RenderGroupAnalyzer render_group_analyzer_
boost::variant< ScalarTargetValue, ArrayTargetValue, GeoTargetValue, GeoTargetValuePtr > TargetValue
Definition: TargetValue.h:167
std::unique_ptr< std::vector< ArrayDatum > > column_data_
SQLTypeInfo columnType
void convertElementToColumnarFormat(size_t row, typename ElementsBufferColumnPtr::pointer columnBuffer, const ScalarTargetValue *scalarValue)
void allocateColumnarData(size_t num_rows) override
void convertToColumnarFormat(size_t row, const TargetValue *value) override
int8_t * numbersPtr
Definition: sqltypes.h:226
ElementsBufferColumnPtr allocateColumnarBuffer(size_t num_rows)
void convertToColumnarFormat(size_t row, const TargetValue *value) override
boost_variant_accessor< ArrayTargetValue > ARRAY_VALUE_ACCESSOR
boost_variant_accessor< GeoPointTargetValue > GEO_POINT_VALUE_ACCESSOR
const ColumnDescriptor * ring_sizes_column_descriptor_
boost_variant_accessor< SOURCE_TYPE > SOURCE_TYPE_ACCESSOR
std::vector< int > columnIds
identifies the table into which the data is being inserted
Definition: Fragmenter.h:62
void addDataBlocksToInsertData(Fragmenter_Namespace::InsertData &insertData) override
int insertBoundsAndReturnRenderGroup(const std::vector< double > &bounds)
GeoPolygonValueConverter(const Catalog_Namespace::Catalog &cat, size_t num_rows, const ColumnDescriptor *logicalColumnDescriptor)
std::unique_ptr< std::vector< ArrayDatum > > signed_compressed_coords_data_
void addDataBlocksToInsertData(Fragmenter_Namespace::InsertData &insertData) override
StringDictionaryProxy * source_dict_proxy_
NumericValueConverter(const ColumnDescriptor *cd, size_t num_rows, TARGET_TYPE nullValue, SOURCE_TYPE nullCheckValue, bool doNullCheck)
DictionaryValueConverter(const Catalog_Namespace::Catalog &cat, int32_t sourceDictId, const ColumnDescriptor *targetDescriptor, size_t num_rows, TARGET_TYPE nullValue, int64_t nullCheckValue, bool doNullCheck, StringDictionaryProxy *literals_dict, StringDictionaryProxy *source_dict_proxy)
boost::variant< int64_t, double, float, NullableString > ScalarTargetValue
Definition: TargetValue.h:156