OmniSciDB  17c254d2f8
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
TargetValueConvertersImpl.h
Go to the documentation of this file.
1 /*
2  * Copyright 2018, OmniSci, Inc.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #ifndef TARGET_VALUE_CONVERTERS_IMPL_H_
18 #define TARGET_VALUE_CONVERTERS_IMPL_H_
19 
21 #include "Shared/geo_compression.h"
23 
24 #include <atomic>
25 #include <future>
26 #include <thread>
27 
28 template <typename T>
30  if (std::is_floating_point<T>::value) {
31  return static_cast<T>(inline_fp_null_array_value<T>());
32  } else {
33  return static_cast<T>(inline_int_null_array_value<T>());
34  }
35 }
36 
37 template <typename SOURCE_TYPE, typename TARGET_TYPE>
39  using ColumnDataPtr = std::unique_ptr<TARGET_TYPE, CheckedMallocDeleter<TARGET_TYPE>>;
41 
43  TARGET_TYPE null_value_;
44  SOURCE_TYPE null_check_value_;
47 
49 
51  size_t num_rows,
52  TARGET_TYPE nullValue,
53  SOURCE_TYPE nullCheckValue,
54  bool doNullCheck)
56  , null_value_(nullValue)
57  , null_check_value_(nullCheckValue)
58  , do_null_check_(doNullCheck) {
59  fixed_array_null_value_ = get_fixed_array_null_value<TARGET_TYPE>();
60  if (num_rows) {
61  allocateColumnarData(num_rows);
62  }
63  }
64 
65  ~NumericValueConverter() override {}
66 
67  bool allowFixedNullArray() { return true; }
68 
70  allocateColumnarData(num_rows);
73  }
74 
75  void allocateColumnarData(size_t num_rows) override {
76  CHECK(num_rows > 0);
78  reinterpret_cast<TARGET_TYPE*>(checked_malloc(num_rows * sizeof(TARGET_TYPE))));
79  }
80 
82  CHECK(num_rows > 0);
84  reinterpret_cast<TARGET_TYPE*>(checked_malloc(num_rows * sizeof(TARGET_TYPE))));
85  }
86 
88  size_t row,
89  typename ElementsBufferColumnPtr::pointer columnData,
90  const ScalarTargetValue* scalarValue) {
91  auto db_p = checked_get<SOURCE_TYPE>(row, scalarValue, SOURCE_TYPE_ACCESSOR);
92  auto val = *db_p;
93 
94  if (do_null_check_ && null_check_value_ == val) {
95  columnData[row] = null_value_;
96  } else {
97  columnData[row] = static_cast<TARGET_TYPE>(val);
98  }
99  }
100 
101  void convertToColumnarFormat(size_t row, const ScalarTargetValue* scalarValue) {
102  convertElementToColumnarFormat(row, column_data_.get(), scalarValue);
103  }
104 
105  void convertToColumnarFormat(size_t row, const TargetValue* value) override {
106  auto scalarValue =
107  checked_get<ScalarTargetValue>(row, value, SCALAR_TARGET_VALUE_ACCESSOR);
108  convertToColumnarFormat(row, scalarValue);
109  }
110 
112  std::unique_ptr<std::vector<std::pair<size_t, ElementsBufferColumnPtr>>>&
113  array_buffer,
114  std::unique_ptr<std::vector<ArrayDatum>>::pointer arrayData) {
115  for (size_t row = 0; row < array_buffer->size(); row++) {
116  auto& element = (array_buffer->at(row));
117  bool is_null = false;
118  if (element.second) {
119  ColumnDataPtr& data = element.second;
120  int8_t* arrayDataPtr = reinterpret_cast<int8_t*>(data.release());
121  (*arrayData)[row] =
122  ArrayDatum(element.first * sizeof(TARGET_TYPE), arrayDataPtr, is_null);
123  }
124  }
125  }
126 
128  DataBlockPtr dataBlock;
129  dataBlock.numbersPtr = reinterpret_cast<int8_t*>(column_data_.get());
130  insertData.data.push_back(dataBlock);
131  insertData.columnIds.push_back(column_descriptor_->columnId);
132  }
133 };
134 
135 template <typename TARGET_TYPE>
136 struct DictionaryValueConverter : public NumericValueConverter<int64_t, TARGET_TYPE> {
137  using ElementsDataColumnPtr =
139 
140  using ElementsBufferColumnPtr = std::unique_ptr<std::vector<int32_t>>;
141 
143 
146 
148 
149  const int32_t buffer_null_sentinal_ = std::numeric_limits<int32_t>::min();
150 
152 
153  std::unordered_map<int32_t, int32_t> literals_lookup_;
155 
157  int32_t sourceDictId,
158  const ColumnDescriptor* targetDescriptor,
159  size_t num_rows,
160  TARGET_TYPE nullValue,
161  int64_t nullCheckValue,
162  bool doNullCheck,
163  StringDictionaryProxy* literals_dict,
164  StringDictionaryProxy* source_dict_proxy)
165  : NumericValueConverter<int64_t, TARGET_TYPE>(targetDescriptor,
166  num_rows,
167  nullValue,
168  nullCheckValue,
169  doNullCheck) {
170  literals_dict_ = literals_dict;
172  cat.getMetadataForDict(targetDescriptor->columnType.get_comp_param(), true);
173 
174  source_dict_desc_ = nullptr;
175  source_dict_proxy_ = source_dict_proxy;
176 
177  use_literals_ = 0 == sourceDictId;
178  if (!use_literals_) {
179  source_dict_desc_ = cat.getMetadataForDict(std::abs(sourceDictId), true);
181  } else {
182  if (literals_dict) {
183  for (auto& entry : literals_dict->getTransientMapping()) {
184  auto newId = target_dict_desc_->stringDict->getOrAdd(entry.second);
185  literals_lookup_[entry.first] = newId;
186  }
187  }
188 
190  }
191 
193 
194  if (num_rows) {
196  }
197  }
198 
200 
201  bool allowFixedNullArray() { return false; }
202 
204  CHECK(num_rows > 0);
205  return std::make_unique<std::vector<int32_t>>(num_rows);
206  }
207 
209  size_t row,
210  typename ElementsBufferColumnPtr::pointer columnBuffer,
211  const ScalarTargetValue* scalarValue) {
212  auto db_p = checked_get<int64_t>(row, scalarValue, this->SOURCE_TYPE_ACCESSOR);
213  auto val = *db_p;
214 
215  if (this->do_null_check_ && this->null_check_value_ == val) {
216  (*columnBuffer)[row] = this->buffer_null_sentinal_;
217  } else {
218  (*columnBuffer)[row] = (int32_t)val;
219  }
220  }
221 
222  void convertToColumnarFormat(size_t row, const ScalarTargetValue* scalarValue) {
223  convertElementToColumnarFormat(row, this->column_buffer_.get(), scalarValue);
224  }
225 
226  void convertToColumnarFormat(size_t row, const TargetValue* value) override {
227  auto scalarValue =
228  checked_get<ScalarTargetValue>(row, value, this->SCALAR_TARGET_VALUE_ACCESSOR);
229 
230  convertToColumnarFormat(row, scalarValue);
231  }
232 
233  inline int32_t convertTransientStringIdToPermanentId(int32_t& transient_string_id) {
234  if (source_dict_proxy_) {
235  auto str = source_dict_proxy_->getString(transient_string_id);
236  return source_dict_proxy_->getOrAdd(str);
237  } else {
238  throw std::runtime_error("Unexpected negative source ID");
239  }
240  }
241 
243  ElementsBufferColumnPtr buffer) {
246  reinterpret_cast<TARGET_TYPE*>(
247  checked_malloc(buffer->size() * sizeof(TARGET_TYPE))));
248 
249  std::vector<int32_t>* bufferPtr =
250  reinterpret_cast<std::vector<int32_t>*>(buffer.get());
251  TARGET_TYPE* columnDataPtr = reinterpret_cast<TARGET_TYPE*>(data.get());
252  if (use_literals_) {
253  for (size_t i = 0; i < bufferPtr->size(); i++) {
254  auto id = literals_lookup_[(*bufferPtr)[i]];
255  if (id == buffer_null_sentinal_) {
256  columnDataPtr[i] = this->null_value_;
257  } else {
258  columnDataPtr[i] = static_cast<TARGET_TYPE>(id);
259  }
260  }
261  } else {
263  // special case, where source and target dict are the same
264  // mostly in update
265  for (size_t i = 0; i < bufferPtr->size(); i++) {
266  auto src_id = (*bufferPtr)[i];
267  if (src_id == buffer_null_sentinal_) {
268  columnDataPtr[i] = this->null_value_;
269  } else if (src_id < 0) {
270  columnDataPtr[i] = convertTransientStringIdToPermanentId(src_id);
271  } else {
272  columnDataPtr[i] = static_cast<TARGET_TYPE>(src_id);
273  }
274  }
275 
276  } else {
277  std::vector<int32_t> dest_ids;
278  dest_ids.resize(bufferPtr->size());
279 
280  if (source_dict_proxy_) {
282  dest_ids,
284  *bufferPtr,
287  } else {
290  *bufferPtr,
292  }
293 
294  // fixup NULL sentinel
295  for (size_t i = 0; i < dest_ids.size(); i++) {
296  auto id = dest_ids[i];
297  if (id == buffer_null_sentinal_) {
298  columnDataPtr[i] = this->null_value_;
299  } else {
300  if (std::is_signed<TARGET_TYPE>::value) {
301  if (id < 0) {
302  throw std::runtime_error(
303  "Maximum number of unique strings (" +
304  std::to_string(std::numeric_limits<TARGET_TYPE>::max()) +
305  ") reached in target dictionary");
306  }
307  } else {
308  if (id >= std::numeric_limits<TARGET_TYPE>::max()) {
309  throw std::runtime_error(
310  "Maximum number of unique strings (" +
311  std::to_string(std::numeric_limits<TARGET_TYPE>::max()) +
312  ") reached in target column's dict encoding");
313  }
314  }
315  columnDataPtr[i] = static_cast<TARGET_TYPE>(id);
316  }
317  }
318  }
319  }
320 
321  return data;
322  }
323 
325  std::unique_ptr<std::vector<std::pair<size_t, ElementsBufferColumnPtr>>>&
326  array_buffer,
327  std::unique_ptr<std::vector<ArrayDatum>>::pointer arrayData) {
328  if (use_literals_) {
329  for (size_t row = 0; row < array_buffer->size(); row++) {
330  auto& element = (array_buffer->at(row));
331  bool is_null = false;
332  if (element.second) {
334  processBuffer(std::move(element.second));
335  int8_t* arrayDataPtr = reinterpret_cast<int8_t*>(data.release());
336  (*arrayData)[row] =
337  ArrayDatum(element.first * sizeof(TARGET_TYPE), arrayDataPtr, is_null);
338  }
339  }
340  } else {
341  std::vector<std::vector<int32_t>> srcArrayIds(array_buffer->size());
342  std::vector<std::vector<int32_t>> destArrayIds(0);
343 
344  for (size_t row = 0; row < array_buffer->size(); row++) {
345  auto& element = (array_buffer->at(row));
346  if (element.second) {
347  srcArrayIds[row] = *(element.second.get());
348  }
349  }
350 
353  srcArrayIds,
355 
356  for (size_t row = 0; row < array_buffer->size(); row++) {
357  auto& element = (array_buffer->at(row));
358  bool is_null = false;
359  if (element.second) {
360  *(element.second.get()) = destArrayIds[row];
361  int8_t* arrayDataPtr = reinterpret_cast<int8_t*>(&(element.second->at(0)));
362  (*arrayData)[row] = ArrayDatum(element.first * sizeof(TARGET_TYPE),
363  arrayDataPtr,
364  is_null,
365  DoNothingDeleter());
366  }
367  }
368  }
369  }
370 
372  if (column_buffer_) {
373  this->column_data_ = processBuffer(std::move(column_buffer_));
374  column_buffer_ = nullptr;
375  }
376  }
377 
380  DataBlockPtr dataBlock;
381  dataBlock.numbersPtr = reinterpret_cast<int8_t*>(this->column_data_.get());
382  insertData.data.push_back(dataBlock);
383  insertData.columnIds.push_back(this->column_descriptor_->columnId);
384  }
385 };
386 
388  std::unique_ptr<std::vector<std::string>> column_data_;
389 
391 
395 
397  const ColumnDescriptor* cd,
398  size_t num_rows,
399  bool dictEncoded,
400  int32_t sourceDictId,
401  StringDictionaryProxy* literals_dict)
402  : TargetValueConverter(cd) {
403  source_dict_ = nullptr;
404  literals_source_dict_ = nullptr;
405  dict_encoded_ = dictEncoded;
406  if (dictEncoded) {
407  if (0 != sourceDictId) {
408  auto source_dict_desc = cat.getMetadataForDict(std::abs(sourceDictId), true);
409  CHECK(source_dict_desc);
410  source_dict_ = source_dict_desc->stringDict.get();
412  } else {
413  literals_source_dict_ = literals_dict;
414  }
415  }
416  if (num_rows) {
417  allocateColumnarData(num_rows);
418  }
419  }
420 
421  ~StringValueConverter() override {}
422 
423  void allocateColumnarData(size_t num_rows) override {
424  CHECK(num_rows > 0);
425  column_data_ = std::make_unique<std::vector<std::string>>(num_rows);
426  }
427 
428  void convertToColumnarFormatFromDict(size_t row, const TargetValue* value) {
429  auto scalarValue =
430  checked_get<ScalarTargetValue>(row, value, SCALAR_TARGET_VALUE_ACCESSOR);
431  auto db_p = checked_get<int64_t>(row, scalarValue, this->SOURCE_TYPE_ACCESSOR);
432  auto val = *db_p;
433 
434  if (std::numeric_limits<int32_t>::min() == val) {
435  (*column_data_)[row] = std::string("");
436  } else {
437  if (source_dict_) {
438  std::string strVal = source_dict_->getString(val);
439  (*column_data_)[row] = strVal;
440  } else if (literals_source_dict_) {
441  std::string strVal = literals_source_dict_->getString(val);
442  (*column_data_)[row] = strVal;
443  } else {
444  CHECK_EQ(val, inline_int_null_value<int32_t>());
445  std::string nullStr = "";
446  (*column_data_)[row] = nullStr;
447  }
448  }
449  }
450 
451  void convertToColumnarFormatFromString(size_t row, const TargetValue* value) {
452  auto scalarValue =
453  checked_get<ScalarTargetValue>(row, value, SCALAR_TARGET_VALUE_ACCESSOR);
454  auto db_p = checked_get<NullableString>(row, scalarValue, NULLABLE_STRING_ACCESSOR);
455  const auto db_str_p = checked_get<std::string>(row, db_p, STRING_ACCESSOR);
456 
457  if (nullptr != db_str_p) {
458  (*column_data_)[row] = *db_str_p;
459  } else {
460  (*column_data_)[row] = std::string("");
461  }
462  }
463 
464  void convertToColumnarFormat(size_t row, const TargetValue* value) override {
465  if (dict_encoded_) {
467  } else {
469  }
470  }
471 
473  DataBlockPtr dataBlock;
474  dataBlock.stringsPtr = column_data_.get();
475  insertData.data.push_back(dataBlock);
476  insertData.columnIds.push_back(column_descriptor_->columnId);
477  }
478 };
479 
480 template <typename ELEMENT_CONVERTER>
482  std::unique_ptr<
483  std::vector<std::pair<size_t, typename ELEMENT_CONVERTER::ElementsBufferColumnPtr>>>
485  std::unique_ptr<std::vector<ArrayDatum>> column_data_;
486  std::unique_ptr<ELEMENT_CONVERTER> element_converter_;
489  bool data_finalized_ = false;
493 
495 
497  size_t num_rows,
498  std::unique_ptr<ELEMENT_CONVERTER> element_converter,
499  bool do_check_null)
501  , element_converter_(std::move(element_converter))
502  , element_type_info_(cd->columnType.get_elem_type())
503  , do_check_null_(do_check_null) {
504  if (num_rows) {
505  allocateColumnarData(num_rows);
506  }
507 
508  if (cd->columnType.get_size() > 0) {
511  fixed_array_size_ / sizeof(ELEMENT_CONVERTER::fixed_array_null_value_);
512  element_converter_->populateFixedArrayNullSentinel(fixed_array_elements_count_);
514  reinterpret_cast<int8_t*>(element_converter_->column_data_.get());
515  } else {
516  fixed_array_size_ = 0;
518  fixed_array_null_sentinel_ = nullptr;
519  }
520  }
521 
522  ~ArrayValueConverter() override {}
523 
524  void allocateColumnarData(size_t num_rows) override {
525  CHECK(num_rows > 0);
526  column_data_ = std::make_unique<std::vector<ArrayDatum>>(num_rows);
527  column_buffer_ = std::make_unique<std::vector<
528  std::pair<size_t, typename ELEMENT_CONVERTER::ElementsBufferColumnPtr>>>(
529  num_rows);
530  }
531 
532  void convertToColumnarFormat(size_t row, const TargetValue* value) override {
533  const auto arrayValue =
534  checked_get<ArrayTargetValue>(row, value, ARRAY_VALUE_ACCESSOR);
535  CHECK(arrayValue);
536  if (arrayValue->is_initialized()) {
537  const auto& vec = arrayValue->get();
538  bool is_null = false;
539 
541  if (fixed_array_elements_count_ != vec.size()) {
542  throw std::runtime_error(
543  "Incorrect number of array elements for fixed length array column");
544  }
545  }
546 
547  if (vec.size()) {
548  typename ELEMENT_CONVERTER::ElementsBufferColumnPtr elementBuffer =
549  element_converter_->allocateColumnarBuffer(vec.size());
550 
551  int elementIndex = 0;
552  for (const auto& scalarValue : vec) {
553  element_converter_->convertElementToColumnarFormat(
554  elementIndex++, elementBuffer.get(), &scalarValue);
555  }
556 
557  column_buffer_->at(row) = {vec.size(), std::move(elementBuffer)};
558 
559  } else {
560  // Empty, not NULL
561  (*column_data_)[row] = ArrayDatum(0, nullptr, is_null, DoNothingDeleter());
562  }
563  } else {
564  if (!do_check_null_) {
565  throw std::runtime_error("NULL assignment of non null column not allowed");
566  }
567 
568  if (fixed_array_elements_count_ && !element_converter_->allowFixedNullArray()) {
569  throw std::runtime_error("NULL assignment of fixed length array not allowed");
570  }
571 
572  bool is_null = true; // do_check_null_;
573  (*column_data_)[row] = ArrayDatum(
575  (*column_data_)[row].is_null = is_null;
576  }
577  }
578 
580  if (!data_finalized_) {
581  element_converter_->processArrayBuffer(column_buffer_, column_data_.get());
582  data_finalized_ = true;
583  }
584  }
585 
588  DataBlockPtr dataBlock;
589  dataBlock.arraysPtr = column_data_.get();
590  insertData.data.push_back(dataBlock);
591  insertData.columnIds.push_back(column_descriptor_->columnId);
592  }
593 };
594 
597 
598  std::unique_ptr<std::vector<std::string>> column_data_;
599  std::unique_ptr<std::vector<ArrayDatum>> signed_compressed_coords_data_;
600 
602  size_t num_rows,
603  const ColumnDescriptor* logicalColumnDescriptor)
604  : TargetValueConverter(logicalColumnDescriptor) {
608 
609  if (num_rows) {
610  allocateColumnarData(num_rows);
611  }
612  }
613 
615 
616  void allocateColumnarData(size_t num_rows) override {
617  CHECK(num_rows > 0);
618  column_data_ = std::make_unique<std::vector<std::string>>(num_rows);
619  signed_compressed_coords_data_ = std::make_unique<std::vector<ArrayDatum>>(num_rows);
620  }
621 
624 
626  const std::shared_ptr<std::vector<double>>& coords) {
627  const auto compressed_coords_vector =
629 
630  uint8_t* compressed_coords_array = reinterpret_cast<uint8_t*>(
631  checked_malloc(sizeof(uint8_t) * compressed_coords_vector.size()));
632  memcpy(compressed_coords_array,
633  &compressed_coords_vector[0],
634  compressed_coords_vector.size());
635 
636  return ArrayDatum((int)compressed_coords_vector.size(),
637  reinterpret_cast<int8_t*>(compressed_coords_array),
638  false);
639  }
640 
641  void convertToColumnarFormat(size_t row, const TargetValue* value) override {
642  const auto geoValue = checked_get<GeoTargetValue>(row, value, GEO_VALUE_ACCESSOR);
643  CHECK(geoValue);
644  if (geoValue->is_initialized()) {
645  const auto geo = geoValue->get();
646  const auto geoPoint =
647  checked_get<GeoPointTargetValue>(row, &geo, GEO_POINT_VALUE_ACCESSOR);
648  CHECK(geoPoint);
649  (*column_data_)[row] = "";
650  (*signed_compressed_coords_data_)[row] = toCompressedCoords(geoPoint->coords);
651  } else {
652  // NULL point
653  (*column_data_)[row] = "";
654  auto coords = std::make_shared<std::vector<double>>(NULL_ARRAY_DOUBLE, NULL_DOUBLE);
655  auto coords_datum = toCompressedCoords(coords);
656  coords_datum.is_null = true;
657  (*signed_compressed_coords_data_)[row] = coords_datum;
658  }
659  }
660 
662  DataBlockPtr logical, coords;
663 
664  logical.stringsPtr = column_data_.get();
666 
667  insertData.data.emplace_back(logical);
668  insertData.columnIds.emplace_back(column_descriptor_->columnId);
669 
670  insertData.data.emplace_back(coords);
671  insertData.columnIds.emplace_back(coords_column_descriptor_->columnId);
672  }
673 };
674 
675 inline std::vector<double> compute_bounds_of_coords(
676  const std::shared_ptr<std::vector<double>>& coords) {
677  std::vector<double> bounds(4);
678  constexpr auto DOUBLE_MAX = std::numeric_limits<double>::max();
679  constexpr auto DOUBLE_MIN = std::numeric_limits<double>::lowest();
680  bounds[0] = DOUBLE_MAX;
681  bounds[1] = DOUBLE_MAX;
682  bounds[2] = DOUBLE_MIN;
683  bounds[3] = DOUBLE_MIN;
684  auto size_coords = coords->size();
685 
686  for (size_t i = 0; i < size_coords; i += 2) {
687  double x = (*coords)[i];
688  double y = (*coords)[i + 1];
689 
690  bounds[0] = std::min(bounds[0], x);
691  bounds[1] = std::min(bounds[1], y);
692  bounds[2] = std::max(bounds[2], x);
693  bounds[3] = std::max(bounds[3], y);
694  }
695  return bounds;
696 }
697 
698 template <typename ELEM_TYPE>
699 inline ArrayDatum to_array_datum(const std::vector<ELEM_TYPE>& vector) {
700  ELEM_TYPE* array =
701  reinterpret_cast<ELEM_TYPE*>(checked_malloc(sizeof(ELEM_TYPE) * vector.size()));
702  memcpy(array, vector.data(), vector.size() * sizeof(ELEM_TYPE));
703 
704  return ArrayDatum(
705  (int)(vector.size() * sizeof(ELEM_TYPE)), reinterpret_cast<int8_t*>(array), false);
706 }
707 
708 template <typename ELEM_TYPE>
709 inline ArrayDatum to_array_datum(const std::shared_ptr<std::vector<ELEM_TYPE>>& vector) {
710  return to_array_datum(*vector.get());
711 }
712 
715 
716  std::unique_ptr<std::vector<ArrayDatum>> bounds_data_;
717 
719  size_t num_rows,
720  const ColumnDescriptor* logicalColumnDescriptor)
721  : GeoPointValueConverter(cat, num_rows, logicalColumnDescriptor) {
725 
726  if (num_rows) {
727  allocateColumnarData(num_rows);
728  }
729  }
730 
732 
733  void allocateColumnarData(size_t num_rows) override {
734  CHECK(num_rows > 0);
736  bounds_data_ = std::make_unique<std::vector<ArrayDatum>>(num_rows);
737  }
738 
740 
741  void convertToColumnarFormat(size_t row, const TargetValue* value) override {
742  const auto geoValue =
743  checked_get<GeoTargetValue>(row, value, GEO_TARGET_VALUE_ACCESSOR);
744  CHECK(geoValue);
745  if (geoValue->is_initialized()) {
746  const auto geo = geoValue->get();
747  const auto geoLinestring =
748  checked_get<GeoLineStringTargetValue>(row, &geo, GEO_LINESTRING_VALUE_ACCESSOR);
749 
750  (*column_data_)[row] = "";
751  (*signed_compressed_coords_data_)[row] = toCompressedCoords(geoLinestring->coords);
752  auto bounds = compute_bounds_of_coords(geoLinestring->coords);
753  (*bounds_data_)[row] = to_array_datum(bounds);
754  } else {
755  // NULL Linestring
756  (*column_data_)[row] = "";
757  (*signed_compressed_coords_data_)[row] = ArrayDatum(0, nullptr, true);
758  std::vector<double> bounds = {
759  NULL_ARRAY_DOUBLE, NULL_DOUBLE, NULL_DOUBLE, NULL_DOUBLE};
760  auto bounds_datum = to_array_datum(bounds);
761  bounds_datum.is_null = true;
762  (*bounds_data_)[row] = bounds_datum;
763  }
764  }
765 
768 
769  DataBlockPtr bounds;
770 
771  bounds.arraysPtr = bounds_data_.get();
772 
773  insertData.data.emplace_back(bounds);
774  insertData.columnIds.emplace_back(bounds_column_descriptor_->columnId);
775  }
776 };
777 
783 
784  std::unique_ptr<std::vector<ArrayDatum>> ring_sizes_data_;
785  std::unique_ptr<std::vector<ArrayDatum>> bounds_data_;
786  std::unique_ptr<int32_t[]> render_group_data_;
787 
789  size_t num_rows,
790  const ColumnDescriptor* logicalColumnDescriptor)
791  : GeoPointValueConverter(cat, num_rows, logicalColumnDescriptor) {
801 
802  if (num_rows) {
803  allocateColumnarData(num_rows);
804  }
805  }
806 
808 
809  void allocateColumnarData(size_t num_rows) override {
811  ring_sizes_data_ = std::make_unique<std::vector<ArrayDatum>>(num_rows);
812  bounds_data_ = std::make_unique<std::vector<ArrayDatum>>(num_rows);
813  render_group_data_ = std::make_unique<int32_t[]>(num_rows);
814  }
815 
817 
818  void convertToColumnarFormat(size_t row, const TargetValue* value) override {
819  const auto geoValue =
820  checked_get<GeoTargetValue>(row, value, GEO_TARGET_VALUE_ACCESSOR);
821  CHECK(geoValue);
822  if (geoValue->is_initialized()) {
823  const auto geo = geoValue->get();
824  const auto geoPoly =
825  checked_get<GeoPolyTargetValue>(row, &geo, GEO_POLY_VALUE_ACCESSOR);
826 
827  (*column_data_)[row] = "";
828  (*signed_compressed_coords_data_)[row] = toCompressedCoords(geoPoly->coords);
829  (*ring_sizes_data_)[row] = to_array_datum(geoPoly->ring_sizes);
830  auto bounds = compute_bounds_of_coords(geoPoly->coords);
831  (*bounds_data_)[row] = to_array_datum(bounds);
832  render_group_data_[row] =
834  } else {
835  // NULL Polygon
836  (*column_data_)[row] = "";
837  (*signed_compressed_coords_data_)[row] = ArrayDatum(0, nullptr, true);
838  (*ring_sizes_data_)[row] = ArrayDatum(0, nullptr, true);
839  std::vector<double> bounds = {
840  NULL_ARRAY_DOUBLE, NULL_DOUBLE, NULL_DOUBLE, NULL_DOUBLE};
841  auto bounds_datum = to_array_datum(bounds);
842  bounds_datum.is_null = true;
843  (*bounds_data_)[row] = bounds_datum;
845  }
846  }
847 
850 
851  DataBlockPtr ringSizes, bounds, renderGroup;
852 
853  ringSizes.arraysPtr = ring_sizes_data_.get();
854  bounds.arraysPtr = bounds_data_.get();
855  renderGroup.numbersPtr = reinterpret_cast<int8_t*>(render_group_data_.get());
856 
857  insertData.data.emplace_back(ringSizes);
858  insertData.columnIds.emplace_back(ring_sizes_column_descriptor_->columnId);
859 
860  insertData.data.emplace_back(bounds);
861  insertData.columnIds.emplace_back(bounds_column_descriptor_->columnId);
862 
863  insertData.data.emplace_back(renderGroup);
864  insertData.columnIds.emplace_back(render_group_column_descriptor_->columnId);
865  }
866 };
867 
874 
875  std::unique_ptr<std::vector<ArrayDatum>> ring_sizes_data_;
876  std::unique_ptr<std::vector<ArrayDatum>> poly_rings_data_;
877  std::unique_ptr<std::vector<ArrayDatum>> bounds_data_;
878  std::unique_ptr<int32_t[]> render_group_data_;
879 
881  size_t num_rows,
882  const ColumnDescriptor* logicalColumnDescriptor)
883  : GeoPointValueConverter(cat, num_rows, logicalColumnDescriptor) {
896 
897  if (num_rows) {
898  allocateColumnarData(num_rows);
899  }
900  }
901 
903 
904  void allocateColumnarData(size_t num_rows) override {
906  ring_sizes_data_ = std::make_unique<std::vector<ArrayDatum>>(num_rows);
907  poly_rings_data_ = std::make_unique<std::vector<ArrayDatum>>(num_rows);
908  bounds_data_ = std::make_unique<std::vector<ArrayDatum>>(num_rows);
909  render_group_data_ = std::make_unique<int32_t[]>(num_rows);
910  }
911 
913 
914  void convertToColumnarFormat(size_t row, const TargetValue* value) override {
915  const auto geoValue =
916  checked_get<GeoTargetValue>(row, value, GEO_TARGET_VALUE_ACCESSOR);
917  CHECK(geoValue);
918  if (geoValue->is_initialized()) {
919  const auto geo = geoValue->get();
920  const auto geoMultiPoly =
921  checked_get<GeoMultiPolyTargetValue>(row, &geo, GEO_MULTI_POLY_VALUE_ACCESSOR);
922 
923  (*column_data_)[row] = "";
924  (*signed_compressed_coords_data_)[row] = toCompressedCoords(geoMultiPoly->coords);
925  (*ring_sizes_data_)[row] = to_array_datum(geoMultiPoly->ring_sizes);
926  (*poly_rings_data_)[row] = to_array_datum(geoMultiPoly->poly_rings);
927  auto bounds = compute_bounds_of_coords(geoMultiPoly->coords);
928  (*bounds_data_)[row] = to_array_datum(bounds);
929  render_group_data_[row] =
931  } else {
932  // NULL MultiPolygon
933  (*column_data_)[row] = "";
934  (*signed_compressed_coords_data_)[row] = ArrayDatum(0, nullptr, true);
935  (*ring_sizes_data_)[row] = ArrayDatum(0, nullptr, true);
936  (*poly_rings_data_)[row] = ArrayDatum(0, nullptr, true);
937  std::vector<double> bounds = {
938  NULL_ARRAY_DOUBLE, NULL_DOUBLE, NULL_DOUBLE, NULL_DOUBLE};
939  auto bounds_datum = to_array_datum(bounds);
940  bounds_datum.is_null = true;
941  (*bounds_data_)[row] = bounds_datum;
943  }
944  }
945 
948 
949  DataBlockPtr ringSizes, polyRings, bounds, renderGroup;
950 
951  ringSizes.arraysPtr = ring_sizes_data_.get();
952  polyRings.arraysPtr = poly_rings_data_.get();
953  bounds.arraysPtr = bounds_data_.get();
954  renderGroup.numbersPtr = reinterpret_cast<int8_t*>(render_group_data_.get());
955 
956  insertData.data.emplace_back(ringSizes);
957  insertData.columnIds.emplace_back(ring_sizes_column_descriptor_->columnId);
958 
959  insertData.data.emplace_back(polyRings);
960  insertData.columnIds.emplace_back(ring_sizes_solumn_descriptor_->columnId);
961 
962  insertData.data.emplace_back(bounds);
963  insertData.columnIds.emplace_back(bounds_column_descriptor_->columnId);
964 
965  insertData.data.emplace_back(renderGroup);
966  insertData.columnIds.emplace_back(render_group_column_descriptor_->columnId);
967  }
968 };
969 
970 #endif
std::unique_ptr< std::vector< ArrayDatum > > ring_sizes_data_
ArrayDatum to_array_datum(const std::vector< ELEM_TYPE > &vector)
std::unique_ptr< std::vector< ArrayDatum > > bounds_data_
std::unique_ptr< TARGET_TYPE, CheckedMallocDeleter< TARGET_TYPE >> ColumnDataPtr
std::unique_ptr< std::vector< int32_t >> ElementsBufferColumnPtr
StringDictionaryProxy * literals_source_dict_
#define CHECK_EQ(x, y)
Definition: Logger.h:205
void convertToColumnarFormatFromDict(size_t row, const TargetValue *value)
void allocateColumnarData(size_t num_rows) override
std::unique_ptr< std::vector< ArrayDatum > > bounds_data_
#define NULL_DOUBLE
Definition: sqltypes.h:185
HOST DEVICE int get_size() const
Definition: sqltypes.h:268
std::string cat(Ts &&...args)
void addDataBlocksToInsertData(Fragmenter_Namespace::InsertData &insertData) override
const int8_t const int64_t * num_rows
class for a per-database catalog. also includes metadata for the current database and the current use...
Definition: Catalog.h:86
std::vector< std::string > * stringsPtr
Definition: sqltypes.h:148
const ColumnDescriptor * ring_sizes_solumn_descriptor_
std::vector< ArrayDatum > * arraysPtr
Definition: sqltypes.h:149
std::unique_ptr< int32_t[]> render_group_data_
void allocateColumnarData(size_t num_rows) override
#define NULL_ARRAY_DOUBLE
Definition: sqltypes.h:193
void processArrayBuffer(std::unique_ptr< std::vector< std::pair< size_t, ElementsBufferColumnPtr >>> &array_buffer, std::unique_ptr< std::vector< ArrayDatum >>::pointer arrayData)
boost_variant_accessor< int64_t > SOURCE_TYPE_ACCESSOR
const ColumnDescriptor * bounds_column_descriptor_
int insertBoundsAndReturnRenderGroup(const std::vector< double > &bounds)
Definition: Importer.cpp:5082
ArrayValueConverter(const ColumnDescriptor *cd, size_t num_rows, std::unique_ptr< ELEMENT_CONVERTER > element_converter, bool do_check_null)
void populateFixedArrayNullSentinel(size_t num_rows)
const DictDescriptor * source_dict_desc_
void convertToColumnarFormat(size_t row, const TargetValue *value) override
void convertToColumnarFormat(size_t row, const ScalarTargetValue *scalarValue)
std::unique_ptr< std::vector< ArrayDatum > > bounds_data_
void allocateColumnarData(size_t num_rows) override
const ColumnDescriptor * column_descriptor_
std::string getString(int32_t string_id) const
ArrayDatum toCompressedCoords(const std::shared_ptr< std::vector< double >> &coords)
static void populate_string_ids(std::vector< int32_t > &dest_ids, StringDictionary *dest_dict, const std::vector< int32_t > &source_ids, const StringDictionary *source_dict, const std::map< int32_t, std::string > transient_mapping={})
Populates provided dest_ids vector with string ids corresponding to given source strings.
ElementsBufferColumnPtr column_buffer_
std::vector< double > compute_bounds_of_coords(const std::shared_ptr< std::vector< double >> &coords)
std::unique_ptr< std::vector< ArrayDatum > > poly_rings_data_
void processArrayBuffer(std::unique_ptr< std::vector< std::pair< size_t, ElementsBufferColumnPtr >>> &array_buffer, std::unique_ptr< std::vector< ArrayDatum >>::pointer arrayData)
ElementsBufferColumnPtr allocateColumnarBuffer(size_t num_rows)
void convertElementToColumnarFormat(size_t row, typename ElementsBufferColumnPtr::pointer columnData, const ScalarTargetValue *scalarValue)
std::string to_string(char const *&&v)
const DictDescriptor * target_dict_desc_
void addDataBlocksToInsertData(Fragmenter_Namespace::InsertData &insertData) override
void addDataBlocksToInsertData(Fragmenter_Namespace::InsertData &insertData) override
const StringDictionaryProxy * literals_dict_
void convertToColumnarFormat(size_t row, const ScalarTargetValue *scalarValue)
std::shared_ptr< StringDictionary > stringDict
boost_variant_accessor< ScalarTargetValue > SCALAR_TARGET_VALUE_ACCESSOR
boost_variant_accessor< GeoLineStringTargetValue > GEO_LINESTRING_VALUE_ACCESSOR
std::conditional_t< is_cuda_compiler(), DeviceArrayDatum, HostArrayDatum > ArrayDatum
Definition: sqltypes.h:129
GeoPointValueConverter(const Catalog_Namespace::Catalog &cat, size_t num_rows, const ColumnDescriptor *logicalColumnDescriptor)
boost_variant_accessor< NullableString > NULLABLE_STRING_ACCESSOR
std::unique_ptr< std::vector< std::string > > column_data_
CHECK(cgen_state)
void addDataBlocksToInsertData(Fragmenter_Namespace::InsertData &insertData) override
void convertToColumnarFormatFromString(size_t row, const TargetValue *value)
void * checked_malloc(const size_t size)
Definition: checked_alloc.h:44
boost_variant_accessor< GeoMultiPolyTargetValue > GEO_MULTI_POLY_VALUE_ACCESSOR
const ColumnDescriptor * bounds_column_descriptor_
boost_variant_accessor< GeoPolyTargetValue > GEO_POLY_VALUE_ACCESSOR
boost_variant_accessor< std::string > STRING_ACCESSOR
const ColumnDescriptor * ring_sizes_column_descriptor_
std::unique_ptr< std::vector< ArrayDatum > > ring_sizes_data_
void finalizeDataBlocksForInsertData() override
GeoLinestringValueConverter(const Catalog_Namespace::Catalog &cat, size_t num_rows, const ColumnDescriptor *logicalColumnDescriptor)
const ColumnDescriptor * getMetadataForColumn(int tableId, const std::string &colName) const
const std::map< int32_t, std::string > getTransientMapping() const
const DictDescriptor * getMetadataForDict(int dict_ref, bool loadDict=true) const
Definition: Catalog.cpp:1446
specifies the content in-memory of a row in the column metadata table
#define NULL_INT
Definition: sqltypes.h:182
GeoMultiPolygonValueConverter(const Catalog_Namespace::Catalog &cat, size_t num_rows, const ColumnDescriptor *logicalColumnDescriptor)
void convertToColumnarFormat(size_t row, const TargetValue *value) override
std::unique_ptr< ELEMENT_CONVERTER > element_converter_
Importer_NS::RenderGroupAnalyzer render_group_analyzer_
T get_fixed_array_null_value()
void convertToColumnarFormat(size_t row, const TargetValue *value) override
std::unique_ptr< int32_t[]> render_group_data_
StringValueConverter(const Catalog_Namespace::Catalog &cat, const ColumnDescriptor *cd, size_t num_rows, bool dictEncoded, int32_t sourceDictId, StringDictionaryProxy *literals_dict)
void finalizeDataBlocksForInsertData() override
void addDataBlocksToInsertData(Fragmenter_Namespace::InsertData &insertData) override
bool is_null(const T &v, const SQLTypeInfo &t)
std::vector< DataBlockPtr > data
the number of rows being inserted
Definition: Fragmenter.h:64
int32_t getOrAdd(const std::string &str) noexcept
void allocateColumnarData(size_t num_rows) override
int32_t convertTransientStringIdToPermanentId(int32_t &transient_string_id)
void convertToColumnarFormat(size_t row, const TargetValue *value) override
std::unique_ptr< std::vector< std::string > > column_data_
const ColumnDescriptor * render_group_column_descriptor_
Importer_NS::RenderGroupAnalyzer render_group_analyzer_
typename NumericValueConverter< int64_t, TARGET_TYPE >::ColumnDataPtr ElementsDataColumnPtr
std::unordered_map< int32_t, int32_t > literals_lookup_
const ColumnDescriptor * coords_column_descriptor_
void allocateColumnarData(size_t num_rows) override
std::string getString(int32_t string_id) const
std::vector< uint8_t > compress_coords(std::vector< double > &coords, const SQLTypeInfo &ti)
HOST DEVICE int get_comp_param() const
Definition: sqltypes.h:267
void allocateColumnarData(size_t num_rows) override
boost_variant_accessor< GeoTargetValue > GEO_VALUE_ACCESSOR
const ColumnDescriptor * render_group_column_descriptor_
void addDataBlocksToInsertData(Fragmenter_Namespace::InsertData &insertData) override
static void populate_string_array_ids(std::vector< std::vector< int32_t >> &dest_array_ids, StringDictionary *dest_dict, const std::vector< std::vector< int32_t >> &source_array_ids, const StringDictionary *source_dict)
NumericValueConverter< int64_t, TARGET_TYPE >::ColumnDataPtr processBuffer(ElementsBufferColumnPtr buffer)
std::unique_ptr< std::vector< std::pair< size_t, typename ELEMENT_CONVERTER::ElementsBufferColumnPtr > > > column_buffer_
boost_variant_accessor< GeoTargetValue > GEO_TARGET_VALUE_ACCESSOR
Descriptor for a dictionary for a string columne.
const ColumnDescriptor * bounds_column_descriptor_
void convertToColumnarFormat(size_t row, const TargetValue *value) override
void convertToColumnarFormat(size_t row, const TargetValue *value) override
The data to be inserted using the fragment manager.
Definition: Fragmenter.h:59
boost::variant< ScalarTargetValue, ArrayTargetValue, GeoTargetValue, GeoTargetValuePtr > TargetValue
Definition: TargetValue.h:167
std::unique_ptr< std::vector< ArrayDatum > > column_data_
SQLTypeInfo columnType
void convertElementToColumnarFormat(size_t row, typename ElementsBufferColumnPtr::pointer columnBuffer, const ScalarTargetValue *scalarValue)
void allocateColumnarData(size_t num_rows) override
void convertToColumnarFormat(size_t row, const TargetValue *value) override
int8_t * numbersPtr
Definition: sqltypes.h:147
ElementsBufferColumnPtr allocateColumnarBuffer(size_t num_rows)
void convertToColumnarFormat(size_t row, const TargetValue *value) override
boost_variant_accessor< ArrayTargetValue > ARRAY_VALUE_ACCESSOR
boost_variant_accessor< GeoPointTargetValue > GEO_POINT_VALUE_ACCESSOR
const ColumnDescriptor * ring_sizes_column_descriptor_
boost_variant_accessor< SOURCE_TYPE > SOURCE_TYPE_ACCESSOR
std::vector< int > columnIds
identifies the table into which the data is being inserted
Definition: Fragmenter.h:62
void addDataBlocksToInsertData(Fragmenter_Namespace::InsertData &insertData) override
GeoPolygonValueConverter(const Catalog_Namespace::Catalog &cat, size_t num_rows, const ColumnDescriptor *logicalColumnDescriptor)
std::unique_ptr< std::vector< ArrayDatum > > signed_compressed_coords_data_
void addDataBlocksToInsertData(Fragmenter_Namespace::InsertData &insertData) override
StringDictionaryProxy * source_dict_proxy_
NumericValueConverter(const ColumnDescriptor *cd, size_t num_rows, TARGET_TYPE nullValue, SOURCE_TYPE nullCheckValue, bool doNullCheck)
DictionaryValueConverter(const Catalog_Namespace::Catalog &cat, int32_t sourceDictId, const ColumnDescriptor *targetDescriptor, size_t num_rows, TARGET_TYPE nullValue, int64_t nullCheckValue, bool doNullCheck, StringDictionaryProxy *literals_dict, StringDictionaryProxy *source_dict_proxy)
boost::variant< int64_t, double, float, NullableString > ScalarTargetValue
Definition: TargetValue.h:156