OmniSciDB  addbbd5075
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
TargetValueConvertersImpl.h
Go to the documentation of this file.
1 /*
2  * Copyright 2018, OmniSci, Inc.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #ifndef TARGET_VALUE_CONVERTERS_IMPL_H_
18 #define TARGET_VALUE_CONVERTERS_IMPL_H_
19 
20 #include "../StringDictionary/StringDictionary.h"
21 #include "TargetValueConverters.h"
22 
23 #include <atomic>
24 #include <future>
25 #include <thread>
26 
27 namespace Importer_NS {
28 std::vector<uint8_t> compress_coords(std::vector<double>& coords, const SQLTypeInfo& ti);
29 } // namespace Importer_NS
30 
31 template <typename T>
33  if (std::is_floating_point<T>::value) {
34  return static_cast<T>(inline_fp_null_array_value<T>());
35  } else {
36  return static_cast<T>(inline_int_null_array_value<T>());
37  }
38 }
39 
40 template <typename SOURCE_TYPE, typename TARGET_TYPE>
42  using ColumnDataPtr = std::unique_ptr<TARGET_TYPE, CheckedMallocDeleter<TARGET_TYPE>>;
44 
46  TARGET_TYPE null_value_;
47  SOURCE_TYPE null_check_value_;
50 
52 
54  size_t num_rows,
55  TARGET_TYPE nullValue,
56  SOURCE_TYPE nullCheckValue,
57  bool doNullCheck)
59  , null_value_(nullValue)
60  , null_check_value_(nullCheckValue)
61  , do_null_check_(doNullCheck) {
62  fixed_array_null_value_ = get_fixed_array_null_value<TARGET_TYPE>();
63  if (num_rows) {
64  allocateColumnarData(num_rows);
65  }
66  }
67 
68  ~NumericValueConverter() override {}
69 
70  bool allowFixedNullArray() { return true; }
71 
73  allocateColumnarData(num_rows);
76  }
77 
78  void allocateColumnarData(size_t num_rows) override {
79  CHECK(num_rows > 0);
81  reinterpret_cast<TARGET_TYPE*>(checked_malloc(num_rows * sizeof(TARGET_TYPE))));
82  }
83 
85  CHECK(num_rows > 0);
87  reinterpret_cast<TARGET_TYPE*>(checked_malloc(num_rows * sizeof(TARGET_TYPE))));
88  }
89 
91  size_t row,
92  typename ElementsBufferColumnPtr::pointer columnData,
93  const ScalarTargetValue* scalarValue) {
94  auto mapd_p = checked_get<SOURCE_TYPE>(row, scalarValue, SOURCE_TYPE_ACCESSOR);
95  auto val = *mapd_p;
96 
97  if (do_null_check_ && null_check_value_ == val) {
98  columnData[row] = null_value_;
99  } else {
100  columnData[row] = static_cast<TARGET_TYPE>(val);
101  }
102  }
103 
104  void convertToColumnarFormat(size_t row, const ScalarTargetValue* scalarValue) {
105  convertElementToColumnarFormat(row, column_data_.get(), scalarValue);
106  }
107 
108  void convertToColumnarFormat(size_t row, const TargetValue* value) override {
109  auto scalarValue =
110  checked_get<ScalarTargetValue>(row, value, SCALAR_TARGET_VALUE_ACCESSOR);
111  convertToColumnarFormat(row, scalarValue);
112  }
113 
115  std::unique_ptr<std::vector<std::pair<size_t, ElementsBufferColumnPtr>>>&
116  array_buffer,
117  std::unique_ptr<std::vector<ArrayDatum>>::pointer arrayData) {
118  for (size_t row = 0; row < array_buffer->size(); row++) {
119  auto& element = (array_buffer->at(row));
120  bool is_null = false;
121  if (element.second) {
122  ColumnDataPtr& data = element.second;
123  int8_t* arrayDataPtr = reinterpret_cast<int8_t*>(data.release());
124  (*arrayData)[row] =
125  ArrayDatum(element.first * sizeof(TARGET_TYPE), arrayDataPtr, is_null);
126  }
127  }
128  }
129 
131  DataBlockPtr dataBlock;
132  dataBlock.numbersPtr = reinterpret_cast<int8_t*>(column_data_.get());
133  insertData.data.push_back(dataBlock);
134  insertData.columnIds.push_back(column_descriptor_->columnId);
135  }
136 };
137 
138 template <typename TARGET_TYPE>
139 struct DictionaryValueConverter : public NumericValueConverter<int64_t, TARGET_TYPE> {
140  using ElementsDataColumnPtr =
142 
143  using ElementsBufferColumnPtr = std::unique_ptr<std::vector<int32_t>>;
144 
146 
149 
151 
152  const int32_t buffer_null_sentinal_ = std::numeric_limits<int32_t>::min();
153 
155 
156  std::unordered_map<int32_t, int32_t> literals_lookup_;
158 
160  int32_t sourceDictId,
161  const ColumnDescriptor* targetDescriptor,
162  size_t num_rows,
163  TARGET_TYPE nullValue,
164  int64_t nullCheckValue,
165  bool doNullCheck,
166  StringDictionaryProxy* literals_dict,
167  StringDictionaryProxy* source_dict_proxy)
168  : NumericValueConverter<int64_t, TARGET_TYPE>(targetDescriptor,
169  num_rows,
170  nullValue,
171  nullCheckValue,
172  doNullCheck) {
173  literals_dict_ = literals_dict;
175  cat.getMetadataForDict(targetDescriptor->columnType.get_comp_param(), true);
176 
177  source_dict_desc_ = nullptr;
178  source_dict_proxy_ = source_dict_proxy;
179 
180  use_literals_ = 0 == sourceDictId;
181  if (!use_literals_) {
182  source_dict_desc_ = cat.getMetadataForDict(std::abs(sourceDictId), true);
184  } else {
185  if (literals_dict) {
186  for (auto& entry : literals_dict->getTransientMapping()) {
187  auto newId = target_dict_desc_->stringDict->getOrAdd(entry.second);
188  literals_lookup_[entry.first] = newId;
189  }
190  }
191 
193  }
194 
196 
197  if (num_rows) {
199  }
200  }
201 
203 
204  bool allowFixedNullArray() { return false; }
205 
207  CHECK(num_rows > 0);
208  return std::make_unique<std::vector<int32_t>>(num_rows);
209  }
210 
212  size_t row,
213  typename ElementsBufferColumnPtr::pointer columnBuffer,
214  const ScalarTargetValue* scalarValue) {
215  auto mapd_p = checked_get<int64_t>(row, scalarValue, this->SOURCE_TYPE_ACCESSOR);
216  auto val = *mapd_p;
217 
218  if (this->do_null_check_ && this->null_check_value_ == val) {
219  (*columnBuffer)[row] = this->buffer_null_sentinal_;
220  } else {
221  (*columnBuffer)[row] = (int32_t)val;
222  }
223  }
224 
225  void convertToColumnarFormat(size_t row, const ScalarTargetValue* scalarValue) {
226  convertElementToColumnarFormat(row, this->column_buffer_.get(), scalarValue);
227  }
228 
229  void convertToColumnarFormat(size_t row, const TargetValue* value) override {
230  auto scalarValue =
231  checked_get<ScalarTargetValue>(row, value, this->SCALAR_TARGET_VALUE_ACCESSOR);
232 
233  convertToColumnarFormat(row, scalarValue);
234  }
235 
236  inline int32_t convertTransientStringIdToPermanentId(int32_t& transient_string_id) {
237  if (source_dict_proxy_) {
238  auto str = source_dict_proxy_->getString(transient_string_id);
239  return source_dict_proxy_->getOrAdd(str);
240  } else {
241  throw std::runtime_error("Unexpected negative source ID");
242  }
243  }
244 
246  ElementsBufferColumnPtr buffer) {
249  reinterpret_cast<TARGET_TYPE*>(
250  checked_malloc(buffer->size() * sizeof(TARGET_TYPE))));
251 
252  std::vector<int32_t>* bufferPtr =
253  reinterpret_cast<std::vector<int32_t>*>(buffer.get());
254  TARGET_TYPE* columnDataPtr = reinterpret_cast<TARGET_TYPE*>(data.get());
255  if (use_literals_) {
256  for (size_t i = 0; i < bufferPtr->size(); i++) {
257  auto id = literals_lookup_[(*bufferPtr)[i]];
258  if (id == buffer_null_sentinal_) {
259  columnDataPtr[i] = this->null_value_;
260  } else {
261  columnDataPtr[i] = static_cast<TARGET_TYPE>(id);
262  }
263  }
264  } else {
266  // special case, where source and target dict are the same
267  // mostly in update
268  for (size_t i = 0; i < bufferPtr->size(); i++) {
269  auto src_id = (*bufferPtr)[i];
270  if (src_id == buffer_null_sentinal_) {
271  columnDataPtr[i] = this->null_value_;
272  } else if (src_id < 0) {
273  columnDataPtr[i] = convertTransientStringIdToPermanentId(src_id);
274  } else {
275  columnDataPtr[i] = static_cast<TARGET_TYPE>(src_id);
276  }
277  }
278 
279  } else {
280  std::vector<int32_t> dest_ids;
281  dest_ids.resize(bufferPtr->size());
282 
283  if (source_dict_proxy_) {
285  dest_ids,
287  *bufferPtr,
290  } else {
293  *bufferPtr,
295  }
296 
297  // fixup NULL sentinel
298  for (size_t i = 0; i < dest_ids.size(); i++) {
299  auto id = dest_ids[i];
300  if (id == buffer_null_sentinal_) {
301  columnDataPtr[i] = this->null_value_;
302  } else {
303  CHECK(std::numeric_limits<TARGET_TYPE>::max() >= id);
304  columnDataPtr[i] = static_cast<TARGET_TYPE>(id);
305  }
306  }
307  }
308  }
309 
310  return data;
311  }
312 
314  std::unique_ptr<std::vector<std::pair<size_t, ElementsBufferColumnPtr>>>&
315  array_buffer,
316  std::unique_ptr<std::vector<ArrayDatum>>::pointer arrayData) {
317  if (use_literals_) {
318  for (size_t row = 0; row < array_buffer->size(); row++) {
319  auto& element = (array_buffer->at(row));
320  bool is_null = false;
321  if (element.second) {
323  processBuffer(std::move(element.second));
324  int8_t* arrayDataPtr = reinterpret_cast<int8_t*>(data.release());
325  (*arrayData)[row] =
326  ArrayDatum(element.first * sizeof(TARGET_TYPE), arrayDataPtr, is_null);
327  }
328  }
329  } else {
330  std::vector<std::vector<int32_t>> srcArrayIds(array_buffer->size());
331  std::vector<std::vector<int32_t>> destArrayIds(0);
332 
333  for (size_t row = 0; row < array_buffer->size(); row++) {
334  auto& element = (array_buffer->at(row));
335  if (element.second) {
336  srcArrayIds[row] = *(element.second.get());
337  }
338  }
339 
342  srcArrayIds,
344 
345  for (size_t row = 0; row < array_buffer->size(); row++) {
346  auto& element = (array_buffer->at(row));
347  bool is_null = false;
348  if (element.second) {
349  *(element.second.get()) = destArrayIds[row];
350  int8_t* arrayDataPtr = reinterpret_cast<int8_t*>(&(element.second->at(0)));
351  (*arrayData)[row] = ArrayDatum(element.first * sizeof(TARGET_TYPE),
352  arrayDataPtr,
353  is_null,
354  DoNothingDeleter());
355  }
356  }
357  }
358  }
359 
361  if (column_buffer_) {
362  this->column_data_ = processBuffer(std::move(column_buffer_));
363  column_buffer_ = nullptr;
364  }
365  }
366 
369  DataBlockPtr dataBlock;
370  dataBlock.numbersPtr = reinterpret_cast<int8_t*>(this->column_data_.get());
371  insertData.data.push_back(dataBlock);
372  insertData.columnIds.push_back(this->column_descriptor_->columnId);
373  }
374 };
375 
377  std::unique_ptr<std::vector<std::string>> column_data_;
378 
380 
384 
386  const ColumnDescriptor* cd,
387  size_t num_rows,
388  bool dictEncoded,
389  int32_t sourceDictId,
390  StringDictionaryProxy* literals_dict)
391  : TargetValueConverter(cd) {
392  source_dict_ = nullptr;
393  literals_source_dict_ = nullptr;
394  dict_encoded_ = dictEncoded;
395  if (dictEncoded) {
396  if (0 != sourceDictId) {
397  auto source_dict_desc = cat.getMetadataForDict(std::abs(sourceDictId), true);
398  CHECK(source_dict_desc);
399  source_dict_ = source_dict_desc->stringDict.get();
401  } else {
402  literals_source_dict_ = literals_dict;
403  }
404  }
405  if (num_rows) {
406  allocateColumnarData(num_rows);
407  }
408  }
409 
410  ~StringValueConverter() override {}
411 
412  void allocateColumnarData(size_t num_rows) override {
413  CHECK(num_rows > 0);
414  column_data_ = std::make_unique<std::vector<std::string>>(num_rows);
415  }
416 
417  void convertToColumnarFormatFromDict(size_t row, const TargetValue* value) {
418  auto scalarValue =
419  checked_get<ScalarTargetValue>(row, value, SCALAR_TARGET_VALUE_ACCESSOR);
420  auto mapd_p = checked_get<int64_t>(row, scalarValue, this->SOURCE_TYPE_ACCESSOR);
421  auto val = *mapd_p;
422 
423  if (std::numeric_limits<int32_t>::min() == val) {
424  (*column_data_)[row] = std::string("");
425  } else {
426  if (source_dict_) {
427  std::string strVal = source_dict_->getString(val);
428  (*column_data_)[row] = strVal;
429  } else if (literals_source_dict_) {
430  std::string strVal = literals_source_dict_->getString(val);
431  (*column_data_)[row] = strVal;
432  } else {
433  CHECK_EQ(val, inline_int_null_value<int32_t>());
434  std::string nullStr = "";
435  (*column_data_)[row] = nullStr;
436  }
437  }
438  }
439 
440  void convertToColumnarFormatFromString(size_t row, const TargetValue* value) {
441  auto scalarValue =
442  checked_get<ScalarTargetValue>(row, value, SCALAR_TARGET_VALUE_ACCESSOR);
443  auto mapd_p = checked_get<NullableString>(row, scalarValue, NULLABLE_STRING_ACCESSOR);
444 
445  const auto mapd_str_p = checked_get<std::string>(row, mapd_p, STRING_ACCESSOR);
446 
447  if (nullptr != mapd_str_p) {
448  (*column_data_)[row] = *mapd_str_p;
449  } else {
450  (*column_data_)[row] = std::string("");
451  }
452  }
453 
454  void convertToColumnarFormat(size_t row, const TargetValue* value) override {
455  if (dict_encoded_) {
457  } else {
459  }
460  }
461 
463  DataBlockPtr dataBlock;
464  dataBlock.stringsPtr = column_data_.get();
465  insertData.data.push_back(dataBlock);
466  insertData.columnIds.push_back(column_descriptor_->columnId);
467  }
468 };
469 
470 template <typename ELEMENT_CONVERTER>
472  std::unique_ptr<
473  std::vector<std::pair<size_t, typename ELEMENT_CONVERTER::ElementsBufferColumnPtr>>>
475  std::unique_ptr<std::vector<ArrayDatum>> column_data_;
476  std::unique_ptr<ELEMENT_CONVERTER> element_converter_;
479  bool data_finalized_ = false;
483 
485 
487  size_t num_rows,
488  std::unique_ptr<ELEMENT_CONVERTER> element_converter,
489  bool do_check_null)
491  , element_converter_(std::move(element_converter))
492  , element_type_info_(cd->columnType.get_elem_type())
493  , do_check_null_(do_check_null) {
494  if (num_rows) {
495  allocateColumnarData(num_rows);
496  }
497 
498  if (cd->columnType.get_size() > 0) {
501  fixed_array_size_ / sizeof(ELEMENT_CONVERTER::fixed_array_null_value_);
502  element_converter_->populateFixedArrayNullSentinel(fixed_array_elements_count_);
504  reinterpret_cast<int8_t*>(element_converter_->column_data_.get());
505  } else {
506  fixed_array_size_ = 0;
508  fixed_array_null_sentinel_ = nullptr;
509  }
510  }
511 
512  ~ArrayValueConverter() override {}
513 
514  void allocateColumnarData(size_t num_rows) override {
515  CHECK(num_rows > 0);
516  column_data_ = std::make_unique<std::vector<ArrayDatum>>(num_rows);
517  column_buffer_ = std::make_unique<std::vector<
518  std::pair<size_t, typename ELEMENT_CONVERTER::ElementsBufferColumnPtr>>>(
519  num_rows);
520  }
521 
522  void convertToColumnarFormat(size_t row, const TargetValue* value) override {
523  const auto arrayValue =
524  checked_get<ArrayTargetValue>(row, value, ARRAY_VALUE_ACCESSOR);
525  CHECK(arrayValue);
526  if (arrayValue->is_initialized()) {
527  const auto& vec = arrayValue->get();
528  bool is_null = false;
529 
531  if (fixed_array_elements_count_ != vec.size()) {
532  throw std::runtime_error(
533  "Incorrect number of array elements for fixed length array column");
534  }
535  }
536 
537  if (vec.size()) {
538  typename ELEMENT_CONVERTER::ElementsBufferColumnPtr elementBuffer =
539  element_converter_->allocateColumnarBuffer(vec.size());
540 
541  int elementIndex = 0;
542  for (const auto& scalarValue : vec) {
543  element_converter_->convertElementToColumnarFormat(
544  elementIndex++, elementBuffer.get(), &scalarValue);
545  }
546 
547  column_buffer_->at(row) = {vec.size(), std::move(elementBuffer)};
548 
549  } else {
550  // Empty, not NULL
551  (*column_data_)[row] = ArrayDatum(0, nullptr, is_null, DoNothingDeleter());
552  }
553  } else {
554  if (!do_check_null_) {
555  throw std::runtime_error("NULL assignment of non null column not allowed");
556  }
557 
558  if (fixed_array_elements_count_ && !element_converter_->allowFixedNullArray()) {
559  throw std::runtime_error("NULL assignment of fixed length array not allowed");
560  }
561 
562  bool is_null = true; // do_check_null_;
563  (*column_data_)[row] = ArrayDatum(
565  (*column_data_)[row].is_null = is_null;
566  }
567  }
568 
570  if (!data_finalized_) {
571  element_converter_->processArrayBuffer(column_buffer_, column_data_.get());
572  data_finalized_ = true;
573  }
574  }
575 
578  DataBlockPtr dataBlock;
579  dataBlock.arraysPtr = column_data_.get();
580  insertData.data.push_back(dataBlock);
581  insertData.columnIds.push_back(column_descriptor_->columnId);
582  }
583 };
584 
587 
588  std::unique_ptr<std::vector<std::string>> column_data_;
589  std::unique_ptr<std::vector<ArrayDatum>> signed_compressed_coords_data_;
590 
592  size_t num_rows,
593  const ColumnDescriptor* logicalColumnDescriptor)
594  : TargetValueConverter(logicalColumnDescriptor) {
598 
599  if (num_rows) {
600  allocateColumnarData(num_rows);
601  }
602  }
603 
605 
606  void allocateColumnarData(size_t num_rows) override {
607  CHECK(num_rows > 0);
608  column_data_ = std::make_unique<std::vector<std::string>>(num_rows);
609  signed_compressed_coords_data_ = std::make_unique<std::vector<ArrayDatum>>(num_rows);
610  }
611 
613 
615  const std::shared_ptr<std::vector<double>>& coords) {
616  const auto compressed_coords_vector =
618 
619  uint8_t* compressed_coords_array = reinterpret_cast<uint8_t*>(
620  checked_malloc(sizeof(uint8_t) * compressed_coords_vector.size()));
621  memcpy(compressed_coords_array,
622  &compressed_coords_vector[0],
623  compressed_coords_vector.size());
624 
625  return ArrayDatum((int)compressed_coords_vector.size(),
626  reinterpret_cast<int8_t*>(compressed_coords_array),
627  false);
628  }
629 
630  void convertToColumnarFormat(size_t row, const TargetValue* value) override {
631  auto geoValue = checked_get<GeoTargetValue>(row, value, GEO_TARGET_VALUE_ACCESSOR);
632  auto geoPoint =
633  checked_get<GeoPointTargetValue>(row, geoValue, GEO_POINT_VALUE_ACCESSOR);
634 
635  (*column_data_)[row] = "";
636  (*signed_compressed_coords_data_)[row] = toCompressedCoords(geoPoint->coords);
637  }
638 
640  DataBlockPtr logical, coords;
641 
642  logical.stringsPtr = column_data_.get();
644 
645  insertData.data.emplace_back(logical);
646  insertData.columnIds.emplace_back(column_descriptor_->columnId);
647 
648  insertData.data.emplace_back(coords);
649  insertData.columnIds.emplace_back(coords_column_descriptor_->columnId);
650  }
651 };
652 
653 inline std::vector<double> compute_bounds_of_coords(
654  const std::shared_ptr<std::vector<double>>& coords) {
655  std::vector<double> bounds(4);
656  constexpr auto DOUBLE_MAX = std::numeric_limits<double>::max();
657  constexpr auto DOUBLE_MIN = std::numeric_limits<double>::lowest();
658  bounds[0] = DOUBLE_MAX;
659  bounds[1] = DOUBLE_MAX;
660  bounds[2] = DOUBLE_MIN;
661  bounds[3] = DOUBLE_MIN;
662  auto size_coords = coords->size();
663 
664  for (size_t i = 0; i < size_coords; i += 2) {
665  double x = (*coords)[i];
666  double y = (*coords)[i + 1];
667 
668  bounds[0] = std::min(bounds[0], x);
669  bounds[1] = std::min(bounds[1], y);
670  bounds[2] = std::max(bounds[2], x);
671  bounds[3] = std::max(bounds[3], y);
672  }
673  return bounds;
674 }
675 
676 template <typename ELEM_TYPE>
677 inline ArrayDatum to_array_datum(const std::vector<ELEM_TYPE>& vector) {
678  ELEM_TYPE* array =
679  reinterpret_cast<ELEM_TYPE*>(checked_malloc(sizeof(ELEM_TYPE) * vector.size()));
680  memcpy(array, vector.data(), vector.size() * sizeof(ELEM_TYPE));
681 
682  return ArrayDatum(
683  (int)(vector.size() * sizeof(ELEM_TYPE)), reinterpret_cast<int8_t*>(array), false);
684 }
685 
686 template <typename ELEM_TYPE>
687 inline ArrayDatum to_array_datum(const std::shared_ptr<std::vector<ELEM_TYPE>>& vector) {
688  return to_array_datum(*vector.get());
689 }
690 
693 
694  std::unique_ptr<std::vector<ArrayDatum>> bounds_data_;
695 
697  size_t num_rows,
698  const ColumnDescriptor* logicalColumnDescriptor)
699  : GeoPointValueConverter(cat, num_rows, logicalColumnDescriptor) {
703 
704  if (num_rows) {
705  allocateColumnarData(num_rows);
706  }
707  }
708 
710 
711  void allocateColumnarData(size_t num_rows) override {
712  CHECK(num_rows > 0);
714  bounds_data_ = std::make_unique<std::vector<ArrayDatum>>(num_rows);
715  }
716 
718 
719  void convertToColumnarFormat(size_t row, const TargetValue* value) override {
720  auto geoValue = checked_get<GeoTargetValue>(row, value, GEO_TARGET_VALUE_ACCESSOR);
721  auto geoLinestring = checked_get<GeoLineStringTargetValue>(
722  row, geoValue, GEO_LINESTRING_VALUE_ACCESSOR);
723 
724  (*column_data_)[row] = "";
725  (*signed_compressed_coords_data_)[row] = toCompressedCoords(geoLinestring->coords);
726  auto bounds = compute_bounds_of_coords(geoLinestring->coords);
727  (*bounds_data_)[row] = to_array_datum(bounds);
728  }
729 
732 
733  DataBlockPtr bounds;
734 
735  bounds.arraysPtr = bounds_data_.get();
736 
737  insertData.data.emplace_back(bounds);
738  insertData.columnIds.emplace_back(bounds_column_descriptor_->columnId);
739  }
740 };
741 
747 
748  std::unique_ptr<std::vector<ArrayDatum>> ring_sizes_data_;
749  std::unique_ptr<std::vector<ArrayDatum>> bounds_data_;
750  std::unique_ptr<int32_t[]> render_group_data_;
751 
753  size_t num_rows,
754  const ColumnDescriptor* logicalColumnDescriptor)
755  : GeoPointValueConverter(cat, num_rows, logicalColumnDescriptor) {
765 
766  if (num_rows) {
767  allocateColumnarData(num_rows);
768  }
769  }
770 
772 
773  void allocateColumnarData(size_t num_rows) override {
775  ring_sizes_data_ = std::make_unique<std::vector<ArrayDatum>>(num_rows);
776  bounds_data_ = std::make_unique<std::vector<ArrayDatum>>(num_rows);
777  render_group_data_ = std::make_unique<int32_t[]>(num_rows);
778  }
779 
781 
782  void convertToColumnarFormat(size_t row, const TargetValue* value) override {
783  auto geoValue = checked_get<GeoTargetValue>(row, value, GEO_TARGET_VALUE_ACCESSOR);
784  auto geoPoly =
785  checked_get<GeoPolyTargetValue>(row, geoValue, GEO_POLY_VALUE_ACCESSOR);
786 
787  (*column_data_)[row] = "";
788  (*signed_compressed_coords_data_)[row] = toCompressedCoords(geoPoly->coords);
789  (*ring_sizes_data_)[row] = to_array_datum(geoPoly->ring_sizes);
790  auto bounds = compute_bounds_of_coords(geoPoly->coords);
791  (*bounds_data_)[row] = to_array_datum(bounds);
792  render_group_data_[row] =
794  }
795 
798 
799  DataBlockPtr ringSizes, bounds, renderGroup;
800 
801  ringSizes.arraysPtr = ring_sizes_data_.get();
802  bounds.arraysPtr = bounds_data_.get();
803  renderGroup.numbersPtr = reinterpret_cast<int8_t*>(render_group_data_.get());
804 
805  insertData.data.emplace_back(ringSizes);
806  insertData.columnIds.emplace_back(ring_sizes_column_descriptor_->columnId);
807 
808  insertData.data.emplace_back(bounds);
809  insertData.columnIds.emplace_back(bounds_column_descriptor_->columnId);
810 
811  insertData.data.emplace_back(renderGroup);
812  insertData.columnIds.emplace_back(render_group_column_descriptor_->columnId);
813  }
814 };
815 
822 
823  std::unique_ptr<std::vector<ArrayDatum>> ring_sizes_data_;
824  std::unique_ptr<std::vector<ArrayDatum>> poly_rings_data_;
825  std::unique_ptr<std::vector<ArrayDatum>> bounds_data_;
826  std::unique_ptr<int32_t[]> render_group_data_;
827 
829  size_t num_rows,
830  const ColumnDescriptor* logicalColumnDescriptor)
831  : GeoPointValueConverter(cat, num_rows, logicalColumnDescriptor) {
844 
845  if (num_rows) {
846  allocateColumnarData(num_rows);
847  }
848  }
849 
851 
852  void allocateColumnarData(size_t num_rows) override {
854  ring_sizes_data_ = std::make_unique<std::vector<ArrayDatum>>(num_rows);
855  poly_rings_data_ = std::make_unique<std::vector<ArrayDatum>>(num_rows);
856  bounds_data_ = std::make_unique<std::vector<ArrayDatum>>(num_rows);
857  render_group_data_ = std::make_unique<int32_t[]>(num_rows);
858  }
859 
861 
862  void convertToColumnarFormat(size_t row, const TargetValue* value) override {
863  auto geoValue = checked_get<GeoTargetValue>(row, value, GEO_TARGET_VALUE_ACCESSOR);
864  auto geoMultiPoly = checked_get<GeoMultiPolyTargetValue>(
865  row, geoValue, GEO_MULTI_POLY_VALUE_ACCESSOR);
866 
867  (*column_data_)[row] = "";
868  (*signed_compressed_coords_data_)[row] = toCompressedCoords(geoMultiPoly->coords);
869  (*ring_sizes_data_)[row] = to_array_datum(geoMultiPoly->ring_sizes);
870  (*poly_rings_data_)[row] = to_array_datum(geoMultiPoly->poly_rings);
871  auto bounds = compute_bounds_of_coords(geoMultiPoly->coords);
872  (*bounds_data_)[row] = to_array_datum(bounds);
873  render_group_data_[row] =
875  }
876 
879 
880  DataBlockPtr ringSizes, polyRings, bounds, renderGroup;
881 
882  ringSizes.arraysPtr = ring_sizes_data_.get();
883  polyRings.arraysPtr = poly_rings_data_.get();
884  bounds.arraysPtr = bounds_data_.get();
885  renderGroup.numbersPtr = reinterpret_cast<int8_t*>(render_group_data_.get());
886 
887  insertData.data.emplace_back(ringSizes);
888  insertData.columnIds.emplace_back(ring_sizes_column_descriptor_->columnId);
889 
890  insertData.data.emplace_back(polyRings);
891  insertData.columnIds.emplace_back(ring_sizes_solumn_descriptor_->columnId);
892 
893  insertData.data.emplace_back(bounds);
894  insertData.columnIds.emplace_back(bounds_column_descriptor_->columnId);
895 
896  insertData.data.emplace_back(renderGroup);
897  insertData.columnIds.emplace_back(render_group_column_descriptor_->columnId);
898  }
899 };
900 
901 #endif
std::unique_ptr< std::vector< ArrayDatum > > ring_sizes_data_
ArrayDatum to_array_datum(const std::vector< ELEM_TYPE > &vector)
std::unique_ptr< std::vector< ArrayDatum > > bounds_data_
std::unique_ptr< TARGET_TYPE, CheckedMallocDeleter< TARGET_TYPE >> ColumnDataPtr
std::unique_ptr< std::vector< int32_t >> ElementsBufferColumnPtr
StringDictionaryProxy * literals_source_dict_
#define CHECK_EQ(x, y)
Definition: Logger.h:201
void convertToColumnarFormatFromDict(size_t row, const TargetValue *value)
void allocateColumnarData(size_t num_rows) override
std::unique_ptr< std::vector< ArrayDatum > > bounds_data_
void addDataBlocksToInsertData(Fragmenter_Namespace::InsertData &insertData) override
const int8_t const int64_t * num_rows
class for a per-database catalog. also includes metadata for the current database and the current use...
Definition: Catalog.h:81
HOST DEVICE int get_comp_param() const
Definition: sqltypes.h:335
std::vector< std::string > * stringsPtr
Definition: sqltypes.h:141
const ColumnDescriptor * ring_sizes_solumn_descriptor_
std::vector< ArrayDatum > * arraysPtr
Definition: sqltypes.h:142
std::unique_ptr< int32_t[]> render_group_data_
void allocateColumnarData(size_t num_rows) override
void processArrayBuffer(std::unique_ptr< std::vector< std::pair< size_t, ElementsBufferColumnPtr >>> &array_buffer, std::unique_ptr< std::vector< ArrayDatum >>::pointer arrayData)
boost_variant_accessor< int64_t > SOURCE_TYPE_ACCESSOR
const ColumnDescriptor * bounds_column_descriptor_
int insertBoundsAndReturnRenderGroup(const std::vector< double > &bounds)
Definition: Importer.cpp:4990
ArrayValueConverter(const ColumnDescriptor *cd, size_t num_rows, std::unique_ptr< ELEMENT_CONVERTER > element_converter, bool do_check_null)
void populateFixedArrayNullSentinel(size_t num_rows)
const DictDescriptor * source_dict_desc_
std::vector< uint8_t > compress_coords(std::vector< double > &coords, const SQLTypeInfo &ti)
Definition: Importer.cpp:1422
void convertToColumnarFormat(size_t row, const TargetValue *value) override
void convertToColumnarFormat(size_t row, const ScalarTargetValue *scalarValue)
HOST DEVICE int get_size() const
Definition: sqltypes.h:336
std::unique_ptr< std::vector< ArrayDatum > > bounds_data_
void allocateColumnarData(size_t num_rows) override
const ColumnDescriptor * column_descriptor_
std::string getString(int32_t string_id) const
ArrayDatum toCompressedCoords(const std::shared_ptr< std::vector< double >> &coords)
static void populate_string_ids(std::vector< int32_t > &dest_ids, StringDictionary *dest_dict, const std::vector< int32_t > &source_ids, const StringDictionary *source_dict, const std::map< int32_t, std::string > transient_mapping={})
Populates provided dest_ids vector with string ids corresponding to given source strings.
ElementsBufferColumnPtr column_buffer_
std::vector< double > compute_bounds_of_coords(const std::shared_ptr< std::vector< double >> &coords)
std::unique_ptr< std::vector< ArrayDatum > > poly_rings_data_
void processArrayBuffer(std::unique_ptr< std::vector< std::pair< size_t, ElementsBufferColumnPtr >>> &array_buffer, std::unique_ptr< std::vector< ArrayDatum >>::pointer arrayData)
ElementsBufferColumnPtr allocateColumnarBuffer(size_t num_rows)
void convertElementToColumnarFormat(size_t row, typename ElementsBufferColumnPtr::pointer columnData, const ScalarTargetValue *scalarValue)
const DictDescriptor * target_dict_desc_
void addDataBlocksToInsertData(Fragmenter_Namespace::InsertData &insertData) override
void addDataBlocksToInsertData(Fragmenter_Namespace::InsertData &insertData) override
const StringDictionaryProxy * literals_dict_
void convertToColumnarFormat(size_t row, const ScalarTargetValue *scalarValue)
std::shared_ptr< StringDictionary > stringDict
boost_variant_accessor< ScalarTargetValue > SCALAR_TARGET_VALUE_ACCESSOR
boost_variant_accessor< GeoLineStringTargetValue > GEO_LINESTRING_VALUE_ACCESSOR
GeoPointValueConverter(const Catalog_Namespace::Catalog &cat, size_t num_rows, const ColumnDescriptor *logicalColumnDescriptor)
boost_variant_accessor< NullableString > NULLABLE_STRING_ACCESSOR
std::unique_ptr< std::vector< std::string > > column_data_
CHECK(cgen_state)
void addDataBlocksToInsertData(Fragmenter_Namespace::InsertData &insertData) override
void convertToColumnarFormatFromString(size_t row, const TargetValue *value)
void * checked_malloc(const size_t size)
Definition: checked_alloc.h:40
boost_variant_accessor< GeoMultiPolyTargetValue > GEO_MULTI_POLY_VALUE_ACCESSOR
const ColumnDescriptor * bounds_column_descriptor_
boost_variant_accessor< GeoPolyTargetValue > GEO_POLY_VALUE_ACCESSOR
boost_variant_accessor< std::string > STRING_ACCESSOR
const ColumnDescriptor * ring_sizes_column_descriptor_
std::unique_ptr< std::vector< ArrayDatum > > ring_sizes_data_
void finalizeDataBlocksForInsertData() override
GeoLinestringValueConverter(const Catalog_Namespace::Catalog &cat, size_t num_rows, const ColumnDescriptor *logicalColumnDescriptor)
const ColumnDescriptor * getMetadataForColumn(int tableId, const std::string &colName) const
const std::map< int32_t, std::string > getTransientMapping() const
const DictDescriptor * getMetadataForDict(int dict_ref, bool loadDict=true) const
Definition: Catalog.cpp:1377
specifies the content in-memory of a row in the column metadata table
GeoMultiPolygonValueConverter(const Catalog_Namespace::Catalog &cat, size_t num_rows, const ColumnDescriptor *logicalColumnDescriptor)
void convertToColumnarFormat(size_t row, const TargetValue *value) override
std::unique_ptr< ELEMENT_CONVERTER > element_converter_
Importer_NS::RenderGroupAnalyzer render_group_analyzer_
T get_fixed_array_null_value()
void convertToColumnarFormat(size_t row, const TargetValue *value) override
std::unique_ptr< int32_t[]> render_group_data_
StringValueConverter(const Catalog_Namespace::Catalog &cat, const ColumnDescriptor *cd, size_t num_rows, bool dictEncoded, int32_t sourceDictId, StringDictionaryProxy *literals_dict)
void finalizeDataBlocksForInsertData() override
void addDataBlocksToInsertData(Fragmenter_Namespace::InsertData &insertData) override
bool is_null(const T &v, const SQLTypeInfo &t)
std::vector< DataBlockPtr > data
the number of rows being inserted
Definition: Fragmenter.h:64
int32_t getOrAdd(const std::string &str) noexcept
void allocateColumnarData(size_t num_rows) override
int32_t convertTransientStringIdToPermanentId(int32_t &transient_string_id)
void convertToColumnarFormat(size_t row, const TargetValue *value) override
std::unique_ptr< std::vector< std::string > > column_data_
const ColumnDescriptor * render_group_column_descriptor_
Importer_NS::RenderGroupAnalyzer render_group_analyzer_
typename NumericValueConverter< int64_t, TARGET_TYPE >::ColumnDataPtr ElementsDataColumnPtr
std::unordered_map< int32_t, int32_t > literals_lookup_
const ColumnDescriptor * coords_column_descriptor_
void allocateColumnarData(size_t num_rows) override
std::string getString(int32_t string_id) const
void allocateColumnarData(size_t num_rows) override
const ColumnDescriptor * render_group_column_descriptor_
void addDataBlocksToInsertData(Fragmenter_Namespace::InsertData &insertData) override
static void populate_string_array_ids(std::vector< std::vector< int32_t >> &dest_array_ids, StringDictionary *dest_dict, const std::vector< std::vector< int32_t >> &source_array_ids, const StringDictionary *source_dict)
NumericValueConverter< int64_t, TARGET_TYPE >::ColumnDataPtr processBuffer(ElementsBufferColumnPtr buffer)
std::unique_ptr< std::vector< std::pair< size_t, typename ELEMENT_CONVERTER::ElementsBufferColumnPtr > > > column_buffer_
boost_variant_accessor< GeoTargetValue > GEO_TARGET_VALUE_ACCESSOR
Descriptor for a dictionary for a string columne.
const ColumnDescriptor * bounds_column_descriptor_
void convertToColumnarFormat(size_t row, const TargetValue *value) override
void convertToColumnarFormat(size_t row, const TargetValue *value) override
The data to be inserted using the fragment manager.
Definition: Fragmenter.h:59
boost::variant< ScalarTargetValue, ArrayTargetValue, GeoTargetValue, GeoTargetValuePtr > TargetValue
Definition: TargetValue.h:167
std::unique_ptr< std::vector< ArrayDatum > > column_data_
SQLTypeInfo columnType
void convertElementToColumnarFormat(size_t row, typename ElementsBufferColumnPtr::pointer columnBuffer, const ScalarTargetValue *scalarValue)
void allocateColumnarData(size_t num_rows) override
void convertToColumnarFormat(size_t row, const TargetValue *value) override
int8_t * numbersPtr
Definition: sqltypes.h:140
ElementsBufferColumnPtr allocateColumnarBuffer(size_t num_rows)
void convertToColumnarFormat(size_t row, const TargetValue *value) override
boost_variant_accessor< ArrayTargetValue > ARRAY_VALUE_ACCESSOR
boost_variant_accessor< GeoPointTargetValue > GEO_POINT_VALUE_ACCESSOR
const ColumnDescriptor * ring_sizes_column_descriptor_
boost_variant_accessor< SOURCE_TYPE > SOURCE_TYPE_ACCESSOR
std::vector< int > columnIds
identifies the table into which the data is being inserted
Definition: Fragmenter.h:62
void addDataBlocksToInsertData(Fragmenter_Namespace::InsertData &insertData) override
GeoPolygonValueConverter(const Catalog_Namespace::Catalog &cat, size_t num_rows, const ColumnDescriptor *logicalColumnDescriptor)
std::unique_ptr< std::vector< ArrayDatum > > signed_compressed_coords_data_
void addDataBlocksToInsertData(Fragmenter_Namespace::InsertData &insertData) override
std::conditional_t< isCudaCC(), DeviceArrayDatum, HostArrayDatum > ArrayDatum
Definition: sqltypes.h:122
StringDictionaryProxy * source_dict_proxy_
NumericValueConverter(const ColumnDescriptor *cd, size_t num_rows, TARGET_TYPE nullValue, SOURCE_TYPE nullCheckValue, bool doNullCheck)
DictionaryValueConverter(const Catalog_Namespace::Catalog &cat, int32_t sourceDictId, const ColumnDescriptor *targetDescriptor, size_t num_rows, TARGET_TYPE nullValue, int64_t nullCheckValue, bool doNullCheck, StringDictionaryProxy *literals_dict, StringDictionaryProxy *source_dict_proxy)
boost::variant< int64_t, double, float, NullableString > ScalarTargetValue
Definition: TargetValue.h:156