OmniSciDB  b24e664e58
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
TargetValueConvertersImpl.h
Go to the documentation of this file.
1 /*
2  * Copyright 2018, OmniSci, Inc.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #ifndef TARGET_VALUE_CONVERTERS_IMPL_H_
18 #define TARGET_VALUE_CONVERTERS_IMPL_H_
19 
20 #include "../StringDictionary/StringDictionary.h"
21 #include "TargetValueConverters.h"
22 
23 #include <atomic>
24 #include <future>
25 #include <thread>
26 
27 namespace Importer_NS {
28 std::vector<uint8_t> compress_coords(std::vector<double>& coords, const SQLTypeInfo& ti);
29 } // namespace Importer_NS
30 
31 template <typename SOURCE_TYPE, typename TARGET_TYPE>
33  using ColumnDataPtr = std::unique_ptr<TARGET_TYPE, CheckedMallocDeleter<TARGET_TYPE>>;
35 
37  TARGET_TYPE null_value_;
38  SOURCE_TYPE null_check_value_;
40 
42 
44  size_t num_rows,
45  TARGET_TYPE nullValue,
46  SOURCE_TYPE nullCheckValue,
47  bool doNullCheck)
49  , null_value_(nullValue)
50  , null_check_value_(nullCheckValue)
51  , do_null_check_(doNullCheck) {
52  if (num_rows) {
53  allocateColumnarData(num_rows);
54  }
55  }
56 
57  ~NumericValueConverter() override {}
58 
59  void allocateColumnarData(size_t num_rows) override {
60  CHECK(num_rows > 0);
62  reinterpret_cast<TARGET_TYPE*>(checked_malloc(num_rows * sizeof(TARGET_TYPE))));
63  }
64 
66  CHECK(num_rows > 0);
68  reinterpret_cast<TARGET_TYPE*>(checked_malloc(num_rows * sizeof(TARGET_TYPE))));
69  }
70 
72  size_t row,
73  typename ElementsBufferColumnPtr::pointer columnData,
74  const ScalarTargetValue* scalarValue) {
75  auto mapd_p = checked_get<SOURCE_TYPE>(row, scalarValue, SOURCE_TYPE_ACCESSOR);
76  auto val = *mapd_p;
77 
78  if (do_null_check_ && null_check_value_ == val) {
79  columnData[row] = null_value_;
80  } else {
81  columnData[row] = static_cast<TARGET_TYPE>(val);
82  }
83  }
84 
85  void convertToColumnarFormat(size_t row, const ScalarTargetValue* scalarValue) {
86  convertElementToColumnarFormat(row, column_data_.get(), scalarValue);
87  }
88 
89  void convertToColumnarFormat(size_t row, const TargetValue* value) override {
90  auto scalarValue =
91  checked_get<ScalarTargetValue>(row, value, SCALAR_TARGET_VALUE_ACCESSOR);
92  convertToColumnarFormat(row, scalarValue);
93  }
94 
96  std::unique_ptr<std::vector<std::pair<size_t, ElementsBufferColumnPtr>>>&
97  array_buffer,
98  std::unique_ptr<std::vector<ArrayDatum>>::pointer arrayData) {
99  for (size_t row = 0; row < array_buffer->size(); row++) {
100  auto& element = (array_buffer->at(row));
101  bool is_null = false;
102  if (element.second) {
103  ColumnDataPtr& data = element.second;
104  int8_t* arrayDataPtr = reinterpret_cast<int8_t*>(data.release());
105  (*arrayData)[row] =
106  ArrayDatum(element.first * sizeof(TARGET_TYPE), arrayDataPtr, is_null);
107  }
108  }
109  }
110 
112  DataBlockPtr dataBlock;
113  dataBlock.numbersPtr = reinterpret_cast<int8_t*>(column_data_.get());
114  insertData.data.push_back(dataBlock);
115  insertData.columnIds.push_back(column_descriptor_->columnId);
116  }
117 };
118 
119 template <typename TARGET_TYPE>
120 struct DictionaryValueConverter : public NumericValueConverter<int64_t, TARGET_TYPE> {
121  using ElementsDataColumnPtr =
123 
124  using ElementsBufferColumnPtr = std::unique_ptr<std::vector<int32_t>>;
125 
127 
130 
132 
133  const int32_t buffer_null_sentinal_ = std::numeric_limits<int32_t>::min();
134 
136 
137  std::unordered_map<int32_t, int32_t> literals_lookup_;
139 
141  int32_t sourceDictId,
142  const ColumnDescriptor* targetDescriptor,
143  size_t num_rows,
144  TARGET_TYPE nullValue,
145  int64_t nullCheckValue,
146  bool doNullCheck,
147  StringDictionaryProxy* literals_dict,
148  StringDictionaryProxy* source_dict_proxy)
149  : NumericValueConverter<int64_t, TARGET_TYPE>(targetDescriptor,
150  num_rows,
151  nullValue,
152  nullCheckValue,
153  doNullCheck) {
154  literals_dict_ = literals_dict;
156  cat.getMetadataForDict(targetDescriptor->columnType.get_comp_param(), true);
157 
158  source_dict_desc_ = nullptr;
159  source_dict_proxy_ = source_dict_proxy;
160 
161  use_literals_ = 0 == sourceDictId;
162  if (!use_literals_) {
163  source_dict_desc_ = cat.getMetadataForDict(std::abs(sourceDictId), true);
165  } else {
166  CHECK(literals_dict);
167 
168  for (auto& entry : literals_dict->getTransientMapping()) {
169  auto newId = target_dict_desc_->stringDict->getOrAdd(entry.second);
170  literals_lookup_[entry.first] = newId;
171  }
172 
174  }
175 
177 
178  if (num_rows) {
180  }
181  }
182 
184 
186  CHECK(num_rows > 0);
187  return std::make_unique<std::vector<int32_t>>(num_rows);
188  }
189 
191  size_t row,
192  typename ElementsBufferColumnPtr::pointer columnBuffer,
193  const ScalarTargetValue* scalarValue) {
194  auto mapd_p = checked_get<int64_t>(row, scalarValue, this->SOURCE_TYPE_ACCESSOR);
195  auto val = *mapd_p;
196 
197  if (this->do_null_check_ && this->null_check_value_ == val) {
198  (*columnBuffer)[row] = this->buffer_null_sentinal_;
199  } else {
200  (*columnBuffer)[row] = (int32_t)val;
201  }
202  }
203 
204  void convertToColumnarFormat(size_t row, const ScalarTargetValue* scalarValue) {
205  convertElementToColumnarFormat(row, this->column_buffer_.get(), scalarValue);
206  }
207 
208  void convertToColumnarFormat(size_t row, const TargetValue* value) override {
209  auto scalarValue =
210  checked_get<ScalarTargetValue>(row, value, this->SCALAR_TARGET_VALUE_ACCESSOR);
211 
212  convertToColumnarFormat(row, scalarValue);
213  }
214 
215  inline int32_t convertTransientStringIdToPermanentId(int32_t& transient_string_id) {
216  if (source_dict_proxy_) {
217  auto str = source_dict_proxy_->getString(transient_string_id);
218  return source_dict_proxy_->getOrAdd(str);
219  } else {
220  throw std::runtime_error("Unexpected negative source ID");
221  }
222  }
223 
225  ElementsBufferColumnPtr buffer) {
228  reinterpret_cast<TARGET_TYPE*>(
229  checked_malloc(buffer->size() * sizeof(TARGET_TYPE))));
230 
231  std::vector<int32_t>* bufferPtr =
232  reinterpret_cast<std::vector<int32_t>*>(buffer.get());
233  TARGET_TYPE* columnDataPtr = reinterpret_cast<TARGET_TYPE*>(data.get());
234  if (use_literals_) {
235  for (size_t i = 0; i < bufferPtr->size(); i++) {
236  auto id = literals_lookup_[(*bufferPtr)[i]];
237  if (id == buffer_null_sentinal_) {
238  columnDataPtr[i] = this->null_value_;
239  } else {
240  columnDataPtr[i] = static_cast<TARGET_TYPE>(id);
241  }
242  }
243  } else {
245  // special case, where source and target dict are the same
246  // mostly in update
247  for (size_t i = 0; i < bufferPtr->size(); i++) {
248  auto src_id = (*bufferPtr)[i];
249  if (src_id == buffer_null_sentinal_) {
250  columnDataPtr[i] = this->null_value_;
251  } else if (src_id < 0) {
252  columnDataPtr[i] = convertTransientStringIdToPermanentId(src_id);
253  } else {
254  columnDataPtr[i] = static_cast<TARGET_TYPE>(src_id);
255  }
256  }
257 
258  } else {
259  std::vector<int32_t> dest_ids;
260  dest_ids.resize(bufferPtr->size());
261 
262  if (source_dict_proxy_) {
264  dest_ids,
266  *bufferPtr,
269  } else {
272  *bufferPtr,
274  }
275 
276  // fixup NULL sentinel
277  for (size_t i = 0; i < dest_ids.size(); i++) {
278  auto id = dest_ids[i];
279  if (id == buffer_null_sentinal_) {
280  columnDataPtr[i] = this->null_value_;
281  } else {
282  CHECK(std::numeric_limits<TARGET_TYPE>::max() >= id);
283  columnDataPtr[i] = static_cast<TARGET_TYPE>(id);
284  }
285  }
286  }
287  }
288 
289  return data;
290  }
291 
293  std::unique_ptr<std::vector<std::pair<size_t, ElementsBufferColumnPtr>>>&
294  array_buffer,
295  std::unique_ptr<std::vector<ArrayDatum>>::pointer arrayData) {
296  if (use_literals_) {
297  for (size_t row = 0; row < array_buffer->size(); row++) {
298  auto& element = (array_buffer->at(row));
299  bool is_null = false;
300  if (element.second) {
302  processBuffer(std::move(element.second));
303  int8_t* arrayDataPtr = reinterpret_cast<int8_t*>(data.release());
304  (*arrayData)[row] =
305  ArrayDatum(element.first * sizeof(TARGET_TYPE), arrayDataPtr, is_null);
306  }
307  }
308  } else {
309  std::vector<std::vector<int32_t>> srcArrayIds(array_buffer->size());
310  std::vector<std::vector<int32_t>> destArrayIds(0);
311 
312  for (size_t row = 0; row < array_buffer->size(); row++) {
313  auto& element = (array_buffer->at(row));
314  if (element.second) {
315  srcArrayIds[row] = *(element.second.get());
316  }
317  }
318 
321  srcArrayIds,
323 
324  for (size_t row = 0; row < array_buffer->size(); row++) {
325  auto& element = (array_buffer->at(row));
326  bool is_null = false;
327  if (element.second) {
328  *(element.second.get()) = destArrayIds[row];
329  int8_t* arrayDataPtr = reinterpret_cast<int8_t*>(&(element.second->at(0)));
330  (*arrayData)[row] = ArrayDatum(element.first * sizeof(TARGET_TYPE),
331  arrayDataPtr,
332  is_null,
333  DoNothingDeleter());
334  }
335  }
336  }
337  }
338 
340  if (column_buffer_) {
341  this->column_data_ = processBuffer(std::move(column_buffer_));
342  column_buffer_ = nullptr;
343  }
344  }
345 
348  DataBlockPtr dataBlock;
349  dataBlock.numbersPtr = reinterpret_cast<int8_t*>(this->column_data_.get());
350  insertData.data.push_back(dataBlock);
351  insertData.columnIds.push_back(this->column_descriptor_->columnId);
352  }
353 };
354 
356  std::unique_ptr<std::vector<std::string>> column_data_;
357 
359 
363 
365  const ColumnDescriptor* cd,
366  size_t num_rows,
367  bool dictEncoded,
368  int32_t sourceDictId,
369  StringDictionaryProxy* literals_dict)
370  : TargetValueConverter(cd) {
371  source_dict_ = nullptr;
372  literals_source_dict_ = nullptr;
373  dict_encoded_ = dictEncoded;
374  if (dictEncoded) {
375  if (0 != sourceDictId) {
376  auto source_dict_desc = cat.getMetadataForDict(std::abs(sourceDictId), true);
377  CHECK(source_dict_desc);
378  source_dict_ = source_dict_desc->stringDict.get();
380  } else {
381  literals_source_dict_ = literals_dict;
382  }
383  }
384  if (num_rows) {
385  allocateColumnarData(num_rows);
386  }
387  }
388 
389  ~StringValueConverter() override {}
390 
391  void allocateColumnarData(size_t num_rows) override {
392  CHECK(num_rows > 0);
393  column_data_ = std::make_unique<std::vector<std::string>>(num_rows);
394  }
395 
396  void convertToColumnarFormatFromDict(size_t row, const TargetValue* value) {
397  auto scalarValue =
398  checked_get<ScalarTargetValue>(row, value, SCALAR_TARGET_VALUE_ACCESSOR);
399  auto mapd_p = checked_get<int64_t>(row, scalarValue, this->SOURCE_TYPE_ACCESSOR);
400  auto val = *mapd_p;
401 
402  if (std::numeric_limits<int32_t>::min() == val) {
403  (*column_data_)[row] = std::string("");
404  } else {
405  if (source_dict_) {
406  std::string strVal = source_dict_->getString(val);
407  (*column_data_)[row] = strVal;
408  } else if (literals_source_dict_) {
409  std::string strVal = literals_source_dict_->getString(val);
410  (*column_data_)[row] = strVal;
411  } else {
412  CHECK_EQ(val, inline_int_null_value<int32_t>());
413  std::string nullStr = "";
414  (*column_data_)[row] = nullStr;
415  }
416  }
417  }
418 
419  void convertToColumnarFormatFromString(size_t row, const TargetValue* value) {
420  auto scalarValue =
421  checked_get<ScalarTargetValue>(row, value, SCALAR_TARGET_VALUE_ACCESSOR);
422  auto mapd_p = checked_get<NullableString>(row, scalarValue, NULLABLE_STRING_ACCESSOR);
423 
424  const auto mapd_str_p = checked_get<std::string>(row, mapd_p, STRING_ACCESSOR);
425 
426  if (nullptr != mapd_str_p) {
427  (*column_data_)[row] = *mapd_str_p;
428  } else {
429  (*column_data_)[row] = std::string("");
430  }
431  }
432 
433  void convertToColumnarFormat(size_t row, const TargetValue* value) override {
434  if (dict_encoded_) {
436  } else {
438  }
439  }
440 
442  DataBlockPtr dataBlock;
443  dataBlock.stringsPtr = column_data_.get();
444  insertData.data.push_back(dataBlock);
445  insertData.columnIds.push_back(column_descriptor_->columnId);
446  }
447 };
448 
449 template <typename ELEMENT_CONVERTER>
451  std::unique_ptr<
452  std::vector<std::pair<size_t, typename ELEMENT_CONVERTER::ElementsBufferColumnPtr>>>
454  std::unique_ptr<std::vector<ArrayDatum>> column_data_;
455  std::unique_ptr<ELEMENT_CONVERTER> element_converter_;
458  bool data_finalized_ = false;
459 
461 
463  size_t num_rows,
464  std::unique_ptr<ELEMENT_CONVERTER> element_converter,
465  bool do_check_null)
467  , element_converter_(std::move(element_converter))
468  , element_type_info_(cd->columnType.get_elem_type())
469  , do_check_null_(do_check_null) {
470  if (num_rows) {
471  allocateColumnarData(num_rows);
472  }
473  }
474 
475  ~ArrayValueConverter() override {}
476 
477  void allocateColumnarData(size_t num_rows) override {
478  CHECK(num_rows > 0);
479  column_data_ = std::make_unique<std::vector<ArrayDatum>>(num_rows);
480  column_buffer_ = std::make_unique<std::vector<
481  std::pair<size_t, typename ELEMENT_CONVERTER::ElementsBufferColumnPtr>>>(
482  num_rows);
483  }
484 
485  void convertToColumnarFormat(size_t row, const TargetValue* value) override {
486  const auto arrayValue =
487  checked_get<ArrayTargetValue>(row, value, ARRAY_VALUE_ACCESSOR);
488  CHECK(arrayValue);
489  if (arrayValue->is_initialized()) {
490  const auto& vec = arrayValue->get();
491  bool is_null = false;
492  if (vec.size()) {
493  typename ELEMENT_CONVERTER::ElementsBufferColumnPtr elementBuffer =
494  element_converter_->allocateColumnarBuffer(vec.size());
495 
496  int elementIndex = 0;
497  for (const auto& scalarValue : vec) {
498  element_converter_->convertElementToColumnarFormat(
499  elementIndex++, elementBuffer.get(), &scalarValue);
500  }
501 
502  column_buffer_->at(row) = {vec.size(), std::move(elementBuffer)};
503 
504  } else {
505  // Empty, not NULL
506  (*column_data_)[row] = ArrayDatum(0, nullptr, is_null, DoNothingDeleter());
507  }
508  } else {
509  // TODO: what does it mean if do_check_null_ is set to false and we get a NULL?
510  // CHECK(do_check_null_); // May need to check
511  bool is_null = true; // do_check_null_;
512  (*column_data_)[row] = ArrayDatum(0, nullptr, is_null, DoNothingDeleter());
513  (*column_data_)[row].is_null = is_null;
514  }
515  }
516 
518  if (!data_finalized_) {
519  element_converter_->processArrayBuffer(column_buffer_, column_data_.get());
520  data_finalized_ = true;
521  }
522  }
523 
526  DataBlockPtr dataBlock;
527  dataBlock.arraysPtr = column_data_.get();
528  insertData.data.push_back(dataBlock);
529  insertData.columnIds.push_back(column_descriptor_->columnId);
530  }
531 };
532 
535 
536  std::unique_ptr<std::vector<std::string>> column_data_;
537  std::unique_ptr<std::vector<ArrayDatum>> signed_compressed_coords_data_;
538 
540  size_t num_rows,
541  const ColumnDescriptor* logicalColumnDescriptor)
542  : TargetValueConverter(logicalColumnDescriptor) {
546 
547  if (num_rows) {
548  allocateColumnarData(num_rows);
549  }
550  }
551 
553 
554  void allocateColumnarData(size_t num_rows) override {
555  CHECK(num_rows > 0);
556  column_data_ = std::make_unique<std::vector<std::string>>(num_rows);
557  signed_compressed_coords_data_ = std::make_unique<std::vector<ArrayDatum>>(num_rows);
558  }
559 
561 
563  const std::shared_ptr<std::vector<double>>& coords) {
564  const auto compressed_coords_vector =
566 
567  uint8_t* compressed_coords_array = reinterpret_cast<uint8_t*>(
568  checked_malloc(sizeof(uint8_t) * compressed_coords_vector.size()));
569  memcpy(compressed_coords_array,
570  &compressed_coords_vector[0],
571  compressed_coords_vector.size());
572 
573  return ArrayDatum((int)compressed_coords_vector.size(),
574  reinterpret_cast<int8_t*>(compressed_coords_array),
575  false);
576  }
577 
578  void convertToColumnarFormat(size_t row, const TargetValue* value) override {
579  auto geoValue = checked_get<GeoTargetValue>(row, value, GEO_TARGET_VALUE_ACCESSOR);
580  auto geoPoint =
581  checked_get<GeoPointTargetValue>(row, geoValue, GEO_POINT_VALUE_ACCESSOR);
582 
583  (*column_data_)[row] = "";
584  (*signed_compressed_coords_data_)[row] = toCompressedCoords(geoPoint->coords);
585  }
586 
588  DataBlockPtr logical, coords;
589 
590  logical.stringsPtr = column_data_.get();
592 
593  insertData.data.emplace_back(logical);
594  insertData.columnIds.emplace_back(column_descriptor_->columnId);
595 
596  insertData.data.emplace_back(coords);
597  insertData.columnIds.emplace_back(coords_column_descriptor_->columnId);
598  }
599 };
600 
601 inline std::vector<double> compute_bounds_of_coords(
602  const std::shared_ptr<std::vector<double>>& coords) {
603  std::vector<double> bounds(4);
604  constexpr auto DOUBLE_MAX = std::numeric_limits<double>::max();
605  constexpr auto DOUBLE_MIN = std::numeric_limits<double>::lowest();
606  bounds[0] = DOUBLE_MAX;
607  bounds[1] = DOUBLE_MAX;
608  bounds[2] = DOUBLE_MIN;
609  bounds[3] = DOUBLE_MIN;
610  auto size_coords = coords->size();
611 
612  for (size_t i = 0; i < size_coords; i += 2) {
613  double x = (*coords)[i];
614  double y = (*coords)[i + 1];
615 
616  bounds[0] = std::min(bounds[0], x);
617  bounds[1] = std::min(bounds[1], y);
618  bounds[2] = std::max(bounds[2], x);
619  bounds[3] = std::max(bounds[3], y);
620  }
621  return bounds;
622 }
623 
624 template <typename ELEM_TYPE>
625 inline ArrayDatum to_array_datum(const std::vector<ELEM_TYPE>& vector) {
626  ELEM_TYPE* array =
627  reinterpret_cast<ELEM_TYPE*>(checked_malloc(sizeof(ELEM_TYPE) * vector.size()));
628  memcpy(array, vector.data(), vector.size() * sizeof(ELEM_TYPE));
629 
630  return ArrayDatum(
631  (int)(vector.size() * sizeof(ELEM_TYPE)), reinterpret_cast<int8_t*>(array), false);
632 }
633 
634 template <typename ELEM_TYPE>
635 inline ArrayDatum to_array_datum(const std::shared_ptr<std::vector<ELEM_TYPE>>& vector) {
636  return to_array_datum(*vector.get());
637 }
638 
641 
642  std::unique_ptr<std::vector<ArrayDatum>> bounds_data_;
643 
645  size_t num_rows,
646  const ColumnDescriptor* logicalColumnDescriptor)
647  : GeoPointValueConverter(cat, num_rows, logicalColumnDescriptor) {
651 
652  if (num_rows) {
653  allocateColumnarData(num_rows);
654  }
655  }
656 
658 
659  void allocateColumnarData(size_t num_rows) override {
660  CHECK(num_rows > 0);
662  bounds_data_ = std::make_unique<std::vector<ArrayDatum>>(num_rows);
663  }
664 
666 
667  void convertToColumnarFormat(size_t row, const TargetValue* value) override {
668  auto geoValue = checked_get<GeoTargetValue>(row, value, GEO_TARGET_VALUE_ACCESSOR);
669  auto geoLinestring = checked_get<GeoLineStringTargetValue>(
670  row, geoValue, GEO_LINESTRING_VALUE_ACCESSOR);
671 
672  (*column_data_)[row] = "";
673  (*signed_compressed_coords_data_)[row] = toCompressedCoords(geoLinestring->coords);
674  auto bounds = compute_bounds_of_coords(geoLinestring->coords);
675  (*bounds_data_)[row] = to_array_datum(bounds);
676  }
677 
680 
681  DataBlockPtr bounds;
682 
683  bounds.arraysPtr = bounds_data_.get();
684 
685  insertData.data.emplace_back(bounds);
686  insertData.columnIds.emplace_back(bounds_column_descriptor_->columnId);
687  }
688 };
689 
695 
696  std::unique_ptr<std::vector<ArrayDatum>> ring_sizes_data_;
697  std::unique_ptr<std::vector<ArrayDatum>> bounds_data_;
698  std::unique_ptr<int32_t[]> render_group_data_;
699 
701  size_t num_rows,
702  const ColumnDescriptor* logicalColumnDescriptor)
703  : GeoPointValueConverter(cat, num_rows, logicalColumnDescriptor) {
713 
714  if (num_rows) {
715  allocateColumnarData(num_rows);
716  }
717  }
718 
720 
721  void allocateColumnarData(size_t num_rows) override {
723  ring_sizes_data_ = std::make_unique<std::vector<ArrayDatum>>(num_rows);
724  bounds_data_ = std::make_unique<std::vector<ArrayDatum>>(num_rows);
725  render_group_data_ = std::make_unique<int32_t[]>(num_rows);
726  }
727 
729 
730  void convertToColumnarFormat(size_t row, const TargetValue* value) override {
731  auto geoValue = checked_get<GeoTargetValue>(row, value, GEO_TARGET_VALUE_ACCESSOR);
732  auto geoPoly =
733  checked_get<GeoPolyTargetValue>(row, geoValue, GEO_POLY_VALUE_ACCESSOR);
734 
735  (*column_data_)[row] = "";
736  (*signed_compressed_coords_data_)[row] = toCompressedCoords(geoPoly->coords);
737  (*ring_sizes_data_)[row] = to_array_datum(geoPoly->ring_sizes);
738  auto bounds = compute_bounds_of_coords(geoPoly->coords);
739  (*bounds_data_)[row] = to_array_datum(bounds);
740  render_group_data_[row] =
742  }
743 
746 
747  DataBlockPtr ringSizes, bounds, renderGroup;
748 
749  ringSizes.arraysPtr = ring_sizes_data_.get();
750  bounds.arraysPtr = bounds_data_.get();
751  renderGroup.numbersPtr = reinterpret_cast<int8_t*>(render_group_data_.get());
752 
753  insertData.data.emplace_back(ringSizes);
754  insertData.columnIds.emplace_back(ring_sizes_column_descriptor_->columnId);
755 
756  insertData.data.emplace_back(bounds);
757  insertData.columnIds.emplace_back(bounds_column_descriptor_->columnId);
758 
759  insertData.data.emplace_back(renderGroup);
760  insertData.columnIds.emplace_back(render_group_column_descriptor_->columnId);
761  }
762 };
763 
770 
771  std::unique_ptr<std::vector<ArrayDatum>> ring_sizes_data_;
772  std::unique_ptr<std::vector<ArrayDatum>> poly_rings_data_;
773  std::unique_ptr<std::vector<ArrayDatum>> bounds_data_;
774  std::unique_ptr<int32_t[]> render_group_data_;
775 
777  size_t num_rows,
778  const ColumnDescriptor* logicalColumnDescriptor)
779  : GeoPointValueConverter(cat, num_rows, logicalColumnDescriptor) {
792 
793  if (num_rows) {
794  allocateColumnarData(num_rows);
795  }
796  }
797 
799 
800  void allocateColumnarData(size_t num_rows) override {
802  ring_sizes_data_ = std::make_unique<std::vector<ArrayDatum>>(num_rows);
803  poly_rings_data_ = std::make_unique<std::vector<ArrayDatum>>(num_rows);
804  bounds_data_ = std::make_unique<std::vector<ArrayDatum>>(num_rows);
805  render_group_data_ = std::make_unique<int32_t[]>(num_rows);
806  }
807 
809 
810  void convertToColumnarFormat(size_t row, const TargetValue* value) override {
811  auto geoValue = checked_get<GeoTargetValue>(row, value, GEO_TARGET_VALUE_ACCESSOR);
812  auto geoMultiPoly = checked_get<GeoMultiPolyTargetValue>(
813  row, geoValue, GEO_MULTI_POLY_VALUE_ACCESSOR);
814 
815  (*column_data_)[row] = "";
816  (*signed_compressed_coords_data_)[row] = toCompressedCoords(geoMultiPoly->coords);
817  (*ring_sizes_data_)[row] = to_array_datum(geoMultiPoly->ring_sizes);
818  (*poly_rings_data_)[row] = to_array_datum(geoMultiPoly->poly_rings);
819  auto bounds = compute_bounds_of_coords(geoMultiPoly->coords);
820  (*bounds_data_)[row] = to_array_datum(bounds);
821  render_group_data_[row] =
823  }
824 
827 
828  DataBlockPtr ringSizes, polyRings, bounds, renderGroup;
829 
830  ringSizes.arraysPtr = ring_sizes_data_.get();
831  polyRings.arraysPtr = poly_rings_data_.get();
832  bounds.arraysPtr = bounds_data_.get();
833  renderGroup.numbersPtr = reinterpret_cast<int8_t*>(render_group_data_.get());
834 
835  insertData.data.emplace_back(ringSizes);
836  insertData.columnIds.emplace_back(ring_sizes_column_descriptor_->columnId);
837 
838  insertData.data.emplace_back(polyRings);
839  insertData.columnIds.emplace_back(ring_sizes_solumn_descriptor_->columnId);
840 
841  insertData.data.emplace_back(bounds);
842  insertData.columnIds.emplace_back(bounds_column_descriptor_->columnId);
843 
844  insertData.data.emplace_back(renderGroup);
845  insertData.columnIds.emplace_back(render_group_column_descriptor_->columnId);
846  }
847 };
848 
849 #endif
std::unique_ptr< std::vector< ArrayDatum > > ring_sizes_data_
ArrayDatum to_array_datum(const std::vector< ELEM_TYPE > &vector)
std::unique_ptr< std::vector< ArrayDatum > > bounds_data_
std::unique_ptr< TARGET_TYPE, CheckedMallocDeleter< TARGET_TYPE >> ColumnDataPtr
std::unique_ptr< std::vector< int32_t >> ElementsBufferColumnPtr
StringDictionaryProxy * literals_source_dict_
#define CHECK_EQ(x, y)
Definition: Logger.h:198
void convertToColumnarFormatFromDict(size_t row, const TargetValue *value)
void allocateColumnarData(size_t num_rows) override
std::unique_ptr< std::vector< ArrayDatum > > bounds_data_
void addDataBlocksToInsertData(Fragmenter_Namespace::InsertData &insertData) override
const int8_t const int64_t * num_rows
class for a per-database catalog. also includes metadata for the current database and the current use...
Definition: Catalog.h:81
HOST DEVICE int get_comp_param() const
Definition: sqltypes.h:335
std::vector< std::string > * stringsPtr
Definition: sqltypes.h:141
const ColumnDescriptor * ring_sizes_solumn_descriptor_
std::vector< ArrayDatum > * arraysPtr
Definition: sqltypes.h:142
std::unique_ptr< int32_t[]> render_group_data_
void allocateColumnarData(size_t num_rows) override
void processArrayBuffer(std::unique_ptr< std::vector< std::pair< size_t, ElementsBufferColumnPtr >>> &array_buffer, std::unique_ptr< std::vector< ArrayDatum >>::pointer arrayData)
boost_variant_accessor< int64_t > SOURCE_TYPE_ACCESSOR
const ColumnDescriptor * bounds_column_descriptor_
int insertBoundsAndReturnRenderGroup(const std::vector< double > &bounds)
Definition: Importer.cpp:4988
ArrayValueConverter(const ColumnDescriptor *cd, size_t num_rows, std::unique_ptr< ELEMENT_CONVERTER > element_converter, bool do_check_null)
const DictDescriptor * source_dict_desc_
std::vector< uint8_t > compress_coords(std::vector< double > &coords, const SQLTypeInfo &ti)
Definition: Importer.cpp:1422
void convertToColumnarFormat(size_t row, const TargetValue *value) override
void convertToColumnarFormat(size_t row, const ScalarTargetValue *scalarValue)
std::unique_ptr< std::vector< ArrayDatum > > bounds_data_
void allocateColumnarData(size_t num_rows) override
const ColumnDescriptor * column_descriptor_
std::string getString(int32_t string_id) const
ArrayDatum toCompressedCoords(const std::shared_ptr< std::vector< double >> &coords)
static void populate_string_ids(std::vector< int32_t > &dest_ids, StringDictionary *dest_dict, const std::vector< int32_t > &source_ids, const StringDictionary *source_dict, const std::map< int32_t, std::string > transient_mapping={})
Populates provided dest_ids vector with string ids corresponding to given source strings.
ElementsBufferColumnPtr column_buffer_
std::vector< double > compute_bounds_of_coords(const std::shared_ptr< std::vector< double >> &coords)
std::unique_ptr< std::vector< ArrayDatum > > poly_rings_data_
void processArrayBuffer(std::unique_ptr< std::vector< std::pair< size_t, ElementsBufferColumnPtr >>> &array_buffer, std::unique_ptr< std::vector< ArrayDatum >>::pointer arrayData)
ElementsBufferColumnPtr allocateColumnarBuffer(size_t num_rows)
void convertElementToColumnarFormat(size_t row, typename ElementsBufferColumnPtr::pointer columnData, const ScalarTargetValue *scalarValue)
const DictDescriptor * target_dict_desc_
void addDataBlocksToInsertData(Fragmenter_Namespace::InsertData &insertData) override
void addDataBlocksToInsertData(Fragmenter_Namespace::InsertData &insertData) override
const StringDictionaryProxy * literals_dict_
void convertToColumnarFormat(size_t row, const ScalarTargetValue *scalarValue)
std::shared_ptr< StringDictionary > stringDict
boost_variant_accessor< ScalarTargetValue > SCALAR_TARGET_VALUE_ACCESSOR
boost_variant_accessor< GeoLineStringTargetValue > GEO_LINESTRING_VALUE_ACCESSOR
GeoPointValueConverter(const Catalog_Namespace::Catalog &cat, size_t num_rows, const ColumnDescriptor *logicalColumnDescriptor)
boost_variant_accessor< NullableString > NULLABLE_STRING_ACCESSOR
std::unique_ptr< std::vector< std::string > > column_data_
CHECK(cgen_state)
void addDataBlocksToInsertData(Fragmenter_Namespace::InsertData &insertData) override
void convertToColumnarFormatFromString(size_t row, const TargetValue *value)
void * checked_malloc(const size_t size)
Definition: checked_alloc.h:40
boost_variant_accessor< GeoMultiPolyTargetValue > GEO_MULTI_POLY_VALUE_ACCESSOR
const ColumnDescriptor * bounds_column_descriptor_
boost_variant_accessor< GeoPolyTargetValue > GEO_POLY_VALUE_ACCESSOR
boost_variant_accessor< std::string > STRING_ACCESSOR
const ColumnDescriptor * ring_sizes_column_descriptor_
std::unique_ptr< std::vector< ArrayDatum > > ring_sizes_data_
void finalizeDataBlocksForInsertData() override
GeoLinestringValueConverter(const Catalog_Namespace::Catalog &cat, size_t num_rows, const ColumnDescriptor *logicalColumnDescriptor)
const ColumnDescriptor * getMetadataForColumn(int tableId, const std::string &colName) const
const std::map< int32_t, std::string > getTransientMapping() const
const DictDescriptor * getMetadataForDict(int dict_ref, bool loadDict=true) const
Definition: Catalog.cpp:1350
specifies the content in-memory of a row in the column metadata table
GeoMultiPolygonValueConverter(const Catalog_Namespace::Catalog &cat, size_t num_rows, const ColumnDescriptor *logicalColumnDescriptor)
void convertToColumnarFormat(size_t row, const TargetValue *value) override
std::unique_ptr< ELEMENT_CONVERTER > element_converter_
Importer_NS::RenderGroupAnalyzer render_group_analyzer_
void convertToColumnarFormat(size_t row, const TargetValue *value) override
std::unique_ptr< int32_t[]> render_group_data_
StringValueConverter(const Catalog_Namespace::Catalog &cat, const ColumnDescriptor *cd, size_t num_rows, bool dictEncoded, int32_t sourceDictId, StringDictionaryProxy *literals_dict)
void finalizeDataBlocksForInsertData() override
void addDataBlocksToInsertData(Fragmenter_Namespace::InsertData &insertData) override
bool is_null(const T &v, const SQLTypeInfo &t)
std::vector< DataBlockPtr > data
the number of rows being inserted
Definition: Fragmenter.h:64
int32_t getOrAdd(const std::string &str) noexcept
void allocateColumnarData(size_t num_rows) override
int32_t convertTransientStringIdToPermanentId(int32_t &transient_string_id)
void convertToColumnarFormat(size_t row, const TargetValue *value) override
std::unique_ptr< std::vector< std::string > > column_data_
const ColumnDescriptor * render_group_column_descriptor_
Importer_NS::RenderGroupAnalyzer render_group_analyzer_
typename NumericValueConverter< int64_t, TARGET_TYPE >::ColumnDataPtr ElementsDataColumnPtr
std::unordered_map< int32_t, int32_t > literals_lookup_
const ColumnDescriptor * coords_column_descriptor_
void allocateColumnarData(size_t num_rows) override
std::string getString(int32_t string_id) const
void allocateColumnarData(size_t num_rows) override
const ColumnDescriptor * render_group_column_descriptor_
void addDataBlocksToInsertData(Fragmenter_Namespace::InsertData &insertData) override
static void populate_string_array_ids(std::vector< std::vector< int32_t >> &dest_array_ids, StringDictionary *dest_dict, const std::vector< std::vector< int32_t >> &source_array_ids, const StringDictionary *source_dict)
NumericValueConverter< int64_t, TARGET_TYPE >::ColumnDataPtr processBuffer(ElementsBufferColumnPtr buffer)
std::unique_ptr< std::vector< std::pair< size_t, typename ELEMENT_CONVERTER::ElementsBufferColumnPtr > > > column_buffer_
boost_variant_accessor< GeoTargetValue > GEO_TARGET_VALUE_ACCESSOR
Descriptor for a dictionary for a string columne.
const ColumnDescriptor * bounds_column_descriptor_
void convertToColumnarFormat(size_t row, const TargetValue *value) override
void convertToColumnarFormat(size_t row, const TargetValue *value) override
The data to be inserted using the fragment manager.
Definition: Fragmenter.h:59
boost::variant< ScalarTargetValue, ArrayTargetValue, GeoTargetValue, GeoTargetValuePtr > TargetValue
Definition: TargetValue.h:167
std::unique_ptr< std::vector< ArrayDatum > > column_data_
SQLTypeInfo columnType
void convertElementToColumnarFormat(size_t row, typename ElementsBufferColumnPtr::pointer columnBuffer, const ScalarTargetValue *scalarValue)
void allocateColumnarData(size_t num_rows) override
void convertToColumnarFormat(size_t row, const TargetValue *value) override
int8_t * numbersPtr
Definition: sqltypes.h:140
ElementsBufferColumnPtr allocateColumnarBuffer(size_t num_rows)
void convertToColumnarFormat(size_t row, const TargetValue *value) override
boost_variant_accessor< ArrayTargetValue > ARRAY_VALUE_ACCESSOR
boost_variant_accessor< GeoPointTargetValue > GEO_POINT_VALUE_ACCESSOR
const ColumnDescriptor * ring_sizes_column_descriptor_
boost_variant_accessor< SOURCE_TYPE > SOURCE_TYPE_ACCESSOR
std::vector< int > columnIds
identifies the table into which the data is being inserted
Definition: Fragmenter.h:62
void addDataBlocksToInsertData(Fragmenter_Namespace::InsertData &insertData) override
GeoPolygonValueConverter(const Catalog_Namespace::Catalog &cat, size_t num_rows, const ColumnDescriptor *logicalColumnDescriptor)
std::unique_ptr< std::vector< ArrayDatum > > signed_compressed_coords_data_
void addDataBlocksToInsertData(Fragmenter_Namespace::InsertData &insertData) override
std::conditional_t< isCudaCC(), DeviceArrayDatum, HostArrayDatum > ArrayDatum
Definition: sqltypes.h:122
StringDictionaryProxy * source_dict_proxy_
NumericValueConverter(const ColumnDescriptor *cd, size_t num_rows, TARGET_TYPE nullValue, SOURCE_TYPE nullCheckValue, bool doNullCheck)
DictionaryValueConverter(const Catalog_Namespace::Catalog &cat, int32_t sourceDictId, const ColumnDescriptor *targetDescriptor, size_t num_rows, TARGET_TYPE nullValue, int64_t nullCheckValue, bool doNullCheck, StringDictionaryProxy *literals_dict, StringDictionaryProxy *source_dict_proxy)
boost::variant< int64_t, double, float, NullableString > ScalarTargetValue
Definition: TargetValue.h:156