OmniSciDB  c07336695a
TargetValueConvertersImpl.h
Go to the documentation of this file.
1 /*
2  * Copyright 2018, OmniSci, Inc.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #ifndef TARGET_VALUE_CONVERTERS_IMPL_H_
18 #define TARGET_VALUE_CONVERTERS_IMPL_H_
19 
20 #include "../StringDictionary/StringDictionary.h"
21 #include "TargetValueConverters.h"
22 
23 #include <atomic>
24 #include <future>
25 #include <thread>
26 
27 namespace Importer_NS {
28 std::vector<uint8_t> compress_coords(std::vector<double>& coords, const SQLTypeInfo& ti);
29 } // namespace Importer_NS
30 
31 template <typename SOURCE_TYPE, typename TARGET_TYPE>
33  using ColumnDataPtr = std::unique_ptr<TARGET_TYPE, CheckedMallocDeleter<TARGET_TYPE>>;
35 
37  TARGET_TYPE null_value_;
38  SOURCE_TYPE null_check_value_;
40 
42 
44  size_t num_rows,
45  TARGET_TYPE nullValue,
46  SOURCE_TYPE nullCheckValue,
47  bool doNullCheck)
49  , null_value_(nullValue)
50  , null_check_value_(nullCheckValue)
51  , do_null_check_(doNullCheck) {
52  if (num_rows) {
53  allocateColumnarData(num_rows);
54  }
55  }
56 
57  ~NumericValueConverter() override {}
58 
59  void allocateColumnarData(size_t num_rows) override {
60  CHECK(num_rows > 0);
61  column_data_ = ColumnDataPtr(
62  reinterpret_cast<TARGET_TYPE*>(checked_malloc(num_rows * sizeof(TARGET_TYPE))));
63  }
64 
66  CHECK(num_rows > 0);
68  reinterpret_cast<TARGET_TYPE*>(checked_malloc(num_rows * sizeof(TARGET_TYPE))));
69  }
70 
72  size_t row,
73  typename ElementsBufferColumnPtr::pointer columnData,
74  const ScalarTargetValue* scalarValue) {
75  auto mapd_p = checked_get<SOURCE_TYPE>(row, scalarValue, SOURCE_TYPE_ACCESSOR);
76  auto val = *mapd_p;
77 
78  if (do_null_check_ && null_check_value_ == val) {
79  columnData[row] = null_value_;
80  } else {
81  columnData[row] = static_cast<TARGET_TYPE>(val);
82  }
83  }
84 
85  void convertToColumnarFormat(size_t row, const ScalarTargetValue* scalarValue) {
86  convertElementToColumnarFormat(row, column_data_.get(), scalarValue);
87  }
88 
89  void convertToColumnarFormat(size_t row, const TargetValue* value) override {
90  auto scalarValue =
91  checked_get<ScalarTargetValue>(row, value, SCALAR_TARGET_VALUE_ACCESSOR);
92  convertToColumnarFormat(row, scalarValue);
93  }
94 
96  std::unique_ptr<std::vector<std::pair<size_t, ElementsBufferColumnPtr>>>&
97  array_buffer,
98  std::unique_ptr<std::vector<ArrayDatum>>::pointer arrayData) {
99  for (size_t row = 0; row < array_buffer->size(); row++) {
100  auto& element = (array_buffer->at(row));
101  bool is_null = false;
102  if (element.second) {
103  ColumnDataPtr& data = element.second;
104  int8_t* arrayDataPtr = reinterpret_cast<int8_t*>(data.release());
105  (*arrayData)[row] =
106  ArrayDatum(element.first * sizeof(TARGET_TYPE), arrayDataPtr, is_null);
107  }
108  }
109  }
110 
112  DataBlockPtr dataBlock;
113  dataBlock.numbersPtr = reinterpret_cast<int8_t*>(column_data_.get());
114  insertData.data.push_back(dataBlock);
115  insertData.columnIds.push_back(column_descriptor_->columnId);
116  }
117 };
118 
119 template <typename TARGET_TYPE>
120 struct DictionaryValueConverter : public NumericValueConverter<int64_t, TARGET_TYPE> {
121  using ElementsDataColumnPtr =
123 
124  using ElementsBufferColumnPtr = std::unique_ptr<std::vector<int32_t>>;
125 
127 
130 
131  const int32_t buffer_null_sentinal_ = std::numeric_limits<int32_t>::min();
132 
134 
135  std::unordered_map<int32_t, int32_t> literals_lookup_;
137 
139  int32_t sourceDictId,
140  const ColumnDescriptor* targetDescriptor,
141  size_t num_rows,
142  TARGET_TYPE nullValue,
143  int64_t nullCheckValue,
144  bool doNullCheck,
145  StringDictionaryProxy* literals_dict)
146  : NumericValueConverter<int64_t, TARGET_TYPE>(targetDescriptor,
147  num_rows,
148  nullValue,
149  nullCheckValue,
150  doNullCheck) {
151  literals_dict_ = literals_dict;
152  target_dict_desc_ =
153  cat.getMetadataForDict(targetDescriptor->columnType.get_comp_param(), true);
154 
155  source_dict_desc_ = nullptr;
156 
157  use_literals_ = 0 == sourceDictId;
158  if (!use_literals_) {
159  source_dict_desc_ = cat.getMetadataForDict(std::abs(sourceDictId), true);
160  CHECK(source_dict_desc_);
161  } else {
162  CHECK(literals_dict);
163 
164  for (auto& entry : literals_dict->getTransientMapping()) {
165  auto newId = target_dict_desc_->stringDict->getOrAdd(entry.second);
166  literals_lookup_[entry.first] = newId;
167  }
168 
169  literals_lookup_[buffer_null_sentinal_] = buffer_null_sentinal_;
170  }
171 
172  CHECK(target_dict_desc_);
173 
174  if (num_rows) {
175  column_buffer_ = allocateColumnarBuffer(num_rows);
176  }
177  }
178 
180 
182  CHECK(num_rows > 0);
183  return std::make_unique<std::vector<int32_t>>(num_rows);
184  }
185 
187  size_t row,
188  typename ElementsBufferColumnPtr::pointer columnBuffer,
189  const ScalarTargetValue* scalarValue) {
190  auto mapd_p = checked_get<int64_t>(row, scalarValue, this->SOURCE_TYPE_ACCESSOR);
191  auto val = *mapd_p;
192 
193  if (this->do_null_check_ && this->null_check_value_ == val) {
194  (*columnBuffer)[row] = this->buffer_null_sentinal_;
195  } else {
196  (*columnBuffer)[row] = (int32_t)val;
197  }
198  }
199 
200  void convertToColumnarFormat(size_t row, const ScalarTargetValue* scalarValue) {
201  convertElementToColumnarFormat(row, this->column_buffer_.get(), scalarValue);
202  }
203 
204  void convertToColumnarFormat(size_t row, const TargetValue* value) override {
205  auto scalarValue =
206  checked_get<ScalarTargetValue>(row, value, this->SCALAR_TARGET_VALUE_ACCESSOR);
207 
208  convertToColumnarFormat(row, scalarValue);
209  }
210 
212  ElementsBufferColumnPtr buffer) {
215  reinterpret_cast<TARGET_TYPE*>(
216  checked_malloc(buffer->size() * sizeof(TARGET_TYPE))));
217 
218  std::vector<int32_t>* bufferPtr =
219  reinterpret_cast<std::vector<int32_t>*>(buffer.get());
220  TARGET_TYPE* columnDataPtr = reinterpret_cast<TARGET_TYPE*>(data.get());
221  if (use_literals_) {
222  for (size_t i = 0; i < bufferPtr->size(); i++) {
223  auto id = literals_lookup_[(*bufferPtr)[i]];
224  if (id == buffer_null_sentinal_) {
225  columnDataPtr[i] = this->null_value_;
226  } else {
227  columnDataPtr[i] = static_cast<TARGET_TYPE>(id);
228  }
229  }
230  } else {
231  if (source_dict_desc_->dictRef == target_dict_desc_->dictRef) {
232  // special case, where source and target dict are the same
233  // mostly in update
234  for (size_t i = 0; i < bufferPtr->size(); i++) {
235  auto src_id = (*bufferPtr)[i];
236  if (src_id == buffer_null_sentinal_) {
237  columnDataPtr[i] = this->null_value_;
238  } else {
239  columnDataPtr[i] = static_cast<TARGET_TYPE>(src_id);
240  }
241  }
242 
243  } else {
244  std::vector<int32_t> dest_ids;
245  dest_ids.resize(bufferPtr->size());
246 
248  target_dict_desc_->stringDict.get(),
249  *bufferPtr,
250  source_dict_desc_->stringDict.get());
251 
252  // fixup NULL sentinel
253  for (size_t i = 0; i < dest_ids.size(); i++) {
254  auto id = dest_ids[i];
255  if (id == buffer_null_sentinal_) {
256  columnDataPtr[i] = this->null_value_;
257  } else {
258  CHECK(std::numeric_limits<TARGET_TYPE>::max() >= id);
259  columnDataPtr[i] = static_cast<TARGET_TYPE>(id);
260  }
261  }
262  }
263  }
264 
265  return data;
266  }
267 
269  std::unique_ptr<std::vector<std::pair<size_t, ElementsBufferColumnPtr>>>&
270  array_buffer,
271  std::unique_ptr<std::vector<ArrayDatum>>::pointer arrayData) {
272  if (use_literals_) {
273  for (size_t row = 0; row < array_buffer->size(); row++) {
274  auto& element = (array_buffer->at(row));
275  bool is_null = false;
276  if (element.second) {
278  processBuffer(std::move(element.second));
279  int8_t* arrayDataPtr = reinterpret_cast<int8_t*>(data.release());
280  (*arrayData)[row] =
281  ArrayDatum(element.first * sizeof(TARGET_TYPE), arrayDataPtr, is_null);
282  }
283  }
284  } else {
285  std::vector<std::vector<int32_t>> srcArrayIds(array_buffer->size());
286  std::vector<std::vector<int32_t>> destArrayIds(0);
287 
288  for (size_t row = 0; row < array_buffer->size(); row++) {
289  auto& element = (array_buffer->at(row));
290  if (element.second) {
291  srcArrayIds[row] = *(element.second.get());
292  }
293  }
294 
296  target_dict_desc_->stringDict.get(),
297  srcArrayIds,
298  source_dict_desc_->stringDict.get());
299 
300  for (size_t row = 0; row < array_buffer->size(); row++) {
301  auto& element = (array_buffer->at(row));
302  bool is_null = false;
303  if (element.second) {
304  *(element.second.get()) = destArrayIds[row];
305  int8_t* arrayDataPtr = reinterpret_cast<int8_t*>(&(element.second->at(0)));
306  (*arrayData)[row] = ArrayDatum(element.first * sizeof(TARGET_TYPE),
307  arrayDataPtr,
308  is_null,
309  DoNothingDeleter());
310  }
311  }
312  }
313  }
314 
316  if (column_buffer_) {
317  this->column_data_ = processBuffer(std::move(column_buffer_));
318  column_buffer_ = nullptr;
319  }
320  }
321 
323  finalizeDataBlocksForInsertData();
324  DataBlockPtr dataBlock;
325  dataBlock.numbersPtr = reinterpret_cast<int8_t*>(this->column_data_.get());
326  insertData.data.push_back(dataBlock);
327  insertData.columnIds.push_back(this->column_descriptor_->columnId);
328  }
329 };
330 
332  std::unique_ptr<std::vector<std::string>> column_data_;
333 
335 
338 
340  const ColumnDescriptor* cd,
341  size_t num_rows,
342  bool dictEncoded,
343  int32_t sourceDictId,
344  StringDictionaryProxy* literals_dict)
345  : TargetValueConverter(cd) {
346  source_dict_ = nullptr;
347  literals_source_dict_ = nullptr;
348  if (dictEncoded) {
349  if (0 != sourceDictId) {
350  auto source_dict_desc = cat.getMetadataForDict(std::abs(sourceDictId), true);
351  CHECK(source_dict_desc);
352  source_dict_ = source_dict_desc->stringDict.get();
353  CHECK(source_dict_);
354  } else {
355  literals_source_dict_ = literals_dict;
356  CHECK(literals_source_dict_);
357  }
358  }
359  if (num_rows) {
360  allocateColumnarData(num_rows);
361  }
362  }
363 
364  ~StringValueConverter() override {}
365 
366  void allocateColumnarData(size_t num_rows) override {
367  CHECK(num_rows > 0);
368  column_data_ = std::make_unique<std::vector<std::string>>(num_rows);
369  }
370 
371  void convertToColumnarFormatFromDict(size_t row, const TargetValue* value) {
372  auto scalarValue =
373  checked_get<ScalarTargetValue>(row, value, SCALAR_TARGET_VALUE_ACCESSOR);
374  auto mapd_p = checked_get<int64_t>(row, scalarValue, this->SOURCE_TYPE_ACCESSOR);
375  auto val = *mapd_p;
376 
377  if (std::numeric_limits<int32_t>::min() == val) {
378  (*column_data_)[row] = std::string("");
379  } else {
380  if (source_dict_) {
381  std::string strVal = source_dict_->getString(val);
382  (*column_data_)[row] = strVal;
383  } else {
384  std::string strVal = literals_source_dict_->getString(val);
385  (*column_data_)[row] = strVal;
386  }
387  }
388  }
389 
390  void convertToColumnarFormatFromString(size_t row, const TargetValue* value) {
391  auto scalarValue =
392  checked_get<ScalarTargetValue>(row, value, SCALAR_TARGET_VALUE_ACCESSOR);
393  auto mapd_p = checked_get<NullableString>(row, scalarValue, NULLABLE_STRING_ACCESSOR);
394 
395  const auto mapd_str_p = checked_get<std::string>(row, mapd_p, STRING_ACCESSOR);
396 
397  if (nullptr != mapd_str_p) {
398  (*column_data_)[row] = *mapd_str_p;
399  } else {
400  (*column_data_)[row] = std::string("");
401  }
402  }
403 
404  void convertToColumnarFormat(size_t row, const TargetValue* value) override {
405  if (source_dict_ || literals_source_dict_) {
406  convertToColumnarFormatFromDict(row, value);
407  } else {
408  convertToColumnarFormatFromString(row, value);
409  }
410  }
411 
413  DataBlockPtr dataBlock;
414  dataBlock.stringsPtr = column_data_.get();
415  insertData.data.push_back(dataBlock);
416  insertData.columnIds.push_back(column_descriptor_->columnId);
417  }
418 };
419 
420 template <typename ELEMENT_CONVERTER>
422  std::unique_ptr<
423  std::vector<std::pair<size_t, typename ELEMENT_CONVERTER::ElementsBufferColumnPtr>>>
425  std::unique_ptr<std::vector<ArrayDatum>> column_data_;
426  std::unique_ptr<ELEMENT_CONVERTER> element_converter_;
429  bool data_finalized_ = false;
430 
432 
434  size_t num_rows,
435  std::unique_ptr<ELEMENT_CONVERTER> element_converter,
436  bool do_check_null)
438  , element_converter_(std::move(element_converter))
439  , element_type_info_(cd->columnType.get_elem_type())
440  , do_check_null_(do_check_null) {
441  if (num_rows) {
442  allocateColumnarData(num_rows);
443  }
444  }
445 
446  ~ArrayValueConverter() override {}
447 
448  void allocateColumnarData(size_t num_rows) override {
449  CHECK(num_rows > 0);
450  column_data_ = std::make_unique<std::vector<ArrayDatum>>(num_rows);
451  column_buffer_ = std::make_unique<std::vector<
452  std::pair<size_t, typename ELEMENT_CONVERTER::ElementsBufferColumnPtr>>>(
453  num_rows);
454  }
455 
456  void convertToColumnarFormat(size_t row, const TargetValue* value) override {
457  const auto arrayValue =
458  checked_get<ArrayTargetValue>(row, value, ARRAY_VALUE_ACCESSOR);
459  CHECK(arrayValue);
460  if (arrayValue->is_initialized()) {
461  const auto& vec = arrayValue->get();
462  bool is_null = false;
463  if (vec.size()) {
464  typename ELEMENT_CONVERTER::ElementsBufferColumnPtr elementBuffer =
465  element_converter_->allocateColumnarBuffer(vec.size());
466 
467  int elementIndex = 0;
468  for (const auto& scalarValue : vec) {
469  element_converter_->convertElementToColumnarFormat(
470  elementIndex++, elementBuffer.get(), &scalarValue);
471  }
472 
473  column_buffer_->at(row) = {vec.size(), std::move(elementBuffer)};
474 
475  } else {
476  // Empty, not NULL
477  (*column_data_)[row] = ArrayDatum(0, nullptr, is_null, DoNothingDeleter());
478  }
479  } else {
480  // TODO: what does it mean if do_check_null_ is set to false and we get a NULL?
481  // CHECK(do_check_null_); // May need to check
482  bool is_null = true; // do_check_null_;
483  (*column_data_)[row] = ArrayDatum(0, nullptr, is_null, DoNothingDeleter());
484  (*column_data_)[row].is_null = is_null;
485  }
486  }
487 
489  if (!data_finalized_) {
490  element_converter_->processArrayBuffer(column_buffer_, column_data_.get());
491  data_finalized_ = true;
492  }
493  }
494 
496  finalizeDataBlocksForInsertData();
497  DataBlockPtr dataBlock;
498  dataBlock.arraysPtr = column_data_.get();
499  insertData.data.push_back(dataBlock);
500  insertData.columnIds.push_back(column_descriptor_->columnId);
501  }
502 };
503 
506 
507  std::unique_ptr<std::vector<std::string>> column_data_;
508  std::unique_ptr<std::vector<ArrayDatum>> signed_compressed_coords_data_;
509 
511  size_t num_rows,
512  const ColumnDescriptor* logicalColumnDescriptor)
513  : TargetValueConverter(logicalColumnDescriptor) {
514  coords_column_descriptor_ = cat.getMetadataForColumn(
515  column_descriptor_->tableId, column_descriptor_->columnId + 1);
516  CHECK(coords_column_descriptor_);
517 
518  if (num_rows) {
519  allocateColumnarData(num_rows);
520  }
521  }
522 
524 
525  void allocateColumnarData(size_t num_rows) override {
526  CHECK(num_rows > 0);
527  column_data_ = std::make_unique<std::vector<std::string>>(num_rows);
528  signed_compressed_coords_data_ = std::make_unique<std::vector<ArrayDatum>>(num_rows);
529  }
530 
532 
534  const std::shared_ptr<std::vector<double>>& coords) {
535  const auto compressed_coords_vector =
536  Importer_NS::compress_coords(*coords, column_descriptor_->columnType);
537 
538  uint8_t* compressed_coords_array = reinterpret_cast<uint8_t*>(
539  checked_malloc(sizeof(uint8_t) * compressed_coords_vector.size()));
540  memcpy(compressed_coords_array,
541  &compressed_coords_vector[0],
542  compressed_coords_vector.size());
543 
544  return ArrayDatum((int)compressed_coords_vector.size(),
545  reinterpret_cast<int8_t*>(compressed_coords_array),
546  false);
547  }
548 
549  void convertToColumnarFormat(size_t row, const TargetValue* value) override {
550  auto geoValue = checked_get<GeoTargetValue>(row, value, GEO_TARGET_VALUE_ACCESSOR);
551  auto geoPoint =
552  checked_get<GeoPointTargetValue>(row, geoValue, GEO_POINT_VALUE_ACCESSOR);
553 
554  (*column_data_)[row] = "";
555  (*signed_compressed_coords_data_)[row] = toCompressedCoords(geoPoint->coords);
556  }
557 
559  DataBlockPtr logical, coords;
560 
561  logical.stringsPtr = column_data_.get();
562  coords.arraysPtr = signed_compressed_coords_data_.get();
563 
564  insertData.data.emplace_back(logical);
565  insertData.columnIds.emplace_back(column_descriptor_->columnId);
566 
567  insertData.data.emplace_back(coords);
568  insertData.columnIds.emplace_back(coords_column_descriptor_->columnId);
569  }
570 };
571 
572 inline std::vector<double> compute_bounds_of_coords(
573  const std::shared_ptr<std::vector<double>>& coords) {
574  std::vector<double> bounds(4);
575  constexpr auto DOUBLE_MAX = std::numeric_limits<double>::max();
576  constexpr auto DOUBLE_MIN = std::numeric_limits<double>::lowest();
577  bounds[0] = DOUBLE_MAX;
578  bounds[1] = DOUBLE_MAX;
579  bounds[2] = DOUBLE_MIN;
580  bounds[3] = DOUBLE_MIN;
581  auto size_coords = coords->size();
582 
583  for (size_t i = 0; i < size_coords; i += 2) {
584  double x = (*coords)[i];
585  double y = (*coords)[i + 1];
586 
587  bounds[0] = std::min(bounds[0], x);
588  bounds[1] = std::min(bounds[1], y);
589  bounds[2] = std::max(bounds[2], x);
590  bounds[3] = std::max(bounds[3], y);
591  }
592  return bounds;
593 }
594 
595 template <typename ELEM_TYPE>
596 inline ArrayDatum to_array_datum(const std::vector<ELEM_TYPE>& vector) {
597  ELEM_TYPE* array =
598  reinterpret_cast<ELEM_TYPE*>(checked_malloc(sizeof(ELEM_TYPE) * vector.size()));
599  memcpy(array, vector.data(), vector.size() * sizeof(ELEM_TYPE));
600 
601  return ArrayDatum(
602  (int)(vector.size() * sizeof(ELEM_TYPE)), reinterpret_cast<int8_t*>(array), false);
603 }
604 
605 template <typename ELEM_TYPE>
606 inline ArrayDatum to_array_datum(const std::shared_ptr<std::vector<ELEM_TYPE>>& vector) {
607  return to_array_datum(*vector.get());
608 }
609 
612 
613  std::unique_ptr<std::vector<ArrayDatum>> bounds_data_;
614 
616  size_t num_rows,
617  const ColumnDescriptor* logicalColumnDescriptor)
618  : GeoPointValueConverter(cat, num_rows, logicalColumnDescriptor) {
619  bounds_column_descriptor_ = cat.getMetadataForColumn(
620  column_descriptor_->tableId, column_descriptor_->columnId + 2);
621  CHECK(bounds_column_descriptor_);
622 
623  if (num_rows) {
624  allocateColumnarData(num_rows);
625  }
626  }
627 
629 
630  void allocateColumnarData(size_t num_rows) override {
631  CHECK(num_rows > 0);
633  bounds_data_ = std::make_unique<std::vector<ArrayDatum>>(num_rows);
634  }
635 
637 
638  void convertToColumnarFormat(size_t row, const TargetValue* value) override {
639  auto geoValue = checked_get<GeoTargetValue>(row, value, GEO_TARGET_VALUE_ACCESSOR);
640  auto geoLinestring = checked_get<GeoLineStringTargetValue>(
641  row, geoValue, GEO_LINESTRING_VALUE_ACCESSOR);
642 
643  (*column_data_)[row] = "";
644  (*signed_compressed_coords_data_)[row] = toCompressedCoords(geoLinestring->coords);
645  auto bounds = compute_bounds_of_coords(geoLinestring->coords);
646  (*bounds_data_)[row] = to_array_datum(bounds);
647  }
648 
651 
652  DataBlockPtr bounds;
653 
654  bounds.arraysPtr = bounds_data_.get();
655 
656  insertData.data.emplace_back(bounds);
657  insertData.columnIds.emplace_back(bounds_column_descriptor_->columnId);
658  }
659 };
660 
666 
667  std::unique_ptr<std::vector<ArrayDatum>> ring_sizes_data_;
668  std::unique_ptr<std::vector<ArrayDatum>> bounds_data_;
669  std::unique_ptr<int32_t[]> render_group_data_;
670 
672  size_t num_rows,
673  const ColumnDescriptor* logicalColumnDescriptor)
674  : GeoPointValueConverter(cat, num_rows, logicalColumnDescriptor) {
675  ring_sizes_column_descriptor_ = cat.getMetadataForColumn(
676  column_descriptor_->tableId, column_descriptor_->columnId + 2);
677  CHECK(ring_sizes_column_descriptor_);
678  bounds_column_descriptor_ = cat.getMetadataForColumn(
679  column_descriptor_->tableId, column_descriptor_->columnId + 3);
680  CHECK(bounds_column_descriptor_);
681  render_group_column_descriptor_ = cat.getMetadataForColumn(
682  column_descriptor_->tableId, column_descriptor_->columnId + 4);
683  CHECK(render_group_column_descriptor_);
684 
685  if (num_rows) {
686  allocateColumnarData(num_rows);
687  }
688  }
689 
691 
692  void allocateColumnarData(size_t num_rows) override {
694  ring_sizes_data_ = std::make_unique<std::vector<ArrayDatum>>(num_rows);
695  bounds_data_ = std::make_unique<std::vector<ArrayDatum>>(num_rows);
696  render_group_data_ = std::make_unique<int32_t[]>(num_rows);
697  }
698 
700 
701  void convertToColumnarFormat(size_t row, const TargetValue* value) override {
702  auto geoValue = checked_get<GeoTargetValue>(row, value, GEO_TARGET_VALUE_ACCESSOR);
703  auto geoPoly =
704  checked_get<GeoPolyTargetValue>(row, geoValue, GEO_POLY_VALUE_ACCESSOR);
705 
706  (*column_data_)[row] = "";
707  (*signed_compressed_coords_data_)[row] = toCompressedCoords(geoPoly->coords);
708  (*ring_sizes_data_)[row] = to_array_datum(geoPoly->ring_sizes);
709  auto bounds = compute_bounds_of_coords(geoPoly->coords);
710  (*bounds_data_)[row] = to_array_datum(bounds);
711  render_group_data_[row] =
712  render_group_analyzer_.insertBoundsAndReturnRenderGroup(bounds);
713  }
714 
717 
718  DataBlockPtr ringSizes, bounds, renderGroup;
719 
720  ringSizes.arraysPtr = ring_sizes_data_.get();
721  bounds.arraysPtr = bounds_data_.get();
722  renderGroup.numbersPtr = reinterpret_cast<int8_t*>(render_group_data_.get());
723 
724  insertData.data.emplace_back(ringSizes);
725  insertData.columnIds.emplace_back(ring_sizes_column_descriptor_->columnId);
726 
727  insertData.data.emplace_back(bounds);
728  insertData.columnIds.emplace_back(bounds_column_descriptor_->columnId);
729 
730  insertData.data.emplace_back(renderGroup);
731  insertData.columnIds.emplace_back(render_group_column_descriptor_->columnId);
732  }
733 };
734 
741 
742  std::unique_ptr<std::vector<ArrayDatum>> ring_sizes_data_;
743  std::unique_ptr<std::vector<ArrayDatum>> poly_rings_data_;
744  std::unique_ptr<std::vector<ArrayDatum>> bounds_data_;
745  std::unique_ptr<int32_t[]> render_group_data_;
746 
748  size_t num_rows,
749  const ColumnDescriptor* logicalColumnDescriptor)
750  : GeoPointValueConverter(cat, num_rows, logicalColumnDescriptor) {
751  ring_sizes_column_descriptor_ = cat.getMetadataForColumn(
752  column_descriptor_->tableId, column_descriptor_->columnId + 2);
753  CHECK(ring_sizes_column_descriptor_);
754  ring_sizes_solumn_descriptor_ = cat.getMetadataForColumn(
755  column_descriptor_->tableId, column_descriptor_->columnId + 3);
756  CHECK(ring_sizes_column_descriptor_);
757  bounds_column_descriptor_ = cat.getMetadataForColumn(
758  column_descriptor_->tableId, column_descriptor_->columnId + 4);
759  CHECK(bounds_column_descriptor_);
760  render_group_column_descriptor_ = cat.getMetadataForColumn(
761  column_descriptor_->tableId, column_descriptor_->columnId + 5);
762  CHECK(render_group_column_descriptor_);
763 
764  if (num_rows) {
765  allocateColumnarData(num_rows);
766  }
767  }
768 
770 
771  void allocateColumnarData(size_t num_rows) override {
773  ring_sizes_data_ = std::make_unique<std::vector<ArrayDatum>>(num_rows);
774  poly_rings_data_ = std::make_unique<std::vector<ArrayDatum>>(num_rows);
775  bounds_data_ = std::make_unique<std::vector<ArrayDatum>>(num_rows);
776  render_group_data_ = std::make_unique<int32_t[]>(num_rows);
777  }
778 
780 
781  void convertToColumnarFormat(size_t row, const TargetValue* value) override {
782  auto geoValue = checked_get<GeoTargetValue>(row, value, GEO_TARGET_VALUE_ACCESSOR);
783  auto geoMultiPoly = checked_get<GeoMultiPolyTargetValue>(
784  row, geoValue, GEO_MULTI_POLY_VALUE_ACCESSOR);
785 
786  (*column_data_)[row] = "";
787  (*signed_compressed_coords_data_)[row] = toCompressedCoords(geoMultiPoly->coords);
788  (*ring_sizes_data_)[row] = to_array_datum(geoMultiPoly->ring_sizes);
789  (*poly_rings_data_)[row] = to_array_datum(geoMultiPoly->poly_rings);
790  auto bounds = compute_bounds_of_coords(geoMultiPoly->coords);
791  (*bounds_data_)[row] = to_array_datum(bounds);
792  render_group_data_[row] =
793  render_group_analyzer_.insertBoundsAndReturnRenderGroup(bounds);
794  }
795 
798 
799  DataBlockPtr ringSizes, polyRings, bounds, renderGroup;
800 
801  ringSizes.arraysPtr = ring_sizes_data_.get();
802  polyRings.arraysPtr = poly_rings_data_.get();
803  bounds.arraysPtr = bounds_data_.get();
804  renderGroup.numbersPtr = reinterpret_cast<int8_t*>(render_group_data_.get());
805 
806  insertData.data.emplace_back(ringSizes);
807  insertData.columnIds.emplace_back(ring_sizes_column_descriptor_->columnId);
808 
809  insertData.data.emplace_back(polyRings);
810  insertData.columnIds.emplace_back(ring_sizes_solumn_descriptor_->columnId);
811 
812  insertData.data.emplace_back(bounds);
813  insertData.columnIds.emplace_back(bounds_column_descriptor_->columnId);
814 
815  insertData.data.emplace_back(renderGroup);
816  insertData.columnIds.emplace_back(render_group_column_descriptor_->columnId);
817  }
818 };
819 
820 #endif
std::unique_ptr< std::vector< ArrayDatum > > ring_sizes_data_
ArrayDatum to_array_datum(const std::vector< ELEM_TYPE > &vector)
std::unique_ptr< std::vector< ArrayDatum > > bounds_data_
StringDictionaryProxy * literals_source_dict_
void convertToColumnarFormatFromDict(size_t row, const TargetValue *value)
void allocateColumnarData(size_t num_rows) override
std::unique_ptr< std::vector< ArrayDatum > > bounds_data_
void addDataBlocksToInsertData(Fragmenter_Namespace::InsertData &insertData) override
const int8_t const int64_t * num_rows
class for a per-database catalog. also includes metadata for the current database and the current use...
Definition: Catalog.h:81
std::vector< std::string > * stringsPtr
Definition: sqltypes.h:138
const ColumnDescriptor * ring_sizes_solumn_descriptor_
std::vector< ArrayDatum > * arraysPtr
Definition: sqltypes.h:139
std::unique_ptr< int32_t[]> render_group_data_
void allocateColumnarData(size_t num_rows) override
void processArrayBuffer(std::unique_ptr< std::vector< std::pair< size_t, ElementsBufferColumnPtr >>> &array_buffer, std::unique_ptr< std::vector< ArrayDatum >>::pointer arrayData)
boost_variant_accessor< int64_t > SOURCE_TYPE_ACCESSOR
const ColumnDescriptor * bounds_column_descriptor_
int insertBoundsAndReturnRenderGroup(const std::vector< double > &bounds)
Definition: Importer.cpp:4896
ArrayValueConverter(const ColumnDescriptor *cd, size_t num_rows, std::unique_ptr< ELEMENT_CONVERTER > element_converter, bool do_check_null)
std::string getString(int32_t string_id) const
const DictDescriptor * source_dict_desc_
std::vector< uint8_t > compress_coords(std::vector< double > &coords, const SQLTypeInfo &ti)
Definition: Importer.cpp:1546
void convertToColumnarFormat(size_t row, const TargetValue *value) override
void convertToColumnarFormat(size_t row, const ScalarTargetValue *scalarValue)
DictionaryValueConverter(const Catalog_Namespace::Catalog &cat, int32_t sourceDictId, const ColumnDescriptor *targetDescriptor, size_t num_rows, TARGET_TYPE nullValue, int64_t nullCheckValue, bool doNullCheck, StringDictionaryProxy *literals_dict)
std::unique_ptr< std::vector< ArrayDatum > > bounds_data_
void allocateColumnarData(size_t num_rows) override
const ColumnDescriptor * getMetadataForColumn(int tableId, const std::string &colName) const
ArrayDatum toCompressedCoords(const std::shared_ptr< std::vector< double >> &coords)
ElementsBufferColumnPtr column_buffer_
std::vector< double > compute_bounds_of_coords(const std::shared_ptr< std::vector< double >> &coords)
std::unique_ptr< std::vector< ArrayDatum > > poly_rings_data_
void processArrayBuffer(std::unique_ptr< std::vector< std::pair< size_t, ElementsBufferColumnPtr >>> &array_buffer, std::unique_ptr< std::vector< ArrayDatum >>::pointer arrayData)
ElementsBufferColumnPtr allocateColumnarBuffer(size_t num_rows)
void convertElementToColumnarFormat(size_t row, typename ElementsBufferColumnPtr::pointer columnData, const ScalarTargetValue *scalarValue)
const DictDescriptor * target_dict_desc_
void addDataBlocksToInsertData(Fragmenter_Namespace::InsertData &insertData) override
void addDataBlocksToInsertData(Fragmenter_Namespace::InsertData &insertData) override
const StringDictionaryProxy * literals_dict_
void convertToColumnarFormat(size_t row, const ScalarTargetValue *scalarValue)
std::shared_ptr< StringDictionary > stringDict
boost_variant_accessor< GeoLineStringTargetValue > GEO_LINESTRING_VALUE_ACCESSOR
GeoPointValueConverter(const Catalog_Namespace::Catalog &cat, size_t num_rows, const ColumnDescriptor *logicalColumnDescriptor)
const std::map< int32_t, std::string > getTransientMapping() const
std::unique_ptr< std::vector< std::string > > column_data_
void addDataBlocksToInsertData(Fragmenter_Namespace::InsertData &insertData) override
std::string getString(int32_t string_id) const
void convertToColumnarFormatFromString(size_t row, const TargetValue *value)
void * checked_malloc(const size_t size)
Definition: checked_alloc.h:40
boost_variant_accessor< GeoMultiPolyTargetValue > GEO_MULTI_POLY_VALUE_ACCESSOR
const ColumnDescriptor * bounds_column_descriptor_
boost_variant_accessor< GeoPolyTargetValue > GEO_POLY_VALUE_ACCESSOR
const ColumnDescriptor * ring_sizes_column_descriptor_
std::unique_ptr< std::vector< ArrayDatum > > ring_sizes_data_
void finalizeDataBlocksForInsertData() override
GeoLinestringValueConverter(const Catalog_Namespace::Catalog &cat, size_t num_rows, const ColumnDescriptor *logicalColumnDescriptor)
static void populate_string_ids(std::vector< int32_t > &dest_ids, StringDictionary *dest_dict, const std::vector< int32_t > &source_ids, const StringDictionary *source_dict)
specifies the content in-memory of a row in the column metadata table
GeoMultiPolygonValueConverter(const Catalog_Namespace::Catalog &cat, size_t num_rows, const ColumnDescriptor *logicalColumnDescriptor)
void convertToColumnarFormat(size_t row, const TargetValue *value) override
const DictDescriptor * getMetadataForDict(int dict_ref, bool loadDict=true) const
Definition: Catalog.cpp:1348
std::unique_ptr< ELEMENT_CONVERTER > element_converter_
Importer_NS::RenderGroupAnalyzer render_group_analyzer_
void convertToColumnarFormat(size_t row, const TargetValue *value) override
std::unique_ptr< int32_t[]> render_group_data_
StringValueConverter(const Catalog_Namespace::Catalog &cat, const ColumnDescriptor *cd, size_t num_rows, bool dictEncoded, int32_t sourceDictId, StringDictionaryProxy *literals_dict)
void finalizeDataBlocksForInsertData() override
void addDataBlocksToInsertData(Fragmenter_Namespace::InsertData &insertData) override
bool is_null(const T &v, const SQLTypeInfo &t)
std::vector< DataBlockPtr > data
the number of rows being inserted
Definition: Fragmenter.h:64
void allocateColumnarData(size_t num_rows) override
void convertToColumnarFormat(size_t row, const TargetValue *value) override
std::unique_ptr< std::vector< std::string > > column_data_
const ColumnDescriptor * render_group_column_descriptor_
Importer_NS::RenderGroupAnalyzer render_group_analyzer_
typename NumericValueConverter< int64_t, TARGET_TYPE >::ColumnDataPtr ElementsDataColumnPtr
std::unordered_map< int32_t, int32_t > literals_lookup_
const ColumnDescriptor * coords_column_descriptor_
void allocateColumnarData(size_t num_rows) override
HOST DEVICE int get_comp_param() const
Definition: sqltypes.h:328
void allocateColumnarData(size_t num_rows) override
const ColumnDescriptor * render_group_column_descriptor_
void addDataBlocksToInsertData(Fragmenter_Namespace::InsertData &insertData) override
static void populate_string_array_ids(std::vector< std::vector< int32_t >> &dest_array_ids, StringDictionary *dest_dict, const std::vector< std::vector< int32_t >> &source_array_ids, const StringDictionary *source_dict)
#define CHECK(condition)
Definition: Logger.h:187
NumericValueConverter< int64_t, TARGET_TYPE >::ColumnDataPtr processBuffer(ElementsBufferColumnPtr buffer)
std::unique_ptr< std::vector< std::pair< size_t, typename ELEMENT_CONVERTER::ElementsBufferColumnPtr > > > column_buffer_
Descriptor for a dictionary for a string columne.
const ColumnDescriptor * bounds_column_descriptor_
void convertToColumnarFormat(size_t row, const TargetValue *value) override
void convertToColumnarFormat(size_t row, const TargetValue *value) override
The data to be inserted using the fragment manager.
Definition: Fragmenter.h:59
std::unique_ptr< std::vector< int32_t > > ElementsBufferColumnPtr
boost::variant< ScalarTargetValue, ArrayTargetValue, GeoTargetValue, GeoTargetValuePtr > TargetValue
Definition: TargetValue.h:167
std::unique_ptr< std::vector< ArrayDatum > > column_data_
SQLTypeInfo columnType
void convertElementToColumnarFormat(size_t row, typename ElementsBufferColumnPtr::pointer columnBuffer, const ScalarTargetValue *scalarValue)
void allocateColumnarData(size_t num_rows) override
void convertToColumnarFormat(size_t row, const TargetValue *value) override
int8_t * numbersPtr
Definition: sqltypes.h:137
ElementsBufferColumnPtr allocateColumnarBuffer(size_t num_rows)
void convertToColumnarFormat(size_t row, const TargetValue *value) override
boost_variant_accessor< ArrayTargetValue > ARRAY_VALUE_ACCESSOR
boost_variant_accessor< GeoPointTargetValue > GEO_POINT_VALUE_ACCESSOR
const ColumnDescriptor * ring_sizes_column_descriptor_
boost_variant_accessor< SOURCE_TYPE > SOURCE_TYPE_ACCESSOR
std::unique_ptr< TARGET_TYPE, CheckedMallocDeleter< TARGET_TYPE > > ColumnDataPtr
std::vector< int > columnIds
identifies the table into which the data is being inserted
Definition: Fragmenter.h:62
void addDataBlocksToInsertData(Fragmenter_Namespace::InsertData &insertData) override
GeoPolygonValueConverter(const Catalog_Namespace::Catalog &cat, size_t num_rows, const ColumnDescriptor *logicalColumnDescriptor)
std::unique_ptr< std::vector< ArrayDatum > > signed_compressed_coords_data_
void addDataBlocksToInsertData(Fragmenter_Namespace::InsertData &insertData) override
std::conditional_t< isCudaCC(), DeviceArrayDatum, HostArrayDatum > ArrayDatum
Definition: sqltypes.h:119
NumericValueConverter(const ColumnDescriptor *cd, size_t num_rows, TARGET_TYPE nullValue, SOURCE_TYPE nullCheckValue, bool doNullCheck)
boost::variant< int64_t, double, float, NullableString > ScalarTargetValue
Definition: TargetValue.h:156