17 #ifndef TARGET_VALUE_CONVERTERS_IMPL_H_
18 #define TARGET_VALUE_CONVERTERS_IMPL_H_
33 if (std::is_floating_point<T>::value) {
34 return static_cast<T>(inline_fp_null_array_value<T>());
36 return static_cast<T>(inline_int_null_array_value<T>());
40 template <
typename SOURCE_TYPE,
typename RETURN_TYPE,
typename CHECKED_CAST_TYPE>
41 RETURN_TYPE
checked_cast(SOURCE_TYPE val,
bool check_null, RETURN_TYPE null_value) {
42 if (!std::is_same<SOURCE_TYPE, CHECKED_CAST_TYPE>::value) {
45 CHECKED_CAST_TYPE castedVal = boost::numeric_cast<CHECKED_CAST_TYPE>(val);
46 if (check_null && castedVal == null_value) {
47 throw std::runtime_error(
"Overflow or underflow");
50 throw std::runtime_error(
"Overflow or underflow");
54 return static_cast<RETURN_TYPE
>(val);
57 template <
typename SOURCE_TYPE,
typename TARGET_TYPE>
59 using ColumnDataPtr = std::unique_ptr<TARGET_TYPE, CheckedMallocDeleter<TARGET_TYPE>>;
61 using CasterFunc = std::function<TARGET_TYPE(SOURCE_TYPE, bool, TARGET_TYPE)>;
74 TARGET_TYPE nullValue,
75 SOURCE_TYPE nullCheckValue,
100 reinterpret_cast<TARGET_TYPE*>(
checked_malloc(num_rows *
sizeof(TARGET_TYPE))));
106 reinterpret_cast<TARGET_TYPE*>(
checked_malloc(num_rows *
sizeof(TARGET_TYPE))));
111 typename ElementsBufferColumnPtr::pointer columnData,
122 columnData[row] =
static_cast<TARGET_TYPE
>(val);
138 std::unique_ptr<std::vector<std::pair<size_t, ElementsBufferColumnPtr>>>&
140 std::unique_ptr<std::vector<ArrayDatum>>::pointer arrayData) {
141 for (
size_t row = 0; row < array_buffer->size(); row++) {
142 auto& element = (array_buffer->at(row));
144 if (element.second) {
146 int8_t* arrayDataPtr =
reinterpret_cast<int8_t*
>(data.release());
148 ArrayDatum(element.first *
sizeof(TARGET_TYPE), arrayDataPtr, is_null);
156 insertData.
data.push_back(dataBlock);
161 template <
typename TARGET_TYPE>
183 int32_t sourceDictId,
186 TARGET_TYPE nullValue,
187 int64_t nullCheckValue,
210 for (
unsigned index = 0; index < transient_vecmap.size(); ++index) {
212 std::string
const& str = *transient_vecmap[index];
232 return std::make_unique<std::vector<int32_t>>(num_rows);
237 typename ElementsBufferColumnPtr::pointer columnBuffer,
245 (*columnBuffer)[row] = (int32_t)val;
265 throw std::runtime_error(
"Unexpected negative source ID");
273 reinterpret_cast<TARGET_TYPE*>(
276 std::vector<int32_t>* bufferPtr =
277 reinterpret_cast<std::vector<int32_t>*
>(buffer.get());
278 TARGET_TYPE* columnDataPtr =
reinterpret_cast<TARGET_TYPE*
>(data.get());
280 for (
size_t i = 0; i < bufferPtr->size(); i++) {
285 columnDataPtr[i] =
static_cast<TARGET_TYPE
>(id);
292 for (
size_t i = 0; i < bufferPtr->size(); i++) {
293 auto src_id = (*bufferPtr)[i];
296 }
else if (src_id < 0) {
299 columnDataPtr[i] =
static_cast<TARGET_TYPE
>(src_id);
304 std::vector<int32_t> dest_ids;
305 dest_ids.resize(bufferPtr->size());
321 for (
size_t i = 0; i < dest_ids.size(); i++) {
322 auto id = dest_ids[i];
326 if (std::is_signed<TARGET_TYPE>::value) {
328 throw std::runtime_error(
329 "Maximum number of unique strings (" +
331 ") reached in target dictionary");
334 if (
id >= std::numeric_limits<TARGET_TYPE>::max()) {
335 throw std::runtime_error(
336 "Maximum number of unique strings (" +
338 ") reached in target column's dict encoding");
341 columnDataPtr[i] =
static_cast<TARGET_TYPE
>(id);
351 std::unique_ptr<std::vector<std::pair<size_t, ElementsBufferColumnPtr>>>&
353 std::unique_ptr<std::vector<ArrayDatum>>::pointer arrayData) {
355 for (
size_t row = 0; row < array_buffer->size(); row++) {
356 auto& element = (array_buffer->at(row));
358 if (element.second) {
361 int8_t* arrayDataPtr =
reinterpret_cast<int8_t*
>(data.release());
363 ArrayDatum(element.first *
sizeof(TARGET_TYPE), arrayDataPtr, is_null);
367 std::vector<std::vector<int32_t>> srcArrayIds(array_buffer->size());
368 std::vector<std::vector<int32_t>> destArrayIds(0);
370 for (
size_t row = 0; row < array_buffer->size(); row++) {
371 auto& element = (array_buffer->at(row));
372 if (element.second) {
373 srcArrayIds[row] = *(element.second.get());
382 for (
size_t row = 0; row < array_buffer->size(); row++) {
383 auto& element = (array_buffer->at(row));
385 if (element.second) {
386 *(element.second.get()) = destArrayIds[row];
387 int8_t* arrayDataPtr =
reinterpret_cast<int8_t*
>(&(element.second->at(0)));
388 (*arrayData)[row] =
ArrayDatum(element.first *
sizeof(TARGET_TYPE),
408 insertData.
data.push_back(dataBlock);
426 int32_t sourceDictId,
433 if (0 != sourceDictId) {
435 CHECK(source_dict_desc);
451 column_data_ = std::make_unique<std::vector<std::string>>(num_rows);
460 if (std::numeric_limits<int32_t>::min() == val) {
461 (*column_data_)[row] = std::string(
"");
465 (*column_data_)[row] = strVal;
468 (*column_data_)[row] = strVal;
470 CHECK_EQ(val, inline_int_null_value<int32_t>());
471 std::string nullStr =
"";
472 (*column_data_)[row] = nullStr;
481 const auto db_str_p = checked_get<std::string>(row, db_p,
STRING_ACCESSOR);
483 if (
nullptr != db_str_p) {
484 (*column_data_)[row] = *db_str_p;
486 (*column_data_)[row] = std::string(
"");
501 insertData.
data.push_back(dataBlock);
506 template <
typename ELEMENT_CONVERTER>
509 std::vector<std::pair<size_t, typename ELEMENT_CONVERTER::ElementsBufferColumnPtr>>>
524 std::unique_ptr<ELEMENT_CONVERTER> element_converter,
552 column_data_ = std::make_unique<std::vector<ArrayDatum>>(num_rows);
554 std::pair<size_t, typename ELEMENT_CONVERTER::ElementsBufferColumnPtr>>>(
559 const auto arrayValue =
562 if (arrayValue->is_initialized()) {
563 const auto& vec = arrayValue->get();
568 throw std::runtime_error(
569 "Incorrect number of array elements for fixed length array column");
574 typename ELEMENT_CONVERTER::ElementsBufferColumnPtr elementBuffer =
577 int elementIndex = 0;
578 for (
const auto& scalarValue : vec) {
580 elementIndex++, elementBuffer.get(), &scalarValue);
591 throw std::runtime_error(
"NULL assignment of non null column not allowed");
597 (*column_data_)[row].is_null =
is_null;
612 insertData.
data.push_back(dataBlock);
640 column_data_ = std::make_unique<std::vector<std::string>>(num_rows);
648 const std::shared_ptr<std::vector<double>>& coords) {
649 const auto compressed_coords_vector =
652 uint8_t* compressed_coords_array =
reinterpret_cast<uint8_t*
>(
653 checked_malloc(
sizeof(uint8_t) * compressed_coords_vector.size()));
654 memcpy(compressed_coords_array,
655 &compressed_coords_vector[0],
656 compressed_coords_vector.size());
658 return ArrayDatum((
int)compressed_coords_vector.size(),
659 reinterpret_cast<int8_t*
>(compressed_coords_array),
666 if (geoValue->is_initialized()) {
667 const auto geo = geoValue->get();
668 const auto geoPoint =
671 (*column_data_)[row] =
"";
675 (*column_data_)[row] =
"";
676 auto coords = std::make_shared<std::vector<double>>();
680 coords_datum.is_null =
true;
681 (*signed_compressed_coords_data_)[row] = coords_datum;
691 insertData.
data.emplace_back(logical);
694 insertData.
data.emplace_back(coords);
700 const std::shared_ptr<std::vector<double>>& coords) {
701 std::vector<double> bounds(4);
702 constexpr
auto DOUBLE_MAX = std::numeric_limits<double>::max();
703 constexpr
auto DOUBLE_MIN = std::numeric_limits<double>::lowest();
708 auto size_coords = coords->size();
710 for (
size_t i = 0; i < size_coords; i += 2) {
711 double x = (*coords)[i];
712 double y = (*coords)[i + 1];
714 bounds[0] = std::min(bounds[0], x);
715 bounds[1] = std::min(bounds[1], y);
716 bounds[2] = std::max(bounds[2], x);
717 bounds[3] = std::max(bounds[3], y);
722 template <
typename ELEM_TYPE>
725 reinterpret_cast<ELEM_TYPE*
>(
checked_malloc(
sizeof(ELEM_TYPE) * vector.size()));
726 memcpy(array, vector.data(), vector.size() *
sizeof(ELEM_TYPE));
729 (
int)(vector.size() *
sizeof(ELEM_TYPE)),
reinterpret_cast<int8_t*
>(array),
false);
732 template <
typename ELEM_TYPE>
760 bounds_data_ = std::make_unique<std::vector<ArrayDatum>>(num_rows);
766 const auto geoValue =
769 if (geoValue->is_initialized()) {
770 const auto geo = geoValue->get();
771 const auto geoLinestring =
774 (*column_data_)[row] =
"";
780 (*column_data_)[row] =
"";
781 (*signed_compressed_coords_data_)[row] =
ArrayDatum(0,
nullptr,
true);
782 std::vector<double> bounds = {
785 bounds_datum.is_null =
true;
786 (*bounds_data_)[row] = bounds_datum;
797 insertData.
data.emplace_back(bounds);
807 if (render_group_analyzer_map) {
809 auto itr = render_group_analyzer_map->try_emplace(column_id).first;
833 logicalColumnDescriptor->columnId) {
854 bounds_data_ = std::make_unique<std::vector<ArrayDatum>>(num_rows);
861 const auto geoValue =
864 if (geoValue->is_initialized()) {
865 const auto geo = geoValue->get();
869 (*column_data_)[row] =
"";
882 (*column_data_)[row] =
"";
883 (*signed_compressed_coords_data_)[row] =
ArrayDatum(0,
nullptr,
true);
884 (*ring_sizes_data_)[row] =
ArrayDatum(0,
nullptr,
true);
885 std::vector<double> bounds = {
888 bounds_datum.is_null =
true;
889 (*bounds_data_)[row] = bounds_datum;
903 insertData.
data.emplace_back(ringSizes);
906 insertData.
data.emplace_back(bounds);
909 insertData.
data.emplace_back(renderGroup);
932 logicalColumnDescriptor->columnId) {
957 bounds_data_ = std::make_unique<std::vector<ArrayDatum>>(num_rows);
964 const auto geoValue =
967 if (geoValue->is_initialized()) {
968 const auto geo = geoValue->get();
969 const auto geoMultiPoly =
972 (*column_data_)[row] =
"";
974 (*ring_sizes_data_)[row] =
to_array_datum(geoMultiPoly->ring_sizes);
975 (*poly_rings_data_)[row] =
to_array_datum(geoMultiPoly->poly_rings);
986 (*column_data_)[row] =
"";
987 (*signed_compressed_coords_data_)[row] =
ArrayDatum(0,
nullptr,
true);
988 (*ring_sizes_data_)[row] =
ArrayDatum(0,
nullptr,
true);
989 (*poly_rings_data_)[row] =
ArrayDatum(0,
nullptr,
true);
990 std::vector<double> bounds = {
993 bounds_datum.is_null =
true;
994 (*bounds_data_)[row] = bounds_datum;
1002 DataBlockPtr ringSizes, polyRings, bounds, renderGroup;
1009 insertData.
data.emplace_back(ringSizes);
1012 insertData.
data.emplace_back(polyRings);
1015 insertData.
data.emplace_back(bounds);
1018 insertData.
data.emplace_back(renderGroup);
std::unique_ptr< std::vector< ArrayDatum > > ring_sizes_data_
TARGET_TYPE fixed_array_null_value_
ArrayDatum to_array_datum(const std::vector< ELEM_TYPE > &vector)
std::unique_ptr< std::vector< ArrayDatum > > bounds_data_
std::unique_ptr< TARGET_TYPE, CheckedMallocDeleter< TARGET_TYPE >> ColumnDataPtr
std::unique_ptr< std::vector< int32_t >> ElementsBufferColumnPtr
StringDictionaryProxy * literals_source_dict_
void convertToColumnarFormatFromDict(size_t row, const TargetValue *value)
void allocateColumnarData(size_t num_rows) override
std::unique_ptr< std::vector< ArrayDatum > > bounds_data_
HOST DEVICE int get_size() const
~StringValueConverter() override
const std::vector< std::string const * > & getTransientVector() const
void addDataBlocksToInsertData(Fragmenter_Namespace::InsertData &insertData) override
class for a per-database catalog. also includes metadata for the current database and the current use...
std::vector< std::string > * stringsPtr
const ColumnDescriptor * ring_sizes_solumn_descriptor_
import_export::RenderGroupAnalyzer * render_group_analyzer_
std::vector< ArrayDatum > * arraysPtr
size_t fixed_array_elements_count_
std::unique_ptr< int32_t[]> render_group_data_
constexpr auto DOUBLE_MAX
void allocateColumnarData(size_t num_rows) override
~GeoPolygonValueConverter() override
void processArrayBuffer(std::unique_ptr< std::vector< std::pair< size_t, ElementsBufferColumnPtr >>> &array_buffer, std::unique_ptr< std::vector< ArrayDatum >>::pointer arrayData)
boost_variant_accessor< int64_t > SOURCE_TYPE_ACCESSOR
const ColumnDescriptor * bounds_column_descriptor_
ArrayValueConverter(const ColumnDescriptor *cd, size_t num_rows, std::unique_ptr< ELEMENT_CONVERTER > element_converter, bool do_check_null)
~NumericValueConverter() override
void populateFixedArrayNullSentinel(size_t num_rows)
const DictDescriptor * source_dict_desc_
SQLTypeInfo element_type_info_
void convertToColumnarFormat(size_t row, const TargetValue *value) override
void convertToColumnarFormat(size_t row, const ScalarTargetValue *scalarValue)
std::unique_ptr< std::vector< ArrayDatum > > bounds_data_
void allocateColumnarData(size_t num_rows) override
const ColumnDescriptor * column_descriptor_
CasterFunc checked_caster_
~DictionaryValueConverter() override
std::string getString(int32_t string_id) const
ArrayDatum toCompressedCoords(const std::shared_ptr< std::vector< double >> &coords)
ElementsBufferColumnPtr column_buffer_
std::vector< double > compute_bounds_of_coords(const std::shared_ptr< std::vector< double >> &coords)
std::unique_ptr< std::vector< ArrayDatum > > poly_rings_data_
static void populate_string_ids(std::vector< int32_t > &dest_ids, StringDictionary *dest_dict, const std::vector< int32_t > &source_ids, const StringDictionary *source_dict, const std::vector< std::string const * > &transient_string_vec={})
Populates provided dest_ids vector with string ids corresponding to given source strings.
void processArrayBuffer(std::unique_ptr< std::vector< std::pair< size_t, ElementsBufferColumnPtr >>> &array_buffer, std::unique_ptr< std::vector< ArrayDatum >>::pointer arrayData)
std::function< TARGET_TYPE(int64_t, bool, TARGET_TYPE)> CasterFunc
ElementsBufferColumnPtr allocateColumnarBuffer(size_t num_rows)
~ArrayValueConverter() override
void convertElementToColumnarFormat(size_t row, typename ElementsBufferColumnPtr::pointer columnData, const ScalarTargetValue *scalarValue)
const DictDescriptor * target_dict_desc_
void addDataBlocksToInsertData(Fragmenter_Namespace::InsertData &insertData) override
void addDataBlocksToInsertData(Fragmenter_Namespace::InsertData &insertData) override
const StringDictionaryProxy * literals_dict_
void convertToColumnarFormat(size_t row, const ScalarTargetValue *scalarValue)
std::shared_ptr< StringDictionary > stringDict
boost_variant_accessor< ScalarTargetValue > SCALAR_TARGET_VALUE_ACCESSOR
boost_variant_accessor< GeoLineStringTargetValue > GEO_LINESTRING_VALUE_ACCESSOR
std::conditional_t< is_cuda_compiler(), DeviceArrayDatum, HostArrayDatum > ArrayDatum
GeoPointValueConverter(const Catalog_Namespace::Catalog &cat, size_t num_rows, const ColumnDescriptor *logicalColumnDescriptor)
std::vector< uint8_t > compress_coords(const std::vector< double > &coords, const SQLTypeInfo &ti)
boost_variant_accessor< NullableString > NULLABLE_STRING_ACCESSOR
std::unique_ptr< std::vector< std::string > > column_data_
CONSTEXPR DEVICE bool is_null(const T &value)
void addDataBlocksToInsertData(Fragmenter_Namespace::InsertData &insertData) override
void convertToColumnarFormatFromString(size_t row, const TargetValue *value)
void * checked_malloc(const size_t size)
boost_variant_accessor< GeoMultiPolyTargetValue > GEO_MULTI_POLY_VALUE_ACCESSOR
const ColumnDescriptor * bounds_column_descriptor_
boost_variant_accessor< GeoPolyTargetValue > GEO_POLY_VALUE_ACCESSOR
boost_variant_accessor< std::string > STRING_ACCESSOR
const ColumnDescriptor * ring_sizes_column_descriptor_
std::unique_ptr< std::vector< ArrayDatum > > ring_sizes_data_
void finalizeDataBlocksForInsertData() override
GeoLinestringValueConverter(const Catalog_Namespace::Catalog &cat, size_t num_rows, const ColumnDescriptor *logicalColumnDescriptor)
const ColumnDescriptor * getMetadataForColumn(int tableId, const std::string &colName) const
static int32_t transientIndexToId(unsigned const index)
const DictDescriptor * getMetadataForDict(int dict_ref, bool loadDict=true) const
specifies the content in-memory of a row in the column metadata table
GeoPolygonRenderGroupManager(RenderGroupAnalyzerMap *render_group_analyzer_map, const int column_id)
const int32_t buffer_null_sentinal_
void convertToColumnarFormat(size_t row, const TargetValue *value) override
Global bool for controlling render group assignment, remove along with legacy poly rendering...
std::unique_ptr< ELEMENT_CONVERTER > element_converter_
T get_fixed_array_null_value()
void convertToColumnarFormat(size_t row, const TargetValue *value) override
std::unique_ptr< int32_t[]> render_group_data_
StringValueConverter(const Catalog_Namespace::Catalog &cat, const ColumnDescriptor *cd, size_t num_rows, bool dictEncoded, int32_t sourceDictId, StringDictionaryProxy *literals_dict)
void finalizeDataBlocksForInsertData() override
void addDataBlocksToInsertData(Fragmenter_Namespace::InsertData &insertData) override
void setValueCaster(CasterFunc caster)
std::vector< DataBlockPtr > data
the number of rows being inserted
int32_t getOrAdd(const std::string &str) noexcept
void allocateColumnarData(size_t num_rows) override
int32_t convertTransientStringIdToPermanentId(int32_t &transient_string_id)
void convertToColumnarFormat(size_t row, const TargetValue *value) override
~GeoLinestringValueConverter() override
std::unique_ptr< std::vector< std::string > > column_data_
const ColumnDescriptor * render_group_column_descriptor_
typename NumericValueConverter< int64_t, TARGET_TYPE >::ColumnDataPtr ElementsDataColumnPtr
ColumnDataPtr ElementsBufferColumnPtr
std::unordered_map< int32_t, int32_t > literals_lookup_
RETURN_TYPE checked_cast(SOURCE_TYPE val, bool check_null, RETURN_TYPE null_value)
const ColumnDescriptor * coords_column_descriptor_
void allocateColumnarData(size_t num_rows) override
std::string getString(int32_t string_id) const
GeoMultiPolygonValueConverter(const Catalog_Namespace::Catalog &cat, size_t num_rows, const ColumnDescriptor *logicalColumnDescriptor, RenderGroupAnalyzerMap *render_group_analyzer_map)
constexpr auto DOUBLE_MIN
HOST DEVICE int get_comp_param() const
void allocateColumnarData(size_t num_rows) override
boost_variant_accessor< GeoTargetValue > GEO_VALUE_ACCESSOR
ColumnDataPtr column_data_
#define NULL_ARRAY_DOUBLE
const ColumnDescriptor * render_group_column_descriptor_
void addDataBlocksToInsertData(Fragmenter_Namespace::InsertData &insertData) override
static void populate_string_array_ids(std::vector< std::vector< int32_t >> &dest_array_ids, StringDictionary *dest_dict, const std::vector< std::vector< int32_t >> &source_array_ids, const StringDictionary *source_dict)
NumericValueConverter< int64_t, TARGET_TYPE >::ColumnDataPtr processBuffer(ElementsBufferColumnPtr buffer)
std::unique_ptr< std::vector< std::pair< size_t, typename ELEMENT_CONVERTER::ElementsBufferColumnPtr > > > column_buffer_
boost_variant_accessor< GeoTargetValue > GEO_TARGET_VALUE_ACCESSOR
Descriptor for a dictionary for a string columne.
const ColumnDescriptor * bounds_column_descriptor_
void convertToColumnarFormat(size_t row, const TargetValue *value) override
~GeoMultiPolygonValueConverter() override
void convertToColumnarFormat(size_t row, const TargetValue *value) override
The data to be inserted using the fragment manager.
std::map< int, import_export::RenderGroupAnalyzer > RenderGroupAnalyzerMap
boost::variant< ScalarTargetValue, ArrayTargetValue, GeoTargetValue, GeoTargetValuePtr > TargetValue
std::unique_ptr< std::vector< ArrayDatum > > column_data_
SOURCE_TYPE null_check_value_
void convertElementToColumnarFormat(size_t row, typename ElementsBufferColumnPtr::pointer columnBuffer, const ScalarTargetValue *scalarValue)
void allocateColumnarData(size_t num_rows) override
void convertToColumnarFormat(size_t row, const TargetValue *value) override
ElementsBufferColumnPtr allocateColumnarBuffer(size_t num_rows)
void convertToColumnarFormat(size_t row, const TargetValue *value) override
int8_t * fixed_array_null_sentinel_
boost_variant_accessor< ArrayTargetValue > ARRAY_VALUE_ACCESSOR
boost_variant_accessor< GeoPointTargetValue > GEO_POINT_VALUE_ACCESSOR
const ColumnDescriptor * ring_sizes_column_descriptor_
boost_variant_accessor< SOURCE_TYPE > SOURCE_TYPE_ACCESSOR
std::vector< int > columnIds
identifies the table into which the data is being inserted
GeoPolygonValueConverter(const Catalog_Namespace::Catalog &cat, size_t num_rows, const ColumnDescriptor *logicalColumnDescriptor, RenderGroupAnalyzerMap *render_group_analyzer_map)
void addDataBlocksToInsertData(Fragmenter_Namespace::InsertData &insertData) override
~GeoPointValueConverter() override
int insertBoundsAndReturnRenderGroup(const std::vector< double > &bounds)
StringDictionary * source_dict_
std::unique_ptr< std::vector< ArrayDatum > > signed_compressed_coords_data_
void addDataBlocksToInsertData(Fragmenter_Namespace::InsertData &insertData) override
StringDictionaryProxy * source_dict_proxy_
NumericValueConverter(const ColumnDescriptor *cd, size_t num_rows, TARGET_TYPE nullValue, SOURCE_TYPE nullCheckValue, bool doNullCheck)
DictionaryValueConverter(const Catalog_Namespace::Catalog &cat, int32_t sourceDictId, const ColumnDescriptor *targetDescriptor, size_t num_rows, TARGET_TYPE nullValue, int64_t nullCheckValue, bool doNullCheck, StringDictionaryProxy *literals_dict, StringDictionaryProxy *source_dict_proxy)
boost::variant< int64_t, double, float, NullableString > ScalarTargetValue