OmniSciDB  72c90bc290
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
anonymous_namespace{ArrowResultSetConverter.cpp} Namespace Reference

Classes

class  ResultSetBuffer
 
class  null_type
 
struct  null_type< TYPE, std::enable_if_t< std::is_integral< TYPE >::value > >
 
struct  null_type< TYPE, std::enable_if_t< std::is_floating_point< TYPE >::value > >
 

Typedefs

template<typename TYPE >
using null_type_t = typename null_type< TYPE >::type
 

Functions

SQLTypes get_dict_index_type (const SQLTypeInfo &ti)
 
SQLTypes get_physical_type (const SQLTypeInfo &ti)
 
template<typename TYPE , typename VALUE_ARRAY_TYPE >
void create_or_append_value (const ScalarTargetValue &val_cty, std::shared_ptr< ValueArray > &values, const size_t max_size)
 
template<typename TYPE , typename VALUE_ARRAY_TYPE >
void create_or_append_value (const ArrayTargetValue &val_ctys, std::shared_ptr< ValueArray > &values, const size_t max_size)
 
void create_or_append_validity (const ArrayTargetValue &value, const SQLTypeInfo &col_type, std::shared_ptr< std::vector< bool >> &null_bitmap, const size_t max_size)
 
template<typename TYPE >
void create_or_append_validity (const ScalarTargetValue &value, const SQLTypeInfo &col_type, std::shared_ptr< std::vector< bool >> &null_bitmap, const size_t max_size)
 
template<typename C_TYPE , typename ARROW_TYPE = typename arrow::CTypeTraits<C_TYPE>::ArrowType>
void convert_column (ResultSetPtr result, size_t col, size_t entry_count, std::shared_ptr< arrow::Array > &out)
 
std::pair< key_t, void * > get_shm (size_t shmsz)
 
std::pair< key_t,
std::shared_ptr< arrow::Buffer > > 
get_shm_buffer (size_t size)
 
void remap_string_values (const ArrowResultSetConverter::ColumnBuilder &column_builder, const std::vector< uint8_t > &bitmap, std::vector< int64_t > &vec1d)
 
std::shared_ptr< arrow::DataType > get_arrow_type (const SQLTypeInfo &sql_type, const ExecutorDeviceType device_type)
 
template<typename BUILDER_TYPE , typename VALUE_ARRAY_TYPE >
void appendToColumnBuilder (ArrowResultSetConverter::ColumnBuilder &column_builder, const ValueArray &values, const std::shared_ptr< std::vector< bool >> &is_valid)
 
template<>
void appendToColumnBuilder< arrow::Decimal128Builder, int64_t > (ArrowResultSetConverter::ColumnBuilder &column_builder, const ValueArray &values, const std::shared_ptr< std::vector< bool >> &is_valid)
 
template<>
void appendToColumnBuilder< arrow::StringBuilder, std::string > (ArrowResultSetConverter::ColumnBuilder &column_builder, const ValueArray &values, const std::shared_ptr< std::vector< bool >> &is_valid)
 
template<>
void appendToColumnBuilder< arrow::StringDictionary32Builder, int32_t > (ArrowResultSetConverter::ColumnBuilder &column_builder, const ValueArray &values, const std::shared_ptr< std::vector< bool >> &is_valid)
 
template<typename BUILDER_TYPE , typename VALUE_TYPE >
void appendToListColumnBuilder (ArrowResultSetConverter::ColumnBuilder &column_builder, const ValueArray &values, const std::shared_ptr< std::vector< bool >> &is_valid)
 
template<>
void appendToListColumnBuilder< arrow::StringDictionaryBuilder, int64_t > (ArrowResultSetConverter::ColumnBuilder &column_builder, const ValueArray &values, const std::shared_ptr< std::vector< bool >> &is_valid)
 

Typedef Documentation

template<typename TYPE >
using anonymous_namespace{ArrowResultSetConverter.cpp}::null_type_t = typedef typename null_type<TYPE>::type

Definition at line 228 of file ArrowResultSetConverter.cpp.

Function Documentation

template<typename BUILDER_TYPE , typename VALUE_ARRAY_TYPE >
void anonymous_namespace{ArrowResultSetConverter.cpp}::appendToColumnBuilder ( ArrowResultSetConverter::ColumnBuilder column_builder,
const ValueArray values,
const std::shared_ptr< std::vector< bool >> &  is_valid 
)

Definition at line 1396 of file ArrowResultSetConverter.cpp.

References ARROW_THROW_NOT_OK, ArrowResultSetConverter::ColumnBuilder::builder, CHECK, ArrowResultSetConverter::ColumnBuilder::field, DateConverters::get_epoch_days_from_seconds(), kMilliSecsPerSec, and shared::transform().

1398  {
1399  static_assert(!std::is_same<BUILDER_TYPE, arrow::StringDictionary32Builder>::value,
1400  "Dictionary encoded string builder requires function specialization.");
1401 
1402  std::vector<VALUE_ARRAY_TYPE> vals = boost::get<std::vector<VALUE_ARRAY_TYPE>>(values);
1403 
1404  if (scale_epoch_values<BUILDER_TYPE>()) {
1405  auto scale_sec_to_millisec = [](auto seconds) { return seconds * kMilliSecsPerSec; };
1406  auto scale_values = [&](auto epoch) {
1407  return std::is_same<BUILDER_TYPE, arrow::Date32Builder>::value
1409  : scale_sec_to_millisec(epoch);
1410  };
1411  std::transform(vals.begin(), vals.end(), vals.begin(), scale_values);
1412  }
1413 
1414  auto typed_builder = dynamic_cast<BUILDER_TYPE*>(column_builder.builder.get());
1415  CHECK(typed_builder);
1416  if (column_builder.field->nullable()) {
1417  CHECK(is_valid.get());
1418  ARROW_THROW_NOT_OK(typed_builder->AppendValues(vals, *is_valid));
1419  } else {
1420  ARROW_THROW_NOT_OK(typed_builder->AppendValues(vals));
1421  }
1422 }
std::unique_ptr< arrow::ArrayBuilder > builder
#define ARROW_THROW_NOT_OK(s)
Definition: ArrowUtil.h:36
std::shared_ptr< arrow::Field > field
static constexpr int64_t kMilliSecsPerSec
OUTPUT transform(INPUT const &input, FUNC const &func)
Definition: misc.h:320
#define CHECK(condition)
Definition: Logger.h:291
int64_t get_epoch_days_from_seconds(const int64_t seconds)

+ Here is the call graph for this function:

template<>
void anonymous_namespace{ArrowResultSetConverter.cpp}::appendToColumnBuilder< arrow::Decimal128Builder, int64_t > ( ArrowResultSetConverter::ColumnBuilder column_builder,
const ValueArray values,
const std::shared_ptr< std::vector< bool >> &  is_valid 
)

Definition at line 1425 of file ArrowResultSetConverter.cpp.

References ARROW_THROW_NOT_OK, CHECK, and CHECK_EQ.

1428  {
1429  std::vector<int64_t> vals = boost::get<std::vector<int64_t>>(values);
1430  auto typed_builder =
1431  dynamic_cast<arrow::Decimal128Builder*>(column_builder.builder.get());
1432  CHECK(typed_builder);
1433  CHECK_EQ(is_valid->size(), vals.size());
1434  if (column_builder.field->nullable()) {
1435  CHECK(is_valid.get());
1436  for (size_t i = 0; i < vals.size(); i++) {
1437  const auto v = vals[i];
1438  const auto valid = (*is_valid)[i];
1439  if (valid) {
1440  ARROW_THROW_NOT_OK(typed_builder->Append(v));
1441  } else {
1442  ARROW_THROW_NOT_OK(typed_builder->AppendNull());
1443  }
1444  }
1445  } else {
1446  for (const auto& v : vals) {
1447  ARROW_THROW_NOT_OK(typed_builder->Append(v));
1448  }
1449  }
1450 }
#define CHECK_EQ(x, y)
Definition: Logger.h:301
std::unique_ptr< arrow::ArrayBuilder > builder
#define ARROW_THROW_NOT_OK(s)
Definition: ArrowUtil.h:36
std::shared_ptr< arrow::Field > field
#define CHECK(condition)
Definition: Logger.h:291
template<>
void anonymous_namespace{ArrowResultSetConverter.cpp}::appendToColumnBuilder< arrow::StringBuilder, std::string > ( ArrowResultSetConverter::ColumnBuilder column_builder,
const ValueArray values,
const std::shared_ptr< std::vector< bool >> &  is_valid 
)

Definition at line 1453 of file ArrowResultSetConverter.cpp.

References ARROW_THROW_NOT_OK, CHECK, and CHECK_EQ.

1456  {
1457  std::vector<std::string> vals = boost::get<std::vector<std::string>>(values);
1458  auto typed_builder = dynamic_cast<arrow::StringBuilder*>(column_builder.builder.get());
1459  CHECK(typed_builder);
1460  CHECK_EQ(is_valid->size(), vals.size());
1461 
1462  if (column_builder.field->nullable()) {
1463  CHECK(is_valid.get());
1464 
1465  // TODO: Generate this instead of the boolean bitmap
1466  std::vector<uint8_t> transformed_bitmap;
1467  transformed_bitmap.reserve(is_valid->size());
1468  std::for_each(
1469  is_valid->begin(), is_valid->end(), [&transformed_bitmap](const bool is_valid) {
1470  transformed_bitmap.push_back(is_valid ? 1 : 0);
1471  });
1472  ARROW_THROW_NOT_OK(typed_builder->AppendValues(vals, transformed_bitmap.data()));
1473  } else {
1474  ARROW_THROW_NOT_OK(typed_builder->AppendValues(vals));
1475  }
1476 }
#define CHECK_EQ(x, y)
Definition: Logger.h:301
std::unique_ptr< arrow::ArrayBuilder > builder
#define ARROW_THROW_NOT_OK(s)
Definition: ArrowUtil.h:36
std::shared_ptr< arrow::Field > field
#define CHECK(condition)
Definition: Logger.h:291
template<>
void anonymous_namespace{ArrowResultSetConverter.cpp}::appendToColumnBuilder< arrow::StringDictionary32Builder, int32_t > ( ArrowResultSetConverter::ColumnBuilder column_builder,
const ValueArray values,
const std::shared_ptr< std::vector< bool >> &  is_valid 
)

Definition at line 1479 of file ArrowResultSetConverter.cpp.

References ALL_STRINGS_REMAPPED, ARROW_THROW_NOT_OK, CHECK, and INVALID.

1482  {
1483  auto typed_builder =
1484  dynamic_cast<arrow::StringDictionary32Builder*>(column_builder.builder.get());
1485  CHECK(typed_builder);
1486 
1487  std::vector<int32_t> vals = boost::get<std::vector<int32_t>>(values);
1488  // remap negative values if ArrowStringRemapMode == ONLY_TRANSIENT_STRINGS_REMAPPED or
1489  // everything if ALL_STRINGS_REMAPPED
1491  for (size_t i = 0; i < vals.size(); i++) {
1492  auto& val = vals[i];
1494  val < 0) &&
1495  (*is_valid)[i]) {
1496  vals[i] = column_builder.string_remapping.at(val);
1497  }
1498  }
1499 
1500  if (column_builder.field->nullable()) {
1501  CHECK(is_valid.get());
1502  // TODO(adb): Generate this instead of the boolean bitmap
1503  std::vector<uint8_t> transformed_bitmap;
1504  transformed_bitmap.reserve(is_valid->size());
1505  std::for_each(
1506  is_valid->begin(), is_valid->end(), [&transformed_bitmap](const bool is_valid) {
1507  transformed_bitmap.push_back(is_valid ? 1 : 0);
1508  });
1509 
1510  ARROW_THROW_NOT_OK(typed_builder->AppendIndices(
1511  vals.data(), static_cast<int64_t>(vals.size()), transformed_bitmap.data()));
1512  } else {
1514  typed_builder->AppendIndices(vals.data(), static_cast<int64_t>(vals.size())));
1515  }
1516 }
std::unique_ptr< arrow::ArrayBuilder > builder
#define ARROW_THROW_NOT_OK(s)
Definition: ArrowUtil.h:36
std::shared_ptr< arrow::Field > field
std::unordered_map< StrId, ArrowStrId > string_remapping
#define CHECK(condition)
Definition: Logger.h:291
template<typename BUILDER_TYPE , typename VALUE_TYPE >
void anonymous_namespace{ArrowResultSetConverter.cpp}::appendToListColumnBuilder ( ArrowResultSetConverter::ColumnBuilder column_builder,
const ValueArray values,
const std::shared_ptr< std::vector< bool >> &  is_valid 
)

Definition at line 1519 of file ArrowResultSetConverter.cpp.

References ARROW_THROW_NOT_OK, ArrowResultSetConverter::ColumnBuilder::builder, CHECK, gpu_enabled::copy(), ArrowResultSetConverter::ColumnBuilder::field, and shared::transform().

1521  {
1522  Vec2<VALUE_TYPE> vals = boost::get<Vec2<VALUE_TYPE>>(values);
1523  auto list_builder = dynamic_cast<arrow::ListBuilder*>(column_builder.builder.get());
1524  CHECK(list_builder);
1525 
1526  auto value_builder = static_cast<BUILDER_TYPE*>(list_builder->value_builder());
1527 
1528  if (column_builder.field->nullable()) {
1529  for (size_t i = 0; i < vals.size(); i++) {
1530  if ((*is_valid)[i]) {
1531  const auto& val = vals[i];
1532  std::vector<uint8_t> bitmap(val.size());
1533  std::transform(val.begin(), val.end(), bitmap.begin(), [](VALUE_TYPE pvalue) {
1534  return static_cast<VALUE_TYPE>(pvalue) != null_type<VALUE_TYPE>::value;
1535  });
1536  ARROW_THROW_NOT_OK(list_builder->Append());
1537  if constexpr (std::is_same_v<BUILDER_TYPE, arrow::BooleanBuilder>) {
1538  std::vector<uint8_t> bval(val.size());
1539  std::copy(val.begin(), val.end(), bval.begin());
1541  value_builder->AppendValues(bval.data(), bval.size(), bitmap.data()));
1542  } else {
1544  value_builder->AppendValues(val.data(), val.size(), bitmap.data()));
1545  }
1546  } else {
1547  ARROW_THROW_NOT_OK(list_builder->AppendNull());
1548  }
1549  }
1550  } else {
1551  for (size_t i = 0; i < vals.size(); i++) {
1552  if ((*is_valid)[i]) {
1553  const auto& val = vals[i];
1554  ARROW_THROW_NOT_OK(list_builder->Append());
1555  if constexpr (std::is_same_v<BUILDER_TYPE, arrow::BooleanBuilder>) {
1556  std::vector<uint8_t> bval(val.size());
1557  std::copy(val.begin(), val.end(), bval.begin());
1558  ARROW_THROW_NOT_OK(value_builder->AppendValues(bval.data(), bval.size()));
1559  } else {
1560  ARROW_THROW_NOT_OK(value_builder->AppendValues(val.data(), val.size()));
1561  }
1562  } else {
1563  ARROW_THROW_NOT_OK(list_builder->AppendNull());
1564  }
1565  }
1566  }
1567 }
std::vector< std::vector< T >> Vec2
std::unique_ptr< arrow::ArrayBuilder > builder
#define ARROW_THROW_NOT_OK(s)
Definition: ArrowUtil.h:36
std::shared_ptr< arrow::Field > field
DEVICE auto copy(ARGS &&...args)
Definition: gpu_enabled.h:51
OUTPUT transform(INPUT const &input, FUNC const &func)
Definition: misc.h:320
#define CHECK(condition)
Definition: Logger.h:291

+ Here is the call graph for this function:

template<>
void anonymous_namespace{ArrowResultSetConverter.cpp}::appendToListColumnBuilder< arrow::StringDictionaryBuilder, int64_t > ( ArrowResultSetConverter::ColumnBuilder column_builder,
const ValueArray values,
const std::shared_ptr< std::vector< bool >> &  is_valid 
)

Definition at line 1570 of file ArrowResultSetConverter.cpp.

References ARROW_THROW_NOT_OK, CHECK, remap_string_values(), and shared::transform().

1573  {
1574  Vec2<int64_t> vec2d = boost::get<Vec2<int64_t>>(values);
1575 
1576  auto* list_builder = dynamic_cast<arrow::ListBuilder*>(column_builder.builder.get());
1577  CHECK(list_builder);
1578 
1579  // todo: fix value_builder being a StringDictionaryBuilder and not
1580  // StringDictionary32Builder
1581  auto* value_builder =
1582  dynamic_cast<arrow::StringDictionaryBuilder*>(list_builder->value_builder());
1583  CHECK(value_builder);
1584 
1585  if (column_builder.field->nullable()) {
1586  for (size_t i = 0; i < vec2d.size(); i++) {
1587  if ((*is_valid)[i]) {
1588  auto& vec1d = vec2d[i];
1589  std::vector<uint8_t> bitmap(vec1d.size());
1590  std::transform(vec1d.begin(), vec1d.end(), bitmap.begin(), [](int64_t pvalue) {
1591  return pvalue != null_type<int32_t>::value;
1592  });
1593  ARROW_THROW_NOT_OK(list_builder->Append());
1594  ARROW_THROW_NOT_OK(value_builder->InsertMemoValues(*column_builder.string_array));
1595  remap_string_values(column_builder, bitmap, vec1d);
1596  ARROW_THROW_NOT_OK(value_builder->AppendIndices(
1597  vec1d.data(), static_cast<int64_t>(vec1d.size()), bitmap.data()));
1598  } else {
1599  ARROW_THROW_NOT_OK(list_builder->AppendNull());
1600  }
1601  }
1602  } else {
1603  for (size_t i = 0; i < vec2d.size(); i++) {
1604  if ((*is_valid)[i]) {
1605  auto& vec1d = vec2d[i];
1606  ARROW_THROW_NOT_OK(list_builder->Append());
1607  remap_string_values(column_builder, {}, vec1d);
1608  ARROW_THROW_NOT_OK(value_builder->AppendIndices(vec1d.data(), vec1d.size()));
1609  } else {
1610  ARROW_THROW_NOT_OK(list_builder->AppendNull());
1611  }
1612  }
1613  }
1614 }
std::vector< std::vector< T >> Vec2
std::unique_ptr< arrow::ArrayBuilder > builder
#define ARROW_THROW_NOT_OK(s)
Definition: ArrowUtil.h:36
std::shared_ptr< arrow::StringArray > string_array
std::shared_ptr< arrow::Field > field
void remap_string_values(const ArrowResultSetConverter::ColumnBuilder &column_builder, const std::vector< uint8_t > &bitmap, std::vector< int64_t > &vec1d)
OUTPUT transform(INPUT const &input, FUNC const &func)
Definition: misc.h:320
#define CHECK(condition)
Definition: Logger.h:291

+ Here is the call graph for this function:

template<typename C_TYPE , typename ARROW_TYPE = typename arrow::CTypeTraits<C_TYPE>::ArrowType>
void anonymous_namespace{ArrowResultSetConverter.cpp}::convert_column ( ResultSetPtr  result,
size_t  col,
size_t  entry_count,
std::shared_ptr< arrow::Array > &  out 
)

Definition at line 232 of file ArrowResultSetConverter.cpp.

References CHECK, and run_benchmark_import::res.

235  {
236  CHECK(sizeof(C_TYPE) == result->getColType(col).get_size());
237 
238  std::shared_ptr<arrow::Buffer> values;
239  std::shared_ptr<arrow::Buffer> is_valid;
240  const int64_t buf_size = entry_count * sizeof(C_TYPE);
241  if (result->isZeroCopyColumnarConversionPossible(col)) {
242  values.reset(new ResultSetBuffer(
243  reinterpret_cast<const uint8_t*>(result->getColumnarBuffer(col)),
244  buf_size,
245  result));
246  } else {
247  auto res = arrow::AllocateBuffer(buf_size);
248  CHECK(res.ok());
249  values = std::move(res).ValueOrDie();
250  result->copyColumnIntoBuffer(
251  col, reinterpret_cast<int8_t*>(values->mutable_data()), buf_size);
252  }
253 
254  int64_t null_count = 0;
255  auto res = arrow::AllocateBuffer((entry_count + 7) / 8);
256  CHECK(res.ok());
257  is_valid = std::move(res).ValueOrDie();
258 
259  auto is_valid_data = is_valid->mutable_data();
260 
261  const null_type_t<C_TYPE>* vals =
262  reinterpret_cast<const null_type_t<C_TYPE>*>(values->data());
263  null_type_t<C_TYPE> null_val = null_type<C_TYPE>::value;
264 
265  size_t unroll_count = entry_count & 0xFFFFFFFFFFFFFFF8ULL;
266  for (size_t i = 0; i < unroll_count; i += 8) {
267  uint8_t valid_byte = 0;
268  uint8_t valid;
269  valid = vals[i + 0] != null_val;
270  valid_byte |= valid << 0;
271  null_count += !valid;
272  valid = vals[i + 1] != null_val;
273  valid_byte |= valid << 1;
274  null_count += !valid;
275  valid = vals[i + 2] != null_val;
276  valid_byte |= valid << 2;
277  null_count += !valid;
278  valid = vals[i + 3] != null_val;
279  valid_byte |= valid << 3;
280  null_count += !valid;
281  valid = vals[i + 4] != null_val;
282  valid_byte |= valid << 4;
283  null_count += !valid;
284  valid = vals[i + 5] != null_val;
285  valid_byte |= valid << 5;
286  null_count += !valid;
287  valid = vals[i + 6] != null_val;
288  valid_byte |= valid << 6;
289  null_count += !valid;
290  valid = vals[i + 7] != null_val;
291  valid_byte |= valid << 7;
292  null_count += !valid;
293  is_valid_data[i >> 3] = valid_byte;
294  }
295  if (unroll_count != entry_count) {
296  uint8_t valid_byte = 0;
297  for (size_t i = unroll_count; i < entry_count; ++i) {
298  bool valid = vals[i] != null_val;
299  valid_byte |= valid << (i & 7);
300  null_count += !valid;
301  }
302  is_valid_data[unroll_count >> 3] = valid_byte;
303  }
304 
305  if (!null_count) {
306  is_valid.reset();
307  }
308 
309  // TODO: support date/time + scaling
310  // TODO: support booleans
311  if (null_count) {
312  out.reset(
313  new arrow::NumericArray<ARROW_TYPE>(entry_count, values, is_valid, null_count));
314  } else {
315  out.reset(new arrow::NumericArray<ARROW_TYPE>(entry_count, values));
316  }
317 }
#define CHECK(condition)
Definition: Logger.h:291
void anonymous_namespace{ArrowResultSetConverter.cpp}::create_or_append_validity ( const ArrayTargetValue value,
const SQLTypeInfo col_type,
std::shared_ptr< std::vector< bool >> &  null_bitmap,
const size_t  max_size 
)

Definition at line 159 of file ArrowResultSetConverter.cpp.

References CHECK, and SQLTypeInfo::get_notnull().

Referenced by ArrowResultSetConverter::getArrowBatch().

162  {
163  if (col_type.get_notnull()) {
164  CHECK(!null_bitmap);
165  return;
166  }
167 
168  if (!null_bitmap) {
169  null_bitmap = std::make_shared<std::vector<bool>>();
170  null_bitmap->reserve(max_size);
171  }
172  CHECK(null_bitmap);
173  null_bitmap->push_back(value ? true : false);
174 }
#define CHECK(condition)
Definition: Logger.h:291
HOST DEVICE bool get_notnull() const
Definition: sqltypes.h:398

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

template<typename TYPE >
void anonymous_namespace{ArrowResultSetConverter.cpp}::create_or_append_validity ( const ScalarTargetValue value,
const SQLTypeInfo col_type,
std::shared_ptr< std::vector< bool >> &  null_bitmap,
const size_t  max_size 
)

Definition at line 177 of file ArrowResultSetConverter.cpp.

References CHECK, SQLTypeInfo::get_notnull(), inline_fp_null_val(), inline_int_null_val(), SQLTypeInfo::is_boolean(), SQLTypeInfo::is_decimal(), SQLTypeInfo::is_dict_encoded_string(), SQLTypeInfo::is_fp(), SQLTypeInfo::is_integer(), SQLTypeInfo::is_time(), and UNREACHABLE.

180  {
181  if (col_type.get_notnull()) {
182  CHECK(!null_bitmap);
183  return;
184  }
185  auto pvalue = boost::get<TYPE>(&value);
186  CHECK(pvalue);
187  bool is_valid = false;
188  if constexpr (std::is_same_v<TYPE, NullableString>) {
189  is_valid = boost::get<std::string>(pvalue) != nullptr;
190  } else {
191  if (col_type.is_boolean()) {
192  is_valid = inline_int_null_val(col_type) != static_cast<int8_t>(*pvalue);
193  } else if (col_type.is_dict_encoded_string()) {
194  is_valid = inline_int_null_val(col_type) != static_cast<int32_t>(*pvalue);
195  } else if (col_type.is_integer() || col_type.is_time() || col_type.is_decimal()) {
196  is_valid = inline_int_null_val(col_type) != static_cast<int64_t>(*pvalue);
197  } else if (col_type.is_fp()) {
198  is_valid = inline_fp_null_val(col_type) != static_cast<double>(*pvalue);
199  } else {
200  UNREACHABLE();
201  }
202  }
203 
204  if (!null_bitmap) {
205  null_bitmap = std::make_shared<std::vector<bool>>();
206  null_bitmap->reserve(max_size);
207  }
208  CHECK(null_bitmap);
209  null_bitmap->push_back(is_valid);
210 }
bool is_fp() const
Definition: sqltypes.h:571
#define UNREACHABLE()
Definition: Logger.h:338
double inline_fp_null_val(const SQL_TYPE_INFO &ti)
bool is_time() const
Definition: sqltypes.h:577
bool is_integer() const
Definition: sqltypes.h:565
bool is_boolean() const
Definition: sqltypes.h:580
#define CHECK(condition)
Definition: Logger.h:291
int64_t inline_int_null_val(const SQL_TYPE_INFO &ti)
bool is_dict_encoded_string() const
Definition: sqltypes.h:641
HOST DEVICE bool get_notnull() const
Definition: sqltypes.h:398
bool is_decimal() const
Definition: sqltypes.h:568

+ Here is the call graph for this function:

template<typename TYPE , typename VALUE_ARRAY_TYPE >
void anonymous_namespace{ArrowResultSetConverter.cpp}::create_or_append_value ( const ScalarTargetValue val_cty,
std::shared_ptr< ValueArray > &  values,
const size_t  max_size 
)

Definition at line 110 of file ArrowResultSetConverter.cpp.

References CHECK.

112  {
113  if (!values) {
114  values = std::make_shared<ValueArray>(std::vector<TYPE>());
115  boost::get<std::vector<TYPE>>(*values).reserve(max_size);
116  }
117  CHECK(values);
118  auto values_ty = boost::get<std::vector<TYPE>>(values.get());
119  CHECK(values_ty);
120 
121  auto pval_cty = boost::get<VALUE_ARRAY_TYPE>(&val_cty);
122  CHECK(pval_cty);
123  if constexpr (std::is_same_v<VALUE_ARRAY_TYPE, NullableString>) {
124  if (auto str = boost::get<std::string>(pval_cty)) {
125  values_ty->push_back(*str);
126  } else {
127  values_ty->push_back("");
128  }
129  } else {
130  auto val_ty = static_cast<TYPE>(*pval_cty);
131  values_ty->push_back(val_ty);
132  }
133 }
#define CHECK(condition)
Definition: Logger.h:291
template<typename TYPE , typename VALUE_ARRAY_TYPE >
void anonymous_namespace{ArrowResultSetConverter.cpp}::create_or_append_value ( const ArrayTargetValue val_ctys,
std::shared_ptr< ValueArray > &  values,
const size_t  max_size 
)

Definition at line 136 of file ArrowResultSetConverter.cpp.

References CHECK.

138  {
139  if (!values) {
140  values = std::make_shared<ValueArray>(Vec2<TYPE>());
141  boost::get<Vec2<TYPE>>(*values).reserve(max_size);
142  }
143  CHECK(values);
144 
145  Vec2<TYPE>* values_ty = boost::get<Vec2<TYPE>>(values.get());
146  CHECK(values_ty);
147 
148  values_ty->emplace_back(std::vector<TYPE>{});
149 
150  if (val_ctys) {
151  for (auto val_cty : val_ctys.value()) {
152  auto pval_cty = boost::get<VALUE_ARRAY_TYPE>(&val_cty);
153  CHECK(pval_cty);
154  values_ty->back().emplace_back(static_cast<TYPE>(*pval_cty));
155  }
156  }
157 }
std::vector< std::vector< T >> Vec2
#define CHECK(condition)
Definition: Logger.h:291
std::shared_ptr<arrow::DataType> anonymous_namespace{ArrowResultSetConverter.cpp}::get_arrow_type ( const SQLTypeInfo sql_type,
const ExecutorDeviceType  device_type 
)

Definition at line 1114 of file ArrowResultSetConverter.cpp.

References foreign_storage::get_physical_type(), SQLTypeInfo::get_precision(), SQLTypeInfo::get_scale(), SQLTypeInfo::get_subtype(), SQLTypeInfo::get_type_name(), GPU, SQLTypeInfo::is_dict_encoded_string(), SQLTypeInfo::is_dict_encoded_type(), kARRAY, kBIGINT, kBOOLEAN, kCHAR, kDATE, kDECIMAL, kDOUBLE, kFLOAT, kINT, kINTERVAL_DAY_TIME, kINTERVAL_YEAR_MONTH, kNUMERIC, kSMALLINT, kTEXT, kTIME, kTIMESTAMP, kTINYINT, kVARCHAR, and to_string().

Referenced by ArrowResultSetConverter::makeField().

1115  {
1116  switch (get_physical_type(sql_type)) {
1117  case kBOOLEAN:
1118  return arrow::boolean();
1119  case kTINYINT:
1120  return arrow::int8();
1121  case kSMALLINT:
1122  return arrow::int16();
1123  case kINT:
1124  return arrow::int32();
1125  case kBIGINT:
1126  return arrow::int64();
1127  case kFLOAT:
1128  return arrow::float32();
1129  case kDOUBLE:
1130  return arrow::float64();
1131  case kCHAR:
1132  case kVARCHAR:
1133  case kTEXT:
1134  if (sql_type.is_dict_encoded_string()) {
1135  auto value_type = std::make_shared<arrow::StringType>();
1136  return arrow::dictionary(arrow::int32(), value_type, false);
1137  }
1138  return arrow::utf8();
1139  case kDECIMAL:
1140  case kNUMERIC:
1141  return arrow::decimal(sql_type.get_precision(), sql_type.get_scale());
1142  case kTIME:
1143  return time32(arrow::TimeUnit::SECOND);
1144  case kDATE: {
1145  // TODO(wamsi) : Remove date64() once date32() support is added in cuDF. date32()
1146  // Currently support for date32() is missing in cuDF.Hence, if client requests for
1147  // date on GPU, return date64() for the time being, till support is added.
1148  if (device_type == ExecutorDeviceType::GPU) {
1149  return arrow::date64();
1150  } else {
1151  return arrow::date32();
1152  }
1153  }
1154  case kTIMESTAMP:
1155  switch (sql_type.get_precision()) {
1156  case 0:
1157  return timestamp(arrow::TimeUnit::SECOND);
1158  case 3:
1159  return timestamp(arrow::TimeUnit::MILLI);
1160  case 6:
1161  return timestamp(arrow::TimeUnit::MICRO);
1162  case 9:
1163  return timestamp(arrow::TimeUnit::NANO);
1164  default:
1165  throw std::runtime_error(
1166  "Unsupported timestamp precision for Arrow result sets: " +
1167  std::to_string(sql_type.get_precision()));
1168  }
1169  case kARRAY:
1170  switch (sql_type.get_subtype()) {
1171  case kBOOLEAN:
1172  return arrow::list(arrow::boolean());
1173  case kTINYINT:
1174  return arrow::list(arrow::int8());
1175  case kSMALLINT:
1176  return arrow::list(arrow::int16());
1177  case kINT:
1178  return arrow::list(arrow::int32());
1179  case kBIGINT:
1180  return arrow::list(arrow::int64());
1181  case kFLOAT:
1182  return arrow::list(arrow::float32());
1183  case kDOUBLE:
1184  return arrow::list(arrow::float64());
1185  case kTEXT:
1186  if (sql_type.is_dict_encoded_type()) {
1187  auto value_type = std::make_shared<arrow::StringType>();
1188  return arrow::list(arrow::dictionary(arrow::int32(), value_type, false));
1189  }
1190  default:
1191  throw std::runtime_error("Unsupported array type for Arrow result sets: " +
1192  sql_type.get_type_name());
1193  }
1194  case kINTERVAL_DAY_TIME:
1195  case kINTERVAL_YEAR_MONTH:
1196  default:
1197  throw std::runtime_error(sql_type.get_type_name() +
1198  " is not supported in Arrow result sets.");
1199  }
1200  return nullptr;
1201 }
HOST DEVICE SQLTypes get_subtype() const
Definition: sqltypes.h:392
Definition: sqltypes.h:76
HOST DEVICE int get_scale() const
Definition: sqltypes.h:396
parquet::Type::type get_physical_type(ReaderPtr &reader, const int logical_column_index)
std::string to_string(char const *&&v)
bool is_dict_encoded_type() const
Definition: sqltypes.h:653
int get_precision() const
Definition: sqltypes.h:394
Definition: sqltypes.h:79
Definition: sqltypes.h:80
std::string get_type_name() const
Definition: sqltypes.h:482
Definition: sqltypes.h:68
bool is_dict_encoded_string() const
Definition: sqltypes.h:641
Definition: sqltypes.h:72

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

SQLTypes anonymous_namespace{ArrowResultSetConverter.cpp}::get_dict_index_type ( const SQLTypeInfo ti)
inline

Definition at line 73 of file ArrowResultSetConverter.cpp.

References CHECK, SQLTypeInfo::get_size(), SQLTypeInfo::get_type(), SQLTypeInfo::is_dict_encoded_string(), kBIGINT, kINT, kSMALLINT, and kTINYINT.

Referenced by ArrowResultSetConverter::initializeColumnBuilder().

73  {
75  switch (ti.get_size()) {
76  case 1:
77  return kTINYINT;
78  case 2:
79  return kSMALLINT;
80  case 4:
81  return kINT;
82  case 8:
83  return kBIGINT;
84  default:
85  CHECK(false);
86  }
87  return ti.get_type();
88 }
HOST DEVICE int get_size() const
Definition: sqltypes.h:403
HOST DEVICE SQLTypes get_type() const
Definition: sqltypes.h:391
#define CHECK(condition)
Definition: Logger.h:291
bool is_dict_encoded_string() const
Definition: sqltypes.h:641
Definition: sqltypes.h:72

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

SQLTypes anonymous_namespace{ArrowResultSetConverter.cpp}::get_physical_type ( const SQLTypeInfo ti)
inline

Definition at line 90 of file ArrowResultSetConverter.cpp.

References CHECK, SQLTypeInfo::get_size(), SQLTypeInfo::get_type(), IS_INTEGER, kBIGINT, kINT, kSMALLINT, and kTINYINT.

90  {
91  auto logical_type = ti.get_type();
92  if (IS_INTEGER(logical_type)) {
93  switch (ti.get_size()) {
94  case 1:
95  return kTINYINT;
96  case 2:
97  return kSMALLINT;
98  case 4:
99  return kINT;
100  case 8:
101  return kBIGINT;
102  default:
103  CHECK(false);
104  }
105  }
106  return logical_type;
107 }
HOST DEVICE int get_size() const
Definition: sqltypes.h:403
HOST DEVICE SQLTypes get_type() const
Definition: sqltypes.h:391
#define IS_INTEGER(T)
Definition: sqltypes.h:304
#define CHECK(condition)
Definition: Logger.h:291
Definition: sqltypes.h:72

+ Here is the call graph for this function:

std::pair<key_t, void*> anonymous_namespace{ArrowResultSetConverter.cpp}::get_shm ( size_t  shmsz)

Definition at line 320 of file ArrowResultSetConverter.cpp.

Referenced by arrow::get_and_copy_to_shm(), and get_shm_buffer().

320  {
321  if (!shmsz) {
322  return std::make_pair(IPC_PRIVATE, nullptr);
323  }
324  // Generate a new key for a shared memory segment. Keys to shared memory segments
325  // are OS global, so we need to try a new key if we encounter a collision. It seems
326  // incremental keygen would be deterministically worst-case. If we use a hash
327  // (like djb2) + nonce, we could still get collisions if multiple clients specify
328  // the same nonce, so using rand() in lieu of a better approach
329  // TODO(ptaylor): Is this common? Are these assumptions true?
330  auto key = static_cast<key_t>(rand());
331  int shmid = -1;
332  // IPC_CREAT - indicates we want to create a new segment for this key if it doesn't
333  // exist IPC_EXCL - ensures failure if a segment already exists for this key
334  while ((shmid = shmget(key, shmsz, IPC_CREAT | IPC_EXCL | 0666)) < 0) {
335  // If shmget fails and errno is one of these four values, try a new key.
336  // TODO(ptaylor): is checking for the last three values really necessary? Checking
337  // them by default to be safe. EEXIST - a shared memory segment is already associated
338  // with this key EACCES - a shared memory segment is already associated with this key,
339  // but we don't have permission to access it EINVAL - a shared memory segment is
340  // already associated with this key, but the size is less than shmsz ENOENT -
341  // IPC_CREAT was not set in shmflg and no shared memory segment associated with key
342  // was found
343  if (!(errno & (EEXIST | EACCES | EINVAL | ENOENT))) {
344  throw std::runtime_error("failed to create a shared memory");
345  }
346  key = static_cast<key_t>(rand());
347  }
348  // get a pointer to the shared memory segment
349  auto ipc_ptr = shmat(shmid, NULL, 0);
350  if (reinterpret_cast<int64_t>(ipc_ptr) == -1) {
351  throw std::runtime_error("failed to attach a shared memory");
352  }
353 
354  return std::make_pair(key, ipc_ptr);
355 }

+ Here is the caller graph for this function:

std::pair<key_t, std::shared_ptr<arrow::Buffer> > anonymous_namespace{ArrowResultSetConverter.cpp}::get_shm_buffer ( size_t  size)

Definition at line 358 of file ArrowResultSetConverter.cpp.

References get_shm().

Referenced by ArrowResultSetConverter::getArrowResult().

358  {
359 #ifdef _MSC_VER
360  throw std::runtime_error("Arrow IPC not yet supported on Windows.");
361  return std::make_pair(0, nullptr);
362 #else
363  auto [key, ipc_ptr] = get_shm(size);
364  std::shared_ptr<arrow::Buffer> buffer(
365  new arrow::MutableBuffer(static_cast<uint8_t*>(ipc_ptr), size));
366  return std::make_pair<key_t, std::shared_ptr<arrow::Buffer>>(std::move(key),
367  std::move(buffer));
368 #endif
369 }
std::pair< key_t, void * > get_shm(size_t shmsz)

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

void anonymous_namespace{ArrowResultSetConverter.cpp}::remap_string_values ( const ArrowResultSetConverter::ColumnBuilder column_builder,
const std::vector< uint8_t > &  bitmap,
std::vector< int64_t > &  vec1d 
)

Definition at line 371 of file ArrowResultSetConverter.cpp.

References ALL_STRINGS_REMAPPED, ONLY_TRANSIENT_STRINGS_REMAPPED, ArrowResultSetConverter::ColumnBuilder::string_remap_mode, ArrowResultSetConverter::ColumnBuilder::string_remapping, and UNREACHABLE.

Referenced by appendToListColumnBuilder< arrow::StringDictionaryBuilder, int64_t >().

373  {
374  /*
375  remap negative values if ArrowStringRemapMode == ONLY_TRANSIENT_STRINGS_REMAPPED or
376  everything if ALL_STRINGS_REMAPPED
377  */
378 
379  auto all_strings_remapped_bitmap = [&column_builder, &vec1d, &bitmap]() {
380  for (size_t i = 0; i < vec1d.size(); i++) {
381  if (bitmap[i]) {
382  vec1d[i] = column_builder.string_remapping.at(vec1d[i]);
383  }
384  }
385  };
386 
387  auto all_strings_remapped = [&column_builder, &vec1d]() {
388  for (size_t i = 0; i < vec1d.size(); i++) {
389  vec1d[i] = column_builder.string_remapping.at(vec1d[i]);
390  }
391  };
392 
393  auto only_transient_strings_remapped = [&column_builder, &vec1d]() {
394  for (size_t i = 0; i < vec1d.size(); i++) {
395  if (vec1d[i] < 0) {
396  vec1d[i] = column_builder.string_remapping.at(vec1d[i]);
397  }
398  }
399  };
400 
401  auto only_transient_strings_remapped_bitmap = [&column_builder, &vec1d, &bitmap]() {
402  for (size_t i = 0; i < vec1d.size(); i++) {
403  if (bitmap[i] && vec1d[i] < 0) {
404  vec1d[i] = column_builder.string_remapping.at(vec1d[i]);
405  }
406  }
407  };
408 
409  switch (column_builder.string_remap_mode) {
411  bitmap.empty() ? all_strings_remapped() : all_strings_remapped_bitmap();
412  break;
414  bitmap.empty() ? only_transient_strings_remapped()
415  : only_transient_strings_remapped_bitmap();
416  break;
417  default:
418  UNREACHABLE();
419  }
420 }
#define UNREACHABLE()
Definition: Logger.h:338
std::unordered_map< StrId, ArrowStrId > string_remapping

+ Here is the caller graph for this function: