OmniSciDB  dfae7c3b14
anonymous_namespace{ArrowResultSetConverter.cpp} Namespace Reference

Classes

class  null_type
 
struct  null_type< TYPE, std::enable_if_t< std::is_floating_point< TYPE >::value > >
 
struct  null_type< TYPE, std::enable_if_t< std::is_integral< TYPE >::value > >
 

Typedefs

template<typename TYPE >
using null_type_t = typename null_type< TYPE >::type
 

Functions

SQLTypes get_dict_index_type (const SQLTypeInfo &ti)
 
SQLTypes get_physical_type (const SQLTypeInfo &ti)
 
template<typename TYPE , typename VALUE_ARRAY_TYPE >
void create_or_append_value (const ScalarTargetValue &val_cty, std::shared_ptr< ValueArray > &values, const size_t max_size)
 
template<typename TYPE >
void create_or_append_validity (const ScalarTargetValue &value, const SQLTypeInfo &col_type, std::shared_ptr< std::vector< bool >> &null_bitmap, const size_t max_size)
 
template<typename C_TYPE , typename ARROW_TYPE = typename CTypeTraits<C_TYPE>::ArrowType>
void convert_column (ResultSetPtr result, size_t col, std::unique_ptr< int8_t[]> &values, std::unique_ptr< uint8_t[]> &is_valid, size_t entry_count, std::shared_ptr< Array > &out)
 
std::pair< key_t, void * > get_shm (size_t shmsz)
 
std::pair< key_t, std::shared_ptr< Buffer > > get_shm_buffer (size_t size)
 
std::shared_ptr< arrow::DataType > get_arrow_type (const SQLTypeInfo &sql_type, const ExecutorDeviceType device_type)
 
template<typename BUILDER_TYPE , typename VALUE_ARRAY_TYPE >
void appendToColumnBuilder (ArrowResultSetConverter::ColumnBuilder &column_builder, const ValueArray &values, const std::shared_ptr< std::vector< bool >> &is_valid)
 
template<>
void appendToColumnBuilder< arrow::StringDictionary32Builder, int32_t > (ArrowResultSetConverter::ColumnBuilder &column_builder, const ValueArray &values, const std::shared_ptr< std::vector< bool >> &is_valid)
 

Typedef Documentation

◆ null_type_t

template<typename TYPE >
using anonymous_namespace{ArrowResultSetConverter.cpp}::null_type_t = typedef typename null_type<TYPE>::type

Definition at line 156 of file ArrowResultSetConverter.cpp.

Function Documentation

◆ appendToColumnBuilder()

template<typename BUILDER_TYPE , typename VALUE_ARRAY_TYPE >
void anonymous_namespace{ArrowResultSetConverter.cpp}::appendToColumnBuilder ( ArrowResultSetConverter::ColumnBuilder column_builder,
const ValueArray values,
const std::shared_ptr< std::vector< bool >> &  is_valid 
)

Definition at line 1031 of file ArrowResultSetConverter.cpp.

References ARROW_THROW_NOT_OK, ArrowResultSetConverter::ColumnBuilder::builder, CHECK, ArrowResultSetConverter::ColumnBuilder::field, DateConverters::get_epoch_days_from_seconds(), and kMilliSecsPerSec.

1033  {
1034  static_assert(!std::is_same<BUILDER_TYPE, arrow::StringDictionary32Builder>::value,
1035  "Dictionary encoded string builder requires function specialization.");
1036 
1037  std::vector<VALUE_ARRAY_TYPE> vals = boost::get<std::vector<VALUE_ARRAY_TYPE>>(values);
1038 
1039  if (scale_epoch_values<BUILDER_TYPE>()) {
1040  auto scale_sec_to_millisec = [](auto seconds) { return seconds * kMilliSecsPerSec; };
1041  auto scale_values = [&](auto epoch) {
1042  return std::is_same<BUILDER_TYPE, Date32Builder>::value
1044  : scale_sec_to_millisec(epoch);
1045  };
1046  std::transform(vals.begin(), vals.end(), vals.begin(), scale_values);
1047  }
1048 
1049  auto typed_builder = dynamic_cast<BUILDER_TYPE*>(column_builder.builder.get());
1050  CHECK(typed_builder);
1051  if (column_builder.field->nullable()) {
1052  CHECK(is_valid.get());
1053  ARROW_THROW_NOT_OK(typed_builder->AppendValues(vals, *is_valid));
1054  } else {
1055  ARROW_THROW_NOT_OK(typed_builder->AppendValues(vals));
1056  }
1057 }
std::unique_ptr< arrow::ArrayBuilder > builder
#define ARROW_THROW_NOT_OK(s)
Definition: ArrowUtil.h:36
std::shared_ptr< arrow::Field > field
static constexpr int64_t kMilliSecsPerSec
#define CHECK(condition)
Definition: Logger.h:197
int64_t get_epoch_days_from_seconds(const int64_t seconds)
+ Here is the call graph for this function:

◆ appendToColumnBuilder< arrow::StringDictionary32Builder, int32_t >()

template<>
void anonymous_namespace{ArrowResultSetConverter.cpp}::appendToColumnBuilder< arrow::StringDictionary32Builder, int32_t > ( ArrowResultSetConverter::ColumnBuilder column_builder,
const ValueArray values,
const std::shared_ptr< std::vector< bool >> &  is_valid 
)

Definition at line 1060 of file ArrowResultSetConverter.cpp.

References ARROW_THROW_NOT_OK, and CHECK.

1063  {
1064  auto typed_builder =
1065  dynamic_cast<arrow::StringDictionary32Builder*>(column_builder.builder.get());
1066  CHECK(typed_builder);
1067 
1068  std::vector<int32_t> vals = boost::get<std::vector<int32_t>>(values);
1069 
1070  if (column_builder.field->nullable()) {
1071  CHECK(is_valid.get());
1072  // TODO(adb): Generate this instead of the boolean bitmap
1073  std::vector<uint8_t> transformed_bitmap;
1074  transformed_bitmap.reserve(is_valid->size());
1075  std::for_each(
1076  is_valid->begin(), is_valid->end(), [&transformed_bitmap](const bool is_valid) {
1077  transformed_bitmap.push_back(is_valid ? 1 : 0);
1078  });
1079 
1080  ARROW_THROW_NOT_OK(typed_builder->AppendIndices(
1081  vals.data(), static_cast<int64_t>(vals.size()), transformed_bitmap.data()));
1082  } else {
1084  typed_builder->AppendIndices(vals.data(), static_cast<int64_t>(vals.size())));
1085  }
1086 }
std::unique_ptr< arrow::ArrayBuilder > builder
#define ARROW_THROW_NOT_OK(s)
Definition: ArrowUtil.h:36
std::shared_ptr< arrow::Field > field
#define CHECK(condition)
Definition: Logger.h:197

◆ convert_column()

template<typename C_TYPE , typename ARROW_TYPE = typename CTypeTraits<C_TYPE>::ArrowType>
void anonymous_namespace{ArrowResultSetConverter.cpp}::convert_column ( ResultSetPtr  result,
size_t  col,
std::unique_ptr< int8_t[]> &  values,
std::unique_ptr< uint8_t[]> &  is_valid,
size_t  entry_count,
std::shared_ptr< Array > &  out 
)

Definition at line 159 of file ArrowResultSetConverter.cpp.

References CHECK.

164  {
165  CHECK(sizeof(C_TYPE) == result->getColType(col).get_size());
166  CHECK(!values);
167  CHECK(!is_valid);
168 
169  const int8_t* data_ptr;
170  if (result->isZeroCopyColumnarConversionPossible(col)) {
171  data_ptr = result->getColumnarBuffer(col);
172  } else {
173  values.reset(new int8_t[entry_count * sizeof(C_TYPE)]);
174  result->copyColumnIntoBuffer(col, values.get(), entry_count * sizeof(C_TYPE));
175  data_ptr = values.get();
176  }
177 
178  int64_t null_count = 0;
179  is_valid.reset(new uint8_t[(entry_count + 7) / 8]);
180 
181  const null_type_t<C_TYPE>* vals =
182  reinterpret_cast<const null_type_t<C_TYPE>*>(data_ptr);
183  null_type_t<C_TYPE> null_val = null_type<C_TYPE>::value;
184 
185  size_t unroll_count = entry_count & 0xFFFFFFFFFFFFFFF8ULL;
186  for (size_t i = 0; i < unroll_count; i += 8) {
187  uint8_t valid_byte = 0;
188  uint8_t valid;
189  valid = vals[i + 0] != null_val;
190  valid_byte |= valid << 0;
191  null_count += !valid;
192  valid = vals[i + 1] != null_val;
193  valid_byte |= valid << 1;
194  null_count += !valid;
195  valid = vals[i + 2] != null_val;
196  valid_byte |= valid << 2;
197  null_count += !valid;
198  valid = vals[i + 3] != null_val;
199  valid_byte |= valid << 3;
200  null_count += !valid;
201  valid = vals[i + 4] != null_val;
202  valid_byte |= valid << 4;
203  null_count += !valid;
204  valid = vals[i + 5] != null_val;
205  valid_byte |= valid << 5;
206  null_count += !valid;
207  valid = vals[i + 6] != null_val;
208  valid_byte |= valid << 6;
209  null_count += !valid;
210  valid = vals[i + 7] != null_val;
211  valid_byte |= valid << 7;
212  null_count += !valid;
213  is_valid[i >> 3] = valid_byte;
214  }
215  if (unroll_count != entry_count) {
216  uint8_t valid_byte = 0;
217  for (size_t i = unroll_count; i < entry_count; ++i) {
218  bool valid = vals[i] != null_val;
219  valid_byte |= valid << (i & 7);
220  null_count += !valid;
221  }
222  is_valid[unroll_count >> 3] = valid_byte;
223  }
224 
225  if (!null_count) {
226  is_valid.reset();
227  }
228 
229  // TODO: support date/time + scaling
230  // TODO: support booleans
231  std::shared_ptr<Buffer> data(new Buffer(reinterpret_cast<const uint8_t*>(data_ptr),
232  entry_count * sizeof(C_TYPE)));
233  if (null_count) {
234  std::shared_ptr<Buffer> null_bitmap(
235  new Buffer(is_valid.get(), (entry_count + 7) / 8));
236  out.reset(new NumericArray<ARROW_TYPE>(entry_count, data, null_bitmap, null_count));
237  } else {
238  out.reset(new NumericArray<ARROW_TYPE>(entry_count, data));
239  }
240 }
#define CHECK(condition)
Definition: Logger.h:197
const int8_t const int64_t const uint64_t const int32_t const int64_t int64_t ** out

◆ create_or_append_validity()

template<typename TYPE >
void anonymous_namespace{ArrowResultSetConverter.cpp}::create_or_append_validity ( const ScalarTargetValue value,
const SQLTypeInfo col_type,
std::shared_ptr< std::vector< bool >> &  null_bitmap,
const size_t  max_size 
)

Definition at line 109 of file ArrowResultSetConverter.cpp.

References CHECK, SQLTypeInfo::get_notnull(), inline_fp_null_val(), inline_int_null_val(), SQLTypeInfo::is_boolean(), SQLTypeInfo::is_dict_encoded_string(), SQLTypeInfo::is_fp(), SQLTypeInfo::is_integer(), SQLTypeInfo::is_time(), and UNREACHABLE.

112  {
113  if (col_type.get_notnull()) {
114  CHECK(!null_bitmap);
115  return;
116  }
117  auto pvalue = boost::get<TYPE>(&value);
118  CHECK(pvalue);
119  bool is_valid = false;
120  if (col_type.is_boolean()) {
121  is_valid = inline_int_null_val(col_type) != static_cast<int8_t>(*pvalue);
122  } else if (col_type.is_dict_encoded_string()) {
123  is_valid = inline_int_null_val(col_type) != static_cast<int32_t>(*pvalue);
124  } else if (col_type.is_integer() || col_type.is_time()) {
125  is_valid = inline_int_null_val(col_type) != static_cast<int64_t>(*pvalue);
126  } else if (col_type.is_fp()) {
127  is_valid = inline_fp_null_val(col_type) != static_cast<double>(*pvalue);
128  } else {
129  UNREACHABLE();
130  }
131 
132  if (!null_bitmap) {
133  null_bitmap = std::make_shared<std::vector<bool>>();
134  null_bitmap->reserve(max_size);
135  }
136  CHECK(null_bitmap);
137  null_bitmap->push_back(is_valid);
138 }
bool is_time() const
Definition: sqltypes.h:423
bool is_boolean() const
Definition: sqltypes.h:424
bool is_integer() const
Definition: sqltypes.h:419
#define UNREACHABLE()
Definition: Logger.h:241
double inline_fp_null_val(const SQL_TYPE_INFO &ti)
HOST DEVICE bool get_notnull() const
Definition: sqltypes.h:266
bool is_dict_encoded_string() const
Definition: sqltypes.h:444
#define CHECK(condition)
Definition: Logger.h:197
int64_t inline_int_null_val(const SQL_TYPE_INFO &ti)
bool is_fp() const
Definition: sqltypes.h:421
+ Here is the call graph for this function:

◆ create_or_append_value()

template<typename TYPE , typename VALUE_ARRAY_TYPE >
void anonymous_namespace{ArrowResultSetConverter.cpp}::create_or_append_value ( const ScalarTargetValue val_cty,
std::shared_ptr< ValueArray > &  values,
const size_t  max_size 
)

Definition at line 92 of file ArrowResultSetConverter.cpp.

References CHECK.

94  {
95  auto pval_cty = boost::get<VALUE_ARRAY_TYPE>(&val_cty);
96  CHECK(pval_cty);
97  auto val_ty = static_cast<TYPE>(*pval_cty);
98  if (!values) {
99  values = std::make_shared<ValueArray>(std::vector<TYPE>());
100  boost::get<std::vector<TYPE>>(*values).reserve(max_size);
101  }
102  CHECK(values);
103  auto values_ty = boost::get<std::vector<TYPE>>(values.get());
104  CHECK(values_ty);
105  values_ty->push_back(val_ty);
106 }
#define CHECK(condition)
Definition: Logger.h:197

◆ get_arrow_type()

std::shared_ptr<arrow::DataType> anonymous_namespace{ArrowResultSetConverter.cpp}::get_arrow_type ( const SQLTypeInfo sql_type,
const ExecutorDeviceType  device_type 
)

Definition at line 879 of file ArrowResultSetConverter.cpp.

References get_physical_type(), SQLTypeInfo::get_precision(), SQLTypeInfo::get_scale(), SQLTypeInfo::get_type_name(), GPU, SQLTypeInfo::is_dict_encoded_string(), kARRAY, kBIGINT, kBOOLEAN, kCHAR, kDATE, kDECIMAL, kDOUBLE, kFLOAT, kINT, kINTERVAL_DAY_TIME, kINTERVAL_YEAR_MONTH, kNUMERIC, kSMALLINT, kTEXT, kTIME, kTIMESTAMP, kTINYINT, kVARCHAR, and to_string().

Referenced by ArrowResultSetConverter::makeField().

880  {
881  switch (get_physical_type(sql_type)) {
882  case kBOOLEAN:
883  return arrow::boolean();
884  case kTINYINT:
885  return arrow::int8();
886  case kSMALLINT:
887  return arrow::int16();
888  case kINT:
889  return arrow::int32();
890  case kBIGINT:
891  return arrow::int64();
892  case kFLOAT:
893  return arrow::float32();
894  case kDOUBLE:
895  return arrow::float64();
896  case kCHAR:
897  case kVARCHAR:
898  case kTEXT:
899  if (sql_type.is_dict_encoded_string()) {
900  auto value_type = std::make_shared<StringType>();
901  return dictionary(int32(), value_type, false);
902  }
903  return utf8();
904  case kDECIMAL:
905  case kNUMERIC:
906  return decimal(sql_type.get_precision(), sql_type.get_scale());
907  case kTIME:
908  return time32(TimeUnit::SECOND);
909  case kDATE:
910  // TODO(wamsi) : Remove date64() once date32() support is added in cuDF. date32()
911  // Currently support for date32() is missing in cuDF.Hence, if client requests for
912  // date on GPU, return date64() for the time being, till support is added.
913  return device_type == ExecutorDeviceType::GPU ? date64() : date32();
914  case kTIMESTAMP:
915  switch (sql_type.get_precision()) {
916  case 0:
917  return timestamp(TimeUnit::SECOND);
918  case 3:
919  return timestamp(TimeUnit::MILLI);
920  case 6:
921  return timestamp(TimeUnit::MICRO);
922  case 9:
923  return timestamp(TimeUnit::NANO);
924  default:
925  throw std::runtime_error(
926  "Unsupported timestamp precision for Arrow result sets: " +
927  std::to_string(sql_type.get_precision()));
928  }
929  case kARRAY:
930  case kINTERVAL_DAY_TIME:
932  default:
933  throw std::runtime_error(sql_type.get_type_name() +
934  " is not supported in Arrow result sets.");
935  }
936  return nullptr;
937 }
int get_precision() const
Definition: sqltypes.h:262
Definition: sqltypes.h:51
HOST DEVICE int get_scale() const
Definition: sqltypes.h:264
std::string to_string(char const *&&v)
bool is_dict_encoded_string() const
Definition: sqltypes.h:444
Definition: sqltypes.h:54
Definition: sqltypes.h:55
std::string get_type_name() const
Definition: sqltypes.h:362
Definition: sqltypes.h:43
Definition: sqltypes.h:47
+ Here is the call graph for this function:
+ Here is the caller graph for this function:

◆ get_dict_index_type()

SQLTypes anonymous_namespace{ArrowResultSetConverter.cpp}::get_dict_index_type ( const SQLTypeInfo ti)
inline

Definition at line 55 of file ArrowResultSetConverter.cpp.

References CHECK, SQLTypeInfo::get_size(), SQLTypeInfo::get_type(), SQLTypeInfo::is_dict_encoded_string(), kBIGINT, kINT, kSMALLINT, and kTINYINT.

Referenced by ArrowResultSetConverter::initializeColumnBuilder().

55  {
57  switch (ti.get_size()) {
58  case 1:
59  return kTINYINT;
60  case 2:
61  return kSMALLINT;
62  case 4:
63  return kINT;
64  case 8:
65  return kBIGINT;
66  default:
67  CHECK(false);
68  }
69  return ti.get_type();
70 }
HOST DEVICE int get_size() const
Definition: sqltypes.h:269
bool is_dict_encoded_string() const
Definition: sqltypes.h:444
#define CHECK(condition)
Definition: Logger.h:197
HOST DEVICE SQLTypes get_type() const
Definition: sqltypes.h:259
Definition: sqltypes.h:47
+ Here is the call graph for this function:
+ Here is the caller graph for this function:

◆ get_physical_type()

SQLTypes anonymous_namespace{ArrowResultSetConverter.cpp}::get_physical_type ( const SQLTypeInfo ti)
inline

Definition at line 72 of file ArrowResultSetConverter.cpp.

References CHECK, SQLTypeInfo::get_size(), SQLTypeInfo::get_type(), IS_INTEGER, kBIGINT, kINT, kSMALLINT, and kTINYINT.

Referenced by get_arrow_type(), and ArrowResultSetConverter::initializeColumnBuilder().

72  {
73  auto logical_type = ti.get_type();
74  if (IS_INTEGER(logical_type)) {
75  switch (ti.get_size()) {
76  case 1:
77  return kTINYINT;
78  case 2:
79  return kSMALLINT;
80  case 4:
81  return kINT;
82  case 8:
83  return kBIGINT;
84  default:
85  CHECK(false);
86  }
87  }
88  return logical_type;
89 }
HOST DEVICE int get_size() const
Definition: sqltypes.h:269
#define IS_INTEGER(T)
Definition: sqltypes.h:168
#define CHECK(condition)
Definition: Logger.h:197
HOST DEVICE SQLTypes get_type() const
Definition: sqltypes.h:259
Definition: sqltypes.h:47
+ Here is the call graph for this function:
+ Here is the caller graph for this function:

◆ get_shm()

std::pair<key_t, void*> anonymous_namespace{ArrowResultSetConverter.cpp}::get_shm ( size_t  shmsz)

Definition at line 243 of file ArrowResultSetConverter.cpp.

Referenced by arrow::get_and_copy_to_shm(), and get_shm_buffer().

243  {
244  if (!shmsz) {
245  return std::make_pair(IPC_PRIVATE, nullptr);
246  }
247  // Generate a new key for a shared memory segment. Keys to shared memory segments
248  // are OS global, so we need to try a new key if we encounter a collision. It seems
249  // incremental keygen would be deterministically worst-case. If we use a hash
250  // (like djb2) + nonce, we could still get collisions if multiple clients specify
251  // the same nonce, so using rand() in lieu of a better approach
252  // TODO(ptaylor): Is this common? Are these assumptions true?
253  auto key = static_cast<key_t>(rand());
254  int shmid = -1;
255  // IPC_CREAT - indicates we want to create a new segment for this key if it doesn't
256  // exist IPC_EXCL - ensures failure if a segment already exists for this key
257  while ((shmid = shmget(key, shmsz, IPC_CREAT | IPC_EXCL | 0666)) < 0) {
258  // If shmget fails and errno is one of these four values, try a new key.
259  // TODO(ptaylor): is checking for the last three values really necessary? Checking
260  // them by default to be safe. EEXIST - a shared memory segment is already associated
261  // with this key EACCES - a shared memory segment is already associated with this key,
262  // but we don't have permission to access it EINVAL - a shared memory segment is
263  // already associated with this key, but the size is less than shmsz ENOENT -
264  // IPC_CREAT was not set in shmflg and no shared memory segment associated with key
265  // was found
266  if (!(errno & (EEXIST | EACCES | EINVAL | ENOENT))) {
267  throw std::runtime_error("failed to create a shared memory");
268  }
269  key = static_cast<key_t>(rand());
270  }
271  // get a pointer to the shared memory segment
272  auto ipc_ptr = shmat(shmid, NULL, 0);
273  if (reinterpret_cast<int64_t>(ipc_ptr) == -1) {
274  throw std::runtime_error("failed to attach a shared memory");
275  }
276 
277  return std::make_pair(key, ipc_ptr);
278 }
+ Here is the caller graph for this function:

◆ get_shm_buffer()

std::pair<key_t, std::shared_ptr<Buffer> > anonymous_namespace{ArrowResultSetConverter.cpp}::get_shm_buffer ( size_t  size)

Definition at line 281 of file ArrowResultSetConverter.cpp.

References get_shm().

Referenced by ArrowResultSetConverter::getArrowResult().

281  {
282 #ifdef _MSC_VER
283  throw std::runtime_error("Arrow IPC not yet supported on Windows.");
284  return std::make_pair(0, nullptr);
285 #else
286  auto [key, ipc_ptr] = get_shm(size);
287  std::shared_ptr<Buffer> buffer(new MutableBuffer(static_cast<uint8_t*>(ipc_ptr), size));
288  return std::make_pair<key_t, std::shared_ptr<Buffer>>(std::move(key),
289  std::move(buffer));
290 #endif
291 }
std::pair< key_t, void * > get_shm(size_t shmsz)
+ Here is the call graph for this function:
+ Here is the caller graph for this function: