OmniSciDB  a667adc9c8
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
anonymous_namespace{ArrowResultSetConverter.cpp} Namespace Reference

Classes

class  ResultSetBuffer
 
class  null_type
 
struct  null_type< TYPE, std::enable_if_t< std::is_integral< TYPE >::value > >
 
struct  null_type< TYPE, std::enable_if_t< std::is_floating_point< TYPE >::value > >
 

Typedefs

template<typename TYPE >
using null_type_t = typename null_type< TYPE >::type
 

Functions

SQLTypes get_dict_index_type (const SQLTypeInfo &ti)
 
SQLTypes get_physical_type (const SQLTypeInfo &ti)
 
template<typename TYPE , typename VALUE_ARRAY_TYPE >
void create_or_append_value (const ScalarTargetValue &val_cty, std::shared_ptr< ValueArray > &values, const size_t max_size)
 
template<typename TYPE >
void create_or_append_validity (const ScalarTargetValue &value, const SQLTypeInfo &col_type, std::shared_ptr< std::vector< bool >> &null_bitmap, const size_t max_size)
 
template<typename C_TYPE , typename ARROW_TYPE = typename CTypeTraits<C_TYPE>::ArrowType>
void convert_column (ResultSetPtr result, size_t col, size_t entry_count, std::shared_ptr< Array > &out)
 
std::pair< key_t, void * > get_shm (size_t shmsz)
 
std::pair< key_t,
std::shared_ptr< Buffer > > 
get_shm_buffer (size_t size)
 
std::shared_ptr< arrow::DataType > get_arrow_type (const SQLTypeInfo &sql_type, const ExecutorDeviceType device_type)
 
template<typename BUILDER_TYPE , typename VALUE_ARRAY_TYPE >
void appendToColumnBuilder (ArrowResultSetConverter::ColumnBuilder &column_builder, const ValueArray &values, const std::shared_ptr< std::vector< bool >> &is_valid)
 
template<>
void appendToColumnBuilder< arrow::StringDictionary32Builder, int32_t > (ArrowResultSetConverter::ColumnBuilder &column_builder, const ValueArray &values, const std::shared_ptr< std::vector< bool >> &is_valid)
 

Typedef Documentation

template<typename TYPE >
using anonymous_namespace{ArrowResultSetConverter.cpp}::null_type_t = typedef typename null_type<TYPE>::type

Definition at line 176 of file ArrowResultSetConverter.cpp.

Function Documentation

template<typename BUILDER_TYPE , typename VALUE_ARRAY_TYPE >
void anonymous_namespace{ArrowResultSetConverter.cpp}::appendToColumnBuilder ( ArrowResultSetConverter::ColumnBuilder column_builder,
const ValueArray values,
const std::shared_ptr< std::vector< bool >> &  is_valid 
)

Definition at line 1057 of file ArrowResultSetConverter.cpp.

References ARROW_THROW_NOT_OK, ArrowResultSetConverter::ColumnBuilder::builder, CHECK, ArrowResultSetConverter::ColumnBuilder::field, DateConverters::get_epoch_days_from_seconds(), and kMilliSecsPerSec.

1059  {
1060  static_assert(!std::is_same<BUILDER_TYPE, arrow::StringDictionary32Builder>::value,
1061  "Dictionary encoded string builder requires function specialization.");
1062 
1063  std::vector<VALUE_ARRAY_TYPE> vals = boost::get<std::vector<VALUE_ARRAY_TYPE>>(values);
1064 
1065  if (scale_epoch_values<BUILDER_TYPE>()) {
1066  auto scale_sec_to_millisec = [](auto seconds) { return seconds * kMilliSecsPerSec; };
1067  auto scale_values = [&](auto epoch) {
1068  return std::is_same<BUILDER_TYPE, Date32Builder>::value
1070  : scale_sec_to_millisec(epoch);
1071  };
1072  std::transform(vals.begin(), vals.end(), vals.begin(), scale_values);
1073  }
1074 
1075  auto typed_builder = dynamic_cast<BUILDER_TYPE*>(column_builder.builder.get());
1076  CHECK(typed_builder);
1077  if (column_builder.field->nullable()) {
1078  CHECK(is_valid.get());
1079  ARROW_THROW_NOT_OK(typed_builder->AppendValues(vals, *is_valid));
1080  } else {
1081  ARROW_THROW_NOT_OK(typed_builder->AppendValues(vals));
1082  }
1083 }
std::unique_ptr< arrow::ArrayBuilder > builder
#define ARROW_THROW_NOT_OK(s)
Definition: ArrowUtil.h:36
std::shared_ptr< arrow::Field > field
static constexpr int64_t kMilliSecsPerSec
#define CHECK(condition)
Definition: Logger.h:197
int64_t get_epoch_days_from_seconds(const int64_t seconds)

+ Here is the call graph for this function:

template<>
void anonymous_namespace{ArrowResultSetConverter.cpp}::appendToColumnBuilder< arrow::StringDictionary32Builder, int32_t > ( ArrowResultSetConverter::ColumnBuilder column_builder,
const ValueArray values,
const std::shared_ptr< std::vector< bool >> &  is_valid 
)

Definition at line 1086 of file ArrowResultSetConverter.cpp.

References ARROW_THROW_NOT_OK, and CHECK.

1089  {
1090  auto typed_builder =
1091  dynamic_cast<arrow::StringDictionary32Builder*>(column_builder.builder.get());
1092  CHECK(typed_builder);
1093 
1094  std::vector<int32_t> vals = boost::get<std::vector<int32_t>>(values);
1095 
1096  if (column_builder.field->nullable()) {
1097  CHECK(is_valid.get());
1098  // TODO(adb): Generate this instead of the boolean bitmap
1099  std::vector<uint8_t> transformed_bitmap;
1100  transformed_bitmap.reserve(is_valid->size());
1101  std::for_each(
1102  is_valid->begin(), is_valid->end(), [&transformed_bitmap](const bool is_valid) {
1103  transformed_bitmap.push_back(is_valid ? 1 : 0);
1104  });
1105 
1106  ARROW_THROW_NOT_OK(typed_builder->AppendIndices(
1107  vals.data(), static_cast<int64_t>(vals.size()), transformed_bitmap.data()));
1108  } else {
1110  typed_builder->AppendIndices(vals.data(), static_cast<int64_t>(vals.size())));
1111  }
1112 }
std::unique_ptr< arrow::ArrayBuilder > builder
#define ARROW_THROW_NOT_OK(s)
Definition: ArrowUtil.h:36
std::shared_ptr< arrow::Field > field
#define CHECK(condition)
Definition: Logger.h:197
template<typename C_TYPE , typename ARROW_TYPE = typename CTypeTraits<C_TYPE>::ArrowType>
void anonymous_namespace{ArrowResultSetConverter.cpp}::convert_column ( ResultSetPtr  result,
size_t  col,
size_t  entry_count,
std::shared_ptr< Array > &  out 
)

Definition at line 179 of file ArrowResultSetConverter.cpp.

References CHECK, i, and run_benchmark_import::res.

182  {
183  CHECK(sizeof(C_TYPE) == result->getColType(col).get_size());
184 
185  std::shared_ptr<arrow::Buffer> values;
186  std::shared_ptr<arrow::Buffer> is_valid;
187  const int64_t buf_size = entry_count * sizeof(C_TYPE);
188  if (result->isZeroCopyColumnarConversionPossible(col)) {
189  values.reset(new ResultSetBuffer(
190  reinterpret_cast<const uint8_t*>(result->getColumnarBuffer(col)),
191  buf_size,
192  result));
193  } else {
194  auto res = arrow::AllocateBuffer(buf_size);
195  CHECK(res.ok());
196  values = std::move(res).ValueOrDie();
197  result->copyColumnIntoBuffer(
198  col, reinterpret_cast<int8_t*>(values->mutable_data()), buf_size);
199  }
200 
201  int64_t null_count = 0;
202  auto res = arrow::AllocateBuffer((entry_count + 7) / 8);
203  CHECK(res.ok());
204  is_valid = std::move(res).ValueOrDie();
205 
206  auto is_valid_data = is_valid->mutable_data();
207 
208  const null_type_t<C_TYPE>* vals =
209  reinterpret_cast<const null_type_t<C_TYPE>*>(values->data());
210  null_type_t<C_TYPE> null_val = null_type<C_TYPE>::value;
211 
212  size_t unroll_count = entry_count & 0xFFFFFFFFFFFFFFF8ULL;
213  for (size_t i = 0; i < unroll_count; i += 8) {
214  uint8_t valid_byte = 0;
215  uint8_t valid;
216  valid = vals[i + 0] != null_val;
217  valid_byte |= valid << 0;
218  null_count += !valid;
219  valid = vals[i + 1] != null_val;
220  valid_byte |= valid << 1;
221  null_count += !valid;
222  valid = vals[i + 2] != null_val;
223  valid_byte |= valid << 2;
224  null_count += !valid;
225  valid = vals[i + 3] != null_val;
226  valid_byte |= valid << 3;
227  null_count += !valid;
228  valid = vals[i + 4] != null_val;
229  valid_byte |= valid << 4;
230  null_count += !valid;
231  valid = vals[i + 5] != null_val;
232  valid_byte |= valid << 5;
233  null_count += !valid;
234  valid = vals[i + 6] != null_val;
235  valid_byte |= valid << 6;
236  null_count += !valid;
237  valid = vals[i + 7] != null_val;
238  valid_byte |= valid << 7;
239  null_count += !valid;
240  is_valid_data[i >> 3] = valid_byte;
241  }
242  if (unroll_count != entry_count) {
243  uint8_t valid_byte = 0;
244  for (size_t i = unroll_count; i < entry_count; ++i) {
245  bool valid = vals[i] != null_val;
246  valid_byte |= valid << (i & 7);
247  null_count += !valid;
248  }
249  is_valid_data[unroll_count >> 3] = valid_byte;
250  }
251 
252  if (!null_count) {
253  is_valid.reset();
254  }
255 
256  // TODO: support date/time + scaling
257  // TODO: support booleans
258  if (null_count) {
259  out.reset(new NumericArray<ARROW_TYPE>(entry_count, values, is_valid, null_count));
260  } else {
261  out.reset(new NumericArray<ARROW_TYPE>(entry_count, values));
262  }
263 }
#define CHECK(condition)
Definition: Logger.h:197
template<typename TYPE >
void anonymous_namespace{ArrowResultSetConverter.cpp}::create_or_append_validity ( const ScalarTargetValue value,
const SQLTypeInfo col_type,
std::shared_ptr< std::vector< bool >> &  null_bitmap,
const size_t  max_size 
)

Definition at line 129 of file ArrowResultSetConverter.cpp.

References CHECK, SQLTypeInfo::get_notnull(), inline_fp_null_val(), inline_int_null_val(), SQLTypeInfo::is_boolean(), SQLTypeInfo::is_dict_encoded_string(), SQLTypeInfo::is_fp(), SQLTypeInfo::is_integer(), SQLTypeInfo::is_time(), and UNREACHABLE.

132  {
133  if (col_type.get_notnull()) {
134  CHECK(!null_bitmap);
135  return;
136  }
137  auto pvalue = boost::get<TYPE>(&value);
138  CHECK(pvalue);
139  bool is_valid = false;
140  if (col_type.is_boolean()) {
141  is_valid = inline_int_null_val(col_type) != static_cast<int8_t>(*pvalue);
142  } else if (col_type.is_dict_encoded_string()) {
143  is_valid = inline_int_null_val(col_type) != static_cast<int32_t>(*pvalue);
144  } else if (col_type.is_integer() || col_type.is_time()) {
145  is_valid = inline_int_null_val(col_type) != static_cast<int64_t>(*pvalue);
146  } else if (col_type.is_fp()) {
147  is_valid = inline_fp_null_val(col_type) != static_cast<double>(*pvalue);
148  } else {
149  UNREACHABLE();
150  }
151 
152  if (!null_bitmap) {
153  null_bitmap = std::make_shared<std::vector<bool>>();
154  null_bitmap->reserve(max_size);
155  }
156  CHECK(null_bitmap);
157  null_bitmap->push_back(is_valid);
158 }
bool is_fp() const
Definition: sqltypes.h:492
#define UNREACHABLE()
Definition: Logger.h:241
double inline_fp_null_val(const SQL_TYPE_INFO &ti)
bool is_time() const
Definition: sqltypes.h:494
bool is_integer() const
Definition: sqltypes.h:490
bool is_boolean() const
Definition: sqltypes.h:495
#define CHECK(condition)
Definition: Logger.h:197
int64_t inline_int_null_val(const SQL_TYPE_INFO &ti)
bool is_dict_encoded_string() const
Definition: sqltypes.h:525
HOST DEVICE bool get_notnull() const
Definition: sqltypes.h:321

+ Here is the call graph for this function:

template<typename TYPE , typename VALUE_ARRAY_TYPE >
void anonymous_namespace{ArrowResultSetConverter.cpp}::create_or_append_value ( const ScalarTargetValue val_cty,
std::shared_ptr< ValueArray > &  values,
const size_t  max_size 
)

Definition at line 112 of file ArrowResultSetConverter.cpp.

References CHECK.

114  {
115  auto pval_cty = boost::get<VALUE_ARRAY_TYPE>(&val_cty);
116  CHECK(pval_cty);
117  auto val_ty = static_cast<TYPE>(*pval_cty);
118  if (!values) {
119  values = std::make_shared<ValueArray>(std::vector<TYPE>());
120  boost::get<std::vector<TYPE>>(*values).reserve(max_size);
121  }
122  CHECK(values);
123  auto values_ty = boost::get<std::vector<TYPE>>(values.get());
124  CHECK(values_ty);
125  values_ty->push_back(val_ty);
126 }
#define CHECK(condition)
Definition: Logger.h:197
std::shared_ptr<arrow::DataType> anonymous_namespace{ArrowResultSetConverter.cpp}::get_arrow_type ( const SQLTypeInfo sql_type,
const ExecutorDeviceType  device_type 
)

Definition at line 900 of file ArrowResultSetConverter.cpp.

References foreign_storage::get_physical_type(), SQLTypeInfo::get_precision(), SQLTypeInfo::get_scale(), SQLTypeInfo::get_type_name(), GPU, SQLTypeInfo::is_dict_encoded_string(), kARRAY, kBIGINT, kBOOLEAN, kCHAR, kDATE, kDECIMAL, kDOUBLE, kFLOAT, kINT, kINTERVAL_DAY_TIME, kINTERVAL_YEAR_MONTH, kNUMERIC, kSMALLINT, kTEXT, kTIME, kTIMESTAMP, kTINYINT, kVARCHAR, and to_string().

Referenced by ArrowResultSetConverter::makeField().

901  {
902  switch (get_physical_type(sql_type)) {
903  case kBOOLEAN:
904  return arrow::boolean();
905  case kTINYINT:
906  return arrow::int8();
907  case kSMALLINT:
908  return arrow::int16();
909  case kINT:
910  return arrow::int32();
911  case kBIGINT:
912  return arrow::int64();
913  case kFLOAT:
914  return arrow::float32();
915  case kDOUBLE:
916  return arrow::float64();
917  case kCHAR:
918  case kVARCHAR:
919  case kTEXT:
920  if (sql_type.is_dict_encoded_string()) {
921  auto value_type = std::make_shared<StringType>();
922  return dictionary(int32(), value_type, false);
923  }
924  return utf8();
925  case kDECIMAL:
926  case kNUMERIC:
927  return decimal(sql_type.get_precision(), sql_type.get_scale());
928  case kTIME:
929  return time32(TimeUnit::SECOND);
930  case kDATE: {
931  // TODO(wamsi) : Remove date64() once date32() support is added in cuDF. date32()
932  // Currently support for date32() is missing in cuDF.Hence, if client requests for
933  // date on GPU, return date64() for the time being, till support is added.
934  if (device_type == ExecutorDeviceType::GPU) {
935  return date64();
936  } else {
937  return date32();
938  }
939  }
940  case kTIMESTAMP:
941  switch (sql_type.get_precision()) {
942  case 0:
943  return timestamp(TimeUnit::SECOND);
944  case 3:
945  return timestamp(TimeUnit::MILLI);
946  case 6:
947  return timestamp(TimeUnit::MICRO);
948  case 9:
949  return timestamp(TimeUnit::NANO);
950  default:
951  throw std::runtime_error(
952  "Unsupported timestamp precision for Arrow result sets: " +
953  std::to_string(sql_type.get_precision()));
954  }
955  case kARRAY:
956  case kINTERVAL_DAY_TIME:
958  default:
959  throw std::runtime_error(sql_type.get_type_name() +
960  " is not supported in Arrow result sets.");
961  }
962  return nullptr;
963 }
Definition: sqltypes.h:48
HOST DEVICE int get_scale() const
Definition: sqltypes.h:319
parquet::Type::type get_physical_type(ReaderPtr &reader, const int logical_column_index)
std::string to_string(char const *&&v)
int get_precision() const
Definition: sqltypes.h:317
Definition: sqltypes.h:51
Definition: sqltypes.h:52
std::string get_type_name() const
Definition: sqltypes.h:417
Definition: sqltypes.h:40
bool is_dict_encoded_string() const
Definition: sqltypes.h:525
Definition: sqltypes.h:44

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

SQLTypes anonymous_namespace{ArrowResultSetConverter.cpp}::get_dict_index_type ( const SQLTypeInfo ti)
inline

Definition at line 75 of file ArrowResultSetConverter.cpp.

References CHECK, SQLTypeInfo::get_size(), SQLTypeInfo::get_type(), SQLTypeInfo::is_dict_encoded_string(), kBIGINT, kINT, kSMALLINT, and kTINYINT.

Referenced by ArrowResultSetConverter::initializeColumnBuilder().

75  {
77  switch (ti.get_size()) {
78  case 1:
79  return kTINYINT;
80  case 2:
81  return kSMALLINT;
82  case 4:
83  return kINT;
84  case 8:
85  return kBIGINT;
86  default:
87  CHECK(false);
88  }
89  return ti.get_type();
90 }
HOST DEVICE int get_size() const
Definition: sqltypes.h:324
HOST DEVICE SQLTypes get_type() const
Definition: sqltypes.h:314
#define CHECK(condition)
Definition: Logger.h:197
bool is_dict_encoded_string() const
Definition: sqltypes.h:525
Definition: sqltypes.h:44

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

SQLTypes anonymous_namespace{ArrowResultSetConverter.cpp}::get_physical_type ( const SQLTypeInfo ti)
inline

Definition at line 92 of file ArrowResultSetConverter.cpp.

References CHECK, SQLTypeInfo::get_size(), SQLTypeInfo::get_type(), IS_INTEGER, kBIGINT, kINT, kSMALLINT, and kTINYINT.

92  {
93  auto logical_type = ti.get_type();
94  if (IS_INTEGER(logical_type)) {
95  switch (ti.get_size()) {
96  case 1:
97  return kTINYINT;
98  case 2:
99  return kSMALLINT;
100  case 4:
101  return kINT;
102  case 8:
103  return kBIGINT;
104  default:
105  CHECK(false);
106  }
107  }
108  return logical_type;
109 }
HOST DEVICE int get_size() const
Definition: sqltypes.h:324
HOST DEVICE SQLTypes get_type() const
Definition: sqltypes.h:314
#define IS_INTEGER(T)
Definition: sqltypes.h:239
#define CHECK(condition)
Definition: Logger.h:197
Definition: sqltypes.h:44

+ Here is the call graph for this function:

std::pair<key_t, void*> anonymous_namespace{ArrowResultSetConverter.cpp}::get_shm ( size_t  shmsz)

Definition at line 266 of file ArrowResultSetConverter.cpp.

Referenced by arrow::get_and_copy_to_shm(), and get_shm_buffer().

266  {
267  if (!shmsz) {
268  return std::make_pair(IPC_PRIVATE, nullptr);
269  }
270  // Generate a new key for a shared memory segment. Keys to shared memory segments
271  // are OS global, so we need to try a new key if we encounter a collision. It seems
272  // incremental keygen would be deterministically worst-case. If we use a hash
273  // (like djb2) + nonce, we could still get collisions if multiple clients specify
274  // the same nonce, so using rand() in lieu of a better approach
275  // TODO(ptaylor): Is this common? Are these assumptions true?
276  auto key = static_cast<key_t>(rand());
277  int shmid = -1;
278  // IPC_CREAT - indicates we want to create a new segment for this key if it doesn't
279  // exist IPC_EXCL - ensures failure if a segment already exists for this key
280  while ((shmid = shmget(key, shmsz, IPC_CREAT | IPC_EXCL | 0666)) < 0) {
281  // If shmget fails and errno is one of these four values, try a new key.
282  // TODO(ptaylor): is checking for the last three values really necessary? Checking
283  // them by default to be safe. EEXIST - a shared memory segment is already associated
284  // with this key EACCES - a shared memory segment is already associated with this key,
285  // but we don't have permission to access it EINVAL - a shared memory segment is
286  // already associated with this key, but the size is less than shmsz ENOENT -
287  // IPC_CREAT was not set in shmflg and no shared memory segment associated with key
288  // was found
289  if (!(errno & (EEXIST | EACCES | EINVAL | ENOENT))) {
290  throw std::runtime_error("failed to create a shared memory");
291  }
292  key = static_cast<key_t>(rand());
293  }
294  // get a pointer to the shared memory segment
295  auto ipc_ptr = shmat(shmid, NULL, 0);
296  if (reinterpret_cast<int64_t>(ipc_ptr) == -1) {
297  throw std::runtime_error("failed to attach a shared memory");
298  }
299 
300  return std::make_pair(key, ipc_ptr);
301 }

+ Here is the caller graph for this function:

std::pair<key_t, std::shared_ptr<Buffer> > anonymous_namespace{ArrowResultSetConverter.cpp}::get_shm_buffer ( size_t  size)

Definition at line 304 of file ArrowResultSetConverter.cpp.

References get_shm().

Referenced by ArrowResultSetConverter::getArrowResult().

304  {
305 #ifdef _MSC_VER
306  throw std::runtime_error("Arrow IPC not yet supported on Windows.");
307  return std::make_pair(0, nullptr);
308 #else
309  auto [key, ipc_ptr] = get_shm(size);
310  std::shared_ptr<Buffer> buffer(new MutableBuffer(static_cast<uint8_t*>(ipc_ptr), size));
311  return std::make_pair<key_t, std::shared_ptr<Buffer>>(std::move(key),
312  std::move(buffer));
313 #endif
314 }
std::pair< key_t, void * > get_shm(size_t shmsz)

+ Here is the call graph for this function:

+ Here is the caller graph for this function: