OmniSciDB  06b3bd477c
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
anonymous_namespace{ArrowResultSetConverter.cpp} Namespace Reference

Functions

SQLTypes get_dict_index_type (const SQLTypeInfo &ti)
 
SQLTypes get_physical_type (const SQLTypeInfo &ti)
 
template<typename TYPE , typename VALUE_ARRAY_TYPE >
void create_or_append_value (const ScalarTargetValue &val_cty, std::shared_ptr< ValueArray > &values, const size_t max_size)
 
template<typename TYPE >
void create_or_append_validity (const ScalarTargetValue &value, const SQLTypeInfo &col_type, std::shared_ptr< std::vector< bool >> &null_bitmap, const size_t max_size)
 
std::pair< key_t, void * > get_shm (size_t shmsz)
 
std::pair< key_t,
std::shared_ptr< Buffer > > 
get_shm_buffer (size_t size)
 
std::shared_ptr< arrow::DataType > get_arrow_type (const SQLTypeInfo &sql_type, const ExecutorDeviceType device_type)
 
template<typename BUILDER_TYPE , typename VALUE_ARRAY_TYPE >
void appendToColumnBuilder (ArrowResultSetConverter::ColumnBuilder &column_builder, const ValueArray &values, const std::shared_ptr< std::vector< bool >> &is_valid)
 
template<>
void appendToColumnBuilder< arrow::StringDictionary32Builder, int32_t > (ArrowResultSetConverter::ColumnBuilder &column_builder, const ValueArray &values, const std::shared_ptr< std::vector< bool >> &is_valid)
 

Function Documentation

template<typename BUILDER_TYPE , typename VALUE_ARRAY_TYPE >
void anonymous_namespace{ArrowResultSetConverter.cpp}::appendToColumnBuilder ( ArrowResultSetConverter::ColumnBuilder column_builder,
const ValueArray values,
const std::shared_ptr< std::vector< bool >> &  is_valid 
)

Definition at line 700 of file ArrowResultSetConverter.cpp.

References ARROW_THROW_NOT_OK, ArrowResultSetConverter::ColumnBuilder::builder, CHECK(), ArrowResultSetConverter::ColumnBuilder::field, DateConverters::get_epoch_days_from_seconds(), and kMilliSecsPerSec.

702  {
703  static_assert(!std::is_same<BUILDER_TYPE, arrow::StringDictionary32Builder>::value,
704  "Dictionary encoded string builder requires function specialization.");
705 
706  std::vector<VALUE_ARRAY_TYPE> vals = boost::get<std::vector<VALUE_ARRAY_TYPE>>(values);
707 
708  if (scale_epoch_values<BUILDER_TYPE>()) {
709  auto scale_sec_to_millisec = [](auto seconds) { return seconds * kMilliSecsPerSec; };
710  auto scale_values = [&](auto epoch) {
711  return std::is_same<BUILDER_TYPE, Date32Builder>::value
713  : scale_sec_to_millisec(epoch);
714  };
715  std::transform(vals.begin(), vals.end(), vals.begin(), scale_values);
716  }
717 
718  auto typed_builder = dynamic_cast<BUILDER_TYPE*>(column_builder.builder.get());
719  CHECK(typed_builder);
720  if (column_builder.field->nullable()) {
721  CHECK(is_valid.get());
722  ARROW_THROW_NOT_OK(typed_builder->AppendValues(vals, *is_valid));
723  } else {
724  ARROW_THROW_NOT_OK(typed_builder->AppendValues(vals));
725  }
726 }
std::unique_ptr< arrow::ArrayBuilder > builder
#define ARROW_THROW_NOT_OK(s)
Definition: ArrowUtil.h:37
std::shared_ptr< arrow::Field > field
static constexpr int64_t kMilliSecsPerSec
CHECK(cgen_state)
int64_t get_epoch_days_from_seconds(const int64_t seconds)

+ Here is the call graph for this function:

template<>
void anonymous_namespace{ArrowResultSetConverter.cpp}::appendToColumnBuilder< arrow::StringDictionary32Builder, int32_t > ( ArrowResultSetConverter::ColumnBuilder column_builder,
const ValueArray values,
const std::shared_ptr< std::vector< bool >> &  is_valid 
)

Definition at line 729 of file ArrowResultSetConverter.cpp.

References ARROW_THROW_NOT_OK, and CHECK().

732  {
733  auto typed_builder =
734  dynamic_cast<arrow::StringDictionary32Builder*>(column_builder.builder.get());
735  CHECK(typed_builder);
736 
737  std::vector<int32_t> vals = boost::get<std::vector<int32_t>>(values);
738 
739  if (column_builder.field->nullable()) {
740  CHECK(is_valid.get());
741  // TODO(adb): Generate this instead of the boolean bitmap
742  std::vector<uint8_t> transformed_bitmap;
743  transformed_bitmap.reserve(is_valid->size());
744  std::for_each(
745  is_valid->begin(), is_valid->end(), [&transformed_bitmap](const bool is_valid) {
746  transformed_bitmap.push_back(is_valid ? 1 : 0);
747  });
748 
749  ARROW_THROW_NOT_OK(typed_builder->AppendIndices(
750  vals.data(), static_cast<int64_t>(vals.size()), transformed_bitmap.data()));
751  } else {
753  typed_builder->AppendIndices(vals.data(), static_cast<int64_t>(vals.size())));
754  }
755 }
std::unique_ptr< arrow::ArrayBuilder > builder
#define ARROW_THROW_NOT_OK(s)
Definition: ArrowUtil.h:37
std::shared_ptr< arrow::Field > field
CHECK(cgen_state)

+ Here is the call graph for this function:

template<typename TYPE >
void anonymous_namespace{ArrowResultSetConverter.cpp}::create_or_append_validity ( const ScalarTargetValue value,
const SQLTypeInfo col_type,
std::shared_ptr< std::vector< bool >> &  null_bitmap,
const size_t  max_size 
)

Definition at line 102 of file ArrowResultSetConverter.cpp.

References CHECK(), SQLTypeInfo::get_notnull(), inline_fp_null_val(), inline_int_null_val(), SQLTypeInfo::is_boolean(), SQLTypeInfo::is_dict_encoded_string(), SQLTypeInfo::is_fp(), SQLTypeInfo::is_integer(), SQLTypeInfo::is_time(), and UNREACHABLE.

105  {
106  if (col_type.get_notnull()) {
107  CHECK(!null_bitmap);
108  return;
109  }
110  auto pvalue = boost::get<TYPE>(&value);
111  CHECK(pvalue);
112  bool is_valid = false;
113  if (col_type.is_boolean()) {
114  is_valid = inline_int_null_val(col_type) != static_cast<int8_t>(*pvalue);
115  } else if (col_type.is_dict_encoded_string()) {
116  is_valid = inline_int_null_val(col_type) != static_cast<int32_t>(*pvalue);
117  } else if (col_type.is_integer() || col_type.is_time()) {
118  is_valid = inline_int_null_val(col_type) != static_cast<int64_t>(*pvalue);
119  } else if (col_type.is_fp()) {
120  is_valid = inline_fp_null_val(col_type) != static_cast<double>(*pvalue);
121  } else {
122  UNREACHABLE();
123  }
124 
125  if (!null_bitmap) {
126  null_bitmap = std::make_shared<std::vector<bool>>();
127  null_bitmap->reserve(max_size);
128  }
129  CHECK(null_bitmap);
130  null_bitmap->push_back(is_valid);
131 }
bool is_fp() const
Definition: sqltypes.h:419
#define UNREACHABLE()
Definition: Logger.h:241
double inline_fp_null_val(const SQL_TYPE_INFO &ti)
bool is_time() const
Definition: sqltypes.h:421
CHECK(cgen_state)
bool is_integer() const
Definition: sqltypes.h:417
bool is_boolean() const
Definition: sqltypes.h:422
int64_t inline_int_null_val(const SQL_TYPE_INFO &ti)
bool is_dict_encoded_string() const
Definition: sqltypes.h:442
HOST DEVICE bool get_notnull() const
Definition: sqltypes.h:265

+ Here is the call graph for this function:

template<typename TYPE , typename VALUE_ARRAY_TYPE >
void anonymous_namespace{ArrowResultSetConverter.cpp}::create_or_append_value ( const ScalarTargetValue val_cty,
std::shared_ptr< ValueArray > &  values,
const size_t  max_size 
)

Definition at line 85 of file ArrowResultSetConverter.cpp.

References CHECK().

87  {
88  auto pval_cty = boost::get<VALUE_ARRAY_TYPE>(&val_cty);
89  CHECK(pval_cty);
90  auto val_ty = static_cast<TYPE>(*pval_cty);
91  if (!values) {
92  values = std::make_shared<ValueArray>(std::vector<TYPE>());
93  boost::get<std::vector<TYPE>>(*values).reserve(max_size);
94  }
95  CHECK(values);
96  auto values_ty = boost::get<std::vector<TYPE>>(values.get());
97  CHECK(values_ty);
98  values_ty->push_back(val_ty);
99 }
CHECK(cgen_state)

+ Here is the call graph for this function:

std::shared_ptr<arrow::DataType> anonymous_namespace{ArrowResultSetConverter.cpp}::get_arrow_type ( const SQLTypeInfo sql_type,
const ExecutorDeviceType  device_type 
)

Definition at line 550 of file ArrowResultSetConverter.cpp.

References get_physical_type(), SQLTypeInfo::get_precision(), SQLTypeInfo::get_scale(), SQLTypeInfo::get_type_name(), GPU, SQLTypeInfo::is_dict_encoded_string(), kARRAY, kBIGINT, kBOOLEAN, kCHAR, kDATE, kDECIMAL, kDOUBLE, kFLOAT, kINT, kINTERVAL_DAY_TIME, kINTERVAL_YEAR_MONTH, kNUMERIC, kSMALLINT, kTEXT, kTIME, kTIMESTAMP, kTINYINT, kVARCHAR, and to_string().

Referenced by ArrowResultSetConverter::makeField().

551  {
552  switch (get_physical_type(sql_type)) {
553  case kBOOLEAN:
554  return boolean();
555  case kTINYINT:
556  return int8();
557  case kSMALLINT:
558  return int16();
559  case kINT:
560  return int32();
561  case kBIGINT:
562  return int64();
563  case kFLOAT:
564  return float32();
565  case kDOUBLE:
566  return float64();
567  case kCHAR:
568  case kVARCHAR:
569  case kTEXT:
570  if (sql_type.is_dict_encoded_string()) {
571  auto value_type = std::make_shared<StringType>();
572  return dictionary(int32(), value_type, false);
573  }
574  return utf8();
575  case kDECIMAL:
576  case kNUMERIC:
577  return decimal(sql_type.get_precision(), sql_type.get_scale());
578  case kTIME:
579  return time32(TimeUnit::SECOND);
580  case kDATE:
581  // TODO(wamsi) : Remove date64() once date32() support is added in cuDF. date32()
582  // Currently support for date32() is missing in cuDF.Hence, if client requests for
583  // date on GPU, return date64() for the time being, till support is added.
584  return device_type == ExecutorDeviceType::GPU ? date64() : date32();
585  case kTIMESTAMP:
586  switch (sql_type.get_precision()) {
587  case 0:
588  return timestamp(TimeUnit::SECOND);
589  case 3:
590  return timestamp(TimeUnit::MILLI);
591  case 6:
592  return timestamp(TimeUnit::MICRO);
593  case 9:
594  return timestamp(TimeUnit::NANO);
595  default:
596  throw std::runtime_error(
597  "Unsupported timestamp precision for Arrow result sets: " +
598  std::to_string(sql_type.get_precision()));
599  }
600  case kARRAY:
601  case kINTERVAL_DAY_TIME:
603  default:
604  throw std::runtime_error(sql_type.get_type_name() +
605  " is not supported in Arrow result sets.");
606  }
607  return nullptr;
608 }
Definition: sqltypes.h:50
HOST DEVICE int get_scale() const
Definition: sqltypes.h:263
std::string to_string(char const *&&v)
int get_precision() const
Definition: sqltypes.h:261
Definition: sqltypes.h:53
Definition: sqltypes.h:54
std::string get_type_name() const
Definition: sqltypes.h:361
Definition: sqltypes.h:42
bool is_dict_encoded_string() const
Definition: sqltypes.h:442
Definition: sqltypes.h:46

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

SQLTypes anonymous_namespace{ArrowResultSetConverter.cpp}::get_dict_index_type ( const SQLTypeInfo ti)
inline

Definition at line 48 of file ArrowResultSetConverter.cpp.

References CHECK(), SQLTypeInfo::get_size(), SQLTypeInfo::get_type(), SQLTypeInfo::is_dict_encoded_string(), kBIGINT, kINT, kSMALLINT, and kTINYINT.

Referenced by ArrowResultSetConverter::initializeColumnBuilder().

48  {
50  switch (ti.get_size()) {
51  case 1:
52  return kTINYINT;
53  case 2:
54  return kSMALLINT;
55  case 4:
56  return kINT;
57  case 8:
58  return kBIGINT;
59  default:
60  CHECK(false);
61  }
62  return ti.get_type();
63 }
HOST DEVICE int get_size() const
Definition: sqltypes.h:268
HOST DEVICE SQLTypes get_type() const
Definition: sqltypes.h:258
CHECK(cgen_state)
bool is_dict_encoded_string() const
Definition: sqltypes.h:442
Definition: sqltypes.h:46

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

SQLTypes anonymous_namespace{ArrowResultSetConverter.cpp}::get_physical_type ( const SQLTypeInfo ti)
inline

Definition at line 65 of file ArrowResultSetConverter.cpp.

References CHECK(), SQLTypeInfo::get_size(), SQLTypeInfo::get_type(), IS_INTEGER, kBIGINT, kINT, kSMALLINT, and kTINYINT.

Referenced by get_arrow_type(), and ArrowResultSetConverter::initializeColumnBuilder().

65  {
66  auto logical_type = ti.get_type();
67  if (IS_INTEGER(logical_type)) {
68  switch (ti.get_size()) {
69  case 1:
70  return kTINYINT;
71  case 2:
72  return kSMALLINT;
73  case 4:
74  return kINT;
75  case 8:
76  return kBIGINT;
77  default:
78  CHECK(false);
79  }
80  }
81  return logical_type;
82 }
HOST DEVICE int get_size() const
Definition: sqltypes.h:268
HOST DEVICE SQLTypes get_type() const
Definition: sqltypes.h:258
CHECK(cgen_state)
#define IS_INTEGER(T)
Definition: sqltypes.h:167
Definition: sqltypes.h:46

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

std::pair<key_t, void*> anonymous_namespace{ArrowResultSetConverter.cpp}::get_shm ( size_t  shmsz)

Definition at line 133 of file ArrowResultSetConverter.cpp.

Referenced by arrow::get_and_copy_to_shm(), and get_shm_buffer().

133  {
134  if (!shmsz) {
135  return std::make_pair(IPC_PRIVATE, nullptr);
136  }
137  // Generate a new key for a shared memory segment. Keys to shared memory segments
138  // are OS global, so we need to try a new key if we encounter a collision. It seems
139  // incremental keygen would be deterministically worst-case. If we use a hash
140  // (like djb2) + nonce, we could still get collisions if multiple clients specify
141  // the same nonce, so using rand() in lieu of a better approach
142  // TODO(ptaylor): Is this common? Are these assumptions true?
143  auto key = static_cast<key_t>(rand());
144  int shmid = -1;
145  // IPC_CREAT - indicates we want to create a new segment for this key if it doesn't
146  // exist IPC_EXCL - ensures failure if a segment already exists for this key
147  while ((shmid = shmget(key, shmsz, IPC_CREAT | IPC_EXCL | 0666)) < 0) {
148  // If shmget fails and errno is one of these four values, try a new key.
149  // TODO(ptaylor): is checking for the last three values really necessary? Checking
150  // them by default to be safe. EEXIST - a shared memory segment is already associated
151  // with this key EACCES - a shared memory segment is already associated with this key,
152  // but we don't have permission to access it EINVAL - a shared memory segment is
153  // already associated with this key, but the size is less than shmsz ENOENT -
154  // IPC_CREAT was not set in shmflg and no shared memory segment associated with key
155  // was found
156  if (!(errno & (EEXIST | EACCES | EINVAL | ENOENT))) {
157  throw std::runtime_error("failed to create a shared memory");
158  }
159  key = static_cast<key_t>(rand());
160  }
161  // get a pointer to the shared memory segment
162  auto ipc_ptr = shmat(shmid, NULL, 0);
163  if (reinterpret_cast<int64_t>(ipc_ptr) == -1) {
164  throw std::runtime_error("failed to attach a shared memory");
165  }
166 
167  return std::make_pair(key, ipc_ptr);
168 }

+ Here is the caller graph for this function:

std::pair<key_t, std::shared_ptr<Buffer> > anonymous_namespace{ArrowResultSetConverter.cpp}::get_shm_buffer ( size_t  size)

Definition at line 170 of file ArrowResultSetConverter.cpp.

References get_shm().

Referenced by ArrowResultSetConverter::getArrowResult().

170  {
171  auto [key, ipc_ptr] = get_shm(size);
172  std::shared_ptr<Buffer> buffer(new MutableBuffer(static_cast<uint8_t*>(ipc_ptr), size));
173  return std::make_pair<key_t, std::shared_ptr<Buffer>>(std::move(key),
174  std::move(buffer));
175 }
std::pair< key_t, void * > get_shm(size_t shmsz)

+ Here is the call graph for this function:

+ Here is the caller graph for this function: