OmniSciDB  c1a53651b2
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
ExtensionFunctionsText.hpp File Reference
#include <cstring>
#include "Shared/toString.h"
#include "heavydbTypes.h"
+ Include dependency graph for ExtensionFunctionsText.hpp:
+ This graph shows which files directly or indirectly include this file:

Go to the source code of this file.

Functions

std::vector< std::string > __strtok_to_array (const std::string &text, const std::string &delimiters)
 
EXTENSION_NOINLINE Array
< TextEncodingDict
strtok_to_array (RowFunctionManager &mgr, TextEncodingNone &text, TextEncodingNone &delimiters)
 
EXTENSION_NOINLINE Array
< TextEncodingDict
strtok_to_array__1 (RowFunctionManager &mgr, TextEncodingDict text, TextEncodingNone &delimiters)
 

Function Documentation

std::vector<std::string> __strtok_to_array ( const std::string &  text,
const std::string &  delimiters 
)

Definition at line 24 of file ExtensionFunctionsText.hpp.

Referenced by strtok_to_array(), and strtok_to_array__1().

25  {
26  std::vector<std::string> vec;
27 
28  char* str = const_cast<char*>(text.c_str());
29  const char* del = delimiters.c_str();
30 
31  char* substr = strtok(str, del);
32  while (substr != NULL) {
33  std::string s(substr);
34  vec.emplace_back(s);
35  substr = strtok(NULL, del);
36  }
37 
38  return vec;
39 }

+ Here is the caller graph for this function:

EXTENSION_NOINLINE Array<TextEncodingDict> strtok_to_array ( RowFunctionManager mgr,
TextEncodingNone text,
TextEncodingNone delimiters 
)

Definition at line 42 of file ExtensionFunctionsText.hpp.

References __strtok_to_array(), RowFunctionManager::getOrAddTransient(), TextEncodingNone::getString(), TextEncodingNone::isNull(), TRANSIENT_DICT_DB_ID, and TRANSIENT_DICT_ID.

44  {
45  /*
46  Rules
47  -----
48  * If either parameters is NULL => a NULL is returned
49  * An empty array is returned if tokenization produces no tokens
50 
51  Note
52  ----
53  <delimiters> argument is optional on snowflake but HeavyDB dont' support
54  default values on UDFs at the moment. See:
55  https://github.com/heavyai/heavydb-internal/pull/6651
56 
57  Examples
58  --------
59  > select strtok_to_array('a.b.c', '.');
60  {a, b, c}
61 
62  > select strtok_to_array('user@gmail.com', '.@')
63  {user, gmail, com}
64 
65  > select strtok_to_array('', '.')
66  NULL
67 
68  > select strtok_to_array('a.b.c', '')
69  NULL
70  */
71 
72  if (text.isNull() || delimiters.isNull()) {
73  return Array<TextEncodingDict>(0, true);
74  }
75 
76  const auto& vec = __strtok_to_array(text.getString(), delimiters.getString());
77  Array<TextEncodingDict> out_arr(vec.size());
78  for (size_t i = 0; i < vec.size(); ++i) {
79  out_arr[i] = mgr.getOrAddTransient(TRANSIENT_DICT_DB_ID, TRANSIENT_DICT_ID, vec[i]);
80  }
81  return out_arr;
82 }
std::string getString() const
Definition: heavydbTypes.h:311
#define TRANSIENT_DICT_DB_ID
Definition: DbObjectKeys.h:25
#define TRANSIENT_DICT_ID
Definition: DbObjectKeys.h:24
int32_t getOrAddTransient(int32_t db_id, int32_t dict_id, std::string str)
DEVICE ALWAYS_INLINE bool isNull() const
Definition: heavydbTypes.h:335
std::vector< std::string > __strtok_to_array(const std::string &text, const std::string &delimiters)

+ Here is the call graph for this function:

EXTENSION_NOINLINE Array<TextEncodingDict> strtok_to_array__1 ( RowFunctionManager mgr,
TextEncodingDict  text,
TextEncodingNone delimiters 
)

Definition at line 85 of file ExtensionFunctionsText.hpp.

References __strtok_to_array(), GET_DICT_DB_ID, GET_DICT_ID, RowFunctionManager::getOrAddTransient(), TextEncodingNone::getString(), RowFunctionManager::getString(), TextEncodingDict::isNull(), TextEncodingNone::isNull(), TRANSIENT_DICT_DB_ID, and TRANSIENT_DICT_ID.

87  {
88  if (text.isNull() || delimiters.isNull()) {
89  return Array<TextEncodingDict>(0, true);
90  }
91 
92  std::string str = mgr.getString(GET_DICT_DB_ID(mgr, 0), GET_DICT_ID(mgr, 0), text);
93  const auto& vec = __strtok_to_array(str, delimiters.getString());
94  Array<TextEncodingDict> out_arr(vec.size());
95  for (size_t i = 0; i < vec.size(); ++i) {
96  out_arr[i] = mgr.getOrAddTransient(TRANSIENT_DICT_DB_ID, TRANSIENT_DICT_ID, vec[i]);
97  }
98  return out_arr;
99 }
std::string getString() const
Definition: heavydbTypes.h:311
#define TRANSIENT_DICT_DB_ID
Definition: DbObjectKeys.h:25
#define TRANSIENT_DICT_ID
Definition: DbObjectKeys.h:24
#define GET_DICT_ID(mgr, arg_idx)
Definition: heavydbTypes.h:128
DEVICE ALWAYS_INLINE bool isNull() const
Definition: heavydbTypes.h:165
#define GET_DICT_DB_ID(mgr, arg_idx)
Definition: heavydbTypes.h:126
int32_t getOrAddTransient(int32_t db_id, int32_t dict_id, std::string str)
DEVICE ALWAYS_INLINE bool isNull() const
Definition: heavydbTypes.h:335
std::string getString(int32_t db_id, int32_t dict_id, int32_t string_id)
std::vector< std::string > __strtok_to_array(const std::string &text, const std::string &delimiters)

+ Here is the call graph for this function: