OmniSciDB  c1a53651b2
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
anonymous_namespace{StringDictionary.cpp} Namespace Reference

Classes

struct  ThreadInfo
 
class  MapMaker
 

Functions

int checked_open (const char *path, const bool recover)
 
const uint64_t round_up_p2 (const uint64_t num)
 
string_dict_hash_t hash_string (const std::string_view &str)
 
template<class T >
void throw_encoding_error (std::string_view str, const shared::StringDictKey &dict_key)
 
void throw_string_too_long_error (std::string_view str, const shared::StringDictKey &dict_key)
 
bool is_like (const std::string &str, const std::string &pattern, const bool icase, const bool is_simple, const char escape)
 
bool is_regexp_like (const std::string &str, const std::string &pattern, const char escape)
 

Variables

const int SYSTEM_PAGE_SIZE = heavyai::get_page_size()
 

Function Documentation

int anonymous_namespace{StringDictionary.cpp}::checked_open ( const char *  path,
const bool  recover 
)

Definition at line 58 of file StringDictionary.cpp.

References logger::ERROR, LOG, and heavyai::open().

Referenced by StringDictionary::StringDictionary().

58  {
59  auto fd = heavyai::open(path, O_RDWR | O_CREAT | (recover ? O_APPEND : O_TRUNC), 0644);
60  if (fd > 0) {
61  return fd;
62  }
63  auto err = std::string("Dictionary path ") + std::string(path) +
64  std::string(" does not exist.");
65  LOG(ERROR) << err;
66  throw DictPayloadUnavailable(err);
67 }
#define LOG(tag)
Definition: Logger.h:285
int open(const char *path, int flags, int mode)
Definition: heavyai_fs.cpp:66

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

string_dict_hash_t anonymous_namespace{StringDictionary.cpp}::hash_string ( const std::string_view &  str)

Definition at line 87 of file StringDictionary.cpp.

Referenced by StringDictionary::buildDictionaryTranslationMap(), StringDictionary::getBulk(), StringDictionary::getOrAddBulk(), StringDictionary::getOrAddImpl(), StringDictionary::getUnlocked(), StringDictionary::hashStrings(), StringDictionary::increaseHashTableCapacity(), StringDictionary::increaseHashTableCapacityFromStorageAndMemory(), and StringDictionary::StringDictionary().

87  {
88  string_dict_hash_t str_hash = 1;
89  // rely on fact that unsigned overflow is defined and wraps
90  for (size_t i = 0; i < str.size(); ++i) {
91  str_hash = str_hash * 997 + str[i];
92  }
93  return str_hash;
94 }
uint32_t string_dict_hash_t

+ Here is the caller graph for this function:

bool anonymous_namespace{StringDictionary.cpp}::is_like ( const std::string &  str,
const std::string &  pattern,
const bool  icase,
const bool  is_simple,
const char  escape 
)

Definition at line 800 of file StringDictionary.cpp.

References string_ilike(), string_ilike_simple(), string_like(), and string_like_simple().

Referenced by StringDictionary::getLike(), and StringDictionaryProxy::getLike().

804  {
805  return icase
806  ? (is_simple ? string_ilike_simple(
807  str.c_str(), str.size(), pattern.c_str(), pattern.size())
808  : string_ilike(str.c_str(),
809  str.size(),
810  pattern.c_str(),
811  pattern.size(),
812  escape))
813  : (is_simple ? string_like_simple(
814  str.c_str(), str.size(), pattern.c_str(), pattern.size())
815  : string_like(str.c_str(),
816  str.size(),
817  pattern.c_str(),
818  pattern.size(),
819  escape));
820 }
RUNTIME_EXPORT DEVICE bool string_like(const char *str, const int32_t str_len, const char *pattern, const int32_t pat_len, const char escape_char)
Definition: StringLike.cpp:244
RUNTIME_EXPORT DEVICE bool string_like_simple(const char *str, const int32_t str_len, const char *pattern, const int32_t pat_len)
Definition: StringLike.cpp:41
RUNTIME_EXPORT DEVICE bool string_ilike_simple(const char *str, const int32_t str_len, const char *pattern, const int32_t pat_len)
Definition: StringLike.cpp:57
RUNTIME_EXPORT DEVICE bool string_ilike(const char *str, const int32_t str_len, const char *pattern, const int32_t pat_len, const char escape_char)
Definition: StringLike.cpp:255

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

bool anonymous_namespace{StringDictionary.cpp}::is_regexp_like ( const std::string &  str,
const std::string &  pattern,
const char  escape 
)

Definition at line 1090 of file StringDictionary.cpp.

References regexp_like().

Referenced by StringDictionary::getRegexpLike(), and StringDictionaryProxy::getRegexpLike().

1092  {
1093  return regexp_like(str.c_str(), str.size(), pattern.c_str(), pattern.size(), escape);
1094 }
RUNTIME_EXPORT DEVICE bool regexp_like(const char *str, const int32_t str_len, const char *pattern, const int32_t pat_len, const char escape_char)
Definition: Regexp.cpp:39

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

const uint64_t anonymous_namespace{StringDictionary.cpp}::round_up_p2 ( const uint64_t  num)

Definition at line 69 of file StringDictionary.cpp.

Referenced by StringDictionary::StringDictionary().

69  {
70  uint64_t in = num;
71  in--;
72  in |= in >> 1;
73  in |= in >> 2;
74  in |= in >> 4;
75  in |= in >> 8;
76  in |= in >> 16;
77  in++;
78  // TODO MAT deal with case where filesize has been increased but reality is
79  // we are constrained to 2^31.
80  // In that situation this calculation will wrap to zero
81  if (in == 0 || (in > (UINT32_MAX))) {
82  in = UINT32_MAX;
83  }
84  return in;
85 }

+ Here is the caller graph for this function:

template<class T >
void anonymous_namespace{StringDictionary.cpp}::throw_encoding_error ( std::string_view  str,
const shared::StringDictKey dict_key 
)

Definition at line 393 of file StringDictionary.cpp.

References logger::ERROR, LOG, StringDictionary::MAX_STRCOUNT, and heavydb.dtypes::T.

393  {
394  std::ostringstream oss;
395  oss << "The text encoded column using dictionary " << dict_key
396  << " has exceeded it's limit of " << sizeof(T) * 8 << " bits ("
397  << static_cast<size_t>(max_valid_int_value<T>() + 1) << " unique values) "
398  << "while attempting to add the new string '" << str << "'. ";
399 
400  if (sizeof(T) < 4) {
401  // Todo: Implement automatic type widening for dictionary-encoded text
402  // columns/all fixed length columm types (at least if not defined
403  // with fixed encoding size), or short of that, ALTER TABLE
404  // COLUMN TYPE to at least allow the user to do this manually
405  // without re-creating the table
406 
407  oss << "To load more data, please re-create the table with "
408  << "this column as type TEXT ENCODING DICT(" << sizeof(T) * 2 * 8 << ") ";
409  if (sizeof(T) == 1) {
410  oss << "or TEXT ENCODING DICT(32) ";
411  }
412  oss << "and reload your data.";
413  } else {
414  // Todo: Implement TEXT ENCODING DICT(64) type which should essentially
415  // preclude overflows.
416  oss << "Currently dictionary-encoded text columns support a maximum of "
418  << " strings. Consider recreating the table with "
419  << "this column as type TEXT ENCODING NONE and reloading your data.";
420  }
421  LOG(ERROR) << oss.str();
422  throw std::runtime_error(oss.str());
423 }
#define LOG(tag)
Definition: Logger.h:285
static constexpr size_t MAX_STRCOUNT
void anonymous_namespace{StringDictionary.cpp}::throw_string_too_long_error ( std::string_view  str,
const shared::StringDictKey dict_key 
)

Definition at line 425 of file StringDictionary.cpp.

References logger::ERROR, LOG, and StringDictionary::MAX_STRLEN.

Referenced by StringDictionary::getBulk().

426  {
427  std::ostringstream oss;
428  oss << "The string '" << str << " could not be inserted into the dictionary "
429  << dict_key << " because it exceeded the maximum allowable "
430  << "length of " << StringDictionary::MAX_STRLEN << " characters (string was "
431  << str.size() << " characters).";
432  LOG(ERROR) << oss.str();
433  throw std::runtime_error(oss.str());
434 }
#define LOG(tag)
Definition: Logger.h:285
static constexpr size_t MAX_STRLEN

+ Here is the caller graph for this function:

Variable Documentation

const int anonymous_namespace{StringDictionary.cpp}::SYSTEM_PAGE_SIZE = heavyai::get_page_size()