OmniSciDB  72c90bc290
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
anonymous_namespace{StringDictionary.cpp} Namespace Reference

Classes

struct  ThreadInfo
 
class  MapMaker
 

Functions

int checked_open (const char *path, const bool recover)
 
const uint64_t round_up_p2 (const uint64_t num)
 
string_dict_hash_t hash_string (const std::string_view &str)
 
template<class T >
void throw_encoding_error (std::string_view str, const shared::StringDictKey &dict_key)
 
void throw_string_too_long_error (std::string_view str, const shared::StringDictKey &dict_key)
 
bool is_like (const std::string &str, const std::string &pattern, const bool icase, const bool is_simple, const char escape)
 
bool is_regexp_like (const std::string &str, const std::string &pattern, const char escape)
 

Variables

const int SYSTEM_PAGE_SIZE = heavyai::get_page_size()
 

Function Documentation

int anonymous_namespace{StringDictionary.cpp}::checked_open ( const char *  path,
const bool  recover 
)

Definition at line 59 of file StringDictionary.cpp.

References logger::ERROR, LOG, and heavyai::open().

Referenced by StringDictionary::StringDictionary().

59  {
60  auto fd = heavyai::open(path, O_RDWR | O_CREAT | (recover ? O_APPEND : O_TRUNC), 0644);
61  if (fd > 0) {
62  return fd;
63  }
64  auto err = std::string("Dictionary path ") + std::string(path) +
65  std::string(" does not exist.");
66  LOG(ERROR) << err;
67  throw DictPayloadUnavailable(err);
68 }
#define LOG(tag)
Definition: Logger.h:285
int open(const char *path, int flags, int mode)
Definition: heavyai_fs.cpp:66

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

string_dict_hash_t anonymous_namespace{StringDictionary.cpp}::hash_string ( const std::string_view &  str)

Definition at line 88 of file StringDictionary.cpp.

Referenced by StringDictionary::buildDictionaryTranslationMap(), StringDictionary::getBulk(), StringDictionary::getOrAddBulk(), StringDictionary::getOrAddImpl(), StringDictionary::getUnlocked(), StringDictionary::hashStrings(), StringDictionary::increaseHashTableCapacity(), StringDictionary::increaseHashTableCapacityFromStorageAndMemory(), and StringDictionary::StringDictionary().

88  {
89  string_dict_hash_t str_hash = 1;
90  // rely on fact that unsigned overflow is defined and wraps
91  for (size_t i = 0; i < str.size(); ++i) {
92  str_hash = str_hash * 997 + str[i];
93  }
94  return str_hash;
95 }
uint32_t string_dict_hash_t

+ Here is the caller graph for this function:

bool anonymous_namespace{StringDictionary.cpp}::is_like ( const std::string &  str,
const std::string &  pattern,
const bool  icase,
const bool  is_simple,
const char  escape 
)

Definition at line 817 of file StringDictionary.cpp.

References string_ilike(), string_ilike_simple(), string_like(), and string_like_simple().

Referenced by StringDictionary::getLike(), and StringDictionaryProxy::getLike().

821  {
822  return icase
823  ? (is_simple ? string_ilike_simple(
824  str.c_str(), str.size(), pattern.c_str(), pattern.size())
825  : string_ilike(str.c_str(),
826  str.size(),
827  pattern.c_str(),
828  pattern.size(),
829  escape))
830  : (is_simple ? string_like_simple(
831  str.c_str(), str.size(), pattern.c_str(), pattern.size())
832  : string_like(str.c_str(),
833  str.size(),
834  pattern.c_str(),
835  pattern.size(),
836  escape));
837 }
RUNTIME_EXPORT DEVICE bool string_like(const char *str, const int32_t str_len, const char *pattern, const int32_t pat_len, const char escape_char)
Definition: StringLike.cpp:244
RUNTIME_EXPORT DEVICE bool string_like_simple(const char *str, const int32_t str_len, const char *pattern, const int32_t pat_len)
Definition: StringLike.cpp:41
RUNTIME_EXPORT DEVICE bool string_ilike_simple(const char *str, const int32_t str_len, const char *pattern, const int32_t pat_len)
Definition: StringLike.cpp:57
RUNTIME_EXPORT DEVICE bool string_ilike(const char *str, const int32_t str_len, const char *pattern, const int32_t pat_len, const char escape_char)
Definition: StringLike.cpp:255

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

bool anonymous_namespace{StringDictionary.cpp}::is_regexp_like ( const std::string &  str,
const std::string &  pattern,
const char  escape 
)

Definition at line 1110 of file StringDictionary.cpp.

References regexp_like().

Referenced by StringDictionary::getRegexpLike(), and StringDictionaryProxy::getRegexpLike().

1112  {
1113  return regexp_like(str.c_str(), str.size(), pattern.c_str(), pattern.size(), escape);
1114 }
RUNTIME_EXPORT DEVICE bool regexp_like(const char *str, const int32_t str_len, const char *pattern, const int32_t pat_len, const char escape_char)
Definition: Regexp.cpp:39

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

const uint64_t anonymous_namespace{StringDictionary.cpp}::round_up_p2 ( const uint64_t  num)

Definition at line 70 of file StringDictionary.cpp.

Referenced by StringDictionary::StringDictionary().

70  {
71  uint64_t in = num;
72  in--;
73  in |= in >> 1;
74  in |= in >> 2;
75  in |= in >> 4;
76  in |= in >> 8;
77  in |= in >> 16;
78  in++;
79  // TODO MAT deal with case where filesize has been increased but reality is
80  // we are constrained to 2^31.
81  // In that situation this calculation will wrap to zero
82  if (in == 0 || (in > (UINT32_MAX))) {
83  in = UINT32_MAX;
84  }
85  return in;
86 }

+ Here is the caller graph for this function:

template<class T >
void anonymous_namespace{StringDictionary.cpp}::throw_encoding_error ( std::string_view  str,
const shared::StringDictKey dict_key 
)

Definition at line 399 of file StringDictionary.cpp.

References logger::ERROR, LOG, StringDictionary::MAX_STRCOUNT, and heavydb.dtypes::T.

399  {
400  std::ostringstream oss;
401  oss << "The text encoded column using dictionary " << dict_key
402  << " has exceeded it's limit of " << sizeof(T) * 8 << " bits ("
403  << static_cast<size_t>(max_valid_int_value<T>() + 1) << " unique values) "
404  << "while attempting to add the new string '" << str << "'. ";
405 
406  if (sizeof(T) < 4) {
407  // Todo: Implement automatic type widening for dictionary-encoded text
408  // columns/all fixed length columm types (at least if not defined
409  // with fixed encoding size), or short of that, ALTER TABLE
410  // COLUMN TYPE to at least allow the user to do this manually
411  // without re-creating the table
412 
413  oss << "To load more data, please re-create the table with "
414  << "this column as type TEXT ENCODING DICT(" << sizeof(T) * 2 * 8 << ") ";
415  if (sizeof(T) == 1) {
416  oss << "or TEXT ENCODING DICT(32) ";
417  }
418  oss << "and reload your data.";
419  } else {
420  // Todo: Implement TEXT ENCODING DICT(64) type which should essentially
421  // preclude overflows.
422  oss << "Currently dictionary-encoded text columns support a maximum of "
424  << " strings. Consider recreating the table with "
425  << "this column as type TEXT ENCODING NONE and reloading your data.";
426  }
427  LOG(ERROR) << oss.str();
428  throw std::runtime_error(oss.str());
429 }
#define LOG(tag)
Definition: Logger.h:285
static constexpr size_t MAX_STRCOUNT
void anonymous_namespace{StringDictionary.cpp}::throw_string_too_long_error ( std::string_view  str,
const shared::StringDictKey dict_key 
)

Definition at line 431 of file StringDictionary.cpp.

References logger::ERROR, LOG, and StringDictionary::MAX_STRLEN.

Referenced by StringDictionary::getBulk().

432  {
433  std::ostringstream oss;
434  oss << "The string '" << str << " could not be inserted into the dictionary "
435  << dict_key << " because it exceeded the maximum allowable "
436  << "length of " << StringDictionary::MAX_STRLEN << " characters (string was "
437  << str.size() << " characters).";
438  LOG(ERROR) << oss.str();
439  throw std::runtime_error(oss.str());
440 }
#define LOG(tag)
Definition: Logger.h:285
static constexpr size_t MAX_STRLEN

+ Here is the caller graph for this function:

Variable Documentation

const int anonymous_namespace{StringDictionary.cpp}::SYSTEM_PAGE_SIZE = heavyai::get_page_size()