OmniSciDB  ca0c39ec8f
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
anonymous_namespace{StringDictionary.cpp} Namespace Reference

Classes

struct  ThreadInfo
 
class  MapMaker
 

Functions

int checked_open (const char *path, const bool recover)
 
const uint64_t round_up_p2 (const uint64_t num)
 
string_dict_hash_t hash_string (const std::string_view &str)
 
template<class T >
void throw_encoding_error (std::string_view str, const DictRef &dict_ref)
 
void throw_string_too_long_error (std::string_view str, const DictRef &dict_ref)
 
bool is_like (const std::string &str, const std::string &pattern, const bool icase, const bool is_simple, const char escape)
 
bool is_regexp_like (const std::string &str, const std::string &pattern, const char escape)
 

Variables

const int SYSTEM_PAGE_SIZE = heavyai::get_page_size()
 

Function Documentation

int anonymous_namespace{StringDictionary.cpp}::checked_open ( const char *  path,
const bool  recover 
)

Definition at line 58 of file StringDictionary.cpp.

References logger::ERROR, LOG, and heavyai::open().

Referenced by StringDictionary::StringDictionary().

58  {
59  auto fd = heavyai::open(path, O_RDWR | O_CREAT | (recover ? O_APPEND : O_TRUNC), 0644);
60  if (fd > 0) {
61  return fd;
62  }
63  auto err = std::string("Dictionary path ") + std::string(path) +
64  std::string(" does not exist.");
65  LOG(ERROR) << err;
66  throw DictPayloadUnavailable(err);
67 }
#define LOG(tag)
Definition: Logger.h:216
int open(const char *path, int flags, int mode)
Definition: heavyai_fs.cpp:66

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

string_dict_hash_t anonymous_namespace{StringDictionary.cpp}::hash_string ( const std::string_view &  str)

Definition at line 87 of file StringDictionary.cpp.

Referenced by StringDictionary::buildDictionaryTranslationMap(), StringDictionary::getBulk(), StringDictionary::getOrAddBulk(), StringDictionary::getOrAddImpl(), StringDictionary::getUnlocked(), StringDictionary::hashStrings(), StringDictionary::increaseHashTableCapacity(), StringDictionary::increaseHashTableCapacityFromStorageAndMemory(), and StringDictionary::StringDictionary().

87  {
88  string_dict_hash_t str_hash = 1;
89  // rely on fact that unsigned overflow is defined and wraps
90  for (size_t i = 0; i < str.size(); ++i) {
91  str_hash = str_hash * 997 + str[i];
92  }
93  return str_hash;
94 }
uint32_t string_dict_hash_t

+ Here is the caller graph for this function:

bool anonymous_namespace{StringDictionary.cpp}::is_like ( const std::string &  str,
const std::string &  pattern,
const bool  icase,
const bool  is_simple,
const char  escape 
)

Definition at line 801 of file StringDictionary.cpp.

References string_ilike(), string_ilike_simple(), string_like(), and string_like_simple().

Referenced by StringDictionary::getLike(), and StringDictionaryProxy::getLike().

805  {
806  return icase
807  ? (is_simple ? string_ilike_simple(
808  str.c_str(), str.size(), pattern.c_str(), pattern.size())
809  : string_ilike(str.c_str(),
810  str.size(),
811  pattern.c_str(),
812  pattern.size(),
813  escape))
814  : (is_simple ? string_like_simple(
815  str.c_str(), str.size(), pattern.c_str(), pattern.size())
816  : string_like(str.c_str(),
817  str.size(),
818  pattern.c_str(),
819  pattern.size(),
820  escape));
821 }
RUNTIME_EXPORT DEVICE bool string_like(const char *str, const int32_t str_len, const char *pattern, const int32_t pat_len, const char escape_char)
Definition: StringLike.cpp:244
RUNTIME_EXPORT DEVICE bool string_like_simple(const char *str, const int32_t str_len, const char *pattern, const int32_t pat_len)
Definition: StringLike.cpp:41
RUNTIME_EXPORT DEVICE bool string_ilike_simple(const char *str, const int32_t str_len, const char *pattern, const int32_t pat_len)
Definition: StringLike.cpp:57
RUNTIME_EXPORT DEVICE bool string_ilike(const char *str, const int32_t str_len, const char *pattern, const int32_t pat_len, const char escape_char)
Definition: StringLike.cpp:255

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

bool anonymous_namespace{StringDictionary.cpp}::is_regexp_like ( const std::string &  str,
const std::string &  pattern,
const char  escape 
)

Definition at line 1091 of file StringDictionary.cpp.

References regexp_like().

Referenced by StringDictionary::getRegexpLike(), and StringDictionaryProxy::getRegexpLike().

1093  {
1094  return regexp_like(str.c_str(), str.size(), pattern.c_str(), pattern.size(), escape);
1095 }
RUNTIME_EXPORT DEVICE bool regexp_like(const char *str, const int32_t str_len, const char *pattern, const int32_t pat_len, const char escape_char)
Definition: Regexp.cpp:39

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

const uint64_t anonymous_namespace{StringDictionary.cpp}::round_up_p2 ( const uint64_t  num)

Definition at line 69 of file StringDictionary.cpp.

Referenced by StringDictionary::StringDictionary().

69  {
70  uint64_t in = num;
71  in--;
72  in |= in >> 1;
73  in |= in >> 2;
74  in |= in >> 4;
75  in |= in >> 8;
76  in |= in >> 16;
77  in++;
78  // TODO MAT deal with case where filesize has been increased but reality is
79  // we are constrained to 2^31.
80  // In that situation this calculation will wrap to zero
81  if (in == 0 || (in > (UINT32_MAX))) {
82  in = UINT32_MAX;
83  }
84  return in;
85 }

+ Here is the caller graph for this function:

template<class T >
void anonymous_namespace{StringDictionary.cpp}::throw_encoding_error ( std::string_view  str,
const DictRef dict_ref 
)

Definition at line 395 of file StringDictionary.cpp.

References logger::ERROR, LOG, StringDictionary::MAX_STRCOUNT, heavydb.dtypes::T, and dict_ref_t::toString().

395  {
396  std::ostringstream oss;
397  oss << "The text encoded column using dictionary " << dict_ref.toString()
398  << " has exceeded it's limit of " << sizeof(T) * 8 << " bits ("
399  << static_cast<size_t>(max_valid_int_value<T>() + 1) << " unique values) "
400  << "while attempting to add the new string '" << str << "'. ";
401 
402  if (sizeof(T) < 4) {
403  // Todo: Implement automatic type widening for dictionary-encoded text
404  // columns/all fixed length columm types (at least if not defined
405  // with fixed encoding size), or short of that, ALTER TABLE
406  // COLUMN TYPE to at least allow the user to do this manually
407  // without re-creating the table
408 
409  oss << "To load more data, please re-create the table with "
410  << "this column as type TEXT ENCODING DICT(" << sizeof(T) * 2 * 8 << ") ";
411  if (sizeof(T) == 1) {
412  oss << "or TEXT ENCODING DICT(32) ";
413  }
414  oss << "and reload your data.";
415  } else {
416  // Todo: Implement TEXT ENCODING DICT(64) type which should essentially
417  // preclude overflows.
418  oss << "Currently dictionary-encoded text columns support a maximum of "
420  << " strings. Consider recreating the table with "
421  << "this column as type TEXT ENCODING NONE and reloading your data.";
422  }
423  LOG(ERROR) << oss.str();
424  throw std::runtime_error(oss.str());
425 }
#define LOG(tag)
Definition: Logger.h:216
static constexpr size_t MAX_STRCOUNT
std::string toString() const
Definition: DictRef.h:41

+ Here is the call graph for this function:

void anonymous_namespace{StringDictionary.cpp}::throw_string_too_long_error ( std::string_view  str,
const DictRef dict_ref 
)

Definition at line 427 of file StringDictionary.cpp.

References logger::ERROR, LOG, StringDictionary::MAX_STRLEN, and dict_ref_t::toString().

Referenced by StringDictionary::getBulk().

427  {
428  std::ostringstream oss;
429  oss << "The string '" << str << " could not be inserted into the dictionary "
430  << dict_ref.toString() << " because it exceeded the maximum allowable "
431  << "length of " << StringDictionary::MAX_STRLEN << " characters (string was "
432  << str.size() << " characters).";
433  LOG(ERROR) << oss.str();
434  throw std::runtime_error(oss.str());
435 }
#define LOG(tag)
Definition: Logger.h:216
std::string toString() const
Definition: DictRef.h:41
static constexpr size_t MAX_STRLEN

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

Variable Documentation

const int anonymous_namespace{StringDictionary.cpp}::SYSTEM_PAGE_SIZE = heavyai::get_page_size()