OmniSciDB
c1a53651b2
|
#include <StringDictionary.h>
Classes | |
struct | compare_cache_value_t |
struct | PayloadString |
class | StringCallback |
struct | StringIdxEntry |
Public Member Functions | |
StringDictionary (const shared::StringDictKey &dict_key, const std::string &folder, const bool isTemp, const bool recover, const bool materializeHashes=false, size_t initial_capacity=256) | |
StringDictionary (const LeafHostInfo &host, const shared::StringDictKey &dict_key) | |
~StringDictionary () noexcept | |
const shared::StringDictKey & | getDictKey () const noexcept |
void | eachStringSerially (int64_t const generation, StringCallback &) const |
std::function< int32_t(std::string const &)> | makeLambdaStringToId () const |
int32_t | getOrAdd (const std::string &str) noexcept |
template<class T , class String > | |
size_t | getBulk (const std::vector< String > &string_vec, T *encoded_vec) const |
template<class T , class String > | |
size_t | getBulk (const std::vector< String > &string_vec, T *encoded_vec, const int64_t generation) const |
template<class T , class String > | |
void | getOrAddBulk (const std::vector< String > &string_vec, T *encoded_vec) |
template<class T , class String > | |
void | getOrAddBulkParallel (const std::vector< String > &string_vec, T *encoded_vec) |
template<class String > | |
void | getOrAddBulkArray (const std::vector< std::vector< String >> &string_array_vec, std::vector< std::vector< int32_t >> &ids_array_vec) |
template<class String > | |
int32_t | getIdOfString (const String &) const |
std::string | getString (int32_t string_id) const |
std::pair< char *, size_t > | getStringBytes (int32_t string_id) const noexcept |
size_t | storageEntryCount () const |
std::vector< int32_t > | getLike (const std::string &pattern, const bool icase, const bool is_simple, const char escape, const size_t generation) const |
std::vector< int32_t > | getCompare (const std::string &pattern, const std::string &comp_operator, const size_t generation) |
std::vector< int32_t > | getRegexpLike (const std::string &pattern, const char escape, const size_t generation) const |
std::vector< std::string > | copyStrings () const |
std::vector< std::string_view > | getStringViews () const |
std::vector< std::string_view > | getStringViews (const size_t generation) const |
std::vector< int32_t > | buildDictionaryTranslationMap (const std::shared_ptr< StringDictionary > dest_dict, StringLookupCallback const &dest_transient_lookup_callback) const |
size_t | buildDictionaryTranslationMap (const StringDictionary *dest_dict, int32_t *translated_ids, const int64_t source_generation, const int64_t dest_generation, const bool dest_has_transients, StringLookupCallback const &dest_transient_lookup_callback, const std::vector< StringOps_Namespace::StringOpInfo > &string_op_infos) const |
void | buildDictionaryNumericTranslationMap (Datum *translated_ids, const int64_t source_generation, const std::vector< StringOps_Namespace::StringOpInfo > &string_op_infos) const |
bool | checkpoint () noexcept |
bool | isClient () const noexcept |
void | update_leaf (const LeafHostInfo &host_info) |
Static Public Member Functions | |
static void | populate_string_ids (std::vector< int32_t > &dest_ids, StringDictionary *dest_dict, const std::vector< int32_t > &source_ids, const StringDictionary *source_dict, const std::vector< std::string const * > &transient_string_vec={}) |
Populates provided dest_ids vector with string ids corresponding to given source strings. More... | |
static void | populate_string_array_ids (std::vector< std::vector< int32_t >> &dest_array_ids, StringDictionary *dest_dict, const std::vector< std::vector< int32_t >> &source_array_ids, const StringDictionary *source_dict) |
Static Public Attributes | |
static constexpr int32_t | INVALID_STR_ID = -1 |
static constexpr size_t | MAX_STRLEN = (1 << 15) - 1 |
static constexpr size_t | MAX_STRCOUNT = (1U << 31) - 1 |
Private Member Functions | |
void | processDictionaryFutures (std::vector< std::future< std::vector< std::pair< string_dict_hash_t, unsigned int >>>> &dictionary_futures) |
size_t | getNumStringsFromStorage (const size_t storage_slots) const noexcept |
bool | fillRateIsHigh (const size_t num_strings) const noexcept |
void | increaseHashTableCapacity () noexcept |
template<class String > | |
void | increaseHashTableCapacityFromStorageAndMemory (const size_t str_count, const size_t storage_high_water_mark, const std::vector< String > &input_strings, const std::vector< size_t > &string_memory_ids, const std::vector< string_dict_hash_t > &input_strings_hashes) noexcept |
int32_t | getOrAddImpl (const std::string_view &str) noexcept |
template<class String > | |
void | hashStrings (const std::vector< String > &string_vec, std::vector< string_dict_hash_t > &hashes) const noexcept |
int32_t | getUnlocked (const std::string_view sv) const noexcept |
std::string | getStringUnlocked (int32_t string_id) const noexcept |
std::string | getStringChecked (const int string_id) const noexcept |
std::pair< char *, size_t > | getStringBytesChecked (const int string_id) const noexcept |
template<class String > | |
uint32_t | computeBucket (const string_dict_hash_t hash, const String &input_string, const std::vector< int32_t > &string_id_string_dict_hash_table) const noexcept |
template<class String > | |
uint32_t | computeBucketFromStorageAndMemory (const string_dict_hash_t input_string_hash, const String &input_string, const std::vector< int32_t > &string_id_string_dict_hash_table, const size_t storage_high_water_mark, const std::vector< String > &input_strings, const std::vector< size_t > &string_memory_ids) const noexcept |
uint32_t | computeUniqueBucketWithHash (const string_dict_hash_t hash, const std::vector< int32_t > &string_id_string_dict_hash_table) noexcept |
void | checkAndConditionallyIncreasePayloadCapacity (const size_t write_length) |
void | checkAndConditionallyIncreaseOffsetCapacity (const size_t write_length) |
template<class String > | |
void | appendToStorage (const String str) noexcept |
template<class String > | |
void | appendToStorageBulk (const std::vector< String > &input_strings, const std::vector< size_t > &string_memory_ids, const size_t sum_new_strings_lengths) noexcept |
PayloadString | getStringFromStorage (const int string_id) const noexcept |
std::string_view | getStringFromStorageFast (const int string_id) const noexcept |
void | addPayloadCapacity (const size_t min_capacity_requested=0) noexcept |
void | addOffsetCapacity (const size_t min_capacity_requested=0) noexcept |
size_t | addStorageCapacity (int fd, const size_t min_capacity_requested=0) noexcept |
void * | addMemoryCapacity (void *addr, size_t &mem_size, const size_t min_capacity_requested=0) noexcept |
void | invalidateInvertedIndex () noexcept |
std::vector< int32_t > | getEquals (std::string pattern, std::string comp_operator, size_t generation) |
void | buildSortedCache () |
void | insertInSortedCache (std::string str, int32_t str_id) |
void | sortCache (std::vector< int32_t > &cache) |
void | mergeSortedCache (std::vector< int32_t > &temp_sorted_cache) |
compare_cache_value_t * | binary_search_cache (const std::string &pattern) const |
Private Attributes | |
const shared::StringDictKey | dict_key_ |
const std::string | folder_ |
size_t | str_count_ |
size_t | collisions_ |
std::vector< int32_t > | string_id_string_dict_hash_table_ |
std::vector< string_dict_hash_t > | hash_cache_ |
std::vector< int32_t > | sorted_cache |
bool | isTemp_ |
bool | materialize_hashes_ |
std::string | offsets_path_ |
int | payload_fd_ |
int | offset_fd_ |
StringIdxEntry * | offset_map_ |
char * | payload_map_ |
size_t | offset_file_size_ |
size_t | payload_file_size_ |
size_t | payload_file_off_ |
std::shared_mutex | rw_mutex_ |
std::map< std::tuple < std::string, bool, bool, char >, std::vector< int32_t > > | like_cache_ |
std::map< std::pair < std::string, char > , std::vector< int32_t > > | regex_cache_ |
std::map< std::string, int32_t > | equal_cache_ |
DictionaryCache< std::string, compare_cache_value_t > | compare_cache_ |
std::shared_ptr< std::vector < std::string > > | strings_cache_ |
std::unique_ptr < StringDictionaryClient > | client_ |
std::unique_ptr < StringDictionaryClient > | client_no_timeout_ |
char * | CANARY_BUFFER {nullptr} |
size_t | canary_buffer_size = 0 |
Friends | |
class | StringLocalCallback |
Definition at line 54 of file StringDictionary.h.
StringDictionary::StringDictionary | ( | const shared::StringDictKey & | dict_key, |
const std::string & | folder, | ||
const bool | isTemp, | ||
const bool | recover, | ||
const bool | materializeHashes = false , |
||
size_t | initial_capacity = 256 |
||
) |
Definition at line 118 of file StringDictionary.cpp.
References addOffsetCapacity(), addPayloadCapacity(), threading_serial::async(), CHECK_EQ, heavyai::checked_mmap(), anonymous_namespace{StringDictionary.cpp}::checked_open(), collisions_, heavyai::file_size(), getNumStringsFromStorage(), getStringFromStorage(), hash_cache_, anonymous_namespace{StringDictionary.cpp}::hash_string(), INVALID_STR_ID, isTemp_, LOG, materialize_hashes_, offset_fd_, offset_file_size_, offset_map_, offsets_path_, payload_fd_, payload_file_size_, payload_map_, processDictionaryFutures(), anonymous_namespace{StringDictionary.cpp}::round_up_p2(), rw_mutex_, str_count_, string_id_string_dict_hash_table_, VLOG, and logger::WARNING.
StringDictionary::StringDictionary | ( | const LeafHostInfo & | host, |
const shared::StringDictKey & | dict_key | ||
) |
Definition at line 346 of file StringDictionary.cpp.
|
noexcept |
Definition at line 356 of file StringDictionary.cpp.
References CANARY_BUFFER, CHECK, CHECK_GE, heavyai::checked_munmap(), heavyai::close(), isClient(), isTemp_, offset_fd_, offset_file_size_, offset_map_, payload_fd_, payload_file_size_, and payload_map_.
|
privatenoexcept |
Definition at line 1542 of file StringDictionary.cpp.
References CHECK, and anonymous_namespace{StringDictionary.cpp}::SYSTEM_PAGE_SIZE.
|
privatenoexcept |
Definition at line 1512 of file StringDictionary.cpp.
Referenced by checkAndConditionallyIncreaseOffsetCapacity(), and StringDictionary().
|
privatenoexcept |
Definition at line 1503 of file StringDictionary.cpp.
Referenced by checkAndConditionallyIncreasePayloadCapacity(), and StringDictionary().
|
privatenoexcept |
Definition at line 1521 of file StringDictionary.cpp.
References CHECK, CHECK_NE, anonymous_namespace{StringDictionary.cpp}::SYSTEM_PAGE_SIZE, and File_Namespace::write().
|
privatenoexcept |
Definition at line 1448 of file StringDictionary.cpp.
References StringDictionary::StringIdxEntry::size.
Referenced by getOrAddBulk().
|
privatenoexcept |
Definition at line 1462 of file StringDictionary.cpp.
Referenced by getOrAddBulkParallel().
|
private |
void StringDictionary::buildDictionaryNumericTranslationMap | ( | Datum * | translated_ids, |
const int64_t | source_generation, | ||
const std::vector< StringOps_Namespace::StringOpInfo > & | string_op_infos | ||
) | const |
Definition at line 1959 of file StringDictionary.cpp.
References CHECK, CHECK_GE, CHECK_GT, CHECK_LE, DEBUG_TIMER, getStringFromStorageFast(), ThreadInfo::num_elems_per_thread, ThreadInfo::num_threads, threading_serial::parallel_for(), rw_mutex_, and str_count_.
std::vector< int32_t > StringDictionary::buildDictionaryTranslationMap | ( | const std::shared_ptr< StringDictionary > | dest_dict, |
StringLookupCallback const & | dest_transient_lookup_callback | ||
) | const |
Definition at line 1774 of file StringDictionary.cpp.
References DEBUG_TIMER, and storageEntryCount().
size_t StringDictionary::buildDictionaryTranslationMap | ( | const StringDictionary * | dest_dict, |
int32_t * | translated_ids, | ||
const int64_t | source_generation, | ||
const int64_t | dest_generation, | ||
const bool | dest_has_transients, | ||
StringLookupCallback const & | dest_transient_lookup_callback, | ||
const std::vector< StringOps_Namespace::StringOpInfo > & | string_op_infos | ||
) | const |
Definition at line 1811 of file StringDictionary.cpp.
References CHECK_GE, CHECK_LE, client_no_timeout_, computeBucket(), DEBUG_TIMER, getDictKey(), getStringFromStorageFast(), hash_cache_, anonymous_namespace{StringDictionary.cpp}::hash_string(), INVALID_STR_ID, materialize_hashes_, ThreadInfo::num_elems_per_thread, ThreadInfo::num_threads, order_translation_locks(), threading_serial::parallel_for(), rw_mutex_, str_count_, and string_id_string_dict_hash_table_.
|
private |
Definition at line 1603 of file StringDictionary.cpp.
References mergeSortedCache(), sortCache(), sorted_cache, and str_count_.
Referenced by getCompare().
|
private |
Definition at line 1427 of file StringDictionary.cpp.
References addOffsetCapacity(), CHECK, CHECK_GE, heavyai::checked_mmap(), heavyai::checked_munmap(), isTemp_, offset_fd_, offset_file_size_, offset_map_, and str_count_.
|
private |
Definition at line 1408 of file StringDictionary.cpp.
References addPayloadCapacity(), CHECK, CHECK_GE, heavyai::checked_mmap(), heavyai::checked_munmap(), isTemp_, payload_fd_, payload_file_off_, payload_file_size_, and payload_map_.
|
noexcept |
Definition at line 1580 of file StringDictionary.cpp.
References CHECK, client_, heavyai::fsync(), isClient(), isTemp_, heavyai::msync(), offset_fd_, offset_file_size_, offset_map_, payload_fd_, payload_file_size_, and payload_map_.
Referenced by import_export::TypedImportBuffer::stringDictCheckpoint().
|
privatenoexcept |
Definition at line 1310 of file StringDictionary.cpp.
Referenced by buildDictionaryTranslationMap(), getBulk(), and getOrAddBulk().
|
privatenoexcept |
memcmp(input_string.data(), candidate_storage_string.c_str_ptr, input_string.size())) {
Definition at line 1340 of file StringDictionary.cpp.
Referenced by getOrAddBulkParallel().
|
privatenoexcept |
Definition at line 1389 of file StringDictionary.cpp.
Referenced by increaseHashTableCapacity(), and processDictionaryFutures().
std::vector< std::string > StringDictionary::copyStrings | ( | ) | const |
Definition at line 1145 of file StringDictionary.cpp.
References threading_serial::async(), CHECK_EQ, CHECK_GT, CHECK_LE, gpu_enabled::copy(), cpu_threads(), getStringUnlocked(), isClient(), rw_mutex_, str_count_, and strings_cache_.
void StringDictionary::eachStringSerially | ( | int64_t const | generation, |
StringCallback & | serial_callback | ||
) | const |
Definition at line 268 of file StringDictionary.cpp.
References CHECK_LE, client_, getStringFromStorageFast(), isClient(), anonymous_namespace{Utm.h}::n, rw_mutex_, storageEntryCount(), and str_count_.
Referenced by makeLambdaStringToId().
|
privatenoexcept |
Definition at line 1197 of file StringDictionary.cpp.
Referenced by getOrAddBulk(), and getOrAddBulkParallel().
size_t StringDictionary::getBulk | ( | const std::vector< String > & | string_vec, |
T * | encoded_vec | ||
) | const |
Definition at line 487 of file StringDictionary.cpp.
size_t StringDictionary::getBulk | ( | const std::vector< String > & | string_vec, |
T * | encoded_vec, | ||
const int64_t | generation | ||
) | const |
Definition at line 500 of file StringDictionary.cpp.
References CHECK_GE, CHECK_LE, computeBucket(), dict_key_, anonymous_namespace{StringDictionary.cpp}::hash_string(), INVALID_STR_ID, MAX_STRLEN, ThreadInfo::num_elems_per_thread, ThreadInfo::num_threads, threading_serial::parallel_for(), rw_mutex_, storageEntryCount(), string_id_string_dict_hash_table_, and anonymous_namespace{StringDictionary.cpp}::throw_string_too_long_error().
std::vector< int32_t > StringDictionary::getCompare | ( | const std::string & | pattern, |
const std::string & | comp_operator, | ||
const size_t | generation | ||
) |
Definition at line 937 of file StringDictionary.cpp.
References anonymous_namespace{Utm.h}::a, buildSortedCache(), client_, compare_cache_, getEquals(), getStringFromStorage(), isClient(), gpu_enabled::lower_bound(), rw_mutex_, sorted_cache, str_count_, string_eq(), and string_lt().
|
noexcept |
Definition at line 312 of file StringDictionary.cpp.
References dict_key_.
Referenced by RowSetMemoryOwner::addStringProxyIntersectionTranslationMap(), RowSetMemoryOwner::addStringProxyNumericTranslationMap(), RowSetMemoryOwner::addStringProxyUnionTranslationMap(), and buildDictionaryTranslationMap().
|
private |
Definition at line 877 of file StringDictionary.cpp.
References CHECK, CHECK_GT, CHECK_LE, cpu_threads(), equal_cache_, getStringUnlocked(), MAX_STRLEN, run_benchmark_import::result, and str_count_.
Referenced by getCompare().
template int32_t StringDictionary::getIdOfString | ( | const String & | ) | const |
Definition at line 744 of file StringDictionary.cpp.
References client_, getUnlocked(), isClient(), and rw_mutex_.
std::vector< int32_t > StringDictionary::getLike | ( | const std::string & | pattern, |
const bool | icase, | ||
const bool | is_simple, | ||
const char | escape, | ||
const size_t | generation | ||
) | const |
Definition at line 824 of file StringDictionary.cpp.
References CHECK, CHECK_GT, CHECK_LE, client_, cpu_threads(), getStringUnlocked(), anonymous_namespace{StringDictionary.cpp}::is_like(), isClient(), like_cache_, run_benchmark_import::result, rw_mutex_, and str_count_.
|
privatenoexcept |
Method to retrieve number of strings in storage via a binary search for the first canary
storage_slots | number of storage entries we should search to find the minimum canary |
Definition at line 323 of file StringDictionary.cpp.
References CHECK_GE.
Referenced by StringDictionary().
|
noexcept |
Definition at line 380 of file StringDictionary.cpp.
References CHECK_EQ.
template void StringDictionary::getOrAddBulk | ( | const std::vector< String > & | string_vec, |
T * | encoded_vec | ||
) |
Definition at line 590 of file StringDictionary.cpp.
References appendToStorage(), CHECK, CHECK_LT, computeBucket(), dict_key_, fillRateIsHigh(), g_enable_stringdict_parallel, getOrAddBulkParallel(), hash_cache_, anonymous_namespace{StringDictionary.cpp}::hash_string(), increaseHashTableCapacity(), INVALID_STR_ID, invalidateInvertedIndex(), materialize_hashes_, MAX_STRCOUNT, MAX_STRLEN, offsets_path_, rw_mutex_, str_count_, and string_id_string_dict_hash_table_.
Referenced by import_export::TypedImportBuffer::addDictEncodedString(), ArrowForeignStorageBase::convertArrowDictionary(), ArrowForeignStorageBase::createDictionaryEncodedColumn(), foreign_storage::ParquetStringEncoder< V >::encodeAndCopyContiguous(), getOrAddBulkArray(), and populate_string_ids().
template void StringDictionary::getOrAddBulkArray | ( | const std::vector< std::vector< String >> & | string_array_vec, |
std::vector< std::vector< int32_t >> & | ids_array_vec | ||
) |
Definition at line 439 of file StringDictionary.cpp.
References client_no_timeout_, and getOrAddBulk().
Referenced by import_export::TypedImportBuffer::addDictEncodedStringArray().
void StringDictionary::getOrAddBulkParallel | ( | const std::vector< String > & | string_vec, |
T * | encoded_vec | ||
) |
Definition at line 648 of file StringDictionary.cpp.
References appendToStorageBulk(), CHECK, CHECK_LT, computeBucketFromStorageAndMemory(), dict_key_, fillRateIsHigh(), hash_cache_, hashStrings(), increaseHashTableCapacityFromStorageAndMemory(), INVALID_STR_ID, invalidateInvertedIndex(), materialize_hashes_, MAX_STRCOUNT, MAX_STRLEN, offsets_path_, rw_mutex_, str_count_, and string_id_string_dict_hash_table_.
Referenced by getOrAddBulk().
|
privatenoexcept |
Definition at line 1257 of file StringDictionary.cpp.
References CHECK, CHECK_LT, and anonymous_namespace{StringDictionary.cpp}::hash_string().
std::vector< int32_t > StringDictionary::getRegexpLike | ( | const std::string & | pattern, |
const char | escape, | ||
const size_t | generation | ||
) | const |
Definition at line 1098 of file StringDictionary.cpp.
References CHECK, CHECK_GT, CHECK_LE, client_, cpu_threads(), getStringUnlocked(), anonymous_namespace{StringDictionary.cpp}::is_regexp_like(), isClient(), regex_cache_, run_benchmark_import::result, rw_mutex_, and str_count_.
std::string StringDictionary::getString | ( | int32_t | string_id | ) | const |
Definition at line 766 of file StringDictionary.cpp.
References client_, getStringUnlocked(), isClient(), and rw_mutex_.
Referenced by StringValueConverter::convertToColumnarFormatFromDict(), and populate_string_ids().
|
noexcept |
Definition at line 781 of file StringDictionary.cpp.
References CHECK, CHECK_LE, and CHECK_LT.
|
privatenoexcept |
Definition at line 1302 of file StringDictionary.cpp.
References CHECK.
|
privatenoexcept |
Definition at line 1296 of file StringDictionary.cpp.
References CHECK.
Referenced by increaseHashTableCapacity().
|
privatenoexcept |
Definition at line 1488 of file StringDictionary.cpp.
References CHECK_GE, StringDictionary::StringIdxEntry::off, and StringDictionary::StringIdxEntry::size.
Referenced by getCompare(), mergeSortedCache(), sortCache(), and StringDictionary().
|
privatenoexcept |
Definition at line 1482 of file StringDictionary.cpp.
References StringDictionary::StringIdxEntry::off, and StringDictionary::StringIdxEntry::size.
Referenced by buildDictionaryNumericTranslationMap(), buildDictionaryTranslationMap(), eachStringSerially(), and getStringViews().
|
privatenoexcept |
Definition at line 776 of file StringDictionary.cpp.
References CHECK_LT.
Referenced by copyStrings(), getEquals(), getLike(), getRegexpLike(), and getString().
std::vector< std::string_view > StringDictionary::getStringViews | ( | ) | const |
Definition at line 1770 of file StringDictionary.cpp.
References storageEntryCount().
std::vector< std::string_view > StringDictionary::getStringViews | ( | const size_t | generation | ) | const |
Definition at line 1716 of file StringDictionary.cpp.
References CHECK_GE, CHECK_LE, DEBUG_TIMER, getStringFromStorageFast(), MAX_STRCOUNT, ThreadInfo::num_elems_per_thread, ThreadInfo::num_threads, threading_serial::parallel_for(), rw_mutex_, and storageEntryCount().
|
privatenoexcept |
Definition at line 759 of file StringDictionary.cpp.
References anonymous_namespace{StringDictionary.cpp}::hash_string().
Referenced by getIdOfString().
|
privatenoexcept |
Method to hash a vector of strings in parallel.
string_vec | input vector of strings to be hashed |
hashes | space for the output - should be pre-sized to match string_vec size |
Definition at line 470 of file StringDictionary.cpp.
References CHECK_EQ, anonymous_namespace{StringDictionary.cpp}::hash_string(), and threading_serial::parallel_for().
Referenced by getOrAddBulkParallel().
|
privatenoexcept |
Definition at line 1201 of file StringDictionary.cpp.
References computeUniqueBucketWithHash(), getStringChecked(), hash_cache_, anonymous_namespace{StringDictionary.cpp}::hash_string(), INVALID_STR_ID, materialize_hashes_, str_count_, and string_id_string_dict_hash_table_.
Referenced by getOrAddBulk().
|
privatenoexcept |
Definition at line 1224 of file StringDictionary.cpp.
References anonymous_namespace{StringDictionary.cpp}::hash_string().
Referenced by getOrAddBulkParallel().
|
private |
|
privatenoexcept |
Definition at line 1563 of file StringDictionary.cpp.
References compare_cache_, equal_cache_, like_cache_, regex_cache_, and gpu_enabled::swap().
Referenced by getOrAddBulk(), and getOrAddBulkParallel().
|
noexcept |
Definition at line 1599 of file StringDictionary.cpp.
References client_.
Referenced by checkpoint(), copyStrings(), eachStringSerially(), getCompare(), getIdOfString(), getLike(), getRegexpLike(), getString(), makeLambdaStringToId(), storageEntryCount(), and ~StringDictionary().
std::function< int32_t(std::string const &)> StringDictionary::makeLambdaStringToId | ( | ) | const |
Definition at line 255 of file StringDictionary.cpp.
References CHECK, eachStringSerially(), INVALID_STR_ID, and isClient().
|
private |
Definition at line 1627 of file StringDictionary.cpp.
References getStringFromStorage(), sorted_cache, and string_lt().
Referenced by buildSortedCache().
|
static |
Definition at line 1675 of file StringDictionary.cpp.
References threading_serial::async(), populate_string_ids(), and logger::thread_id().
Referenced by DictionaryValueConverter< TARGET_TYPE >::processArrayBuffer().
|
static |
Populates provided dest_ids
vector with string ids corresponding to given source strings.
Given a vector of source string ids and corresponding source dictionary, this method populates a vector of destination string ids by either returning the string id of matching strings in the destination dictionary or creating new entries in the dictionary. Source string ids can also be transient if they were created by a function (e.g LOWER/UPPER functions). A map of transient string ids to string values is provided in order to handle this use case.
dest_ids | - vector of destination string ids to be populated |
dest_dict | - destination dictionary |
source_ids | - vector of source string ids for which destination ids are needed |
source_dict | - source dictionary |
transient_string_vec | - ordered vector of string value pointers |
Definition at line 1651 of file StringDictionary.cpp.
References CHECK_LT, getOrAddBulk(), getString(), and StringDictionaryProxy::transientIdToIndex().
Referenced by populate_string_array_ids(), and DictionaryValueConverter< TARGET_TYPE >::processBuffer().
|
private |
Definition at line 292 of file StringDictionary.cpp.
References computeUniqueBucketWithHash(), hash_cache_, materialize_hashes_, payload_file_off_, str_count_, and string_id_string_dict_hash_table_.
Referenced by StringDictionary().
|
private |
Definition at line 1614 of file StringDictionary.cpp.
References anonymous_namespace{Utm.h}::a, getStringFromStorage(), gpu_enabled::sort(), and string_lt().
Referenced by buildSortedCache().
size_t StringDictionary::storageEntryCount | ( | ) | const |
Definition at line 790 of file StringDictionary.cpp.
References client_, isClient(), rw_mutex_, and str_count_.
Referenced by buildDictionaryTranslationMap(), eachStringSerially(), getBulk(), and getStringViews().
void StringDictionary::update_leaf | ( | const LeafHostInfo & | host_info | ) |
Definition at line 378 of file StringDictionary.cpp.
|
friend |
Definition at line 79 of file StringDictionary.h.
|
private |
Definition at line 291 of file StringDictionary.h.
Referenced by ~StringDictionary().
|
private |
Definition at line 292 of file StringDictionary.h.
|
mutableprivate |
Definition at line 288 of file StringDictionary.h.
Referenced by checkpoint(), eachStringSerially(), getCompare(), getIdOfString(), getLike(), getRegexpLike(), getString(), isClient(), and storageEntryCount().
|
mutableprivate |
Definition at line 289 of file StringDictionary.h.
Referenced by buildDictionaryTranslationMap(), and getOrAddBulkArray().
|
private |
Definition at line 267 of file StringDictionary.h.
Referenced by StringDictionary().
|
mutableprivate |
Definition at line 286 of file StringDictionary.h.
Referenced by getCompare(), and invalidateInvertedIndex().
|
private |
Definition at line 264 of file StringDictionary.h.
Referenced by getBulk(), getDictKey(), getOrAddBulk(), and getOrAddBulkParallel().
|
mutableprivate |
Definition at line 285 of file StringDictionary.h.
Referenced by getEquals(), and invalidateInvertedIndex().
|
private |
Definition at line 265 of file StringDictionary.h.
|
private |
Definition at line 269 of file StringDictionary.h.
Referenced by buildDictionaryTranslationMap(), getOrAddBulk(), getOrAddBulkParallel(), increaseHashTableCapacity(), processDictionaryFutures(), and StringDictionary().
|
static |
Definition at line 172 of file StringDictionary.h.
Referenced by buildDictionaryTranslationMap(), StringDictionaryProxy::buildIntersectionTranslationMapToOtherProxyUnlocked(), StringDictionaryProxy::buildUnionTranslationMapToOtherProxy(), count_matches_impl(), count_matches_sharded(), CodeGenerator::createInValuesBitmap(), anonymous_namespace{RelAlgTranslator.cpp}::fill_dictionary_encoded_in_vals(), fill_hash_join_buff_impl(), fill_hash_join_buff_sharded_impl(), fill_row_ids_impl(), fill_row_ids_sharded_impl(), getBulk(), StringDictionaryProxy::getIdOfString(), StringDictionaryProxy::getIdOfStringNoGeneration(), getOrAddBulk(), getOrAddBulkParallel(), StringDictionaryProxy::getOrAddTransientBulk(), StringDictionaryProxy::getOrAddTransientImpl(), increaseHashTableCapacity(), StringDictionaryProxy::initIdMap(), StringDictionaryProxy::lookupTransientStringUnlocked(), makeLambdaStringToId(), anonymous_namespace{HashJoinRuntime.cpp}::map_str_id_to_outer_dict(), GenericKeyHandler::operator()(), StringLocalCallback::operator()(), StringNetworkCallback::operator()(), StringDictionary(), StringDictionaryProxy::transientLookupBulkParallelUnlocked(), StringDictionaryProxy::transientLookupBulkUnlocked(), and truncate_to_generation().
|
private |
Definition at line 271 of file StringDictionary.h.
Referenced by checkAndConditionallyIncreaseOffsetCapacity(), checkAndConditionallyIncreasePayloadCapacity(), checkpoint(), StringDictionary(), and ~StringDictionary().
|
mutableprivate |
Definition at line 283 of file StringDictionary.h.
Referenced by getLike(), and invalidateInvertedIndex().
|
private |
Definition at line 272 of file StringDictionary.h.
Referenced by buildDictionaryTranslationMap(), getOrAddBulk(), getOrAddBulkParallel(), increaseHashTableCapacity(), processDictionaryFutures(), and StringDictionary().
|
static |
Definition at line 174 of file StringDictionary.h.
Referenced by getOrAddBulk(), getOrAddBulkParallel(), getStringViews(), and anonymous_namespace{StringDictionary.cpp}::throw_encoding_error().
|
static |
Definition at line 173 of file StringDictionary.h.
Referenced by import_export::TypedImportBuffer::add_value(), import_export::TypedImportBuffer::addDefaultValues(), import_export::TypedImportBuffer::addDictEncodedString(), import_export::TypedImportBuffer::addDictEncodedStringArray(), foreign_storage::ParquetStringNoneEncoder::appendData(), foreign_storage::ParquetDetectStringEncoder::appendData(), foreign_storage::ParquetStringEncoder< V >::appendDataTrackErrors(), foreign_storage::ParquetStringEncoder< V >::encodeAndCopyContiguous(), getBulk(), getEquals(), getOrAddBulk(), getOrAddBulkParallel(), import_export::delimited_parser::parse_string_array(), foreign_storage::RegexFileBufferParser::parseBuffer(), anonymous_namespace{StringDictionary.cpp}::throw_string_too_long_error(), foreign_storage::ParquetDetectStringEncoder::validate(), ddl_utils::anonymous_namespace{DdlUtils.cpp}::validate_literal(), and foreign_storage::ParquetStringEncoder< V >::validateAndAppendData().
|
private |
Definition at line 275 of file StringDictionary.h.
Referenced by checkAndConditionallyIncreaseOffsetCapacity(), checkpoint(), StringDictionary(), and ~StringDictionary().
|
private |
Definition at line 278 of file StringDictionary.h.
Referenced by checkAndConditionallyIncreaseOffsetCapacity(), checkpoint(), StringDictionary(), and ~StringDictionary().
|
private |
Definition at line 276 of file StringDictionary.h.
Referenced by checkAndConditionallyIncreaseOffsetCapacity(), checkpoint(), StringDictionary(), and ~StringDictionary().
|
private |
Definition at line 273 of file StringDictionary.h.
Referenced by getOrAddBulk(), getOrAddBulkParallel(), and StringDictionary().
|
private |
Definition at line 274 of file StringDictionary.h.
Referenced by checkAndConditionallyIncreasePayloadCapacity(), checkpoint(), StringDictionary(), and ~StringDictionary().
|
private |
Definition at line 280 of file StringDictionary.h.
Referenced by checkAndConditionallyIncreasePayloadCapacity(), and processDictionaryFutures().
|
private |
Definition at line 279 of file StringDictionary.h.
Referenced by checkAndConditionallyIncreasePayloadCapacity(), checkpoint(), StringDictionary(), and ~StringDictionary().
|
private |
Definition at line 277 of file StringDictionary.h.
Referenced by checkAndConditionallyIncreasePayloadCapacity(), checkpoint(), StringDictionary(), and ~StringDictionary().
|
mutableprivate |
Definition at line 284 of file StringDictionary.h.
Referenced by getRegexpLike(), and invalidateInvertedIndex().
|
mutableprivate |
Definition at line 281 of file StringDictionary.h.
Referenced by buildDictionaryNumericTranslationMap(), buildDictionaryTranslationMap(), copyStrings(), eachStringSerially(), getBulk(), getCompare(), getIdOfString(), getLike(), getOrAddBulk(), getOrAddBulkParallel(), getRegexpLike(), getString(), getStringViews(), storageEntryCount(), and StringDictionary().
|
private |
Definition at line 270 of file StringDictionary.h.
Referenced by buildSortedCache(), getCompare(), and mergeSortedCache().
|
private |
Definition at line 266 of file StringDictionary.h.
Referenced by buildDictionaryNumericTranslationMap(), buildDictionaryTranslationMap(), buildSortedCache(), checkAndConditionallyIncreaseOffsetCapacity(), copyStrings(), eachStringSerially(), getCompare(), getEquals(), getLike(), getOrAddBulk(), getOrAddBulkParallel(), getRegexpLike(), increaseHashTableCapacity(), processDictionaryFutures(), storageEntryCount(), and StringDictionary().
|
private |
Definition at line 268 of file StringDictionary.h.
Referenced by buildDictionaryTranslationMap(), getBulk(), getOrAddBulk(), getOrAddBulkParallel(), increaseHashTableCapacity(), processDictionaryFutures(), and StringDictionary().
|
mutableprivate |
Definition at line 287 of file StringDictionary.h.
Referenced by copyStrings().