OmniSciDB
cde582ebc3
|
#include <StringDictionary.h>
Classes | |
struct | compare_cache_value_t |
struct | PayloadString |
class | StringCallback |
struct | StringIdxEntry |
Public Member Functions | |
StringDictionary (const DictRef &dict_ref, const std::string &folder, const bool isTemp, const bool recover, const bool materializeHashes=false, size_t initial_capacity=256) | |
StringDictionary (const LeafHostInfo &host, const DictRef dict_ref) | |
~StringDictionary () noexcept | |
int32_t | getDbId () const noexcept |
int32_t | getDictId () const noexcept |
void | eachStringSerially (int64_t const generation, StringCallback &) const |
std::function< int32_t(std::string const &)> | makeLambdaStringToId () const |
int32_t | getOrAdd (const std::string &str) noexcept |
template<class T , class String > | |
size_t | getBulk (const std::vector< String > &string_vec, T *encoded_vec) const |
template<class T , class String > | |
size_t | getBulk (const std::vector< String > &string_vec, T *encoded_vec, const int64_t generation) const |
template<class T , class String > | |
void | getOrAddBulk (const std::vector< String > &string_vec, T *encoded_vec) |
template<class T , class String > | |
void | getOrAddBulkParallel (const std::vector< String > &string_vec, T *encoded_vec) |
template<class String > | |
void | getOrAddBulkArray (const std::vector< std::vector< String >> &string_array_vec, std::vector< std::vector< int32_t >> &ids_array_vec) |
template<class String > | |
int32_t | getIdOfString (const String &) const |
std::string | getString (int32_t string_id) const |
std::pair< char *, size_t > | getStringBytes (int32_t string_id) const noexcept |
size_t | storageEntryCount () const |
std::vector< int32_t > | getLike (const std::string &pattern, const bool icase, const bool is_simple, const char escape, const size_t generation) const |
std::vector< int32_t > | getCompare (const std::string &pattern, const std::string &comp_operator, const size_t generation) |
std::vector< int32_t > | getRegexpLike (const std::string &pattern, const char escape, const size_t generation) const |
std::vector< std::string > | copyStrings () const |
std::vector< std::string_view > | getStringViews () const |
std::vector< std::string_view > | getStringViews (const size_t generation) const |
std::vector< int32_t > | buildDictionaryTranslationMap (const std::shared_ptr< StringDictionary > dest_dict, StringLookupCallback const &dest_transient_lookup_callback) const |
size_t | buildDictionaryTranslationMap (const StringDictionary *dest_dict, int32_t *translated_ids, const int64_t source_generation, const int64_t dest_generation, const bool dest_has_transients, StringLookupCallback const &dest_transient_lookup_callback, const std::vector< StringOps_Namespace::StringOpInfo > &string_op_infos) const |
bool | checkpoint () noexcept |
bool | isClient () const noexcept |
void | update_leaf (const LeafHostInfo &host_info) |
Static Public Member Functions | |
static void | populate_string_ids (std::vector< int32_t > &dest_ids, StringDictionary *dest_dict, const std::vector< int32_t > &source_ids, const StringDictionary *source_dict, const std::vector< std::string const * > &transient_string_vec={}) |
Populates provided dest_ids vector with string ids corresponding to given source strings. More... | |
static void | populate_string_array_ids (std::vector< std::vector< int32_t >> &dest_array_ids, StringDictionary *dest_dict, const std::vector< std::vector< int32_t >> &source_array_ids, const StringDictionary *source_dict) |
Static Public Attributes | |
static constexpr int32_t | INVALID_STR_ID = -1 |
static constexpr size_t | MAX_STRLEN = (1 << 15) - 1 |
static constexpr size_t | MAX_STRCOUNT = (1U << 31) - 1 |
Private Member Functions | |
void | processDictionaryFutures (std::vector< std::future< std::vector< std::pair< string_dict_hash_t, unsigned int >>>> &dictionary_futures) |
size_t | getNumStringsFromStorage (const size_t storage_slots) const noexcept |
bool | fillRateIsHigh (const size_t num_strings) const noexcept |
void | increaseHashTableCapacity () noexcept |
template<class String > | |
void | increaseHashTableCapacityFromStorageAndMemory (const size_t str_count, const size_t storage_high_water_mark, const std::vector< String > &input_strings, const std::vector< size_t > &string_memory_ids, const std::vector< string_dict_hash_t > &input_strings_hashes) noexcept |
int32_t | getOrAddImpl (const std::string_view &str) noexcept |
template<class String > | |
void | hashStrings (const std::vector< String > &string_vec, std::vector< string_dict_hash_t > &hashes) const noexcept |
int32_t | getUnlocked (const std::string_view sv) const noexcept |
std::string | getStringUnlocked (int32_t string_id) const noexcept |
std::string | getStringChecked (const int string_id) const noexcept |
std::pair< char *, size_t > | getStringBytesChecked (const int string_id) const noexcept |
template<class String > | |
uint32_t | computeBucket (const string_dict_hash_t hash, const String &input_string, const std::vector< int32_t > &string_id_string_dict_hash_table) const noexcept |
template<class String > | |
uint32_t | computeBucketFromStorageAndMemory (const string_dict_hash_t input_string_hash, const String &input_string, const std::vector< int32_t > &string_id_string_dict_hash_table, const size_t storage_high_water_mark, const std::vector< String > &input_strings, const std::vector< size_t > &string_memory_ids) const noexcept |
uint32_t | computeUniqueBucketWithHash (const string_dict_hash_t hash, const std::vector< int32_t > &string_id_string_dict_hash_table) noexcept |
void | checkAndConditionallyIncreasePayloadCapacity (const size_t write_length) |
void | checkAndConditionallyIncreaseOffsetCapacity (const size_t write_length) |
template<class String > | |
void | appendToStorage (const String str) noexcept |
template<class String > | |
void | appendToStorageBulk (const std::vector< String > &input_strings, const std::vector< size_t > &string_memory_ids, const size_t sum_new_strings_lengths) noexcept |
PayloadString | getStringFromStorage (const int string_id) const noexcept |
std::string_view | getStringFromStorageFast (const int string_id) const noexcept |
void | addPayloadCapacity (const size_t min_capacity_requested=0) noexcept |
void | addOffsetCapacity (const size_t min_capacity_requested=0) noexcept |
size_t | addStorageCapacity (int fd, const size_t min_capacity_requested=0) noexcept |
void * | addMemoryCapacity (void *addr, size_t &mem_size, const size_t min_capacity_requested=0) noexcept |
void | invalidateInvertedIndex () noexcept |
std::vector< int32_t > | getEquals (std::string pattern, std::string comp_operator, size_t generation) |
void | buildSortedCache () |
void | insertInSortedCache (std::string str, int32_t str_id) |
void | sortCache (std::vector< int32_t > &cache) |
void | mergeSortedCache (std::vector< int32_t > &temp_sorted_cache) |
compare_cache_value_t * | binary_search_cache (const std::string &pattern) const |
Private Attributes | |
const DictRef | dict_ref_ |
const std::string | folder_ |
size_t | str_count_ |
size_t | collisions_ |
std::vector< int32_t > | string_id_string_dict_hash_table_ |
std::vector< string_dict_hash_t > | hash_cache_ |
std::vector< int32_t > | sorted_cache |
bool | isTemp_ |
bool | materialize_hashes_ |
std::string | offsets_path_ |
int | payload_fd_ |
int | offset_fd_ |
StringIdxEntry * | offset_map_ |
char * | payload_map_ |
size_t | offset_file_size_ |
size_t | payload_file_size_ |
size_t | payload_file_off_ |
std::shared_mutex | rw_mutex_ |
std::map< std::tuple < std::string, bool, bool, char >, std::vector< int32_t > > | like_cache_ |
std::map< std::pair < std::string, char > , std::vector< int32_t > > | regex_cache_ |
std::map< std::string, int32_t > | equal_cache_ |
DictionaryCache< std::string, compare_cache_value_t > | compare_cache_ |
std::shared_ptr< std::vector < std::string > > | strings_cache_ |
std::unique_ptr < StringDictionaryClient > | client_ |
std::unique_ptr < StringDictionaryClient > | client_no_timeout_ |
char * | CANARY_BUFFER {nullptr} |
size_t | canary_buffer_size = 0 |
Friends | |
class | StringLocalCallback |
Definition at line 54 of file StringDictionary.h.
StringDictionary::StringDictionary | ( | const DictRef & | dict_ref, |
const std::string & | folder, | ||
const bool | isTemp, | ||
const bool | recover, | ||
const bool | materializeHashes = false , |
||
size_t | initial_capacity = 256 |
||
) |
Definition at line 117 of file StringDictionary.cpp.
References addOffsetCapacity(), addPayloadCapacity(), threading_serial::async(), CHECK_EQ, heavyai::checked_mmap(), anonymous_namespace{StringDictionary.cpp}::checked_open(), collisions_, heavyai::file_size(), getNumStringsFromStorage(), getStringFromStorage(), hash_cache_, anonymous_namespace{StringDictionary.cpp}::hash_string(), INVALID_STR_ID, isTemp_, LOG, materialize_hashes_, offset_fd_, offset_file_size_, offset_map_, offsets_path_, payload_fd_, payload_file_size_, payload_map_, processDictionaryFutures(), anonymous_namespace{StringDictionary.cpp}::round_up_p2(), rw_mutex_, str_count_, string_id_string_dict_hash_table_, VLOG, and logger::WARNING.
StringDictionary::StringDictionary | ( | const LeafHostInfo & | host, |
const DictRef | dict_ref | ||
) |
Definition at line 349 of file StringDictionary.cpp.
|
noexcept |
Definition at line 357 of file StringDictionary.cpp.
References CANARY_BUFFER, CHECK, CHECK_GE, heavyai::checked_munmap(), heavyai::close(), isClient(), isTemp_, offset_fd_, offset_file_size_, offset_map_, payload_fd_, payload_file_size_, and payload_map_.
|
privatenoexcept |
Definition at line 1543 of file StringDictionary.cpp.
References CHECK, and anonymous_namespace{StringDictionary.cpp}::SYSTEM_PAGE_SIZE.
|
privatenoexcept |
Definition at line 1513 of file StringDictionary.cpp.
Referenced by checkAndConditionallyIncreaseOffsetCapacity(), and StringDictionary().
|
privatenoexcept |
Definition at line 1504 of file StringDictionary.cpp.
Referenced by checkAndConditionallyIncreasePayloadCapacity(), and StringDictionary().
|
privatenoexcept |
Definition at line 1522 of file StringDictionary.cpp.
References CHECK, CHECK_NE, anonymous_namespace{StringDictionary.cpp}::SYSTEM_PAGE_SIZE, and File_Namespace::write().
|
privatenoexcept |
Definition at line 1449 of file StringDictionary.cpp.
References StringDictionary::StringIdxEntry::size.
Referenced by getOrAddBulk().
|
privatenoexcept |
Definition at line 1463 of file StringDictionary.cpp.
Referenced by getOrAddBulkParallel().
|
private |
std::vector< int32_t > StringDictionary::buildDictionaryTranslationMap | ( | const std::shared_ptr< StringDictionary > | dest_dict, |
StringLookupCallback const & | dest_transient_lookup_callback | ||
) | const |
Definition at line 1775 of file StringDictionary.cpp.
References DEBUG_TIMER, and storageEntryCount().
size_t StringDictionary::buildDictionaryTranslationMap | ( | const StringDictionary * | dest_dict, |
int32_t * | translated_ids, | ||
const int64_t | source_generation, | ||
const int64_t | dest_generation, | ||
const bool | dest_has_transients, | ||
StringLookupCallback const & | dest_transient_lookup_callback, | ||
const std::vector< StringOps_Namespace::StringOpInfo > & | string_op_infos | ||
) | const |
Definition at line 1817 of file StringDictionary.cpp.
References CHECK_GE, CHECK_LE, client_no_timeout_, computeBucket(), DEBUG_TIMER, getDbId(), getDictId(), getStringFromStorageFast(), hash_cache_, anonymous_namespace{StringDictionary.cpp}::hash_string(), INVALID_STR_ID, materialize_hashes_, ThreadInfo::num_elems_per_thread, ThreadInfo::num_threads, order_translation_locks(), threading_serial::parallel_for(), rw_mutex_, str_count_, and string_id_string_dict_hash_table_.
|
private |
Definition at line 1604 of file StringDictionary.cpp.
References mergeSortedCache(), sortCache(), sorted_cache, and str_count_.
Referenced by getCompare().
|
private |
Definition at line 1428 of file StringDictionary.cpp.
References addOffsetCapacity(), CHECK, CHECK_GE, heavyai::checked_mmap(), heavyai::checked_munmap(), isTemp_, offset_fd_, offset_file_size_, offset_map_, and str_count_.
|
private |
Definition at line 1409 of file StringDictionary.cpp.
References addPayloadCapacity(), CHECK, CHECK_GE, heavyai::checked_mmap(), heavyai::checked_munmap(), isTemp_, payload_fd_, payload_file_off_, payload_file_size_, and payload_map_.
|
noexcept |
Definition at line 1581 of file StringDictionary.cpp.
References CHECK, client_, heavyai::fsync(), isClient(), isTemp_, heavyai::msync(), offset_fd_, offset_file_size_, offset_map_, payload_fd_, payload_file_size_, and payload_map_.
Referenced by import_export::TypedImportBuffer::stringDictCheckpoint().
|
privatenoexcept |
Definition at line 1311 of file StringDictionary.cpp.
Referenced by buildDictionaryTranslationMap(), getBulk(), and getOrAddBulk().
|
privatenoexcept |
memcmp(input_string.data(), candidate_storage_string.c_str_ptr, input_string.size())) {
Definition at line 1341 of file StringDictionary.cpp.
Referenced by getOrAddBulkParallel().
|
privatenoexcept |
Definition at line 1390 of file StringDictionary.cpp.
Referenced by increaseHashTableCapacity(), and processDictionaryFutures().
std::vector< std::string > StringDictionary::copyStrings | ( | ) | const |
Definition at line 1146 of file StringDictionary.cpp.
References threading_serial::async(), CHECK_EQ, CHECK_GT, CHECK_LE, gpu_enabled::copy(), cpu_threads(), getStringUnlocked(), isClient(), rw_mutex_, str_count_, and strings_cache_.
void StringDictionary::eachStringSerially | ( | int64_t const | generation, |
StringCallback & | serial_callback | ||
) | const |
Definition at line 267 of file StringDictionary.cpp.
References CHECK_LE, client_, getStringFromStorageFast(), isClient(), anonymous_namespace{Utm.h}::n, rw_mutex_, storageEntryCount(), and str_count_.
Referenced by makeLambdaStringToId().
|
privatenoexcept |
Definition at line 1198 of file StringDictionary.cpp.
Referenced by getOrAddBulk(), and getOrAddBulkParallel().
size_t StringDictionary::getBulk | ( | const std::vector< String > & | string_vec, |
T * | encoded_vec | ||
) | const |
Definition at line 488 of file StringDictionary.cpp.
size_t StringDictionary::getBulk | ( | const std::vector< String > & | string_vec, |
T * | encoded_vec, | ||
const int64_t | generation | ||
) | const |
Definition at line 501 of file StringDictionary.cpp.
References CHECK_GE, CHECK_LE, computeBucket(), dict_ref_, anonymous_namespace{StringDictionary.cpp}::hash_string(), INVALID_STR_ID, MAX_STRLEN, ThreadInfo::num_elems_per_thread, ThreadInfo::num_threads, threading_serial::parallel_for(), rw_mutex_, storageEntryCount(), string_id_string_dict_hash_table_, and anonymous_namespace{StringDictionary.cpp}::throw_string_too_long_error().
std::vector< int32_t > StringDictionary::getCompare | ( | const std::string & | pattern, |
const std::string & | comp_operator, | ||
const size_t | generation | ||
) |
Definition at line 938 of file StringDictionary.cpp.
References anonymous_namespace{Utm.h}::a, buildSortedCache(), client_, compare_cache_, getEquals(), getStringFromStorage(), isClient(), gpu_enabled::lower_bound(), rw_mutex_, sorted_cache, str_count_, string_eq(), and string_lt().
|
noexcept |
Definition at line 311 of file StringDictionary.cpp.
References dict_ref_t::dbId, and dict_ref_.
Referenced by buildDictionaryTranslationMap().
|
noexcept |
Definition at line 315 of file StringDictionary.cpp.
References dict_ref_, and dict_ref_t::dictId.
Referenced by RowSetMemoryOwner::addStringProxyIntersectionTranslationMap(), RowSetMemoryOwner::addStringProxyUnionTranslationMap(), and buildDictionaryTranslationMap().
|
private |
Definition at line 878 of file StringDictionary.cpp.
References CHECK, CHECK_GT, CHECK_LE, cpu_threads(), equal_cache_, getStringUnlocked(), MAX_STRLEN, run_benchmark_import::result, and str_count_.
Referenced by getCompare().
template int32_t StringDictionary::getIdOfString | ( | const String & | ) | const |
Definition at line 745 of file StringDictionary.cpp.
References client_, getUnlocked(), isClient(), and rw_mutex_.
std::vector< int32_t > StringDictionary::getLike | ( | const std::string & | pattern, |
const bool | icase, | ||
const bool | is_simple, | ||
const char | escape, | ||
const size_t | generation | ||
) | const |
Definition at line 825 of file StringDictionary.cpp.
References CHECK, CHECK_GT, CHECK_LE, client_, cpu_threads(), getStringUnlocked(), anonymous_namespace{StringDictionary.cpp}::is_like(), isClient(), like_cache_, run_benchmark_import::result, rw_mutex_, and str_count_.
|
privatenoexcept |
Method to retrieve number of strings in storage via a binary search for the first canary
storage_slots | number of storage entries we should search to find the minimum canary |
Definition at line 326 of file StringDictionary.cpp.
References CHECK_GE.
Referenced by StringDictionary().
|
noexcept |
Definition at line 382 of file StringDictionary.cpp.
References CHECK_EQ.
template void StringDictionary::getOrAddBulk | ( | const std::vector< String > & | string_vec, |
T * | encoded_vec | ||
) |
Definition at line 591 of file StringDictionary.cpp.
References appendToStorage(), CHECK, CHECK_LT, computeBucket(), dict_ref_, fillRateIsHigh(), g_enable_stringdict_parallel, getOrAddBulkParallel(), hash_cache_, anonymous_namespace{StringDictionary.cpp}::hash_string(), increaseHashTableCapacity(), INVALID_STR_ID, invalidateInvertedIndex(), materialize_hashes_, MAX_STRCOUNT, MAX_STRLEN, offsets_path_, rw_mutex_, str_count_, and string_id_string_dict_hash_table_.
Referenced by import_export::TypedImportBuffer::addDictEncodedString(), ArrowForeignStorageBase::convertArrowDictionary(), ArrowForeignStorageBase::createDictionaryEncodedColumn(), foreign_storage::ParquetStringEncoder< V >::encodeAndCopyContiguous(), getOrAddBulkArray(), and populate_string_ids().
template void StringDictionary::getOrAddBulkArray | ( | const std::vector< std::vector< String >> & | string_array_vec, |
std::vector< std::vector< int32_t >> & | ids_array_vec | ||
) |
Definition at line 440 of file StringDictionary.cpp.
References client_no_timeout_, and getOrAddBulk().
Referenced by import_export::TypedImportBuffer::addDictEncodedStringArray().
void StringDictionary::getOrAddBulkParallel | ( | const std::vector< String > & | string_vec, |
T * | encoded_vec | ||
) |
Definition at line 649 of file StringDictionary.cpp.
References appendToStorageBulk(), CHECK, CHECK_LT, computeBucketFromStorageAndMemory(), dict_ref_, fillRateIsHigh(), hash_cache_, hashStrings(), increaseHashTableCapacityFromStorageAndMemory(), INVALID_STR_ID, invalidateInvertedIndex(), materialize_hashes_, MAX_STRCOUNT, MAX_STRLEN, offsets_path_, rw_mutex_, str_count_, and string_id_string_dict_hash_table_.
Referenced by getOrAddBulk().
|
privatenoexcept |
Definition at line 1258 of file StringDictionary.cpp.
References CHECK, CHECK_LT, and anonymous_namespace{StringDictionary.cpp}::hash_string().
std::vector< int32_t > StringDictionary::getRegexpLike | ( | const std::string & | pattern, |
const char | escape, | ||
const size_t | generation | ||
) | const |
Definition at line 1099 of file StringDictionary.cpp.
References CHECK, CHECK_GT, CHECK_LE, client_, cpu_threads(), getStringUnlocked(), anonymous_namespace{StringDictionary.cpp}::is_regexp_like(), isClient(), regex_cache_, run_benchmark_import::result, rw_mutex_, and str_count_.
std::string StringDictionary::getString | ( | int32_t | string_id | ) | const |
Definition at line 767 of file StringDictionary.cpp.
References client_, getStringUnlocked(), isClient(), and rw_mutex_.
Referenced by StringValueConverter::convertToColumnarFormatFromDict(), and populate_string_ids().
|
noexcept |
Definition at line 782 of file StringDictionary.cpp.
References CHECK, CHECK_LE, and CHECK_LT.
|
privatenoexcept |
Definition at line 1303 of file StringDictionary.cpp.
References CHECK.
|
privatenoexcept |
Definition at line 1297 of file StringDictionary.cpp.
References CHECK.
Referenced by increaseHashTableCapacity().
|
privatenoexcept |
Definition at line 1489 of file StringDictionary.cpp.
References CHECK_GE, StringDictionary::StringIdxEntry::off, and StringDictionary::StringIdxEntry::size.
Referenced by getCompare(), mergeSortedCache(), sortCache(), and StringDictionary().
|
privatenoexcept |
Definition at line 1483 of file StringDictionary.cpp.
References StringDictionary::StringIdxEntry::off, and StringDictionary::StringIdxEntry::size.
Referenced by buildDictionaryTranslationMap(), eachStringSerially(), and getStringViews().
|
privatenoexcept |
Definition at line 777 of file StringDictionary.cpp.
References CHECK_LT.
Referenced by copyStrings(), getEquals(), getLike(), getRegexpLike(), and getString().
std::vector< std::string_view > StringDictionary::getStringViews | ( | ) | const |
Definition at line 1771 of file StringDictionary.cpp.
References storageEntryCount().
std::vector< std::string_view > StringDictionary::getStringViews | ( | const size_t | generation | ) | const |
Definition at line 1717 of file StringDictionary.cpp.
References CHECK_GE, CHECK_LE, DEBUG_TIMER, getStringFromStorageFast(), MAX_STRCOUNT, ThreadInfo::num_elems_per_thread, ThreadInfo::num_threads, threading_serial::parallel_for(), rw_mutex_, and storageEntryCount().
|
privatenoexcept |
Definition at line 760 of file StringDictionary.cpp.
References anonymous_namespace{StringDictionary.cpp}::hash_string().
Referenced by getIdOfString().
|
privatenoexcept |
Method to hash a vector of strings in parallel.
string_vec | input vector of strings to be hashed |
hashes | space for the output - should be pre-sized to match string_vec size |
Definition at line 471 of file StringDictionary.cpp.
References CHECK_EQ, anonymous_namespace{StringDictionary.cpp}::hash_string(), and threading_serial::parallel_for().
Referenced by getOrAddBulkParallel().
|
privatenoexcept |
Definition at line 1202 of file StringDictionary.cpp.
References computeUniqueBucketWithHash(), getStringChecked(), hash_cache_, anonymous_namespace{StringDictionary.cpp}::hash_string(), INVALID_STR_ID, materialize_hashes_, str_count_, and string_id_string_dict_hash_table_.
Referenced by getOrAddBulk().
|
privatenoexcept |
Definition at line 1225 of file StringDictionary.cpp.
References anonymous_namespace{StringDictionary.cpp}::hash_string().
Referenced by getOrAddBulkParallel().
|
private |
|
privatenoexcept |
Definition at line 1564 of file StringDictionary.cpp.
References compare_cache_, equal_cache_, like_cache_, regex_cache_, and gpu_enabled::swap().
Referenced by getOrAddBulk(), and getOrAddBulkParallel().
|
noexcept |
Definition at line 1600 of file StringDictionary.cpp.
References client_.
Referenced by checkpoint(), copyStrings(), eachStringSerially(), getCompare(), getIdOfString(), getLike(), getRegexpLike(), getString(), makeLambdaStringToId(), storageEntryCount(), and ~StringDictionary().
std::function< int32_t(std::string const &)> StringDictionary::makeLambdaStringToId | ( | ) | const |
Definition at line 254 of file StringDictionary.cpp.
References CHECK, eachStringSerially(), INVALID_STR_ID, and isClient().
|
private |
Definition at line 1628 of file StringDictionary.cpp.
References getStringFromStorage(), sorted_cache, and string_lt().
Referenced by buildSortedCache().
|
static |
Definition at line 1676 of file StringDictionary.cpp.
References threading_serial::async(), populate_string_ids(), and logger::thread_id().
Referenced by DictionaryValueConverter< TARGET_TYPE >::processArrayBuffer().
|
static |
Populates provided dest_ids
vector with string ids corresponding to given source strings.
Given a vector of source string ids and corresponding source dictionary, this method populates a vector of destination string ids by either returning the string id of matching strings in the destination dictionary or creating new entries in the dictionary. Source string ids can also be transient if they were created by a function (e.g LOWER/UPPER functions). A map of transient string ids to string values is provided in order to handle this use case.
dest_ids | - vector of destination string ids to be populated |
dest_dict | - destination dictionary |
source_ids | - vector of source string ids for which destination ids are needed |
source_dict | - source dictionary |
transient_string_vec | - ordered vector of string value pointers |
Definition at line 1652 of file StringDictionary.cpp.
References CHECK_LT, getOrAddBulk(), getString(), and StringDictionaryProxy::transientIdToIndex().
Referenced by populate_string_array_ids(), and DictionaryValueConverter< TARGET_TYPE >::processBuffer().
|
private |
Definition at line 291 of file StringDictionary.cpp.
References computeUniqueBucketWithHash(), hash_cache_, materialize_hashes_, payload_file_off_, str_count_, and string_id_string_dict_hash_table_.
Referenced by StringDictionary().
|
private |
Definition at line 1615 of file StringDictionary.cpp.
References anonymous_namespace{Utm.h}::a, getStringFromStorage(), gpu_enabled::sort(), and string_lt().
Referenced by buildSortedCache().
size_t StringDictionary::storageEntryCount | ( | ) | const |
Definition at line 791 of file StringDictionary.cpp.
References client_, isClient(), rw_mutex_, and str_count_.
Referenced by buildDictionaryTranslationMap(), eachStringSerially(), getBulk(), and getStringViews().
void StringDictionary::update_leaf | ( | const LeafHostInfo & | host_info | ) |
Definition at line 379 of file StringDictionary.cpp.
|
friend |
Definition at line 80 of file StringDictionary.h.
|
private |
Definition at line 287 of file StringDictionary.h.
Referenced by ~StringDictionary().
|
private |
Definition at line 288 of file StringDictionary.h.
|
mutableprivate |
Definition at line 284 of file StringDictionary.h.
Referenced by checkpoint(), eachStringSerially(), getCompare(), getIdOfString(), getLike(), getRegexpLike(), getString(), isClient(), and storageEntryCount().
|
mutableprivate |
Definition at line 285 of file StringDictionary.h.
Referenced by buildDictionaryTranslationMap(), and getOrAddBulkArray().
|
private |
Definition at line 263 of file StringDictionary.h.
Referenced by StringDictionary().
|
mutableprivate |
Definition at line 282 of file StringDictionary.h.
Referenced by getCompare(), and invalidateInvertedIndex().
|
private |
Definition at line 260 of file StringDictionary.h.
Referenced by getBulk(), getDbId(), getDictId(), getOrAddBulk(), and getOrAddBulkParallel().
|
mutableprivate |
Definition at line 281 of file StringDictionary.h.
Referenced by getEquals(), and invalidateInvertedIndex().
|
private |
Definition at line 261 of file StringDictionary.h.
|
private |
Definition at line 265 of file StringDictionary.h.
Referenced by buildDictionaryTranslationMap(), getOrAddBulk(), getOrAddBulkParallel(), increaseHashTableCapacity(), processDictionaryFutures(), and StringDictionary().
|
static |
Definition at line 168 of file StringDictionary.h.
Referenced by buildDictionaryTranslationMap(), StringDictionaryProxy::buildIntersectionTranslationMapToOtherProxyUnlocked(), StringDictionaryProxy::buildUnionTranslationMapToOtherProxy(), count_matches_impl(), count_matches_sharded(), CodeGenerator::createInValuesBitmap(), anonymous_namespace{RelAlgTranslator.cpp}::fill_dictionary_encoded_in_vals(), fill_hash_join_buff_impl(), fill_hash_join_buff_sharded_impl(), fill_row_ids_impl(), fill_row_ids_sharded_impl(), getBulk(), StringDictionaryProxy::getIdOfString(), StringDictionaryProxy::getIdOfStringNoGeneration(), getOrAddBulk(), getOrAddBulkParallel(), StringDictionaryProxy::getOrAddTransient(), StringDictionaryProxy::getOrAddTransientBulk(), increaseHashTableCapacity(), StringDictionaryProxy::lookupTransientStringUnlocked(), makeLambdaStringToId(), anonymous_namespace{HashJoinRuntime.cpp}::map_str_id_to_outer_dict(), GenericKeyHandler::operator()(), StringLocalCallback::operator()(), StringNetworkCallback::operator()(), StringDictionary(), StringDictionaryProxy::transientLookupBulkParallelUnlocked(), StringDictionaryProxy::transientLookupBulkUnlocked(), and truncate_to_generation().
|
private |
Definition at line 267 of file StringDictionary.h.
Referenced by checkAndConditionallyIncreaseOffsetCapacity(), checkAndConditionallyIncreasePayloadCapacity(), checkpoint(), StringDictionary(), and ~StringDictionary().
|
mutableprivate |
Definition at line 279 of file StringDictionary.h.
Referenced by getLike(), and invalidateInvertedIndex().
|
private |
Definition at line 268 of file StringDictionary.h.
Referenced by buildDictionaryTranslationMap(), getOrAddBulk(), getOrAddBulkParallel(), increaseHashTableCapacity(), processDictionaryFutures(), and StringDictionary().
|
static |
Definition at line 170 of file StringDictionary.h.
Referenced by getOrAddBulk(), getOrAddBulkParallel(), getStringViews(), and anonymous_namespace{StringDictionary.cpp}::throw_encoding_error().
|
static |
Definition at line 169 of file StringDictionary.h.
Referenced by import_export::TypedImportBuffer::add_value(), import_export::TypedImportBuffer::addDefaultValues(), import_export::TypedImportBuffer::addDictEncodedString(), import_export::TypedImportBuffer::addDictEncodedStringArray(), foreign_storage::ParquetStringNoneEncoder::appendData(), foreign_storage::ParquetDetectStringEncoder::appendData(), foreign_storage::ParquetStringEncoder< V >::appendDataTrackErrors(), foreign_storage::ParquetStringEncoder< V >::encodeAndCopyContiguous(), getBulk(), getEquals(), getOrAddBulk(), getOrAddBulkParallel(), import_export::delimited_parser::parse_string_array(), foreign_storage::RegexFileBufferParser::parseBuffer(), anonymous_namespace{StringDictionary.cpp}::throw_string_too_long_error(), foreign_storage::ParquetDetectStringEncoder::validate(), ddl_utils::anonymous_namespace{DdlUtils.cpp}::validate_literal(), and foreign_storage::ParquetStringEncoder< V >::validateAndAppendData().
|
private |
Definition at line 271 of file StringDictionary.h.
Referenced by checkAndConditionallyIncreaseOffsetCapacity(), checkpoint(), StringDictionary(), and ~StringDictionary().
|
private |
Definition at line 274 of file StringDictionary.h.
Referenced by checkAndConditionallyIncreaseOffsetCapacity(), checkpoint(), StringDictionary(), and ~StringDictionary().
|
private |
Definition at line 272 of file StringDictionary.h.
Referenced by checkAndConditionallyIncreaseOffsetCapacity(), checkpoint(), StringDictionary(), and ~StringDictionary().
|
private |
Definition at line 269 of file StringDictionary.h.
Referenced by getOrAddBulk(), getOrAddBulkParallel(), and StringDictionary().
|
private |
Definition at line 270 of file StringDictionary.h.
Referenced by checkAndConditionallyIncreasePayloadCapacity(), checkpoint(), StringDictionary(), and ~StringDictionary().
|
private |
Definition at line 276 of file StringDictionary.h.
Referenced by checkAndConditionallyIncreasePayloadCapacity(), and processDictionaryFutures().
|
private |
Definition at line 275 of file StringDictionary.h.
Referenced by checkAndConditionallyIncreasePayloadCapacity(), checkpoint(), StringDictionary(), and ~StringDictionary().
|
private |
Definition at line 273 of file StringDictionary.h.
Referenced by checkAndConditionallyIncreasePayloadCapacity(), checkpoint(), StringDictionary(), and ~StringDictionary().
|
mutableprivate |
Definition at line 280 of file StringDictionary.h.
Referenced by getRegexpLike(), and invalidateInvertedIndex().
|
mutableprivate |
Definition at line 277 of file StringDictionary.h.
Referenced by buildDictionaryTranslationMap(), copyStrings(), eachStringSerially(), getBulk(), getCompare(), getIdOfString(), getLike(), getOrAddBulk(), getOrAddBulkParallel(), getRegexpLike(), getString(), getStringViews(), storageEntryCount(), and StringDictionary().
|
private |
Definition at line 266 of file StringDictionary.h.
Referenced by buildSortedCache(), getCompare(), and mergeSortedCache().
|
private |
Definition at line 262 of file StringDictionary.h.
Referenced by buildDictionaryTranslationMap(), buildSortedCache(), checkAndConditionallyIncreaseOffsetCapacity(), copyStrings(), eachStringSerially(), getCompare(), getEquals(), getLike(), getOrAddBulk(), getOrAddBulkParallel(), getRegexpLike(), increaseHashTableCapacity(), processDictionaryFutures(), storageEntryCount(), and StringDictionary().
|
private |
Definition at line 264 of file StringDictionary.h.
Referenced by buildDictionaryTranslationMap(), getBulk(), getOrAddBulk(), getOrAddBulkParallel(), increaseHashTableCapacity(), processDictionaryFutures(), and StringDictionary().
|
mutableprivate |
Definition at line 283 of file StringDictionary.h.
Referenced by copyStrings().