OmniSciDB
cde582ebc3
|
#include <StringDictionaryProxy.h>
Classes | |
struct | HeterogeneousStringEqual |
struct | HeterogeneousStringHash |
class | IdMap |
Public Types | |
using | TransientMap = robin_hood::unordered_node_map< std::string, int32_t, HeterogeneousStringHash, HeterogeneousStringEqual > |
Public Member Functions | |
StringDictionaryProxy (StringDictionaryProxy const &)=delete | |
StringDictionaryProxy const & | operator= (StringDictionaryProxy const &)=delete |
StringDictionaryProxy (std::shared_ptr< StringDictionary > sd, const int32_t string_dict_id, const int64_t generation) | |
int32_t | getDictId () const noexcept |
bool | operator== (StringDictionaryProxy const &) const |
bool | operator!= (StringDictionaryProxy const &) const |
int32_t | getOrAdd (const std::string &str) noexcept |
StringDictionary * | getDictionary () const noexcept |
int64_t | getGeneration () const noexcept |
std::vector< int32_t > | getTransientBulk (const std::vector< std::string > &strings) const |
Executes read-only lookup of a vector of strings and returns a vector of their integer ids. More... | |
int32_t | getOrAddTransient (const std::string &str) |
std::vector< int32_t > | getOrAddTransientBulk (const std::vector< std::string > &strings) |
int32_t | getIdOfString (const std::string &str) const |
int32_t | getIdOfStringNoGeneration (const std::string &str) const |
std::string | getString (int32_t string_id) const |
std::vector< std::string > | getStrings (const std::vector< int32_t > &string_ids) const |
std::pair< const char *, size_t > | getStringBytes (int32_t string_id) const noexcept |
IdMap | initIdMap () const |
IdMap | buildIntersectionTranslationMapToOtherProxy (const StringDictionaryProxy *dest_proxy, const std::vector< StringOps_Namespace::StringOpInfo > &string_op_infos) const |
Builds a vectorized string_id translation map from this proxy to dest_proxy. More... | |
IdMap | buildUnionTranslationMapToOtherProxy (StringDictionaryProxy *dest_proxy, const std::vector< StringOps_Namespace::StringOpInfo > &string_op_types) const |
size_t | storageEntryCount () const |
Returns the number of string entries in the underlying string dictionary, at this proxy's generation_ if it is set/valid, otherwise just the current size of the dictionary. More... | |
size_t | transientEntryCount () const |
Returns the number of transient string entries for this proxy,. More... | |
size_t | entryCount () const |
Returns the number of total string entries for this proxy, both stored in the underlying dictionary and in the transient map. Equal to storageEntryCount() + transientEntryCount() More... | |
void | updateGeneration (const int64_t generation) noexcept |
std::vector< int32_t > | getLike (const std::string &pattern, const bool icase, const bool is_simple, const char escape) const |
std::vector< int32_t > | getCompare (const std::string &pattern, const std::string &comp_operator) const |
std::vector< int32_t > | getRegexpLike (const std::string &pattern, const char escape) const |
const std::vector< std::string const * > & | getTransientVector () const |
void | eachStringSerially (StringDictionary::StringCallback &) const |
IdMap | transientUnion (StringDictionaryProxy const &) |
Static Public Member Functions | |
static unsigned | transientIdToIndex (int32_t const id) |
static int32_t | transientIndexToId (unsigned const index) |
Private Member Functions | |
std::string | getStringUnlocked (const int32_t string_id) const |
size_t | transientEntryCountUnlocked () const |
size_t | entryCountUnlocked () const |
size_t | persistedC () const |
template<typename String > | |
int32_t | lookupTransientStringUnlocked (const String &lookup_string) const |
size_t | getTransientBulkImpl (const std::vector< std::string > &strings, int32_t *string_ids, const bool take_read_lock) const |
template<typename String > | |
size_t | transientLookupBulk (const std::vector< String > &lookup_strings, int32_t *string_ids, const bool take_read_lock) const |
template<typename String > | |
size_t | transientLookupBulkUnlocked (const std::vector< String > &lookup_strings, int32_t *string_ids) const |
template<typename String > | |
size_t | transientLookupBulkParallelUnlocked (const std::vector< String > &lookup_strings, int32_t *string_ids) const |
IdMap | buildIntersectionTranslationMapToOtherProxyUnlocked (const StringDictionaryProxy *dest_proxy, const std::vector< StringOps_Namespace::StringOpInfo > &string_op_infos) const |
template<typename String > | |
int32_t | getIdOfStringFromClient (String const &) const |
template<typename String > | |
int32_t | getOrAddTransientUnlocked (String const &) |
Private Attributes | |
std::shared_ptr< StringDictionary > | string_dict_ |
const int32_t | string_dict_id_ |
TransientMap | transient_str_to_int_ |
std::vector< std::string const * > | transient_string_vec_ |
int64_t | generation_ |
std::shared_mutex | rw_mutex_ |
Friends | |
class | StringLocalCallback |
class | StringNetworkCallback |
Definition at line 38 of file StringDictionaryProxy.h.
using StringDictionaryProxy::TransientMap = robin_hood::unordered_node_map<std::string, int32_t, HeterogeneousStringHash, HeterogeneousStringEqual> |
Definition at line 226 of file StringDictionaryProxy.h.
|
delete |
StringDictionaryProxy::StringDictionaryProxy | ( | std::shared_ptr< StringDictionary > | sd, |
const int32_t | string_dict_id, | ||
const int64_t | generation | ||
) |
Definition at line 39 of file StringDictionaryProxy.cpp.
StringDictionaryProxy::IdMap StringDictionaryProxy::buildIntersectionTranslationMapToOtherProxy | ( | const StringDictionaryProxy * | dest_proxy, |
const std::vector< StringOps_Namespace::StringOpInfo > & | string_op_infos | ||
) | const |
Builds a vectorized string_id translation map from this proxy to dest_proxy.
dest_proxy | StringDictionaryProxy that we are to map this proxy's string ids to |
Definition at line 302 of file StringDictionaryProxy.cpp.
References buildIntersectionTranslationMapToOtherProxyUnlocked(), getDictId(), order_translation_locks(), and rw_mutex_.
Referenced by RowSetMemoryOwner::addStringProxyIntersectionTranslationMap().
|
private |
Definition at line 183 of file StringDictionaryProxy.cpp.
References CHECK_GT, CHECK_LE, StringDictionaryProxy::IdMap::data(), DEBUG_TIMER, StringDictionaryProxy::IdMap::empty(), entryCountUnlocked(), generation_, getTransientBulkImpl(), StringDictionaryProxy::IdMap::getVectorMap(), initIdMap(), StringDictionary::INVALID_STR_ID, lookupTransientStringUnlocked(), StringDictionaryProxy::IdMap::numTransients(), StringDictionaryProxy::IdMap::setNumUntranslatedStrings(), StringDictionaryProxy::IdMap::setRangeEnd(), StringDictionaryProxy::IdMap::setRangeStart(), StringDictionaryProxy::IdMap::storageData(), storageEntryCount(), string_dict_, shared::transform(), transient_string_vec_, transientEntryCountUnlocked(), and VLOG.
Referenced by buildIntersectionTranslationMapToOtherProxy(), and buildUnionTranslationMapToOtherProxy().
StringDictionaryProxy::IdMap StringDictionaryProxy::buildUnionTranslationMapToOtherProxy | ( | StringDictionaryProxy * | dest_proxy, |
const std::vector< StringOps_Namespace::StringOpInfo > & | string_op_types | ||
) | const |
Definition at line 316 of file StringDictionaryProxy.cpp.
References buildIntersectionTranslationMapToOtherProxyUnlocked(), DEBUG_TIMER, getDictId(), getOrAddTransientUnlocked(), getStringUnlocked(), StringDictionary::INVALID_STR_ID, order_translation_locks(), rw_mutex_, string_dict_, to_string(), and transientEntryCountUnlocked().
Referenced by RowSetMemoryOwner::addStringProxyUnionTranslationMap().
void StringDictionaryProxy::eachStringSerially | ( | StringDictionary::StringCallback & | serial_callback | ) | const |
Definition at line 532 of file StringDictionaryProxy.cpp.
References generation_, string_dict_, and transient_string_vec_.
Referenced by transientUnion().
size_t StringDictionaryProxy::entryCount | ( | ) | const |
Returns the number of total string entries for this proxy, both stored in the underlying dictionary and in the transient map. Equal to storageEntryCount() + transientEntryCount()
Definition at line 526 of file StringDictionaryProxy.cpp.
References entryCountUnlocked(), and rw_mutex_.
|
private |
Definition at line 522 of file StringDictionaryProxy.cpp.
References storageEntryCount(), and transientEntryCountUnlocked().
Referenced by buildIntersectionTranslationMapToOtherProxyUnlocked(), and entryCount().
std::vector< int32_t > StringDictionaryProxy::getCompare | ( | const std::string & | pattern, |
const std::string & | comp_operator | ||
) | const |
Definition at line 451 of file StringDictionaryProxy.cpp.
References CHECK_GE, anonymous_namespace{StringDictionaryProxy.cpp}::do_compare(), generation_, run_benchmark_import::result, string_dict_, transient_string_vec_, and transientIndexToId().
Referenced by anonymous_namespace{StringOpsIR.cpp}::get_compared_ids().
|
inlinenoexcept |
Definition at line 46 of file StringDictionaryProxy.h.
References string_dict_id_.
Referenced by buildIntersectionTranslationMapToOtherProxy(), buildUnionTranslationMapToOtherProxy(), TransientStringLiteralsVisitor::visitStringOper(), and TransientStringLiteralsVisitor::visitUOper().
|
noexcept |
Definition at line 730 of file StringDictionaryProxy.cpp.
References string_dict_.
Referenced by RowSetMemoryOwner::addStringProxyIntersectionTranslationMap(), and RowSetMemoryOwner::addStringProxyUnionTranslationMap().
|
noexcept |
int32_t StringDictionaryProxy::getIdOfString | ( | const std::string & | str | ) | const |
Definition at line 110 of file StringDictionaryProxy.cpp.
References getIdOfStringFromClient(), StringDictionary::INVALID_STR_ID, rw_mutex_, and transient_str_to_int_.
Referenced by anonymous_namespace{RelAlgTranslator.cpp}::fill_dictionary_encoded_in_vals(), and Executor::serializeLiterals().
|
private |
Definition at line 122 of file StringDictionaryProxy.cpp.
References CHECK_GE, generation_, string_dict_, and truncate_to_generation().
Referenced by getIdOfString(), and getOrAddTransient().
int32_t StringDictionaryProxy::getIdOfStringNoGeneration | ( | const std::string & | str | ) | const |
Definition at line 127 of file StringDictionaryProxy.cpp.
References StringDictionary::INVALID_STR_ID, rw_mutex_, string_dict_, and transient_str_to_int_.
std::vector< int32_t > StringDictionaryProxy::getLike | ( | const std::string & | pattern, |
const bool | icase, | ||
const bool | is_simple, | ||
const char | escape | ||
) | const |
Definition at line 413 of file StringDictionaryProxy.cpp.
References CHECK_GE, generation_, anonymous_namespace{StringDictionary.cpp}::is_like(), run_benchmark_import::result, string_dict_, transient_string_vec_, and transientIndexToId().
|
noexcept |
Definition at line 486 of file StringDictionaryProxy.cpp.
Referenced by DictionaryValueConverter< TARGET_TYPE >::convertTransientStringIdToPermanentId().
int32_t StringDictionaryProxy::getOrAddTransient | ( | const std::string & | str | ) |
Definition at line 101 of file StringDictionaryProxy.cpp.
References getIdOfStringFromClient(), getOrAddTransientUnlocked(), StringDictionary::INVALID_STR_ID, and rw_mutex_.
Referenced by apply_string_ops_and_encode(), Executor::serializeLiterals(), string_compress(), TransientStringLiteralsVisitor::visitConstant(), and TransientStringLiteralsVisitor::visitStringOper().
std::vector< int32_t > StringDictionaryProxy::getOrAddTransientBulk | ( | const std::vector< std::string > & | strings | ) |
Definition at line 60 of file StringDictionaryProxy.cpp.
References CHECK_GE, generation_, getOrAddTransientUnlocked(), StringDictionary::INVALID_STR_ID, rw_mutex_, and string_dict_.
Referenced by supported_ml_frameworks__cpu_().
|
private |
Definition at line 89 of file StringDictionaryProxy.cpp.
References transient_str_to_int_, transient_string_vec_, and transientIndexToId().
Referenced by buildUnionTranslationMapToOtherProxy(), getOrAddTransient(), getOrAddTransientBulk(), StringLocalCallback::operator()(), and StringNetworkCallback::operator()().
std::vector< int32_t > StringDictionaryProxy::getRegexpLike | ( | const std::string & | pattern, |
const char | escape | ||
) | const |
Definition at line 474 of file StringDictionaryProxy.cpp.
References CHECK_GE, generation_, anonymous_namespace{StringDictionary.cpp}::is_regexp_like(), run_benchmark_import::result, string_dict_, transient_string_vec_, and transientIndexToId().
std::string StringDictionaryProxy::getString | ( | int32_t | string_id | ) | const |
Definition at line 138 of file StringDictionaryProxy.cpp.
References getStringUnlocked(), and rw_mutex_.
Referenced by anonymous_namespace{ResultSetIteration.cpp}::build_string_array_target_value(), StringValueConverter::convertToColumnarFormatFromDict(), DictionaryValueConverter< TARGET_TYPE >::convertTransientStringIdToPermanentId(), anonymous_namespace{RelAlgTranslator.cpp}::fill_dictionary_encoded_in_vals(), intersect_translate_string_id_to_other_dict(), and union_translate_string_id_to_other_dict().
|
noexcept |
Definition at line 490 of file StringDictionaryProxy.cpp.
References CHECK_LT.
Referenced by anonymous_namespace{ExternalExecutor.cpp}::decode_string(), and string_decompress().
std::vector< std::string > StringDictionaryProxy::getStrings | ( | const std::vector< int32_t > & | string_ids | ) | const |
Definition at line 155 of file StringDictionaryProxy.cpp.
References string_dict_, transient_string_vec_, and transientIdToIndex().
|
private |
Definition at line 146 of file StringDictionaryProxy.cpp.
References CHECK_LT, storageEntryCount(), string_dict_, transient_string_vec_, and transientIdToIndex().
Referenced by buildUnionTranslationMapToOtherProxy(), and getString().
std::vector< int32_t > StringDictionaryProxy::getTransientBulk | ( | const std::vector< std::string > & | strings | ) | const |
Executes read-only lookup of a vector of strings and returns a vector of their integer ids.
This function, unlike getOrAddTransientBulk, will not add strings to the dictionary. Use this function if strings that don't currently exist in the StringDictionaryProxy should not be added to the proxy as transient entries. This method also has performance advantages over getOrAddTransientBulk for read-only use cases, in that it can: 1) Take a read lock instead of a write lock for the transient lookups 2) Use a tbb::parallel_for implementation of the transient string lookups as we are guaranteed that the underlying map of strings to int ids cannot change
strings | - Vector of strings to perform string id lookups on |
Definition at line 52 of file StringDictionaryProxy.cpp.
References CHECK_GE, generation_, and getTransientBulkImpl().
|
private |
Definition at line 627 of file StringDictionaryProxy.cpp.
References generation_, string_dict_, and transientLookupBulk().
Referenced by buildIntersectionTranslationMapToOtherProxyUnlocked(), and getTransientBulk().
|
inline |
Definition at line 228 of file StringDictionaryProxy.h.
References transient_string_vec_.
Referenced by DictionaryValueConverter< TARGET_TYPE >::DictionaryValueConverter(), and DictionaryValueConverter< TARGET_TYPE >::processBuffer().
|
inline |
Definition at line 134 of file StringDictionaryProxy.h.
References generation_, and transient_string_vec_.
Referenced by buildIntersectionTranslationMapToOtherProxyUnlocked(), and transientUnion().
|
private |
Definition at line 175 of file StringDictionaryProxy.cpp.
References StringDictionary::INVALID_STR_ID, and transient_str_to_int_.
Referenced by buildIntersectionTranslationMapToOtherProxyUnlocked(), transientLookupBulkParallelUnlocked(), and transientLookupBulkUnlocked().
bool StringDictionaryProxy::operator!= | ( | StringDictionaryProxy const & | rhs | ) | const |
Definition at line 743 of file StringDictionaryProxy.cpp.
References operator==().
|
delete |
bool StringDictionaryProxy::operator== | ( | StringDictionaryProxy const & | rhs | ) | const |
Definition at line 738 of file StringDictionaryProxy.cpp.
References string_dict_id_, and transient_str_to_int_.
Referenced by operator!=().
|
private |
size_t StringDictionaryProxy::storageEntryCount | ( | ) | const |
Returns the number of string entries in the underlying string dictionary, at this proxy's generation_ if it is set/valid, otherwise just the current size of the dictionary.
Definition at line 501 of file StringDictionaryProxy.cpp.
References CHECK_LE, generation_, and string_dict_.
Referenced by buildIntersectionTranslationMapToOtherProxyUnlocked(), entryCountUnlocked(), and getStringUnlocked().
size_t StringDictionaryProxy::transientEntryCount | ( | ) | const |
Returns the number of transient string entries for this proxy,.
Definition at line 517 of file StringDictionaryProxy.cpp.
References rw_mutex_, and transientEntryCountUnlocked().
|
private |
Definition at line 508 of file StringDictionaryProxy.cpp.
References CHECK_LE, and transient_str_to_int_.
Referenced by buildIntersectionTranslationMapToOtherProxyUnlocked(), buildUnionTranslationMapToOtherProxy(), entryCountUnlocked(), and transientEntryCount().
|
inlinestatic |
Definition at line 234 of file StringDictionaryProxy.h.
Referenced by getStrings(), getStringUnlocked(), and StringDictionary::populate_string_ids().
|
inlinestatic |
Definition at line 239 of file StringDictionaryProxy.h.
Referenced by DictionaryValueConverter< TARGET_TYPE >::DictionaryValueConverter(), getCompare(), getLike(), getOrAddTransientUnlocked(), getRegexpLike(), and ArrowResultSetConverter::initializeColumnBuilder().
|
private |
Definition at line 647 of file StringDictionaryProxy.cpp.
References rw_mutex_, transient_str_to_int_, transientLookupBulkParallelUnlocked(), and transientLookupBulkUnlocked().
Referenced by getTransientBulkImpl().
|
private |
Definition at line 687 of file StringDictionaryProxy.cpp.
References CHECK_GE, StringDictionary::INVALID_STR_ID, lookupTransientStringUnlocked(), ThreadInfo::num_elems_per_thread, ThreadInfo::num_threads, and threading_serial::parallel_for().
Referenced by transientLookupBulk().
|
private |
Definition at line 667 of file StringDictionaryProxy.cpp.
References StringDictionary::INVALID_STR_ID, and lookupTransientStringUnlocked().
Referenced by transientLookupBulk().
StringDictionaryProxy::IdMap StringDictionaryProxy::transientUnion | ( | StringDictionaryProxy const & | sdp_rhs | ) |
Definition at line 596 of file StringDictionaryProxy.cpp.
References eachStringSerially(), initIdMap(), and string_dict_.
|
noexcept |
|
friend |
Definition at line 290 of file StringDictionaryProxy.h.
|
friend |
Definition at line 291 of file StringDictionaryProxy.h.
|
private |
Definition at line 281 of file StringDictionaryProxy.h.
Referenced by buildIntersectionTranslationMapToOtherProxyUnlocked(), eachStringSerially(), getCompare(), getGeneration(), getIdOfStringFromClient(), getLike(), getOrAddTransientBulk(), getRegexpLike(), getTransientBulk(), getTransientBulkImpl(), initIdMap(), and storageEntryCount().
|
mutableprivate |
Definition at line 282 of file StringDictionaryProxy.h.
Referenced by buildIntersectionTranslationMapToOtherProxy(), buildUnionTranslationMapToOtherProxy(), entryCount(), getIdOfString(), getIdOfStringNoGeneration(), getOrAddTransient(), getOrAddTransientBulk(), getString(), transientEntryCount(), and transientLookupBulk().
|
private |
Definition at line 276 of file StringDictionaryProxy.h.
Referenced by buildIntersectionTranslationMapToOtherProxyUnlocked(), buildUnionTranslationMapToOtherProxy(), eachStringSerially(), getCompare(), getDictionary(), getIdOfStringFromClient(), getIdOfStringNoGeneration(), getLike(), getOrAddTransientBulk(), getRegexpLike(), getStrings(), getStringUnlocked(), getTransientBulkImpl(), StringLocalCallback::operator()(), storageEntryCount(), and transientUnion().
|
private |
Definition at line 277 of file StringDictionaryProxy.h.
Referenced by getDictId(), and operator==().
|
private |
Definition at line 278 of file StringDictionaryProxy.h.
Referenced by getIdOfString(), getIdOfStringNoGeneration(), getOrAddTransientUnlocked(), lookupTransientStringUnlocked(), operator==(), transientEntryCountUnlocked(), and transientLookupBulk().
|
private |
Definition at line 280 of file StringDictionaryProxy.h.
Referenced by buildIntersectionTranslationMapToOtherProxyUnlocked(), eachStringSerially(), getCompare(), getLike(), getOrAddTransientUnlocked(), getRegexpLike(), getStrings(), getStringUnlocked(), getTransientVector(), and initIdMap().