OmniSciDB
c1a53651b2
|
#include <StringDictionaryProxy.h>
Classes | |
struct | HeterogeneousStringEqual |
struct | HeterogeneousStringHash |
class | TranslationMap |
Public Types | |
using | IdMap = TranslationMap< int32_t > |
using | TransientMap = robin_hood::unordered_node_map< std::string, int32_t, HeterogeneousStringHash, HeterogeneousStringEqual > |
Public Member Functions | |
StringDictionaryProxy (StringDictionaryProxy const &)=delete | |
StringDictionaryProxy const & | operator= (StringDictionaryProxy const &)=delete |
StringDictionaryProxy (std::shared_ptr< StringDictionary > sd, const shared::StringDictKey &string_dict_key, const int64_t generation) | |
const shared::StringDictKey & | getDictKey () const noexcept |
bool | operator== (StringDictionaryProxy const &) const |
bool | operator!= (StringDictionaryProxy const &) const |
int32_t | getOrAdd (const std::string &str) noexcept |
StringDictionary * | getDictionary () const noexcept |
int64_t | getGeneration () const noexcept |
std::vector< int32_t > | getTransientBulk (const std::vector< std::string > &strings) const |
Executes read-only lookup of a vector of strings and returns a vector of their integer ids. More... | |
int32_t | getOrAddTransient (const std::string &) |
int32_t | getOrAddTransient (const std::string_view) |
std::vector< int32_t > | getOrAddTransientBulk (const std::vector< std::string > &strings) |
int32_t | getIdOfString (const std::string &str) const |
int32_t | getIdOfStringNoGeneration (const std::string &str) const |
std::string | getString (int32_t string_id) const |
std::vector< std::string > | getStrings (const std::vector< int32_t > &string_ids) const |
std::pair< const char *, size_t > | getStringBytes (int32_t string_id) const noexcept |
IdMap | initIdMap () const |
TranslationMap< Datum > | buildNumericTranslationMap (const std::vector< StringOps_Namespace::StringOpInfo > &string_op_infos) const |
Builds a vectorized string_id translation map from this proxy to dest_proxy. More... | |
IdMap | buildIntersectionTranslationMapToOtherProxy (const StringDictionaryProxy *dest_proxy, const std::vector< StringOps_Namespace::StringOpInfo > &string_op_infos) const |
IdMap | buildUnionTranslationMapToOtherProxy (StringDictionaryProxy *dest_proxy, const std::vector< StringOps_Namespace::StringOpInfo > &string_op_types) const |
size_t | storageEntryCount () const |
Returns the number of string entries in the underlying string dictionary, at this proxy's generation_ if it is set/valid, otherwise just the current size of the dictionary. More... | |
size_t | transientEntryCount () const |
Returns the number of transient string entries for this proxy,. More... | |
size_t | entryCount () const |
Returns the number of total string entries for this proxy, both stored in the underlying dictionary and in the transient map. Equal to storageEntryCount() + transientEntryCount() More... | |
void | updateGeneration (const int64_t generation) noexcept |
std::vector< int32_t > | getLike (const std::string &pattern, const bool icase, const bool is_simple, const char escape) const |
std::vector< int32_t > | getCompare (const std::string &pattern, const std::string &comp_operator) const |
std::vector< int32_t > | getRegexpLike (const std::string &pattern, const char escape) const |
const std::vector< std::string const * > & | getTransientVector () const |
void | eachStringSerially (StringDictionary::StringCallback &) const |
IdMap | transientUnion (StringDictionaryProxy const &) |
Static Public Member Functions | |
static unsigned | transientIdToIndex (int32_t const id) |
static int32_t | transientIndexToId (unsigned const index) |
Private Member Functions | |
std::string | getStringUnlocked (const int32_t string_id) const |
size_t | transientEntryCountUnlocked () const |
size_t | entryCountUnlocked () const |
size_t | persistedC () const |
template<typename String > | |
int32_t | getOrAddTransientImpl (String) |
template<typename String > | |
int32_t | lookupTransientStringUnlocked (const String &lookup_string) const |
size_t | getTransientBulkImpl (const std::vector< std::string > &strings, int32_t *string_ids, const bool take_read_lock) const |
template<typename String > | |
size_t | transientLookupBulk (const std::vector< String > &lookup_strings, int32_t *string_ids, const bool take_read_lock) const |
template<typename String > | |
size_t | transientLookupBulkUnlocked (const std::vector< String > &lookup_strings, int32_t *string_ids) const |
template<typename String > | |
size_t | transientLookupBulkParallelUnlocked (const std::vector< String > &lookup_strings, int32_t *string_ids) const |
IdMap | buildIntersectionTranslationMapToOtherProxyUnlocked (const StringDictionaryProxy *dest_proxy, const std::vector< StringOps_Namespace::StringOpInfo > &string_op_infos) const |
template<typename String > | |
int32_t | getIdOfStringFromClient (String const &) const |
template<typename String > | |
int32_t | getOrAddTransientUnlocked (String const &) |
Private Attributes | |
std::shared_ptr< StringDictionary > | string_dict_ |
const shared::StringDictKey | string_dict_key_ |
TransientMap | transient_str_to_int_ |
std::vector< std::string const * > | transient_string_vec_ |
int64_t | generation_ |
std::shared_mutex | rw_mutex_ |
Friends | |
class | StringLocalCallback |
class | StringNetworkCallback |
Definition at line 39 of file StringDictionaryProxy.h.
using StringDictionaryProxy::IdMap = TranslationMap<int32_t> |
Definition at line 141 of file StringDictionaryProxy.h.
using StringDictionaryProxy::TransientMap = robin_hood::unordered_node_map<std::string, int32_t, HeterogeneousStringHash, HeterogeneousStringEqual> |
Definition at line 242 of file StringDictionaryProxy.h.
|
delete |
StringDictionaryProxy::StringDictionaryProxy | ( | std::shared_ptr< StringDictionary > | sd, |
const shared::StringDictKey & | string_dict_key, | ||
const int64_t | generation | ||
) |
Definition at line 39 of file StringDictionaryProxy.cpp.
StringDictionaryProxy::IdMap StringDictionaryProxy::buildIntersectionTranslationMapToOtherProxy | ( | const StringDictionaryProxy * | dest_proxy, |
const std::vector< StringOps_Namespace::StringOpInfo > & | string_op_infos | ||
) | const |
Definition at line 382 of file StringDictionaryProxy.cpp.
References buildIntersectionTranslationMapToOtherProxyUnlocked(), getDictKey(), order_translation_locks(), and rw_mutex_.
Referenced by RowSetMemoryOwner::addStringProxyIntersectionTranslationMap().
|
private |
Definition at line 265 of file StringDictionaryProxy.cpp.
References CHECK_GT, CHECK_LE, StringDictionaryProxy::TranslationMap< T >::data(), DEBUG_TIMER, StringDictionaryProxy::TranslationMap< T >::empty(), entryCountUnlocked(), generation_, getTransientBulkImpl(), StringDictionaryProxy::TranslationMap< T >::getVectorMap(), initIdMap(), StringDictionary::INVALID_STR_ID, lookupTransientStringUnlocked(), StringDictionaryProxy::TranslationMap< T >::numTransients(), StringDictionaryProxy::TranslationMap< T >::setNumUntranslatedStrings(), StringDictionaryProxy::TranslationMap< T >::setRangeEnd(), StringDictionaryProxy::TranslationMap< T >::setRangeStart(), StringDictionaryProxy::TranslationMap< T >::storageData(), storageEntryCount(), string_dict_, shared::transform(), transient_string_vec_, transientEntryCountUnlocked(), and VLOG.
Referenced by buildIntersectionTranslationMapToOtherProxy(), and buildUnionTranslationMapToOtherProxy().
StringDictionaryProxy::TranslationMap< Datum > StringDictionaryProxy::buildNumericTranslationMap | ( | const std::vector< StringOps_Namespace::StringOpInfo > & | string_op_infos | ) | const |
Builds a vectorized string_id translation map from this proxy to dest_proxy.
dest_proxy | StringDictionaryProxy that we are to map this proxy's string ids to |
Definition at line 217 of file StringDictionaryProxy.cpp.
References CHECK, DEBUG_TIMER, generation_, getStringUnlocked(), threading_serial::parallel_for(), string_dict_, and transient_string_vec_.
Referenced by RowSetMemoryOwner::addStringProxyNumericTranslationMap().
StringDictionaryProxy::IdMap StringDictionaryProxy::buildUnionTranslationMapToOtherProxy | ( | StringDictionaryProxy * | dest_proxy, |
const std::vector< StringOps_Namespace::StringOpInfo > & | string_op_types | ||
) | const |
Definition at line 396 of file StringDictionaryProxy.cpp.
References buildIntersectionTranslationMapToOtherProxyUnlocked(), DEBUG_TIMER, getDictKey(), getOrAddTransientUnlocked(), getStringUnlocked(), StringDictionary::INVALID_STR_ID, order_translation_locks(), rw_mutex_, string_dict_, and transientEntryCountUnlocked().
Referenced by RowSetMemoryOwner::addStringProxyUnionTranslationMap().
void StringDictionaryProxy::eachStringSerially | ( | StringDictionary::StringCallback & | serial_callback | ) | const |
Definition at line 613 of file StringDictionaryProxy.cpp.
References generation_, string_dict_, and transient_string_vec_.
Referenced by transientUnion().
size_t StringDictionaryProxy::entryCount | ( | ) | const |
Returns the number of total string entries for this proxy, both stored in the underlying dictionary and in the transient map. Equal to storageEntryCount() + transientEntryCount()
Definition at line 607 of file StringDictionaryProxy.cpp.
References entryCountUnlocked(), and rw_mutex_.
|
private |
Definition at line 603 of file StringDictionaryProxy.cpp.
References storageEntryCount(), and transientEntryCountUnlocked().
Referenced by buildIntersectionTranslationMapToOtherProxyUnlocked(), and entryCount().
std::vector< int32_t > StringDictionaryProxy::getCompare | ( | const std::string & | pattern, |
const std::string & | comp_operator | ||
) | const |
Definition at line 532 of file StringDictionaryProxy.cpp.
References CHECK_GE, anonymous_namespace{StringDictionaryProxy.cpp}::do_compare(), generation_, run_benchmark_import::result, string_dict_, transient_string_vec_, and transientIndexToId().
Referenced by anonymous_namespace{StringOpsIR.cpp}::get_compared_ids().
|
noexcept |
Definition at line 806 of file StringDictionaryProxy.cpp.
References string_dict_.
Referenced by RowSetMemoryOwner::addStringProxyIntersectionTranslationMap(), RowSetMemoryOwner::addStringProxyNumericTranslationMap(), and RowSetMemoryOwner::addStringProxyUnionTranslationMap().
|
inlinenoexcept |
Definition at line 47 of file StringDictionaryProxy.h.
References string_dict_key_.
Referenced by buildIntersectionTranslationMapToOtherProxy(), buildUnionTranslationMapToOtherProxy(), TransientStringLiteralsVisitor::visitStringOper(), and TransientStringLiteralsVisitor::visitUOper().
|
noexcept |
int32_t StringDictionaryProxy::getIdOfString | ( | const std::string & | str | ) | const |
Definition at line 119 of file StringDictionaryProxy.cpp.
References getIdOfStringFromClient(), StringDictionary::INVALID_STR_ID, rw_mutex_, and transient_str_to_int_.
Referenced by anonymous_namespace{RelAlgTranslator.cpp}::fill_dictionary_encoded_in_vals(), and Executor::serializeLiterals().
|
private |
Definition at line 131 of file StringDictionaryProxy.cpp.
References CHECK_GE, generation_, string_dict_, and truncate_to_generation().
Referenced by getIdOfString(), and getOrAddTransientImpl().
int32_t StringDictionaryProxy::getIdOfStringNoGeneration | ( | const std::string & | str | ) | const |
Definition at line 136 of file StringDictionaryProxy.cpp.
References StringDictionary::INVALID_STR_ID, rw_mutex_, string_dict_, and transient_str_to_int_.
std::vector< int32_t > StringDictionaryProxy::getLike | ( | const std::string & | pattern, |
const bool | icase, | ||
const bool | is_simple, | ||
const char | escape | ||
) | const |
Definition at line 494 of file StringDictionaryProxy.cpp.
References CHECK_GE, generation_, anonymous_namespace{StringDictionary.cpp}::is_like(), run_benchmark_import::result, string_dict_, transient_string_vec_, and transientIndexToId().
|
noexcept |
Definition at line 567 of file StringDictionaryProxy.cpp.
Referenced by DictionaryValueConverter< TARGET_TYPE >::convertTransientStringIdToPermanentId().
int32_t StringDictionaryProxy::getOrAddTransient | ( | const std::string & | str | ) |
Definition at line 111 of file StringDictionaryProxy.cpp.
Referenced by apply_multi_input_string_ops_and_encode(), apply_string_ops_and_encode(), populate_output_stats_cols(), Executor::serializeLiterals(), string_compress(), TransientStringLiteralsVisitor::visitConstant(), TransientStringLiteralsVisitor::visitStringOper(), and write_string_to_proxy().
int32_t StringDictionaryProxy::getOrAddTransient | ( | const std::string_view | sv | ) |
Definition at line 115 of file StringDictionaryProxy.cpp.
std::vector< int32_t > StringDictionaryProxy::getOrAddTransientBulk | ( | const std::vector< std::string > & | strings | ) |
Definition at line 60 of file StringDictionaryProxy.cpp.
References CHECK_GE, generation_, getOrAddTransientUnlocked(), StringDictionary::INVALID_STR_ID, rw_mutex_, and string_dict_.
Referenced by supported_ml_frameworks__cpu_().
|
private |
Definition at line 102 of file StringDictionaryProxy.cpp.
References getIdOfStringFromClient(), getOrAddTransientUnlocked(), StringDictionary::INVALID_STR_ID, and rw_mutex_.
|
private |
Definition at line 89 of file StringDictionaryProxy.cpp.
References transient_str_to_int_, transient_string_vec_, and transientIndexToId().
Referenced by buildUnionTranslationMapToOtherProxy(), getOrAddTransientBulk(), getOrAddTransientImpl(), StringLocalCallback::operator()(), and StringNetworkCallback::operator()().
std::vector< int32_t > StringDictionaryProxy::getRegexpLike | ( | const std::string & | pattern, |
const char | escape | ||
) | const |
Definition at line 555 of file StringDictionaryProxy.cpp.
References CHECK_GE, generation_, anonymous_namespace{StringDictionary.cpp}::is_regexp_like(), run_benchmark_import::result, string_dict_, transient_string_vec_, and transientIndexToId().
std::string StringDictionaryProxy::getString | ( | int32_t | string_id | ) | const |
Definition at line 172 of file StringDictionaryProxy.cpp.
References getStringUnlocked(), and rw_mutex_.
Referenced by anonymous_namespace{ResultSetIteration.cpp}::build_string_array_target_value(), StringValueConverter::convertToColumnarFormatFromDict(), DictionaryValueConverter< TARGET_TYPE >::convertTransientStringIdToPermanentId(), anonymous_namespace{RelAlgTranslator.cpp}::fill_dictionary_encoded_in_vals(), ResultSet::getString(), intersect_translate_string_id_to_other_dict(), and union_translate_string_id_to_other_dict().
|
noexcept |
Definition at line 571 of file StringDictionaryProxy.cpp.
References CHECK_LT.
Referenced by anonymous_namespace{ExternalExecutor.cpp}::decode_string(), string_decompress(), StringDictionaryProxy_getStringBytes(), and StringDictionaryProxy_getStringLength().
std::vector< std::string > StringDictionaryProxy::getStrings | ( | const std::vector< int32_t > & | string_ids | ) | const |
Definition at line 189 of file StringDictionaryProxy.cpp.
References string_dict_, transient_string_vec_, and transientIdToIndex().
|
private |
Definition at line 180 of file StringDictionaryProxy.cpp.
References CHECK_LT, storageEntryCount(), string_dict_, transient_string_vec_, and transientIdToIndex().
Referenced by buildNumericTranslationMap(), buildUnionTranslationMapToOtherProxy(), and getString().
std::vector< int32_t > StringDictionaryProxy::getTransientBulk | ( | const std::vector< std::string > & | strings | ) | const |
Executes read-only lookup of a vector of strings and returns a vector of their integer ids.
This function, unlike getOrAddTransientBulk, will not add strings to the dictionary. Use this function if strings that don't currently exist in the StringDictionaryProxy should not be added to the proxy as transient entries. This method also has performance advantages over getOrAddTransientBulk for read-only use cases, in that it can: 1) Take a read lock instead of a write lock for the transient lookups 2) Use a tbb::parallel_for implementation of the transient string lookups as we are guaranteed that the underlying map of strings to int ids cannot change
strings | - Vector of strings to perform string id lookups on |
Definition at line 52 of file StringDictionaryProxy.cpp.
References CHECK_GE, generation_, and getTransientBulkImpl().
|
private |
Definition at line 703 of file StringDictionaryProxy.cpp.
References generation_, string_dict_, and transientLookupBulk().
Referenced by buildIntersectionTranslationMapToOtherProxyUnlocked(), and getTransientBulk().
|
inline |
Definition at line 244 of file StringDictionaryProxy.h.
References transient_string_vec_.
Referenced by DictionaryValueConverter< TARGET_TYPE >::DictionaryValueConverter(), and DictionaryValueConverter< TARGET_TYPE >::processBuffer().
|
inline |
Definition at line 143 of file StringDictionaryProxy.h.
References generation_, StringDictionary::INVALID_STR_ID, and transient_string_vec_.
Referenced by buildIntersectionTranslationMapToOtherProxyUnlocked(), and transientUnion().
|
private |
Definition at line 209 of file StringDictionaryProxy.cpp.
References StringDictionary::INVALID_STR_ID, and transient_str_to_int_.
Referenced by buildIntersectionTranslationMapToOtherProxyUnlocked(), transientLookupBulkParallelUnlocked(), and transientLookupBulkUnlocked().
bool StringDictionaryProxy::operator!= | ( | StringDictionaryProxy const & | rhs | ) | const |
Definition at line 819 of file StringDictionaryProxy.cpp.
References operator==().
|
delete |
bool StringDictionaryProxy::operator== | ( | StringDictionaryProxy const & | rhs | ) | const |
Definition at line 814 of file StringDictionaryProxy.cpp.
References string_dict_key_, and transient_str_to_int_.
Referenced by operator!=().
|
private |
size_t StringDictionaryProxy::storageEntryCount | ( | ) | const |
Returns the number of string entries in the underlying string dictionary, at this proxy's generation_ if it is set/valid, otherwise just the current size of the dictionary.
Definition at line 582 of file StringDictionaryProxy.cpp.
References CHECK_LE, generation_, and string_dict_.
Referenced by buildIntersectionTranslationMapToOtherProxyUnlocked(), entryCountUnlocked(), and getStringUnlocked().
size_t StringDictionaryProxy::transientEntryCount | ( | ) | const |
Returns the number of transient string entries for this proxy,.
Definition at line 598 of file StringDictionaryProxy.cpp.
References rw_mutex_, and transientEntryCountUnlocked().
|
private |
Definition at line 589 of file StringDictionaryProxy.cpp.
References CHECK_LE, and transient_str_to_int_.
Referenced by buildIntersectionTranslationMapToOtherProxyUnlocked(), buildUnionTranslationMapToOtherProxy(), entryCountUnlocked(), and transientEntryCount().
|
inlinestatic |
Definition at line 250 of file StringDictionaryProxy.h.
Referenced by getStrings(), getStringUnlocked(), and StringDictionary::populate_string_ids().
|
inlinestatic |
Definition at line 255 of file StringDictionaryProxy.h.
Referenced by DictionaryValueConverter< TARGET_TYPE >::DictionaryValueConverter(), getCompare(), getLike(), getOrAddTransientUnlocked(), getRegexpLike(), and ArrowResultSetConverter::initializeColumnBuilder().
|
private |
Definition at line 723 of file StringDictionaryProxy.cpp.
References rw_mutex_, transient_str_to_int_, transientLookupBulkParallelUnlocked(), and transientLookupBulkUnlocked().
Referenced by getTransientBulkImpl().
|
private |
Definition at line 763 of file StringDictionaryProxy.cpp.
References CHECK_GE, StringDictionary::INVALID_STR_ID, lookupTransientStringUnlocked(), ThreadInfo::num_elems_per_thread, ThreadInfo::num_threads, and threading_serial::parallel_for().
Referenced by transientLookupBulk().
|
private |
Definition at line 743 of file StringDictionaryProxy.cpp.
References StringDictionary::INVALID_STR_ID, and lookupTransientStringUnlocked().
Referenced by transientLookupBulk().
StringDictionaryProxy::IdMap StringDictionaryProxy::transientUnion | ( | StringDictionaryProxy const & | sdp_rhs | ) |
Definition at line 677 of file StringDictionaryProxy.cpp.
References eachStringSerially(), initIdMap(), and string_dict_.
|
noexcept |
|
friend |
Definition at line 308 of file StringDictionaryProxy.h.
|
friend |
Definition at line 309 of file StringDictionaryProxy.h.
|
private |
Definition at line 299 of file StringDictionaryProxy.h.
Referenced by buildIntersectionTranslationMapToOtherProxyUnlocked(), buildNumericTranslationMap(), eachStringSerially(), getCompare(), getGeneration(), getIdOfStringFromClient(), getLike(), getOrAddTransientBulk(), getRegexpLike(), getTransientBulk(), getTransientBulkImpl(), initIdMap(), and storageEntryCount().
|
mutableprivate |
Definition at line 300 of file StringDictionaryProxy.h.
Referenced by buildIntersectionTranslationMapToOtherProxy(), buildUnionTranslationMapToOtherProxy(), entryCount(), getIdOfString(), getIdOfStringNoGeneration(), getOrAddTransientBulk(), getOrAddTransientImpl(), getString(), transientEntryCount(), and transientLookupBulk().
|
private |
Definition at line 294 of file StringDictionaryProxy.h.
Referenced by buildIntersectionTranslationMapToOtherProxyUnlocked(), buildNumericTranslationMap(), buildUnionTranslationMapToOtherProxy(), eachStringSerially(), getCompare(), getDictionary(), getIdOfStringFromClient(), getIdOfStringNoGeneration(), getLike(), getOrAddTransientBulk(), getRegexpLike(), getStrings(), getStringUnlocked(), getTransientBulkImpl(), StringLocalCallback::operator()(), storageEntryCount(), and transientUnion().
|
private |
Definition at line 295 of file StringDictionaryProxy.h.
Referenced by getDictKey(), and operator==().
|
private |
Definition at line 296 of file StringDictionaryProxy.h.
Referenced by getIdOfString(), getIdOfStringNoGeneration(), getOrAddTransientUnlocked(), lookupTransientStringUnlocked(), operator==(), transientEntryCountUnlocked(), and transientLookupBulk().
|
private |
Definition at line 298 of file StringDictionaryProxy.h.
Referenced by buildIntersectionTranslationMapToOtherProxyUnlocked(), buildNumericTranslationMap(), eachStringSerially(), getCompare(), getLike(), getOrAddTransientUnlocked(), getRegexpLike(), getStrings(), getStringUnlocked(), getTransientVector(), and initIdMap().