17 #ifndef STRINGDICTIONARY_STRINGDICTIONARYPROXY_H
18 #define STRINGDICTIONARY_STRINGDICTIONARYPROXY_H
23 #include "ThirdParty/robin_hood/robin_hood.h"
27 #include <shared_mutex>
29 #include <string_view>
33 namespace StringOps_Namespace {
43 const int32_t string_dict_id,
44 const int64_t generation);
51 int32_t
getOrAdd(
const std::string& str) noexcept;
75 std::vector<int32_t>
getTransientBulk(const std::vector<std::
string>& strings) const;
81 const std::
string& str) const;
82 std::
string getString(int32_t string_id) const;
83 std::vector<std::
string>
getStrings(const std::vector<int32_t>& string_ids) const;
84 std::pair<const
char*,
size_t>
getStringBytes(int32_t string_id) const noexcept;
89 int64_t num_untranslated_strings_{-1};
90 int32_t range_start_{0};
91 int32_t range_end_{0};
95 IdMap(uint32_t
const tran_size, uint32_t
const dict_size)
96 : offset_(tran_size + 1)
98 IdMap(IdMap
const&) =
delete;
99 IdMap(IdMap&&) =
default;
100 bool empty()
const {
return vector_map_.size() == 1; }
101 inline size_t getIndex(int32_t
const id)
const {
return offset_ + id; }
102 std::vector<int32_t>
const&
getVectorMap()
const {
return vector_map_; }
103 size_t size()
const {
return vector_map_.size(); }
106 int32_t*
data() {
return vector_map_.data(); }
107 int32_t
const*
data()
const {
return vector_map_.data(); }
108 int32_t
domainStart()
const {
return -
static_cast<int32_t
>(offset_); }
109 int32_t
domainEnd()
const {
return static_cast<int32_t
>(numNonTransients()); }
122 CHECK_GE(num_untranslated_strings_, 0L);
123 return static_cast<size_t>(num_untranslated_strings_);
126 num_untranslated_strings_ =
static_cast<int64_t
>(num_untranslated_strings);
129 int32_t&
operator[](int32_t
const id) {
return vector_map_[getIndex(
id)]; }
130 int32_t
operator[](int32_t
const id)
const {
return vector_map_[getIndex(
id)]; }
131 friend std::ostream&
operator<<(std::ostream&, IdMap
const&);
157 const std::vector<StringOps_Namespace::StringOpInfo>& string_op_infos)
const;
161 const std::vector<StringOps_Namespace::StringOpInfo>& string_op_types)
const;
194 std::vector<int32_t>
getLike(
const std::string& pattern,
196 const bool is_simple,
197 const char escape)
const;
199 std::vector<int32_t>
getCompare(
const std::string& pattern,
200 const std::string& comp_operator)
const;
202 std::vector<int32_t>
getRegexpLike(
const std::string& pattern,
const char escape)
const;
208 return robin_hood::hash_bytes(key.data(), key.size());
214 bool operator()(std::string_view
const lhs, std::string_view
const rhs)
const {
223 using TransientMap = robin_hood::unordered_node_map<std::string,
225 HeterogeneousStringHash,
235 constexpr
int max_transient_string_id = -2;
236 return static_cast<unsigned>(max_transient_string_id - id);
240 constexpr
int max_transient_string_id = -2;
241 return static_cast<int32_t
>(max_transient_string_id - index);
256 template <
typename String>
260 const bool take_read_lock)
const;
261 template <
typename String>
264 const bool take_read_lock)
const;
265 template <
typename String>
267 int32_t* string_ids)
const;
268 template <
typename String>
270 int32_t* string_ids)
const;
274 const std::vector<StringOps_Namespace::StringOpInfo>& string_op_infos)
const;
285 template <
typename String>
287 template <
typename String>
293 #endif // STRINGDICTIONARY_STRINGDICTIONARYPROXY_H
void eachStringSerially(StringDictionary::StringCallback &) const
size_t numNonTransients() const
std::pair< const char *, size_t > getStringBytes(int32_t string_id) const noexcept
std::vector< int32_t > getLike(const std::string &pattern, const bool icase, const bool is_simple, const char escape) const
size_t transientEntryCountUnlocked() const
const std::vector< std::string const * > & getTransientVector() const
const int32_t string_dict_id_
size_t entryCount() const
Returns the number of total string entries for this proxy, both stored in the underlying dictionary a...
int32_t getIdOfStringNoGeneration(const std::string &str) const
int32_t domainStart() const
std::ostream & operator<<(std::ostream &os, const SessionInfo &session_info)
std::vector< int32_t > vector_map_
std::string getStringUnlocked(const int32_t string_id) const
size_t storageEntryCount() const
Returns the number of string entries in the underlying string dictionary, at this proxy's generation_...
StringDictionary * getDictionary() const noexcept
size_t transientLookupBulkUnlocked(const std::vector< String > &lookup_strings, int32_t *string_ids) const
StringDictionaryProxy const & operator=(StringDictionaryProxy const &)=delete
size_t transientLookupBulk(const std::vector< String > &lookup_strings, int32_t *string_ids, const bool take_read_lock) const
std::string getString(int32_t string_id) const
void setNumUntranslatedStrings(const size_t num_untranslated_strings)
int32_t domainEnd() const
IdMap buildIntersectionTranslationMapToOtherProxyUnlocked(const StringDictionaryProxy *dest_proxy, const std::vector< StringOps_Namespace::StringOpInfo > &string_op_infos) const
size_t transientLookupBulkParallelUnlocked(const std::vector< String > &lookup_strings, int32_t *string_ids) const
int32_t getIdOfStringFromClient(String const &) const
std::vector< int32_t > getTransientBulk(const std::vector< std::string > &strings) const
Executes read-only lookup of a vector of strings and returns a vector of their integer ids...
std::vector< int32_t > getCompare(const std::string &pattern, const std::string &comp_operator) const
std::shared_ptr< StringDictionary > string_dict_
IdMap transientUnion(StringDictionaryProxy const &)
std::vector< std::string const * > transient_string_vec_
int32_t rangeStart() const
int32_t lookupTransientStringUnlocked(const String &lookup_string) const
void setRangeEnd(const int32_t range_end)
std::vector< std::string > getStrings(const std::vector< int32_t > &string_ids) const
size_t getTransientBulkImpl(const std::vector< std::string > &strings, int32_t *string_ids, const bool take_read_lock) const
size_t numTransients() const
size_t operator()(std::string_view const key) const
static int32_t transientIndexToId(unsigned const index)
size_t numUntranslatedStrings() const
void updateGeneration(const int64_t generation) noexcept
size_t transientEntryCount() const
Returns the number of transient string entries for this proxy,.
IdMap buildUnionTranslationMapToOtherProxy(StringDictionaryProxy *dest_proxy, const std::vector< StringOps_Namespace::StringOpInfo > &string_op_types) const
TransientMap transient_str_to_int_
StringDictionaryProxy(StringDictionaryProxy const &)=delete
int32_t const * data() const
size_t entryCountUnlocked() const
std::vector< int32_t > const & getVectorMap() const
int32_t getOrAddTransientUnlocked(String const &)
int32_t operator[](int32_t const id) const
bool operator!=(StringDictionaryProxy const &) const
std::vector< int32_t > getRegexpLike(const std::string &pattern, const char escape) const
int32_t getOrAdd(const std::string &str) noexcept
bool operator==(StringDictionaryProxy const &) const
int32_t getDictId() const noexcept
std::vector< int32_t > getOrAddTransientBulk(const std::vector< std::string > &strings)
IdMap buildIntersectionTranslationMapToOtherProxy(const StringDictionaryProxy *dest_proxy, const std::vector< StringOps_Namespace::StringOpInfo > &string_op_infos) const
Builds a vectorized string_id translation map from this proxy to dest_proxy.
std::shared_mutex rw_mutex_
int32_t getOrAddTransient(const std::string &str)
void setRangeStart(const int32_t range_start)
robin_hood::unordered_node_map< std::string, int32_t, HeterogeneousStringHash, HeterogeneousStringEqual > TransientMap
int32_t & operator[](int32_t const id)
nvtxRangeId_t range_start(const char *)
std::shared_timed_mutex shared_mutex
void range_end(nvtxRangeId_t)
bool operator()(std::string_view const lhs, std::string_view const rhs) const
size_t persistedC() const
int32_t getIdOfString(const std::string &str) const
static unsigned transientIdToIndex(int32_t const id)
int64_t getGeneration() const noexcept
size_t getIndex(int32_t const id) const
IdMap(uint32_t const tran_size, uint32_t const dict_size)