OmniSciDB  cde582ebc3
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
StringDictionaryProxy Class Reference

#include <StringDictionaryProxy.h>

Classes

struct  HeterogeneousStringEqual
 
struct  HeterogeneousStringHash
 
class  IdMap
 

Public Types

using TransientMap = robin_hood::unordered_node_map< std::string, int32_t, HeterogeneousStringHash, HeterogeneousStringEqual >
 

Public Member Functions

 StringDictionaryProxy (StringDictionaryProxy const &)=delete
 
StringDictionaryProxy const & operator= (StringDictionaryProxy const &)=delete
 
 StringDictionaryProxy (std::shared_ptr< StringDictionary > sd, const int32_t string_dict_id, const int64_t generation)
 
int32_t getDictId () const noexcept
 
bool operator== (StringDictionaryProxy const &) const
 
bool operator!= (StringDictionaryProxy const &) const
 
int32_t getOrAdd (const std::string &str) noexcept
 
StringDictionarygetDictionary () const noexcept
 
int64_t getGeneration () const noexcept
 
std::vector< int32_t > getTransientBulk (const std::vector< std::string > &strings) const
 Executes read-only lookup of a vector of strings and returns a vector of their integer ids. More...
 
int32_t getOrAddTransient (const std::string &str)
 
std::vector< int32_t > getOrAddTransientBulk (const std::vector< std::string > &strings)
 
int32_t getIdOfString (const std::string &str) const
 
int32_t getIdOfStringNoGeneration (const std::string &str) const
 
std::string getString (int32_t string_id) const
 
std::vector< std::string > getStrings (const std::vector< int32_t > &string_ids) const
 
std::pair< const char *, size_t > getStringBytes (int32_t string_id) const noexcept
 
IdMap initIdMap () const
 
IdMap buildIntersectionTranslationMapToOtherProxy (const StringDictionaryProxy *dest_proxy, const std::vector< StringOps_Namespace::StringOpInfo > &string_op_infos) const
 Builds a vectorized string_id translation map from this proxy to dest_proxy. More...
 
IdMap buildUnionTranslationMapToOtherProxy (StringDictionaryProxy *dest_proxy, const std::vector< StringOps_Namespace::StringOpInfo > &string_op_types) const
 
size_t storageEntryCount () const
 Returns the number of string entries in the underlying string dictionary, at this proxy's generation_ if it is set/valid, otherwise just the current size of the dictionary. More...
 
size_t transientEntryCount () const
 Returns the number of transient string entries for this proxy,. More...
 
size_t entryCount () const
 Returns the number of total string entries for this proxy, both stored in the underlying dictionary and in the transient map. Equal to storageEntryCount() + transientEntryCount() More...
 
void updateGeneration (const int64_t generation) noexcept
 
std::vector< int32_t > getLike (const std::string &pattern, const bool icase, const bool is_simple, const char escape) const
 
std::vector< int32_t > getCompare (const std::string &pattern, const std::string &comp_operator) const
 
std::vector< int32_t > getRegexpLike (const std::string &pattern, const char escape) const
 
const std::vector< std::string
const * > & 
getTransientVector () const
 
void eachStringSerially (StringDictionary::StringCallback &) const
 
IdMap transientUnion (StringDictionaryProxy const &)
 

Static Public Member Functions

static unsigned transientIdToIndex (int32_t const id)
 
static int32_t transientIndexToId (unsigned const index)
 

Private Member Functions

std::string getStringUnlocked (const int32_t string_id) const
 
size_t transientEntryCountUnlocked () const
 
size_t entryCountUnlocked () const
 
size_t persistedC () const
 
template<typename String >
int32_t lookupTransientStringUnlocked (const String &lookup_string) const
 
size_t getTransientBulkImpl (const std::vector< std::string > &strings, int32_t *string_ids, const bool take_read_lock) const
 
template<typename String >
size_t transientLookupBulk (const std::vector< String > &lookup_strings, int32_t *string_ids, const bool take_read_lock) const
 
template<typename String >
size_t transientLookupBulkUnlocked (const std::vector< String > &lookup_strings, int32_t *string_ids) const
 
template<typename String >
size_t transientLookupBulkParallelUnlocked (const std::vector< String > &lookup_strings, int32_t *string_ids) const
 
IdMap buildIntersectionTranslationMapToOtherProxyUnlocked (const StringDictionaryProxy *dest_proxy, const std::vector< StringOps_Namespace::StringOpInfo > &string_op_infos) const
 
template<typename String >
int32_t getIdOfStringFromClient (String const &) const
 
template<typename String >
int32_t getOrAddTransientUnlocked (String const &)
 

Private Attributes

std::shared_ptr< StringDictionarystring_dict_
 
const int32_t string_dict_id_
 
TransientMap transient_str_to_int_
 
std::vector< std::string const * > transient_string_vec_
 
int64_t generation_
 
std::shared_mutex rw_mutex_
 

Friends

class StringLocalCallback
 
class StringNetworkCallback
 

Detailed Description

Definition at line 38 of file StringDictionaryProxy.h.

Member Typedef Documentation

using StringDictionaryProxy::TransientMap = robin_hood::unordered_node_map<std::string, int32_t, HeterogeneousStringHash, HeterogeneousStringEqual>

Definition at line 226 of file StringDictionaryProxy.h.

Constructor & Destructor Documentation

StringDictionaryProxy::StringDictionaryProxy ( StringDictionaryProxy const &  )
delete
StringDictionaryProxy::StringDictionaryProxy ( std::shared_ptr< StringDictionary sd,
const int32_t  string_dict_id,
const int64_t  generation 
)

Definition at line 39 of file StringDictionaryProxy.cpp.

42  : string_dict_(sd), string_dict_id_(string_dict_id), generation_(generation) {}
std::shared_ptr< StringDictionary > string_dict_

Member Function Documentation

StringDictionaryProxy::IdMap StringDictionaryProxy::buildIntersectionTranslationMapToOtherProxy ( const StringDictionaryProxy dest_proxy,
const std::vector< StringOps_Namespace::StringOpInfo > &  string_op_infos 
) const

Builds a vectorized string_id translation map from this proxy to dest_proxy.

Parameters
dest_proxyStringDictionaryProxy that we are to map this proxy's string ids to
Returns
An IdMap which encapsulates a std::vector<int32_t> of string ids for both transient and non-transient strings, mapping to their translated string_ids. offset_ is defined to be the number of transient entries + 1. The ordering of values in the vector_map_ is:
  • the transient ids (there are offset_-1 of these)
  • INVALID_STR_ID (=-1)
  • the non-transient string ids For example if there are 3 transient entries in this proxy and 20 in the underlying string dictionary, then vector_map_ will be of size() == 24 and offset_=3+1. The formula to translate ids is new_id = vector_map_[offset_ + old_id]. It is always the case that vector_map_[offset_-1]==-1 so that INVALID_STR_ID maps to INVALID_STR_ID.

Definition at line 302 of file StringDictionaryProxy.cpp.

References buildIntersectionTranslationMapToOtherProxyUnlocked(), getDictId(), order_translation_locks(), and rw_mutex_.

Referenced by RowSetMemoryOwner::addStringProxyIntersectionTranslationMap().

304  {
305  const auto source_dict_id = getDictId();
306  const auto dest_dict_id = dest_proxy->getDictId();
307 
308  std::shared_lock<std::shared_mutex> source_proxy_read_lock(rw_mutex_, std::defer_lock);
309  std::unique_lock<std::shared_mutex> dest_proxy_write_lock(dest_proxy->rw_mutex_,
310  std::defer_lock);
312  source_dict_id, dest_dict_id, source_proxy_read_lock, dest_proxy_write_lock);
313  return buildIntersectionTranslationMapToOtherProxyUnlocked(dest_proxy, string_op_infos);
314 }
IdMap buildIntersectionTranslationMapToOtherProxyUnlocked(const StringDictionaryProxy *dest_proxy, const std::vector< StringOps_Namespace::StringOpInfo > &string_op_infos) const
void order_translation_locks(const int32_t source_db_id, const int32_t source_dict_id, const int32_t dest_db_id, const int32_t dest_dict_id, std::shared_lock< std::shared_mutex > &source_read_lock, std::shared_lock< std::shared_mutex > &dest_read_lock)
int32_t getDictId() const noexcept

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

StringDictionaryProxy::IdMap StringDictionaryProxy::buildIntersectionTranslationMapToOtherProxyUnlocked ( const StringDictionaryProxy dest_proxy,
const std::vector< StringOps_Namespace::StringOpInfo > &  string_op_infos 
) const
private

Definition at line 183 of file StringDictionaryProxy.cpp.

References CHECK_GT, CHECK_LE, StringDictionaryProxy::IdMap::data(), DEBUG_TIMER, StringDictionaryProxy::IdMap::empty(), entryCountUnlocked(), generation_, getTransientBulkImpl(), StringDictionaryProxy::IdMap::getVectorMap(), initIdMap(), StringDictionary::INVALID_STR_ID, lookupTransientStringUnlocked(), StringDictionaryProxy::IdMap::numTransients(), StringDictionaryProxy::IdMap::setNumUntranslatedStrings(), StringDictionaryProxy::IdMap::setRangeEnd(), StringDictionaryProxy::IdMap::setRangeStart(), StringDictionaryProxy::IdMap::storageData(), storageEntryCount(), string_dict_, shared::transform(), transient_string_vec_, transientEntryCountUnlocked(), and VLOG.

Referenced by buildIntersectionTranslationMapToOtherProxy(), and buildUnionTranslationMapToOtherProxy().

185  {
186  auto timer = DEBUG_TIMER(__func__);
187  IdMap id_map = initIdMap();
188 
189  if (id_map.empty()) {
190  return id_map;
191  }
192 
193  const StringOps_Namespace::StringOps string_ops(string_op_infos);
194 
195  // First map transient strings, store at front of vector map
196  const size_t num_transient_entries = id_map.numTransients();
197  size_t num_transient_strings_not_translated = 0UL;
198  if (num_transient_entries) {
199  std::vector<std::string> transient_lookup_strings(num_transient_entries);
200  if (string_ops.size()) {
202  transient_string_vec_.cend(),
203  transient_lookup_strings.rbegin(),
204  [&](std::string const* ptr) { return string_ops(*ptr); });
205  } else {
207  transient_string_vec_.cend(),
208  transient_lookup_strings.rbegin(),
209  [](std::string const* ptr) { return *ptr; });
210  }
211 
212  // This lookup may have a different snapshot of
213  // dest_proxy transients and dictionary than what happens under
214  // the below dest_proxy_read_lock. We may need an unlocked version of
215  // getTransientBulk to ensure consistency (I don't believe
216  // current behavior would cause crashes/races, verify this though)
217 
218  // Todo(mattp): Consider variant of getTransientBulkImp that can take
219  // a vector of pointer-to-strings so we don't have to materialize
220  // transient_string_vec_ into transient_lookup_strings.
221 
222  num_transient_strings_not_translated =
223  dest_proxy->getTransientBulkImpl(transient_lookup_strings, id_map.data(), false);
224  }
225 
226  // Now map strings in dictionary
227  // We place non-transient strings after the transient strings
228  // if they exist, otherwise at index 0
229  int32_t* translation_map_stored_entries_ptr = id_map.storageData();
230 
231  auto dest_transient_lookup_callback = [dest_proxy, translation_map_stored_entries_ptr](
232  const std::string_view& source_string,
233  const int32_t source_string_id) {
234  translation_map_stored_entries_ptr[source_string_id] =
235  dest_proxy->lookupTransientStringUnlocked(source_string);
236  return translation_map_stored_entries_ptr[source_string_id] ==
238  };
239 
240  const size_t num_dest_transients = dest_proxy->transientEntryCountUnlocked();
241  const size_t num_persisted_strings_not_translated =
242  generation_ > 0 ? string_dict_->buildDictionaryTranslationMap(
243  dest_proxy->string_dict_.get(),
244  translation_map_stored_entries_ptr,
245  generation_,
246  dest_proxy->generation_,
247  num_dest_transients > 0UL,
248  dest_transient_lookup_callback,
249  string_op_infos)
250  : 0UL;
251 
252  const size_t num_dest_entries = dest_proxy->entryCountUnlocked();
253  const size_t num_total_entries =
254  id_map.getVectorMap().size() - 1UL /* account for skipped entry -1 */;
255  CHECK_GT(num_total_entries, 0UL);
256  const size_t num_strings_not_translated =
257  num_transient_strings_not_translated + num_persisted_strings_not_translated;
258  CHECK_LE(num_strings_not_translated, num_total_entries);
259  id_map.setNumUntranslatedStrings(num_strings_not_translated);
260 
261  // Below is a conservative setting of range based on the size of the destination proxy,
262  // but probably not worth a scan over the data (or inline computation as we translate)
263  // to compute the actual ranges
264 
265  id_map.setRangeStart(
266  num_dest_transients > 0 ? -1 - static_cast<int32_t>(num_dest_transients) : 0);
267  id_map.setRangeEnd(dest_proxy->storageEntryCount());
268 
269  const size_t num_entries_translated = num_total_entries - num_strings_not_translated;
270  const float match_pct =
271  100.0 * static_cast<float>(num_entries_translated) / num_total_entries;
272  VLOG(1) << std::fixed << std::setprecision(2) << match_pct << "% ("
273  << num_entries_translated << " entries) from dictionary ("
274  << string_dict_->getDbId() << ", " << string_dict_->getDictId() << ") with "
275  << num_total_entries << " total entries ( " << num_transient_entries
276  << " literals)"
277  << " translated to dictionary (" << dest_proxy->string_dict_->getDbId() << ", "
278  << dest_proxy->string_dict_->getDictId() << ") with " << num_dest_entries
279  << " total entries (" << dest_proxy->transientEntryCountUnlocked()
280  << " literals).";
281 
282  return id_map;
283 }
size_t transientEntryCountUnlocked() const
size_t storageEntryCount() const
Returns the number of string entries in the underlying string dictionary, at this proxy&#39;s generation_...
#define CHECK_GT(x, y)
Definition: Logger.h:234
static constexpr int32_t INVALID_STR_ID
std::shared_ptr< StringDictionary > string_dict_
std::vector< std::string const * > transient_string_vec_
int32_t lookupTransientStringUnlocked(const String &lookup_string) const
size_t getTransientBulkImpl(const std::vector< std::string > &strings, int32_t *string_ids, const bool take_read_lock) const
OUTPUT transform(INPUT const &input, FUNC const &func)
Definition: misc.h:296
#define CHECK_LE(x, y)
Definition: Logger.h:233
#define DEBUG_TIMER(name)
Definition: Logger.h:371
#define VLOG(n)
Definition: Logger.h:316

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

StringDictionaryProxy::IdMap StringDictionaryProxy::buildUnionTranslationMapToOtherProxy ( StringDictionaryProxy dest_proxy,
const std::vector< StringOps_Namespace::StringOpInfo > &  string_op_types 
) const

Definition at line 316 of file StringDictionaryProxy.cpp.

References buildIntersectionTranslationMapToOtherProxyUnlocked(), DEBUG_TIMER, getDictId(), getOrAddTransientUnlocked(), getStringUnlocked(), StringDictionary::INVALID_STR_ID, order_translation_locks(), rw_mutex_, string_dict_, to_string(), and transientEntryCountUnlocked().

Referenced by RowSetMemoryOwner::addStringProxyUnionTranslationMap().

318  {
319  auto timer = DEBUG_TIMER(__func__);
320 
321  const auto source_dict_id = getDictId();
322  const auto dest_dict_id = dest_proxy->getDictId();
323  std::shared_lock<std::shared_mutex> source_proxy_read_lock(rw_mutex_, std::defer_lock);
324  std::unique_lock<std::shared_mutex> dest_proxy_write_lock(dest_proxy->rw_mutex_,
325  std::defer_lock);
327  source_dict_id, dest_dict_id, source_proxy_read_lock, dest_proxy_write_lock);
328 
329  auto id_map =
330  buildIntersectionTranslationMapToOtherProxyUnlocked(dest_proxy, string_op_infos);
331  if (id_map.empty()) {
332  return id_map;
333  }
334  const auto num_untranslated_strings = id_map.numUntranslatedStrings();
335  if (num_untranslated_strings > 0) {
336  const size_t total_post_translation_dest_transients =
337  num_untranslated_strings + dest_proxy->transientEntryCountUnlocked();
338  constexpr size_t max_allowed_transients =
339  static_cast<size_t>(std::numeric_limits<int32_t>::max() -
340  2); /* -2 accounts for INVALID_STR_ID and NULL value */
341  if (total_post_translation_dest_transients > max_allowed_transients) {
342  throw std::runtime_error("Union translation to dictionary" +
343  std::to_string(getDictId()) + " would result in " +
344  std::to_string(total_post_translation_dest_transients) +
345  " transient entries, which is more than limit of " +
346  std::to_string(max_allowed_transients) + " transients.");
347  }
348  const int32_t map_domain_start = id_map.domainStart();
349  const int32_t map_domain_end = id_map.domainEnd();
350 
351  const StringOps_Namespace::StringOps string_ops(string_op_infos);
352  const bool has_string_ops = string_ops.size();
353 
354  // First iterate over transient strings and add to dest map
355  // Todo (todd): Add call to fetch string_views (local) or strings (distributed)
356  // for all non-translated ids to avoid string-by-string fetch
357 
358  for (int32_t source_string_id = map_domain_start; source_string_id < -1;
359  ++source_string_id) {
360  if (id_map[source_string_id] == StringDictionary::INVALID_STR_ID) {
361  const auto source_string = getStringUnlocked(source_string_id);
362  const auto dest_string_id = dest_proxy->getOrAddTransientUnlocked(
363  has_string_ops ? string_ops(source_string) : source_string);
364  id_map[source_string_id] = dest_string_id;
365  }
366  }
367  // Now iterate over stored strings
368  for (int32_t source_string_id = 0; source_string_id < map_domain_end;
369  ++source_string_id) {
370  if (id_map[source_string_id] == StringDictionary::INVALID_STR_ID) {
371  const auto source_string = string_dict_->getString(source_string_id);
372  const auto dest_string_id = dest_proxy->getOrAddTransientUnlocked(
373  has_string_ops ? string_ops(source_string) : source_string);
374  id_map[source_string_id] = dest_string_id;
375  }
376  }
377  }
378  // We may have added transients to the destination proxy, use this to update
379  // our id map range (used downstream for ExpressionRange)
380 
381  const size_t num_dest_transients = dest_proxy->transientEntryCountUnlocked();
382  id_map.setRangeStart(
383  num_dest_transients > 0 ? -1 - static_cast<int32_t>(num_dest_transients) : 0);
384  return id_map;
385 }
size_t transientEntryCountUnlocked() const
std::string getStringUnlocked(const int32_t string_id) const
IdMap buildIntersectionTranslationMapToOtherProxyUnlocked(const StringDictionaryProxy *dest_proxy, const std::vector< StringOps_Namespace::StringOpInfo > &string_op_infos) const
std::string to_string(char const *&&v)
static constexpr int32_t INVALID_STR_ID
std::shared_ptr< StringDictionary > string_dict_
void order_translation_locks(const int32_t source_db_id, const int32_t source_dict_id, const int32_t dest_db_id, const int32_t dest_dict_id, std::shared_lock< std::shared_mutex > &source_read_lock, std::shared_lock< std::shared_mutex > &dest_read_lock)
int32_t getOrAddTransientUnlocked(String const &)
int32_t getDictId() const noexcept
#define DEBUG_TIMER(name)
Definition: Logger.h:371

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

void StringDictionaryProxy::eachStringSerially ( StringDictionary::StringCallback serial_callback) const

Definition at line 532 of file StringDictionaryProxy.cpp.

References generation_, string_dict_, and transient_string_vec_.

Referenced by transientUnion().

533  {
534  constexpr int32_t max_transient_id = -2;
535  // Iterate over transient strings.
536  for (unsigned index = 0; index < transient_string_vec_.size(); ++index) {
537  std::string const& str = *transient_string_vec_[index];
538  int32_t const string_id = max_transient_id - index;
539  serial_callback(str, string_id);
540  }
541  // Iterate over non-transient strings.
542  string_dict_->eachStringSerially(generation_, serial_callback);
543 }
std::shared_ptr< StringDictionary > string_dict_
std::vector< std::string const * > transient_string_vec_

+ Here is the caller graph for this function:

size_t StringDictionaryProxy::entryCount ( ) const

Returns the number of total string entries for this proxy, both stored in the underlying dictionary and in the transient map. Equal to storageEntryCount() + transientEntryCount()

Returns
size_t Number of total string entries for this proxy

Definition at line 526 of file StringDictionaryProxy.cpp.

References entryCountUnlocked(), and rw_mutex_.

526  {
527  std::shared_lock<std::shared_mutex> read_lock(rw_mutex_);
528  return entryCountUnlocked();
529 }
heavyai::shared_lock< heavyai::shared_mutex > read_lock

+ Here is the call graph for this function:

size_t StringDictionaryProxy::entryCountUnlocked ( ) const
private

Definition at line 522 of file StringDictionaryProxy.cpp.

References storageEntryCount(), and transientEntryCountUnlocked().

Referenced by buildIntersectionTranslationMapToOtherProxyUnlocked(), and entryCount().

522  {
524 }
size_t transientEntryCountUnlocked() const
size_t storageEntryCount() const
Returns the number of string entries in the underlying string dictionary, at this proxy&#39;s generation_...

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

std::vector< int32_t > StringDictionaryProxy::getCompare ( const std::string &  pattern,
const std::string &  comp_operator 
) const

Definition at line 451 of file StringDictionaryProxy.cpp.

References CHECK_GE, anonymous_namespace{StringDictionaryProxy.cpp}::do_compare(), generation_, run_benchmark_import::result, string_dict_, transient_string_vec_, and transientIndexToId().

Referenced by anonymous_namespace{StringOpsIR.cpp}::get_compared_ids().

453  {
454  CHECK_GE(generation_, 0);
455  auto result = string_dict_->getCompare(pattern, comp_operator, generation_);
456  for (unsigned index = 0; index < transient_string_vec_.size(); ++index) {
457  if (do_compare(*transient_string_vec_[index], pattern, comp_operator)) {
458  result.push_back(transientIndexToId(index));
459  }
460  }
461  return result;
462 }
#define CHECK_GE(x, y)
Definition: Logger.h:235
std::shared_ptr< StringDictionary > string_dict_
std::vector< std::string const * > transient_string_vec_
static int32_t transientIndexToId(unsigned const index)
bool do_compare(const std::string &str, const std::string &pattern, const std::string &comp_operator)

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

int32_t StringDictionaryProxy::getDictId ( ) const
inlinenoexcept

Definition at line 46 of file StringDictionaryProxy.h.

References string_dict_id_.

Referenced by buildIntersectionTranslationMapToOtherProxy(), buildUnionTranslationMapToOtherProxy(), TransientStringLiteralsVisitor::visitStringOper(), and TransientStringLiteralsVisitor::visitUOper().

46 { return string_dict_id_; };

+ Here is the caller graph for this function:

StringDictionary * StringDictionaryProxy::getDictionary ( ) const
noexcept

Definition at line 730 of file StringDictionaryProxy.cpp.

References string_dict_.

Referenced by RowSetMemoryOwner::addStringProxyIntersectionTranslationMap(), and RowSetMemoryOwner::addStringProxyUnionTranslationMap().

730  {
731  return string_dict_.get();
732 }
std::shared_ptr< StringDictionary > string_dict_

+ Here is the caller graph for this function:

int64_t StringDictionaryProxy::getGeneration ( ) const
noexcept

Definition at line 734 of file StringDictionaryProxy.cpp.

References generation_.

734  {
735  return generation_;
736 }
int32_t StringDictionaryProxy::getIdOfString ( const std::string &  str) const

Definition at line 110 of file StringDictionaryProxy.cpp.

References getIdOfStringFromClient(), StringDictionary::INVALID_STR_ID, rw_mutex_, and transient_str_to_int_.

Referenced by anonymous_namespace{RelAlgTranslator.cpp}::fill_dictionary_encoded_in_vals(), and Executor::serializeLiterals().

110  {
111  std::shared_lock<std::shared_mutex> read_lock(rw_mutex_);
112  auto const str_id = getIdOfStringFromClient(str);
113  if (str_id != StringDictionary::INVALID_STR_ID || transient_str_to_int_.empty()) {
114  return str_id;
115  }
116  auto it = transient_str_to_int_.find(str);
117  return it != transient_str_to_int_.end() ? it->second
119 }
heavyai::shared_lock< heavyai::shared_mutex > read_lock
int32_t getIdOfStringFromClient(String const &) const
static constexpr int32_t INVALID_STR_ID

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

template<typename String >
int32_t StringDictionaryProxy::getIdOfStringFromClient ( String const &  str) const
private

Definition at line 122 of file StringDictionaryProxy.cpp.

References CHECK_GE, generation_, string_dict_, and truncate_to_generation().

Referenced by getIdOfString(), and getOrAddTransient().

122  {
123  CHECK_GE(generation_, 0);
124  return truncate_to_generation(string_dict_->getIdOfString(str), generation_);
125 }
#define CHECK_GE(x, y)
Definition: Logger.h:235
std::shared_ptr< StringDictionary > string_dict_
int32_t truncate_to_generation(const int32_t id, const size_t generation)

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

int32_t StringDictionaryProxy::getIdOfStringNoGeneration ( const std::string &  str) const

Definition at line 127 of file StringDictionaryProxy.cpp.

References StringDictionary::INVALID_STR_ID, rw_mutex_, string_dict_, and transient_str_to_int_.

127  {
128  std::shared_lock<std::shared_mutex> read_lock(rw_mutex_);
129  auto str_id = string_dict_->getIdOfString(str);
130  if (str_id != StringDictionary::INVALID_STR_ID || transient_str_to_int_.empty()) {
131  return str_id;
132  }
133  auto it = transient_str_to_int_.find(str);
134  return it != transient_str_to_int_.end() ? it->second
136 }
heavyai::shared_lock< heavyai::shared_mutex > read_lock
static constexpr int32_t INVALID_STR_ID
std::shared_ptr< StringDictionary > string_dict_
std::vector< int32_t > StringDictionaryProxy::getLike ( const std::string &  pattern,
const bool  icase,
const bool  is_simple,
const char  escape 
) const

Definition at line 413 of file StringDictionaryProxy.cpp.

References CHECK_GE, generation_, anonymous_namespace{StringDictionary.cpp}::is_like(), run_benchmark_import::result, string_dict_, transient_string_vec_, and transientIndexToId().

416  {
417  CHECK_GE(generation_, 0);
418  auto result = string_dict_->getLike(pattern, icase, is_simple, escape, generation_);
419  for (unsigned index = 0; index < transient_string_vec_.size(); ++index) {
420  if (is_like(*transient_string_vec_[index], pattern, icase, is_simple, escape)) {
421  result.push_back(transientIndexToId(index));
422  }
423  }
424  return result;
425 }
#define CHECK_GE(x, y)
Definition: Logger.h:235
std::shared_ptr< StringDictionary > string_dict_
std::vector< std::string const * > transient_string_vec_
bool is_like(const std::string &str, const std::string &pattern, const bool icase, const bool is_simple, const char escape)
static int32_t transientIndexToId(unsigned const index)

+ Here is the call graph for this function:

int32_t StringDictionaryProxy::getOrAdd ( const std::string &  str)
noexcept

Definition at line 486 of file StringDictionaryProxy.cpp.

Referenced by DictionaryValueConverter< TARGET_TYPE >::convertTransientStringIdToPermanentId().

486  {
487  return string_dict_->getOrAdd(str);
488 }
std::shared_ptr< StringDictionary > string_dict_

+ Here is the caller graph for this function:

int32_t StringDictionaryProxy::getOrAddTransient ( const std::string &  str)

Definition at line 101 of file StringDictionaryProxy.cpp.

References getIdOfStringFromClient(), getOrAddTransientUnlocked(), StringDictionary::INVALID_STR_ID, and rw_mutex_.

Referenced by apply_string_ops_and_encode(), Executor::serializeLiterals(), string_compress(), TransientStringLiteralsVisitor::visitConstant(), and TransientStringLiteralsVisitor::visitStringOper().

101  {
102  auto const string_id = getIdOfStringFromClient(str);
103  if (string_id != StringDictionary::INVALID_STR_ID) {
104  return string_id;
105  }
106  std::lock_guard<std::shared_mutex> write_lock(rw_mutex_);
107  return getOrAddTransientUnlocked(str);
108 }
heavyai::unique_lock< heavyai::shared_mutex > write_lock
int32_t getIdOfStringFromClient(String const &) const
static constexpr int32_t INVALID_STR_ID
int32_t getOrAddTransientUnlocked(String const &)

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

std::vector< int32_t > StringDictionaryProxy::getOrAddTransientBulk ( const std::vector< std::string > &  strings)

Definition at line 60 of file StringDictionaryProxy.cpp.

References CHECK_GE, generation_, getOrAddTransientUnlocked(), StringDictionary::INVALID_STR_ID, rw_mutex_, and string_dict_.

Referenced by supported_ml_frameworks__cpu_().

61  {
63  const size_t num_strings = strings.size();
64  std::vector<int32_t> string_ids(num_strings);
65  if (num_strings == 0) {
66  return string_ids;
67  }
68  // Since new strings added to a StringDictionaryProxy are not materialized in the
69  // proxy's underlying StringDictionary, we can use the fast parallel
70  // StringDictionary::getBulk method to fetch ids from the underlying dictionary (which
71  // will return StringDictionary::INVALID_STR_ID for strings that don't exist)
72 
73  // Don't need to be under lock here as the string ids for strings in the underlying
74  // materialized dictionary are immutable
75  const size_t num_strings_not_found =
76  string_dict_->getBulk(strings, string_ids.data(), generation_);
77  if (num_strings_not_found > 0) {
78  std::lock_guard<std::shared_mutex> write_lock(rw_mutex_);
79  for (size_t string_idx = 0; string_idx < num_strings; ++string_idx) {
80  if (string_ids[string_idx] == StringDictionary::INVALID_STR_ID) {
81  string_ids[string_idx] = getOrAddTransientUnlocked(strings[string_idx]);
82  }
83  }
84  }
85  return string_ids;
86 }
#define CHECK_GE(x, y)
Definition: Logger.h:235
heavyai::unique_lock< heavyai::shared_mutex > write_lock
static constexpr int32_t INVALID_STR_ID
std::shared_ptr< StringDictionary > string_dict_
int32_t getOrAddTransientUnlocked(String const &)

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

template<typename String >
int32_t StringDictionaryProxy::getOrAddTransientUnlocked ( String const &  str)
private

Definition at line 89 of file StringDictionaryProxy.cpp.

References transient_str_to_int_, transient_string_vec_, and transientIndexToId().

Referenced by buildUnionTranslationMapToOtherProxy(), getOrAddTransient(), getOrAddTransientBulk(), StringLocalCallback::operator()(), and StringNetworkCallback::operator()().

89  {
90  unsigned const new_index = transient_str_to_int_.size();
91  auto transient_id = transientIndexToId(new_index);
92  auto const emplaced = transient_str_to_int_.emplace(str, transient_id);
93  if (emplaced.second) { // (str, transient_id) was added to transient_str_to_int_.
94  transient_string_vec_.push_back(&emplaced.first->first);
95  } else { // str already exists in transient_str_to_int_. Return existing transient_id.
96  transient_id = emplaced.first->second;
97  }
98  return transient_id;
99 }
std::vector< std::string const * > transient_string_vec_
static int32_t transientIndexToId(unsigned const index)

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

std::vector< int32_t > StringDictionaryProxy::getRegexpLike ( const std::string &  pattern,
const char  escape 
) const

Definition at line 474 of file StringDictionaryProxy.cpp.

References CHECK_GE, generation_, anonymous_namespace{StringDictionary.cpp}::is_regexp_like(), run_benchmark_import::result, string_dict_, transient_string_vec_, and transientIndexToId().

475  {
476  CHECK_GE(generation_, 0);
477  auto result = string_dict_->getRegexpLike(pattern, escape, generation_);
478  for (unsigned index = 0; index < transient_string_vec_.size(); ++index) {
479  if (is_regexp_like(*transient_string_vec_[index], pattern, escape)) {
480  result.push_back(transientIndexToId(index));
481  }
482  }
483  return result;
484 }
#define CHECK_GE(x, y)
Definition: Logger.h:235
bool is_regexp_like(const std::string &str, const std::string &pattern, const char escape)
std::shared_ptr< StringDictionary > string_dict_
std::vector< std::string const * > transient_string_vec_
static int32_t transientIndexToId(unsigned const index)

+ Here is the call graph for this function:

std::string StringDictionaryProxy::getString ( int32_t  string_id) const

Definition at line 138 of file StringDictionaryProxy.cpp.

References getStringUnlocked(), and rw_mutex_.

Referenced by anonymous_namespace{ResultSetIteration.cpp}::build_string_array_target_value(), StringValueConverter::convertToColumnarFormatFromDict(), DictionaryValueConverter< TARGET_TYPE >::convertTransientStringIdToPermanentId(), anonymous_namespace{RelAlgTranslator.cpp}::fill_dictionary_encoded_in_vals(), intersect_translate_string_id_to_other_dict(), and union_translate_string_id_to_other_dict().

138  {
139  if (inline_int_null_value<int32_t>() == string_id) {
140  return "";
141  }
142  std::shared_lock<std::shared_mutex> read_lock(rw_mutex_);
143  return getStringUnlocked(string_id);
144 }
heavyai::shared_lock< heavyai::shared_mutex > read_lock
std::string getStringUnlocked(const int32_t string_id) const

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

std::pair< const char *, size_t > StringDictionaryProxy::getStringBytes ( int32_t  string_id) const
noexcept

Definition at line 490 of file StringDictionaryProxy.cpp.

References CHECK_LT.

Referenced by anonymous_namespace{ExternalExecutor.cpp}::decode_string(), and string_decompress().

491  {
492  if (string_id >= 0) {
493  return string_dict_.get()->getStringBytes(string_id);
494  }
495  unsigned const string_index = transientIdToIndex(string_id);
496  CHECK_LT(string_index, transient_string_vec_.size());
497  std::string const* const str_ptr = transient_string_vec_[string_index];
498  return {str_ptr->c_str(), str_ptr->size()};
499 }
std::shared_ptr< StringDictionary > string_dict_
std::vector< std::string const * > transient_string_vec_
#define CHECK_LT(x, y)
Definition: Logger.h:232
static unsigned transientIdToIndex(int32_t const id)

+ Here is the caller graph for this function:

std::vector< std::string > StringDictionaryProxy::getStrings ( const std::vector< int32_t > &  string_ids) const

Definition at line 155 of file StringDictionaryProxy.cpp.

References string_dict_, transient_string_vec_, and transientIdToIndex().

156  {
157  std::vector<std::string> strings;
158  if (!string_ids.empty()) {
159  strings.reserve(string_ids.size());
160  for (const auto string_id : string_ids) {
161  if (string_id >= 0) {
162  strings.emplace_back(string_dict_->getString(string_id));
163  } else if (inline_int_null_value<int32_t>() == string_id) {
164  strings.emplace_back("");
165  } else {
166  unsigned const string_index = transientIdToIndex(string_id);
167  strings.emplace_back(*transient_string_vec_[string_index]);
168  }
169  }
170  }
171  return strings;
172 }
std::shared_ptr< StringDictionary > string_dict_
std::vector< std::string const * > transient_string_vec_
static unsigned transientIdToIndex(int32_t const id)

+ Here is the call graph for this function:

std::string StringDictionaryProxy::getStringUnlocked ( const int32_t  string_id) const
private

Definition at line 146 of file StringDictionaryProxy.cpp.

References CHECK_LT, storageEntryCount(), string_dict_, transient_string_vec_, and transientIdToIndex().

Referenced by buildUnionTranslationMapToOtherProxy(), and getString().

146  {
147  if (string_id >= 0 && storageEntryCount() > 0) {
148  return string_dict_->getString(string_id);
149  }
150  unsigned const string_index = transientIdToIndex(string_id);
151  CHECK_LT(string_index, transient_string_vec_.size());
152  return *transient_string_vec_[string_index];
153 }
size_t storageEntryCount() const
Returns the number of string entries in the underlying string dictionary, at this proxy&#39;s generation_...
std::shared_ptr< StringDictionary > string_dict_
std::vector< std::string const * > transient_string_vec_
#define CHECK_LT(x, y)
Definition: Logger.h:232
static unsigned transientIdToIndex(int32_t const id)

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

std::vector< int32_t > StringDictionaryProxy::getTransientBulk ( const std::vector< std::string > &  strings) const

Executes read-only lookup of a vector of strings and returns a vector of their integer ids.

This function, unlike getOrAddTransientBulk, will not add strings to the dictionary. Use this function if strings that don't currently exist in the StringDictionaryProxy should not be added to the proxy as transient entries. This method also has performance advantages over getOrAddTransientBulk for read-only use cases, in that it can: 1) Take a read lock instead of a write lock for the transient lookups 2) Use a tbb::parallel_for implementation of the transient string lookups as we are guaranteed that the underlying map of strings to int ids cannot change

Parameters
strings- Vector of strings to perform string id lookups on
Returns
A vector of string_ids of the same length as strings, containing the id of any strings for which were found in the underlying StringDictionary instance or in the proxy's tranient map, otherwise StringDictionary::INVALID_STR_ID for strings not found.

Definition at line 52 of file StringDictionaryProxy.cpp.

References CHECK_GE, generation_, and getTransientBulkImpl().

53  {
55  std::vector<int32_t> string_ids(strings.size());
56  getTransientBulkImpl(strings, string_ids.data(), true);
57  return string_ids;
58 }
#define CHECK_GE(x, y)
Definition: Logger.h:235
size_t getTransientBulkImpl(const std::vector< std::string > &strings, int32_t *string_ids, const bool take_read_lock) const

+ Here is the call graph for this function:

size_t StringDictionaryProxy::getTransientBulkImpl ( const std::vector< std::string > &  strings,
int32_t *  string_ids,
const bool  take_read_lock 
) const
private

Definition at line 627 of file StringDictionaryProxy.cpp.

References generation_, string_dict_, and transientLookupBulk().

Referenced by buildIntersectionTranslationMapToOtherProxyUnlocked(), and getTransientBulk().

630  {
631  const size_t num_strings = strings.size();
632  if (num_strings == 0) {
633  return 0UL;
634  }
635  // StringDictionary::getBulk returns the number of strings not found
636  if (string_dict_->getBulk(strings, string_ids, generation_) == 0UL) {
637  return 0UL;
638  }
639 
640  // If here, dictionary could not find at least 1 target string,
641  // now look these up in the transient dictionary
642  // transientLookupBulk returns the number of strings not found
643  return transientLookupBulk(strings, string_ids, take_read_lock);
644 }
size_t transientLookupBulk(const std::vector< String > &lookup_strings, int32_t *string_ids, const bool take_read_lock) const
std::shared_ptr< StringDictionary > string_dict_

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

const std::vector<std::string const*>& StringDictionaryProxy::getTransientVector ( ) const
inline

Definition at line 228 of file StringDictionaryProxy.h.

References transient_string_vec_.

Referenced by DictionaryValueConverter< TARGET_TYPE >::DictionaryValueConverter(), and DictionaryValueConverter< TARGET_TYPE >::processBuffer().

228  {
229  return transient_string_vec_;
230  }
std::vector< std::string const * > transient_string_vec_

+ Here is the caller graph for this function:

IdMap StringDictionaryProxy::initIdMap ( ) const
inline

Definition at line 134 of file StringDictionaryProxy.h.

References generation_, and transient_string_vec_.

Referenced by buildIntersectionTranslationMapToOtherProxyUnlocked(), and transientUnion().

134 { return IdMap(transient_string_vec_.size(), generation_); }
std::vector< std::string const * > transient_string_vec_

+ Here is the caller graph for this function:

template<typename String >
int32_t StringDictionaryProxy::lookupTransientStringUnlocked ( const String &  lookup_string) const
private

Definition at line 175 of file StringDictionaryProxy.cpp.

References StringDictionary::INVALID_STR_ID, and transient_str_to_int_.

Referenced by buildIntersectionTranslationMapToOtherProxyUnlocked(), transientLookupBulkParallelUnlocked(), and transientLookupBulkUnlocked().

176  {
177  const auto it = transient_str_to_int_.find(lookup_string);
179  : it->second;
180 }
static constexpr int32_t INVALID_STR_ID

+ Here is the caller graph for this function:

bool StringDictionaryProxy::operator!= ( StringDictionaryProxy const &  rhs) const

Definition at line 743 of file StringDictionaryProxy.cpp.

References operator==().

743  {
744  return !operator==(rhs);
745 }
bool operator==(StringDictionaryProxy const &) const

+ Here is the call graph for this function:

StringDictionaryProxy const& StringDictionaryProxy::operator= ( StringDictionaryProxy const &  )
delete
bool StringDictionaryProxy::operator== ( StringDictionaryProxy const &  rhs) const

Definition at line 738 of file StringDictionaryProxy.cpp.

References string_dict_id_, and transient_str_to_int_.

Referenced by operator!=().

738  {
739  return string_dict_id_ == rhs.string_dict_id_ &&
740  transient_str_to_int_ == rhs.transient_str_to_int_;
741 }

+ Here is the caller graph for this function:

size_t StringDictionaryProxy::persistedC ( ) const
private
size_t StringDictionaryProxy::storageEntryCount ( ) const

Returns the number of string entries in the underlying string dictionary, at this proxy's generation_ if it is set/valid, otherwise just the current size of the dictionary.

Returns
size_t Number of entries in the string dictionary (at this proxy's generation if set)

Definition at line 501 of file StringDictionaryProxy.cpp.

References CHECK_LE, generation_, and string_dict_.

Referenced by buildIntersectionTranslationMapToOtherProxyUnlocked(), entryCountUnlocked(), and getStringUnlocked().

501  {
502  const size_t num_storage_entries{generation_ == -1 ? string_dict_->storageEntryCount()
503  : generation_};
504  CHECK_LE(num_storage_entries, static_cast<size_t>(std::numeric_limits<int32_t>::max()));
505  return num_storage_entries;
506 }
std::shared_ptr< StringDictionary > string_dict_
#define CHECK_LE(x, y)
Definition: Logger.h:233

+ Here is the caller graph for this function:

size_t StringDictionaryProxy::transientEntryCount ( ) const

Returns the number of transient string entries for this proxy,.

Returns
size_t Number of transient string entries for this proxy

Definition at line 517 of file StringDictionaryProxy.cpp.

References rw_mutex_, and transientEntryCountUnlocked().

517  {
518  std::shared_lock<std::shared_mutex> read_lock(rw_mutex_);
520 }
size_t transientEntryCountUnlocked() const
heavyai::shared_lock< heavyai::shared_mutex > read_lock

+ Here is the call graph for this function:

size_t StringDictionaryProxy::transientEntryCountUnlocked ( ) const
private

Definition at line 508 of file StringDictionaryProxy.cpp.

References CHECK_LE, and transient_str_to_int_.

Referenced by buildIntersectionTranslationMapToOtherProxyUnlocked(), buildUnionTranslationMapToOtherProxy(), entryCountUnlocked(), and transientEntryCount().

508  {
509  // CHECK_LE(num_storage_entries,
510  // static_cast<size_t>(std::numeric_limits<int32_t>::max()));
511  const size_t num_transient_entries{transient_str_to_int_.size()};
512  CHECK_LE(num_transient_entries,
513  static_cast<size_t>(std::numeric_limits<int32_t>::max()) - 1);
514  return num_transient_entries;
515 }
#define CHECK_LE(x, y)
Definition: Logger.h:233

+ Here is the caller graph for this function:

static unsigned StringDictionaryProxy::transientIdToIndex ( int32_t const  id)
inlinestatic

Definition at line 234 of file StringDictionaryProxy.h.

Referenced by getStrings(), getStringUnlocked(), and StringDictionary::populate_string_ids().

234  {
235  constexpr int max_transient_string_id = -2;
236  return static_cast<unsigned>(max_transient_string_id - id);
237  }

+ Here is the caller graph for this function:

static int32_t StringDictionaryProxy::transientIndexToId ( unsigned const  index)
inlinestatic

Definition at line 239 of file StringDictionaryProxy.h.

Referenced by DictionaryValueConverter< TARGET_TYPE >::DictionaryValueConverter(), getCompare(), getLike(), getOrAddTransientUnlocked(), getRegexpLike(), and ArrowResultSetConverter::initializeColumnBuilder().

239  {
240  constexpr int max_transient_string_id = -2;
241  return static_cast<int32_t>(max_transient_string_id - index);
242  }

+ Here is the caller graph for this function:

template<typename String >
size_t StringDictionaryProxy::transientLookupBulk ( const std::vector< String > &  lookup_strings,
int32_t *  string_ids,
const bool  take_read_lock 
) const
private

Definition at line 647 of file StringDictionaryProxy.cpp.

References rw_mutex_, transient_str_to_int_, transientLookupBulkParallelUnlocked(), and transientLookupBulkUnlocked().

Referenced by getTransientBulkImpl().

650  {
651  const size_t num_strings = lookup_strings.size();
652  auto read_lock = take_read_lock ? std::shared_lock<std::shared_mutex>(rw_mutex_)
653  : std::shared_lock<std::shared_mutex>();
654 
655  if (num_strings == static_cast<size_t>(0) || transient_str_to_int_.empty()) {
656  return 0UL;
657  }
658  constexpr size_t tbb_parallel_threshold{20000};
659  if (num_strings < tbb_parallel_threshold) {
660  return transientLookupBulkUnlocked(lookup_strings, string_ids);
661  } else {
662  return transientLookupBulkParallelUnlocked(lookup_strings, string_ids);
663  }
664 }
heavyai::shared_lock< heavyai::shared_mutex > read_lock
size_t transientLookupBulkUnlocked(const std::vector< String > &lookup_strings, int32_t *string_ids) const
size_t transientLookupBulkParallelUnlocked(const std::vector< String > &lookup_strings, int32_t *string_ids) const

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

template<typename String >
size_t StringDictionaryProxy::transientLookupBulkParallelUnlocked ( const std::vector< String > &  lookup_strings,
int32_t *  string_ids 
) const
private

Definition at line 687 of file StringDictionaryProxy.cpp.

References CHECK_GE, StringDictionary::INVALID_STR_ID, lookupTransientStringUnlocked(), ThreadInfo::num_elems_per_thread, ThreadInfo::num_threads, and threading_serial::parallel_for().

Referenced by transientLookupBulk().

689  {
690  const size_t num_lookup_strings = lookup_strings.size();
691  const size_t target_inputs_per_thread = 20000L;
692  ThreadInfo thread_info(
693  std::thread::hardware_concurrency(), num_lookup_strings, target_inputs_per_thread);
694  CHECK_GE(thread_info.num_threads, 1L);
695  CHECK_GE(thread_info.num_elems_per_thread, 1L);
696 
697  std::vector<size_t> num_strings_not_found_per_thread(thread_info.num_threads, 0UL);
698 
699  tbb::task_arena limited_arena(thread_info.num_threads);
700  limited_arena.execute([&] {
702  tbb::blocked_range<size_t>(
703  0, num_lookup_strings, thread_info.num_elems_per_thread /* tbb grain_size */),
704  [&](const tbb::blocked_range<size_t>& r) {
705  const size_t start_idx = r.begin();
706  const size_t end_idx = r.end();
707  size_t num_local_strings_not_found = 0;
708  for (size_t string_idx = start_idx; string_idx < end_idx; ++string_idx) {
709  if (string_ids[string_idx] != StringDictionary::INVALID_STR_ID) {
710  continue;
711  }
712  string_ids[string_idx] =
713  lookupTransientStringUnlocked(lookup_strings[string_idx]);
714  if (string_ids[string_idx] == StringDictionary::INVALID_STR_ID) {
715  num_local_strings_not_found++;
716  }
717  }
718  const size_t tbb_thread_idx = tbb::this_task_arena::current_thread_index();
719  num_strings_not_found_per_thread[tbb_thread_idx] = num_local_strings_not_found;
720  },
721  tbb::simple_partitioner());
722  });
723  size_t num_strings_not_found = 0;
724  for (int64_t thread_idx = 0; thread_idx < thread_info.num_threads; ++thread_idx) {
725  num_strings_not_found += num_strings_not_found_per_thread[thread_idx];
726  }
727  return num_strings_not_found;
728 }
#define CHECK_GE(x, y)
Definition: Logger.h:235
static constexpr int32_t INVALID_STR_ID
int32_t lookupTransientStringUnlocked(const String &lookup_string) const
void parallel_for(const blocked_range< Int > &range, const Body &body, const Partitioner &p=Partitioner())

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

template<typename String >
size_t StringDictionaryProxy::transientLookupBulkUnlocked ( const std::vector< String > &  lookup_strings,
int32_t *  string_ids 
) const
private

Definition at line 667 of file StringDictionaryProxy.cpp.

References StringDictionary::INVALID_STR_ID, and lookupTransientStringUnlocked().

Referenced by transientLookupBulk().

669  {
670  const size_t num_strings = lookup_strings.size();
671  size_t num_strings_not_found = 0;
672  for (size_t string_idx = 0; string_idx < num_strings; ++string_idx) {
673  if (string_ids[string_idx] != StringDictionary::INVALID_STR_ID) {
674  continue;
675  }
676  // If we're here it means we need to look up this string as we don't
677  // have a valid id for it
678  string_ids[string_idx] = lookupTransientStringUnlocked(lookup_strings[string_idx]);
679  if (string_ids[string_idx] == StringDictionary::INVALID_STR_ID) {
680  num_strings_not_found++;
681  }
682  }
683  return num_strings_not_found;
684 }
static constexpr int32_t INVALID_STR_ID
int32_t lookupTransientStringUnlocked(const String &lookup_string) const

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

StringDictionaryProxy::IdMap StringDictionaryProxy::transientUnion ( StringDictionaryProxy const &  sdp_rhs)

Definition at line 596 of file StringDictionaryProxy.cpp.

References eachStringSerially(), initIdMap(), and string_dict_.

597  {
598  IdMap id_map = sdp_rhs.initIdMap();
599  // serial_callback cannot be parallelized due to calling getOrAddTransientUnlocked().
600  std::unique_ptr<StringDictionary::StringCallback> serial_callback;
601  if (string_dict_->isClient()) {
602  serial_callback = std::make_unique<StringNetworkCallback>(this, id_map);
603  } else {
604  serial_callback = std::make_unique<StringLocalCallback>(this, id_map);
605  }
606  // Import all non-duplicate strings (transient and non-transient) and add to id_map.
607  sdp_rhs.eachStringSerially(*serial_callback);
608  return id_map;
609 }
std::shared_ptr< StringDictionary > string_dict_

+ Here is the call graph for this function:

void StringDictionaryProxy::updateGeneration ( const int64_t  generation)
noexcept

Definition at line 616 of file StringDictionaryProxy.cpp.

References CHECK_EQ.

616  {
617  if (generation == -1) {
618  return;
619  }
620  if (generation_ != -1) {
621  CHECK_EQ(generation_, generation);
622  return;
623  }
624  generation_ = generation;
625 }
#define CHECK_EQ(x, y)
Definition: Logger.h:230

Friends And Related Function Documentation

friend class StringLocalCallback
friend

Definition at line 290 of file StringDictionaryProxy.h.

friend class StringNetworkCallback
friend

Definition at line 291 of file StringDictionaryProxy.h.

Member Data Documentation

const int32_t StringDictionaryProxy::string_dict_id_
private

Definition at line 277 of file StringDictionaryProxy.h.

Referenced by getDictId(), and operator==().

std::vector<std::string const*> StringDictionaryProxy::transient_string_vec_
private

The documentation for this class was generated from the following files: