OmniSciDB  a987f07e93
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
StringDictionaryProxy Class Reference

#include <StringDictionaryProxy.h>

Classes

struct  HeterogeneousStringEqual
 
struct  HeterogeneousStringHash
 
class  TranslationMap
 

Public Types

using IdMap = TranslationMap< int32_t >
 
using TransientMap = robin_hood::unordered_node_map< std::string, int32_t, HeterogeneousStringHash, HeterogeneousStringEqual >
 

Public Member Functions

 StringDictionaryProxy (StringDictionaryProxy const &)=delete
 
StringDictionaryProxy const & operator= (StringDictionaryProxy const &)=delete
 
 StringDictionaryProxy (std::shared_ptr< StringDictionary > sd, const int32_t string_dict_id, const int64_t generation)
 
int32_t getDictId () const noexcept
 
bool operator== (StringDictionaryProxy const &) const
 
bool operator!= (StringDictionaryProxy const &) const
 
int32_t getOrAdd (const std::string &str) noexcept
 
StringDictionarygetDictionary () const noexcept
 
int64_t getGeneration () const noexcept
 
std::vector< int32_t > getTransientBulk (const std::vector< std::string > &strings) const
 Executes read-only lookup of a vector of strings and returns a vector of their integer ids. More...
 
int32_t getOrAddTransient (const std::string &)
 
int32_t getOrAddTransient (const std::string_view)
 
std::vector< int32_t > getOrAddTransientBulk (const std::vector< std::string > &strings)
 
int32_t getIdOfString (const std::string &str) const
 
int32_t getIdOfStringNoGeneration (const std::string &str) const
 
std::string getString (int32_t string_id) const
 
std::vector< std::string > getStrings (const std::vector< int32_t > &string_ids) const
 
std::pair< const char *, size_t > getStringBytes (int32_t string_id) const noexcept
 
IdMap initIdMap () const
 
TranslationMap< DatumbuildNumericTranslationMap (const std::vector< StringOps_Namespace::StringOpInfo > &string_op_infos) const
 Builds a vectorized string_id translation map from this proxy to dest_proxy. More...
 
IdMap buildIntersectionTranslationMapToOtherProxy (const StringDictionaryProxy *dest_proxy, const std::vector< StringOps_Namespace::StringOpInfo > &string_op_infos) const
 
IdMap buildUnionTranslationMapToOtherProxy (StringDictionaryProxy *dest_proxy, const std::vector< StringOps_Namespace::StringOpInfo > &string_op_types) const
 
size_t storageEntryCount () const
 Returns the number of string entries in the underlying string dictionary, at this proxy's generation_ if it is set/valid, otherwise just the current size of the dictionary. More...
 
size_t transientEntryCount () const
 Returns the number of transient string entries for this proxy,. More...
 
size_t entryCount () const
 Returns the number of total string entries for this proxy, both stored in the underlying dictionary and in the transient map. Equal to storageEntryCount() + transientEntryCount() More...
 
void updateGeneration (const int64_t generation) noexcept
 
std::vector< int32_t > getLike (const std::string &pattern, const bool icase, const bool is_simple, const char escape) const
 
std::vector< int32_t > getCompare (const std::string &pattern, const std::string &comp_operator) const
 
std::vector< int32_t > getRegexpLike (const std::string &pattern, const char escape) const
 
const std::vector< std::string
const * > & 
getTransientVector () const
 
void eachStringSerially (StringDictionary::StringCallback &) const
 
IdMap transientUnion (StringDictionaryProxy const &)
 

Static Public Member Functions

static unsigned transientIdToIndex (int32_t const id)
 
static int32_t transientIndexToId (unsigned const index)
 

Private Member Functions

std::string getStringUnlocked (const int32_t string_id) const
 
size_t transientEntryCountUnlocked () const
 
size_t entryCountUnlocked () const
 
size_t persistedC () const
 
template<typename String >
int32_t getOrAddTransientImpl (String)
 
template<typename String >
int32_t lookupTransientStringUnlocked (const String &lookup_string) const
 
size_t getTransientBulkImpl (const std::vector< std::string > &strings, int32_t *string_ids, const bool take_read_lock) const
 
template<typename String >
size_t transientLookupBulk (const std::vector< String > &lookup_strings, int32_t *string_ids, const bool take_read_lock) const
 
template<typename String >
size_t transientLookupBulkUnlocked (const std::vector< String > &lookup_strings, int32_t *string_ids) const
 
template<typename String >
size_t transientLookupBulkParallelUnlocked (const std::vector< String > &lookup_strings, int32_t *string_ids) const
 
IdMap buildIntersectionTranslationMapToOtherProxyUnlocked (const StringDictionaryProxy *dest_proxy, const std::vector< StringOps_Namespace::StringOpInfo > &string_op_infos) const
 
template<typename String >
int32_t getIdOfStringFromClient (String const &) const
 
template<typename String >
int32_t getOrAddTransientUnlocked (String const &)
 

Private Attributes

std::shared_ptr< StringDictionarystring_dict_
 
const int32_t string_dict_id_
 
TransientMap transient_str_to_int_
 
std::vector< std::string const * > transient_string_vec_
 
int64_t generation_
 
std::shared_mutex rw_mutex_
 

Friends

class StringLocalCallback
 
class StringNetworkCallback
 

Detailed Description

Definition at line 39 of file StringDictionaryProxy.h.

Member Typedef Documentation

Definition at line 141 of file StringDictionaryProxy.h.

using StringDictionaryProxy::TransientMap = robin_hood::unordered_node_map<std::string, int32_t, HeterogeneousStringHash, HeterogeneousStringEqual>

Definition at line 242 of file StringDictionaryProxy.h.

Constructor & Destructor Documentation

StringDictionaryProxy::StringDictionaryProxy ( StringDictionaryProxy const &  )
delete
StringDictionaryProxy::StringDictionaryProxy ( std::shared_ptr< StringDictionary sd,
const int32_t  string_dict_id,
const int64_t  generation 
)

Definition at line 39 of file StringDictionaryProxy.cpp.

42  : string_dict_(sd), string_dict_id_(string_dict_id), generation_(generation) {}
std::shared_ptr< StringDictionary > string_dict_

Member Function Documentation

StringDictionaryProxy::IdMap StringDictionaryProxy::buildIntersectionTranslationMapToOtherProxy ( const StringDictionaryProxy dest_proxy,
const std::vector< StringOps_Namespace::StringOpInfo > &  string_op_infos 
) const

Definition at line 384 of file StringDictionaryProxy.cpp.

References buildIntersectionTranslationMapToOtherProxyUnlocked(), getDictId(), order_translation_locks(), and rw_mutex_.

Referenced by RowSetMemoryOwner::addStringProxyIntersectionTranslationMap().

386  {
387  const auto source_dict_id = getDictId();
388  const auto dest_dict_id = dest_proxy->getDictId();
389 
390  std::shared_lock<std::shared_mutex> source_proxy_read_lock(rw_mutex_, std::defer_lock);
391  std::unique_lock<std::shared_mutex> dest_proxy_write_lock(dest_proxy->rw_mutex_,
392  std::defer_lock);
394  source_dict_id, dest_dict_id, source_proxy_read_lock, dest_proxy_write_lock);
395  return buildIntersectionTranslationMapToOtherProxyUnlocked(dest_proxy, string_op_infos);
396 }
IdMap buildIntersectionTranslationMapToOtherProxyUnlocked(const StringDictionaryProxy *dest_proxy, const std::vector< StringOps_Namespace::StringOpInfo > &string_op_infos) const
void order_translation_locks(const int32_t source_db_id, const int32_t source_dict_id, const int32_t dest_db_id, const int32_t dest_dict_id, std::shared_lock< std::shared_mutex > &source_read_lock, std::shared_lock< std::shared_mutex > &dest_read_lock)
int32_t getDictId() const noexcept

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

StringDictionaryProxy::IdMap StringDictionaryProxy::buildIntersectionTranslationMapToOtherProxyUnlocked ( const StringDictionaryProxy dest_proxy,
const std::vector< StringOps_Namespace::StringOpInfo > &  string_op_infos 
) const
private

Definition at line 265 of file StringDictionaryProxy.cpp.

References CHECK_GT, CHECK_LE, StringDictionaryProxy::TranslationMap< T >::data(), DEBUG_TIMER, StringDictionaryProxy::TranslationMap< T >::empty(), entryCountUnlocked(), generation_, getTransientBulkImpl(), StringDictionaryProxy::TranslationMap< T >::getVectorMap(), initIdMap(), StringDictionary::INVALID_STR_ID, lookupTransientStringUnlocked(), StringDictionaryProxy::TranslationMap< T >::numTransients(), StringDictionaryProxy::TranslationMap< T >::setNumUntranslatedStrings(), StringDictionaryProxy::TranslationMap< T >::setRangeEnd(), StringDictionaryProxy::TranslationMap< T >::setRangeStart(), StringDictionaryProxy::TranslationMap< T >::storageData(), storageEntryCount(), string_dict_, shared::transform(), transient_string_vec_, transientEntryCountUnlocked(), and VLOG.

Referenced by buildIntersectionTranslationMapToOtherProxy(), and buildUnionTranslationMapToOtherProxy().

267  {
268  auto timer = DEBUG_TIMER(__func__);
269  IdMap id_map = initIdMap();
270 
271  if (id_map.empty()) {
272  return id_map;
273  }
274 
275  const StringOps_Namespace::StringOps string_ops(string_op_infos);
276 
277  // First map transient strings, store at front of vector map
278  const size_t num_transient_entries = id_map.numTransients();
279  size_t num_transient_strings_not_translated = 0UL;
280  if (num_transient_entries) {
281  std::vector<std::string> transient_lookup_strings(num_transient_entries);
282  if (string_ops.size()) {
284  transient_string_vec_.cend(),
285  transient_lookup_strings.rbegin(),
286  [&](std::string const* ptr) { return string_ops(*ptr); });
287  } else {
289  transient_string_vec_.cend(),
290  transient_lookup_strings.rbegin(),
291  [](std::string const* ptr) { return *ptr; });
292  }
293 
294  // This lookup may have a different snapshot of
295  // dest_proxy transients and dictionary than what happens under
296  // the below dest_proxy_read_lock. We may need an unlocked version of
297  // getTransientBulk to ensure consistency (I don't believe
298  // current behavior would cause crashes/races, verify this though)
299 
300  // Todo(mattp): Consider variant of getTransientBulkImp that can take
301  // a vector of pointer-to-strings so we don't have to materialize
302  // transient_string_vec_ into transient_lookup_strings.
303 
304  num_transient_strings_not_translated =
305  dest_proxy->getTransientBulkImpl(transient_lookup_strings, id_map.data(), false);
306  }
307 
308  // Now map strings in dictionary
309  // We place non-transient strings after the transient strings
310  // if they exist, otherwise at index 0
311  int32_t* translation_map_stored_entries_ptr = id_map.storageData();
312 
313  auto dest_transient_lookup_callback = [dest_proxy, translation_map_stored_entries_ptr](
314  const std::string_view& source_string,
315  const int32_t source_string_id) {
316  translation_map_stored_entries_ptr[source_string_id] =
317  dest_proxy->lookupTransientStringUnlocked(source_string);
318  return translation_map_stored_entries_ptr[source_string_id] ==
320  };
321 
322  const size_t num_dest_transients = dest_proxy->transientEntryCountUnlocked();
323  const size_t num_persisted_strings_not_translated =
324  generation_ > 0 ? string_dict_->buildDictionaryTranslationMap(
325  dest_proxy->string_dict_.get(),
326  translation_map_stored_entries_ptr,
327  generation_,
328  dest_proxy->generation_,
329  num_dest_transients > 0UL,
330  dest_transient_lookup_callback,
331  string_op_infos)
332  : 0UL;
333 
334  const size_t num_dest_entries = dest_proxy->entryCountUnlocked();
335  const size_t num_total_entries =
336  id_map.getVectorMap().size() - 1UL /* account for skipped entry -1 */;
337  CHECK_GT(num_total_entries, 0UL);
338  const size_t num_strings_not_translated =
339  num_transient_strings_not_translated + num_persisted_strings_not_translated;
340  CHECK_LE(num_strings_not_translated, num_total_entries);
341  id_map.setNumUntranslatedStrings(num_strings_not_translated);
342 
343  // Below is a conservative setting of range based on the size of the destination proxy,
344  // but probably not worth a scan over the data (or inline computation as we translate)
345  // to compute the actual ranges
346 
347  id_map.setRangeStart(
348  num_dest_transients > 0 ? -1 - static_cast<int32_t>(num_dest_transients) : 0);
349  id_map.setRangeEnd(dest_proxy->storageEntryCount());
350 
351  const size_t num_entries_translated = num_total_entries - num_strings_not_translated;
352  const float match_pct =
353  100.0 * static_cast<float>(num_entries_translated) / num_total_entries;
354  VLOG(1) << std::fixed << std::setprecision(2) << match_pct << "% ("
355  << num_entries_translated << " entries) from dictionary ("
356  << string_dict_->getDbId() << ", " << string_dict_->getDictId() << ") with "
357  << num_total_entries << " total entries ( " << num_transient_entries
358  << " literals)"
359  << " translated to dictionary (" << dest_proxy->string_dict_->getDbId() << ", "
360  << dest_proxy->string_dict_->getDictId() << ") with " << num_dest_entries
361  << " total entries (" << dest_proxy->transientEntryCountUnlocked()
362  << " literals).";
363 
364  return id_map;
365 }
size_t transientEntryCountUnlocked() const
size_t storageEntryCount() const
Returns the number of string entries in the underlying string dictionary, at this proxy&#39;s generation_...
#define CHECK_GT(x, y)
Definition: Logger.h:301
TranslationMap< int32_t > IdMap
static constexpr int32_t INVALID_STR_ID
std::shared_ptr< StringDictionary > string_dict_
std::vector< std::string const * > transient_string_vec_
int32_t lookupTransientStringUnlocked(const String &lookup_string) const
size_t getTransientBulkImpl(const std::vector< std::string > &strings, int32_t *string_ids, const bool take_read_lock) const
OUTPUT transform(INPUT const &input, FUNC const &func)
Definition: misc.h:320
#define CHECK_LE(x, y)
Definition: Logger.h:300
#define DEBUG_TIMER(name)
Definition: Logger.h:407
#define VLOG(n)
Definition: Logger.h:383

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

StringDictionaryProxy::TranslationMap< Datum > StringDictionaryProxy::buildNumericTranslationMap ( const std::vector< StringOps_Namespace::StringOpInfo > &  string_op_infos) const

Builds a vectorized string_id translation map from this proxy to dest_proxy.

Parameters
dest_proxyStringDictionaryProxy that we are to map this proxy's string ids to
Returns
An IdMap which encapsulates a std::vector<int32_t> of string ids for both transient and non-transient strings, mapping to their translated string_ids. offset_ is defined to be the number of transient entries + 1. The ordering of values in the vector_map_ is:
  • the transient ids (there are offset_-1 of these)
  • INVALID_STR_ID (=-1)
  • the non-transient string ids For example if there are 3 transient entries in this proxy and 20 in the underlying string dictionary, then vector_map_ will be of size() == 24 and offset_=3+1. The formula to translate ids is new_id = vector_map_[offset_ + old_id]. It is always the case that vector_map_[offset_-1]==-1 so that INVALID_STR_ID maps to INVALID_STR_ID.

Definition at line 217 of file StringDictionaryProxy.cpp.

References CHECK, DEBUG_TIMER, generation_, getStringUnlocked(), threading_serial::parallel_for(), string_dict_, and transient_string_vec_.

Referenced by RowSetMemoryOwner::addStringProxyNumericTranslationMap().

218  {
219  auto timer = DEBUG_TIMER(__func__);
220  CHECK(string_op_infos.size());
221  TranslationMap<Datum> translation_map(transient_string_vec_.size(), generation_);
222  if (translation_map.empty()) {
223  return translation_map;
224  }
225 
226  const StringOps_Namespace::StringOps string_ops(string_op_infos);
227 
228  const size_t num_transient_entries = translation_map.numTransients();
229  if (num_transient_entries) {
230  const int32_t map_domain_start = translation_map.domainStart();
231  if (num_transient_entries > 10000UL) {
233  tbb::blocked_range<int32_t>(map_domain_start, -1),
234  [&](const tbb::blocked_range<int32_t>& r) {
235  const int32_t start_idx = r.begin();
236  const int32_t end_idx = r.end();
237  for (int32_t source_string_id = start_idx; source_string_id < end_idx;
238  ++source_string_id) {
239  const auto source_string = getStringUnlocked(source_string_id);
240  translation_map[source_string_id] = string_ops.numericEval(source_string);
241  }
242  });
243  } else {
244  for (int32_t source_string_id = map_domain_start; source_string_id < -1;
245  ++source_string_id) {
246  const auto source_string = getStringUnlocked(source_string_id);
247  translation_map[source_string_id] = string_ops.numericEval(source_string);
248  }
249  }
250  }
251 
252  Datum* translation_map_stored_entries_ptr = translation_map.storageData();
253  if (generation_ > 0) {
254  string_dict_->buildDictionaryNumericTranslationMap(
255  translation_map_stored_entries_ptr, generation_, string_op_infos);
256  }
257  translation_map.setNumUntranslatedStrings(0UL);
258 
259  // Todo(todd): Set range start/end with scan
260 
261  return translation_map;
262 }
std::string getStringUnlocked(const int32_t string_id) const
std::shared_ptr< StringDictionary > string_dict_
std::vector< std::string const * > transient_string_vec_
void parallel_for(const blocked_range< Int > &range, const Body &body, const Partitioner &p=Partitioner())
#define CHECK(condition)
Definition: Logger.h:289
#define DEBUG_TIMER(name)
Definition: Logger.h:407
Definition: Datum.h:67

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

StringDictionaryProxy::IdMap StringDictionaryProxy::buildUnionTranslationMapToOtherProxy ( StringDictionaryProxy dest_proxy,
const std::vector< StringOps_Namespace::StringOpInfo > &  string_op_types 
) const

Definition at line 398 of file StringDictionaryProxy.cpp.

References buildIntersectionTranslationMapToOtherProxyUnlocked(), DEBUG_TIMER, getDictId(), getOrAddTransientUnlocked(), getStringUnlocked(), StringDictionary::INVALID_STR_ID, order_translation_locks(), rw_mutex_, string_dict_, to_string(), and transientEntryCountUnlocked().

Referenced by RowSetMemoryOwner::addStringProxyUnionTranslationMap().

400  {
401  auto timer = DEBUG_TIMER(__func__);
402 
403  const auto source_dict_id = getDictId();
404  const auto dest_dict_id = dest_proxy->getDictId();
405  std::shared_lock<std::shared_mutex> source_proxy_read_lock(rw_mutex_, std::defer_lock);
406  std::unique_lock<std::shared_mutex> dest_proxy_write_lock(dest_proxy->rw_mutex_,
407  std::defer_lock);
409  source_dict_id, dest_dict_id, source_proxy_read_lock, dest_proxy_write_lock);
410 
411  auto id_map =
412  buildIntersectionTranslationMapToOtherProxyUnlocked(dest_proxy, string_op_infos);
413  if (id_map.empty()) {
414  return id_map;
415  }
416  const auto num_untranslated_strings = id_map.numUntranslatedStrings();
417  if (num_untranslated_strings > 0) {
418  const size_t total_post_translation_dest_transients =
419  num_untranslated_strings + dest_proxy->transientEntryCountUnlocked();
420  constexpr size_t max_allowed_transients =
421  static_cast<size_t>(std::numeric_limits<int32_t>::max() -
422  2); /* -2 accounts for INVALID_STR_ID and NULL value */
423  if (total_post_translation_dest_transients > max_allowed_transients) {
424  throw std::runtime_error("Union translation to dictionary" +
425  std::to_string(getDictId()) + " would result in " +
426  std::to_string(total_post_translation_dest_transients) +
427  " transient entries, which is more than limit of " +
428  std::to_string(max_allowed_transients) + " transients.");
429  }
430  const int32_t map_domain_start = id_map.domainStart();
431  const int32_t map_domain_end = id_map.domainEnd();
432 
433  const StringOps_Namespace::StringOps string_ops(string_op_infos);
434  const bool has_string_ops = string_ops.size();
435 
436  // First iterate over transient strings and add to dest map
437  // Todo (todd): Add call to fetch string_views (local) or strings (distributed)
438  // for all non-translated ids to avoid string-by-string fetch
439 
440  for (int32_t source_string_id = map_domain_start; source_string_id < -1;
441  ++source_string_id) {
442  if (id_map[source_string_id] == StringDictionary::INVALID_STR_ID) {
443  const auto source_string = getStringUnlocked(source_string_id);
444  const auto dest_string_id = dest_proxy->getOrAddTransientUnlocked(
445  has_string_ops ? string_ops(source_string) : source_string);
446  id_map[source_string_id] = dest_string_id;
447  }
448  }
449  // Now iterate over stored strings
450  for (int32_t source_string_id = 0; source_string_id < map_domain_end;
451  ++source_string_id) {
452  if (id_map[source_string_id] == StringDictionary::INVALID_STR_ID) {
453  const auto source_string = string_dict_->getString(source_string_id);
454  const auto dest_string_id = dest_proxy->getOrAddTransientUnlocked(
455  has_string_ops ? string_ops(source_string) : source_string);
456  id_map[source_string_id] = dest_string_id;
457  }
458  }
459  }
460  // We may have added transients to the destination proxy, use this to update
461  // our id map range (used downstream for ExpressionRange)
462 
463  const size_t num_dest_transients = dest_proxy->transientEntryCountUnlocked();
464  id_map.setRangeStart(
465  num_dest_transients > 0 ? -1 - static_cast<int32_t>(num_dest_transients) : 0);
466  return id_map;
467 }
size_t transientEntryCountUnlocked() const
std::string getStringUnlocked(const int32_t string_id) const
IdMap buildIntersectionTranslationMapToOtherProxyUnlocked(const StringDictionaryProxy *dest_proxy, const std::vector< StringOps_Namespace::StringOpInfo > &string_op_infos) const
std::string to_string(char const *&&v)
static constexpr int32_t INVALID_STR_ID
std::shared_ptr< StringDictionary > string_dict_
void order_translation_locks(const int32_t source_db_id, const int32_t source_dict_id, const int32_t dest_db_id, const int32_t dest_dict_id, std::shared_lock< std::shared_mutex > &source_read_lock, std::shared_lock< std::shared_mutex > &dest_read_lock)
int32_t getOrAddTransientUnlocked(String const &)
int32_t getDictId() const noexcept
#define DEBUG_TIMER(name)
Definition: Logger.h:407

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

void StringDictionaryProxy::eachStringSerially ( StringDictionary::StringCallback serial_callback) const

Definition at line 614 of file StringDictionaryProxy.cpp.

References generation_, string_dict_, and transient_string_vec_.

Referenced by transientUnion().

615  {
616  constexpr int32_t max_transient_id = -2;
617  // Iterate over transient strings.
618  for (unsigned index = 0; index < transient_string_vec_.size(); ++index) {
619  std::string const& str = *transient_string_vec_[index];
620  int32_t const string_id = max_transient_id - index;
621  serial_callback(str, string_id);
622  }
623  // Iterate over non-transient strings.
624  string_dict_->eachStringSerially(generation_, serial_callback);
625 }
std::shared_ptr< StringDictionary > string_dict_
std::vector< std::string const * > transient_string_vec_

+ Here is the caller graph for this function:

size_t StringDictionaryProxy::entryCount ( ) const

Returns the number of total string entries for this proxy, both stored in the underlying dictionary and in the transient map. Equal to storageEntryCount() + transientEntryCount()

Returns
size_t Number of total string entries for this proxy

Definition at line 608 of file StringDictionaryProxy.cpp.

References entryCountUnlocked(), and rw_mutex_.

608  {
609  std::shared_lock<std::shared_mutex> read_lock(rw_mutex_);
610  return entryCountUnlocked();
611 }
heavyai::shared_lock< heavyai::shared_mutex > read_lock

+ Here is the call graph for this function:

size_t StringDictionaryProxy::entryCountUnlocked ( ) const
private

Definition at line 604 of file StringDictionaryProxy.cpp.

References storageEntryCount(), and transientEntryCountUnlocked().

Referenced by buildIntersectionTranslationMapToOtherProxyUnlocked(), and entryCount().

604  {
606 }
size_t transientEntryCountUnlocked() const
size_t storageEntryCount() const
Returns the number of string entries in the underlying string dictionary, at this proxy&#39;s generation_...

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

std::vector< int32_t > StringDictionaryProxy::getCompare ( const std::string &  pattern,
const std::string &  comp_operator 
) const

Definition at line 533 of file StringDictionaryProxy.cpp.

References CHECK_GE, anonymous_namespace{StringDictionaryProxy.cpp}::do_compare(), generation_, run_benchmark_import::result, string_dict_, transient_string_vec_, and transientIndexToId().

Referenced by anonymous_namespace{StringOpsIR.cpp}::get_compared_ids().

535  {
536  CHECK_GE(generation_, 0);
537  auto result = string_dict_->getCompare(pattern, comp_operator, generation_);
538  for (unsigned index = 0; index < transient_string_vec_.size(); ++index) {
539  if (do_compare(*transient_string_vec_[index], pattern, comp_operator)) {
540  result.push_back(transientIndexToId(index));
541  }
542  }
543  return result;
544 }
#define CHECK_GE(x, y)
Definition: Logger.h:302
std::shared_ptr< StringDictionary > string_dict_
std::vector< std::string const * > transient_string_vec_
static int32_t transientIndexToId(unsigned const index)
bool do_compare(const std::string &str, const std::string &pattern, const std::string &comp_operator)

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

int32_t StringDictionaryProxy::getDictId ( ) const
inlinenoexcept

Definition at line 47 of file StringDictionaryProxy.h.

References string_dict_id_.

Referenced by buildIntersectionTranslationMapToOtherProxy(), buildUnionTranslationMapToOtherProxy(), TransientStringLiteralsVisitor::visitStringOper(), and TransientStringLiteralsVisitor::visitUOper().

47 { return string_dict_id_; };

+ Here is the caller graph for this function:

StringDictionary * StringDictionaryProxy::getDictionary ( ) const
noexcept

Definition at line 807 of file StringDictionaryProxy.cpp.

References string_dict_.

Referenced by RowSetMemoryOwner::addStringProxyIntersectionTranslationMap(), RowSetMemoryOwner::addStringProxyNumericTranslationMap(), and RowSetMemoryOwner::addStringProxyUnionTranslationMap().

807  {
808  return string_dict_.get();
809 }
std::shared_ptr< StringDictionary > string_dict_

+ Here is the caller graph for this function:

int64_t StringDictionaryProxy::getGeneration ( ) const
noexcept

Definition at line 811 of file StringDictionaryProxy.cpp.

References generation_.

811  {
812  return generation_;
813 }
int32_t StringDictionaryProxy::getIdOfString ( const std::string &  str) const

Definition at line 119 of file StringDictionaryProxy.cpp.

References getIdOfStringFromClient(), StringDictionary::INVALID_STR_ID, rw_mutex_, and transient_str_to_int_.

Referenced by anonymous_namespace{RelAlgTranslator.cpp}::fill_dictionary_encoded_in_vals(), and Executor::serializeLiterals().

119  {
120  std::shared_lock<std::shared_mutex> read_lock(rw_mutex_);
121  auto const str_id = getIdOfStringFromClient(str);
122  if (str_id != StringDictionary::INVALID_STR_ID || transient_str_to_int_.empty()) {
123  return str_id;
124  }
125  auto it = transient_str_to_int_.find(str);
126  return it != transient_str_to_int_.end() ? it->second
128 }
heavyai::shared_lock< heavyai::shared_mutex > read_lock
int32_t getIdOfStringFromClient(String const &) const
static constexpr int32_t INVALID_STR_ID

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

template<typename String >
int32_t StringDictionaryProxy::getIdOfStringFromClient ( String const &  str) const
private

Definition at line 131 of file StringDictionaryProxy.cpp.

References CHECK_GE, generation_, string_dict_, and truncate_to_generation().

Referenced by getIdOfString(), and getOrAddTransientImpl().

131  {
132  CHECK_GE(generation_, 0);
133  return truncate_to_generation(string_dict_->getIdOfString(str), generation_);
134 }
#define CHECK_GE(x, y)
Definition: Logger.h:302
std::shared_ptr< StringDictionary > string_dict_
int32_t truncate_to_generation(const int32_t id, const size_t generation)

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

int32_t StringDictionaryProxy::getIdOfStringNoGeneration ( const std::string &  str) const

Definition at line 136 of file StringDictionaryProxy.cpp.

References StringDictionary::INVALID_STR_ID, rw_mutex_, string_dict_, and transient_str_to_int_.

136  {
137  std::shared_lock<std::shared_mutex> read_lock(rw_mutex_);
138  auto str_id = string_dict_->getIdOfString(str);
139  if (str_id != StringDictionary::INVALID_STR_ID || transient_str_to_int_.empty()) {
140  return str_id;
141  }
142  auto it = transient_str_to_int_.find(str);
143  return it != transient_str_to_int_.end() ? it->second
145 }
heavyai::shared_lock< heavyai::shared_mutex > read_lock
static constexpr int32_t INVALID_STR_ID
std::shared_ptr< StringDictionary > string_dict_
std::vector< int32_t > StringDictionaryProxy::getLike ( const std::string &  pattern,
const bool  icase,
const bool  is_simple,
const char  escape 
) const

Definition at line 495 of file StringDictionaryProxy.cpp.

References CHECK_GE, generation_, anonymous_namespace{StringDictionary.cpp}::is_like(), run_benchmark_import::result, string_dict_, transient_string_vec_, and transientIndexToId().

498  {
499  CHECK_GE(generation_, 0);
500  auto result = string_dict_->getLike(pattern, icase, is_simple, escape, generation_);
501  for (unsigned index = 0; index < transient_string_vec_.size(); ++index) {
502  if (is_like(*transient_string_vec_[index], pattern, icase, is_simple, escape)) {
503  result.push_back(transientIndexToId(index));
504  }
505  }
506  return result;
507 }
#define CHECK_GE(x, y)
Definition: Logger.h:302
std::shared_ptr< StringDictionary > string_dict_
std::vector< std::string const * > transient_string_vec_
bool is_like(const std::string &str, const std::string &pattern, const bool icase, const bool is_simple, const char escape)
static int32_t transientIndexToId(unsigned const index)

+ Here is the call graph for this function:

int32_t StringDictionaryProxy::getOrAdd ( const std::string &  str)
noexcept

Definition at line 568 of file StringDictionaryProxy.cpp.

Referenced by DictionaryValueConverter< TARGET_TYPE >::convertTransientStringIdToPermanentId().

568  {
569  return string_dict_->getOrAdd(str);
570 }
std::shared_ptr< StringDictionary > string_dict_

+ Here is the caller graph for this function:

int32_t StringDictionaryProxy::getOrAddTransient ( const std::string &  str)

Definition at line 111 of file StringDictionaryProxy.cpp.

Referenced by apply_multi_input_string_ops_and_encode(), apply_string_ops_and_encode(), populate_output_stats_cols(), Executor::serializeLiterals(), string_compress(), TransientStringLiteralsVisitor::visitConstant(), TransientStringLiteralsVisitor::visitStringOper(), and write_string_to_proxy().

111  {
112  return getOrAddTransientImpl<std::string const&>(str);
113 }

+ Here is the caller graph for this function:

int32_t StringDictionaryProxy::getOrAddTransient ( const std::string_view  sv)

Definition at line 115 of file StringDictionaryProxy.cpp.

115  {
116  return getOrAddTransientImpl<std::string_view const>(sv);
117 }
std::vector< int32_t > StringDictionaryProxy::getOrAddTransientBulk ( const std::vector< std::string > &  strings)

Definition at line 60 of file StringDictionaryProxy.cpp.

References CHECK_GE, generation_, getOrAddTransientUnlocked(), StringDictionary::INVALID_STR_ID, rw_mutex_, and string_dict_.

Referenced by supported_ml_frameworks__cpu_().

61  {
63  const size_t num_strings = strings.size();
64  std::vector<int32_t> string_ids(num_strings);
65  if (num_strings == 0) {
66  return string_ids;
67  }
68  // Since new strings added to a StringDictionaryProxy are not materialized in the
69  // proxy's underlying StringDictionary, we can use the fast parallel
70  // StringDictionary::getBulk method to fetch ids from the underlying dictionary (which
71  // will return StringDictionary::INVALID_STR_ID for strings that don't exist)
72 
73  // Don't need to be under lock here as the string ids for strings in the underlying
74  // materialized dictionary are immutable
75  const size_t num_strings_not_found =
76  string_dict_->getBulk(strings, string_ids.data(), generation_);
77  if (num_strings_not_found > 0) {
78  std::lock_guard<std::shared_mutex> write_lock(rw_mutex_);
79  for (size_t string_idx = 0; string_idx < num_strings; ++string_idx) {
80  if (string_ids[string_idx] == StringDictionary::INVALID_STR_ID) {
81  string_ids[string_idx] = getOrAddTransientUnlocked(strings[string_idx]);
82  }
83  }
84  }
85  return string_ids;
86 }
#define CHECK_GE(x, y)
Definition: Logger.h:302
heavyai::unique_lock< heavyai::shared_mutex > write_lock
static constexpr int32_t INVALID_STR_ID
std::shared_ptr< StringDictionary > string_dict_
int32_t getOrAddTransientUnlocked(String const &)

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

template<typename String >
int32_t StringDictionaryProxy::getOrAddTransientImpl ( String  str)
private

Definition at line 102 of file StringDictionaryProxy.cpp.

References getIdOfStringFromClient(), getOrAddTransientUnlocked(), StringDictionary::INVALID_STR_ID, and rw_mutex_.

102  {
103  auto const string_id = getIdOfStringFromClient(str);
104  if (string_id != StringDictionary::INVALID_STR_ID) {
105  return string_id;
106  }
107  std::lock_guard<std::shared_mutex> write_lock(rw_mutex_);
108  return getOrAddTransientUnlocked(str);
109 }
heavyai::unique_lock< heavyai::shared_mutex > write_lock
int32_t getIdOfStringFromClient(String const &) const
static constexpr int32_t INVALID_STR_ID
int32_t getOrAddTransientUnlocked(String const &)

+ Here is the call graph for this function:

template<typename String >
int32_t StringDictionaryProxy::getOrAddTransientUnlocked ( String const &  str)
private

Definition at line 89 of file StringDictionaryProxy.cpp.

References transient_str_to_int_, transient_string_vec_, and transientIndexToId().

Referenced by buildUnionTranslationMapToOtherProxy(), getOrAddTransientBulk(), getOrAddTransientImpl(), StringLocalCallback::operator()(), and StringNetworkCallback::operator()().

89  {
90  unsigned const new_index = transient_str_to_int_.size();
91  auto transient_id = transientIndexToId(new_index);
92  auto const emplaced = transient_str_to_int_.emplace(str, transient_id);
93  if (emplaced.second) { // (str, transient_id) was added to transient_str_to_int_.
94  transient_string_vec_.push_back(&emplaced.first->first);
95  } else { // str already exists in transient_str_to_int_. Return existing transient_id.
96  transient_id = emplaced.first->second;
97  }
98  return transient_id;
99 }
std::vector< std::string const * > transient_string_vec_
static int32_t transientIndexToId(unsigned const index)

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

std::vector< int32_t > StringDictionaryProxy::getRegexpLike ( const std::string &  pattern,
const char  escape 
) const

Definition at line 556 of file StringDictionaryProxy.cpp.

References CHECK_GE, generation_, anonymous_namespace{StringDictionary.cpp}::is_regexp_like(), run_benchmark_import::result, string_dict_, transient_string_vec_, and transientIndexToId().

557  {
558  CHECK_GE(generation_, 0);
559  auto result = string_dict_->getRegexpLike(pattern, escape, generation_);
560  for (unsigned index = 0; index < transient_string_vec_.size(); ++index) {
561  if (is_regexp_like(*transient_string_vec_[index], pattern, escape)) {
562  result.push_back(transientIndexToId(index));
563  }
564  }
565  return result;
566 }
#define CHECK_GE(x, y)
Definition: Logger.h:302
bool is_regexp_like(const std::string &str, const std::string &pattern, const char escape)
std::shared_ptr< StringDictionary > string_dict_
std::vector< std::string const * > transient_string_vec_
static int32_t transientIndexToId(unsigned const index)

+ Here is the call graph for this function:

std::string StringDictionaryProxy::getString ( int32_t  string_id) const

Definition at line 172 of file StringDictionaryProxy.cpp.

References getStringUnlocked(), and rw_mutex_.

Referenced by anonymous_namespace{ResultSetIteration.cpp}::build_string_array_target_value(), StringValueConverter::convertToColumnarFormatFromDict(), DictionaryValueConverter< TARGET_TYPE >::convertTransientStringIdToPermanentId(), anonymous_namespace{RelAlgTranslator.cpp}::fill_dictionary_encoded_in_vals(), ResultSet::getString(), intersect_translate_string_id_to_other_dict(), and union_translate_string_id_to_other_dict().

172  {
173  if (inline_int_null_value<int32_t>() == string_id) {
174  return "";
175  }
176  std::shared_lock<std::shared_mutex> read_lock(rw_mutex_);
177  return getStringUnlocked(string_id);
178 }
heavyai::shared_lock< heavyai::shared_mutex > read_lock
std::string getStringUnlocked(const int32_t string_id) const

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

std::pair< const char *, size_t > StringDictionaryProxy::getStringBytes ( int32_t  string_id) const
noexcept

Definition at line 572 of file StringDictionaryProxy.cpp.

References CHECK_LT.

Referenced by anonymous_namespace{ExternalExecutor.cpp}::decode_string(), string_decompress(), StringDictionaryProxy_getStringBytes(), and StringDictionaryProxy_getStringLength().

573  {
574  if (string_id >= 0) {
575  return string_dict_.get()->getStringBytes(string_id);
576  }
577  unsigned const string_index = transientIdToIndex(string_id);
578  CHECK_LT(string_index, transient_string_vec_.size());
579  std::string const* const str_ptr = transient_string_vec_[string_index];
580  return {str_ptr->c_str(), str_ptr->size()};
581 }
std::shared_ptr< StringDictionary > string_dict_
std::vector< std::string const * > transient_string_vec_
#define CHECK_LT(x, y)
Definition: Logger.h:299
static unsigned transientIdToIndex(int32_t const id)

+ Here is the caller graph for this function:

std::vector< std::string > StringDictionaryProxy::getStrings ( const std::vector< int32_t > &  string_ids) const

Definition at line 189 of file StringDictionaryProxy.cpp.

References string_dict_, transient_string_vec_, and transientIdToIndex().

190  {
191  std::vector<std::string> strings;
192  if (!string_ids.empty()) {
193  strings.reserve(string_ids.size());
194  for (const auto string_id : string_ids) {
195  if (string_id >= 0) {
196  strings.emplace_back(string_dict_->getString(string_id));
197  } else if (inline_int_null_value<int32_t>() == string_id) {
198  strings.emplace_back("");
199  } else {
200  unsigned const string_index = transientIdToIndex(string_id);
201  strings.emplace_back(*transient_string_vec_[string_index]);
202  }
203  }
204  }
205  return strings;
206 }
std::shared_ptr< StringDictionary > string_dict_
std::vector< std::string const * > transient_string_vec_
static unsigned transientIdToIndex(int32_t const id)

+ Here is the call graph for this function:

std::string StringDictionaryProxy::getStringUnlocked ( const int32_t  string_id) const
private

Definition at line 180 of file StringDictionaryProxy.cpp.

References CHECK_LT, storageEntryCount(), string_dict_, transient_string_vec_, and transientIdToIndex().

Referenced by buildNumericTranslationMap(), buildUnionTranslationMapToOtherProxy(), and getString().

180  {
181  if (string_id >= 0 && storageEntryCount() > 0) {
182  return string_dict_->getString(string_id);
183  }
184  unsigned const string_index = transientIdToIndex(string_id);
185  CHECK_LT(string_index, transient_string_vec_.size());
186  return *transient_string_vec_[string_index];
187 }
size_t storageEntryCount() const
Returns the number of string entries in the underlying string dictionary, at this proxy&#39;s generation_...
std::shared_ptr< StringDictionary > string_dict_
std::vector< std::string const * > transient_string_vec_
#define CHECK_LT(x, y)
Definition: Logger.h:299
static unsigned transientIdToIndex(int32_t const id)

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

std::vector< int32_t > StringDictionaryProxy::getTransientBulk ( const std::vector< std::string > &  strings) const

Executes read-only lookup of a vector of strings and returns a vector of their integer ids.

This function, unlike getOrAddTransientBulk, will not add strings to the dictionary. Use this function if strings that don't currently exist in the StringDictionaryProxy should not be added to the proxy as transient entries. This method also has performance advantages over getOrAddTransientBulk for read-only use cases, in that it can: 1) Take a read lock instead of a write lock for the transient lookups 2) Use a tbb::parallel_for implementation of the transient string lookups as we are guaranteed that the underlying map of strings to int ids cannot change

Parameters
strings- Vector of strings to perform string id lookups on
Returns
A vector of string_ids of the same length as strings, containing the id of any strings for which were found in the underlying StringDictionary instance or in the proxy's tranient map, otherwise StringDictionary::INVALID_STR_ID for strings not found.

Definition at line 52 of file StringDictionaryProxy.cpp.

References CHECK_GE, generation_, and getTransientBulkImpl().

53  {
55  std::vector<int32_t> string_ids(strings.size());
56  getTransientBulkImpl(strings, string_ids.data(), true);
57  return string_ids;
58 }
#define CHECK_GE(x, y)
Definition: Logger.h:302
size_t getTransientBulkImpl(const std::vector< std::string > &strings, int32_t *string_ids, const bool take_read_lock) const

+ Here is the call graph for this function:

size_t StringDictionaryProxy::getTransientBulkImpl ( const std::vector< std::string > &  strings,
int32_t *  string_ids,
const bool  take_read_lock 
) const
private

Definition at line 704 of file StringDictionaryProxy.cpp.

References generation_, string_dict_, and transientLookupBulk().

Referenced by buildIntersectionTranslationMapToOtherProxyUnlocked(), and getTransientBulk().

707  {
708  const size_t num_strings = strings.size();
709  if (num_strings == 0) {
710  return 0UL;
711  }
712  // StringDictionary::getBulk returns the number of strings not found
713  if (string_dict_->getBulk(strings, string_ids, generation_) == 0UL) {
714  return 0UL;
715  }
716 
717  // If here, dictionary could not find at least 1 target string,
718  // now look these up in the transient dictionary
719  // transientLookupBulk returns the number of strings not found
720  return transientLookupBulk(strings, string_ids, take_read_lock);
721 }
size_t transientLookupBulk(const std::vector< String > &lookup_strings, int32_t *string_ids, const bool take_read_lock) const
std::shared_ptr< StringDictionary > string_dict_

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

const std::vector<std::string const*>& StringDictionaryProxy::getTransientVector ( ) const
inline

Definition at line 244 of file StringDictionaryProxy.h.

References transient_string_vec_.

Referenced by DictionaryValueConverter< TARGET_TYPE >::DictionaryValueConverter(), and DictionaryValueConverter< TARGET_TYPE >::processBuffer().

244  {
245  return transient_string_vec_;
246  }
std::vector< std::string const * > transient_string_vec_

+ Here is the caller graph for this function:

IdMap StringDictionaryProxy::initIdMap ( ) const
inline

Definition at line 143 of file StringDictionaryProxy.h.

References generation_, StringDictionary::INVALID_STR_ID, and transient_string_vec_.

Referenced by buildIntersectionTranslationMapToOtherProxyUnlocked(), and transientUnion().

143  {
144  return IdMap(
146  }
TranslationMap< int32_t > IdMap
static constexpr int32_t INVALID_STR_ID
std::vector< std::string const * > transient_string_vec_

+ Here is the caller graph for this function:

template<typename String >
int32_t StringDictionaryProxy::lookupTransientStringUnlocked ( const String &  lookup_string) const
private

Definition at line 209 of file StringDictionaryProxy.cpp.

References StringDictionary::INVALID_STR_ID, and transient_str_to_int_.

Referenced by buildIntersectionTranslationMapToOtherProxyUnlocked(), transientLookupBulkParallelUnlocked(), and transientLookupBulkUnlocked().

210  {
211  const auto it = transient_str_to_int_.find(lookup_string);
213  : it->second;
214 }
static constexpr int32_t INVALID_STR_ID

+ Here is the caller graph for this function:

bool StringDictionaryProxy::operator!= ( StringDictionaryProxy const &  rhs) const

Definition at line 820 of file StringDictionaryProxy.cpp.

References operator==().

820  {
821  return !operator==(rhs);
822 }
bool operator==(StringDictionaryProxy const &) const

+ Here is the call graph for this function:

StringDictionaryProxy const& StringDictionaryProxy::operator= ( StringDictionaryProxy const &  )
delete
bool StringDictionaryProxy::operator== ( StringDictionaryProxy const &  rhs) const

Definition at line 815 of file StringDictionaryProxy.cpp.

References string_dict_id_, and transient_str_to_int_.

Referenced by operator!=().

815  {
816  return string_dict_id_ == rhs.string_dict_id_ &&
817  transient_str_to_int_ == rhs.transient_str_to_int_;
818 }

+ Here is the caller graph for this function:

size_t StringDictionaryProxy::persistedC ( ) const
private
size_t StringDictionaryProxy::storageEntryCount ( ) const

Returns the number of string entries in the underlying string dictionary, at this proxy's generation_ if it is set/valid, otherwise just the current size of the dictionary.

Returns
size_t Number of entries in the string dictionary (at this proxy's generation if set)

Definition at line 583 of file StringDictionaryProxy.cpp.

References CHECK_LE, generation_, and string_dict_.

Referenced by buildIntersectionTranslationMapToOtherProxyUnlocked(), entryCountUnlocked(), and getStringUnlocked().

583  {
584  const size_t num_storage_entries{generation_ == -1 ? string_dict_->storageEntryCount()
585  : generation_};
586  CHECK_LE(num_storage_entries, static_cast<size_t>(std::numeric_limits<int32_t>::max()));
587  return num_storage_entries;
588 }
std::shared_ptr< StringDictionary > string_dict_
#define CHECK_LE(x, y)
Definition: Logger.h:300

+ Here is the caller graph for this function:

size_t StringDictionaryProxy::transientEntryCount ( ) const

Returns the number of transient string entries for this proxy,.

Returns
size_t Number of transient string entries for this proxy

Definition at line 599 of file StringDictionaryProxy.cpp.

References rw_mutex_, and transientEntryCountUnlocked().

599  {
600  std::shared_lock<std::shared_mutex> read_lock(rw_mutex_);
602 }
size_t transientEntryCountUnlocked() const
heavyai::shared_lock< heavyai::shared_mutex > read_lock

+ Here is the call graph for this function:

size_t StringDictionaryProxy::transientEntryCountUnlocked ( ) const
private

Definition at line 590 of file StringDictionaryProxy.cpp.

References CHECK_LE, and transient_str_to_int_.

Referenced by buildIntersectionTranslationMapToOtherProxyUnlocked(), buildUnionTranslationMapToOtherProxy(), entryCountUnlocked(), and transientEntryCount().

590  {
591  // CHECK_LE(num_storage_entries,
592  // static_cast<size_t>(std::numeric_limits<int32_t>::max()));
593  const size_t num_transient_entries{transient_str_to_int_.size()};
594  CHECK_LE(num_transient_entries,
595  static_cast<size_t>(std::numeric_limits<int32_t>::max()) - 1);
596  return num_transient_entries;
597 }
#define CHECK_LE(x, y)
Definition: Logger.h:300

+ Here is the caller graph for this function:

static unsigned StringDictionaryProxy::transientIdToIndex ( int32_t const  id)
inlinestatic

Definition at line 250 of file StringDictionaryProxy.h.

Referenced by getStrings(), getStringUnlocked(), and StringDictionary::populate_string_ids().

250  {
251  constexpr int max_transient_string_id = -2;
252  return static_cast<unsigned>(max_transient_string_id - id);
253  }

+ Here is the caller graph for this function:

static int32_t StringDictionaryProxy::transientIndexToId ( unsigned const  index)
inlinestatic

Definition at line 255 of file StringDictionaryProxy.h.

Referenced by DictionaryValueConverter< TARGET_TYPE >::DictionaryValueConverter(), getCompare(), getLike(), getOrAddTransientUnlocked(), getRegexpLike(), and ArrowResultSetConverter::initializeColumnBuilder().

255  {
256  constexpr int max_transient_string_id = -2;
257  return static_cast<int32_t>(max_transient_string_id - index);
258  }

+ Here is the caller graph for this function:

template<typename String >
size_t StringDictionaryProxy::transientLookupBulk ( const std::vector< String > &  lookup_strings,
int32_t *  string_ids,
const bool  take_read_lock 
) const
private

Definition at line 724 of file StringDictionaryProxy.cpp.

References rw_mutex_, transient_str_to_int_, transientLookupBulkParallelUnlocked(), and transientLookupBulkUnlocked().

Referenced by getTransientBulkImpl().

727  {
728  const size_t num_strings = lookup_strings.size();
729  auto read_lock = take_read_lock ? std::shared_lock<std::shared_mutex>(rw_mutex_)
730  : std::shared_lock<std::shared_mutex>();
731 
732  if (num_strings == static_cast<size_t>(0) || transient_str_to_int_.empty()) {
733  return 0UL;
734  }
735  constexpr size_t tbb_parallel_threshold{20000};
736  if (num_strings < tbb_parallel_threshold) {
737  return transientLookupBulkUnlocked(lookup_strings, string_ids);
738  } else {
739  return transientLookupBulkParallelUnlocked(lookup_strings, string_ids);
740  }
741 }
heavyai::shared_lock< heavyai::shared_mutex > read_lock
size_t transientLookupBulkUnlocked(const std::vector< String > &lookup_strings, int32_t *string_ids) const
size_t transientLookupBulkParallelUnlocked(const std::vector< String > &lookup_strings, int32_t *string_ids) const

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

template<typename String >
size_t StringDictionaryProxy::transientLookupBulkParallelUnlocked ( const std::vector< String > &  lookup_strings,
int32_t *  string_ids 
) const
private

Definition at line 764 of file StringDictionaryProxy.cpp.

References CHECK_GE, StringDictionary::INVALID_STR_ID, lookupTransientStringUnlocked(), ThreadInfo::num_elems_per_thread, ThreadInfo::num_threads, and threading_serial::parallel_for().

Referenced by transientLookupBulk().

766  {
767  const size_t num_lookup_strings = lookup_strings.size();
768  const size_t target_inputs_per_thread = 20000L;
769  ThreadInfo thread_info(
770  std::thread::hardware_concurrency(), num_lookup_strings, target_inputs_per_thread);
771  CHECK_GE(thread_info.num_threads, 1L);
772  CHECK_GE(thread_info.num_elems_per_thread, 1L);
773 
774  std::vector<size_t> num_strings_not_found_per_thread(thread_info.num_threads, 0UL);
775 
776  tbb::task_arena limited_arena(thread_info.num_threads);
777  limited_arena.execute([&] {
779  tbb::blocked_range<size_t>(
780  0, num_lookup_strings, thread_info.num_elems_per_thread /* tbb grain_size */),
781  [&](const tbb::blocked_range<size_t>& r) {
782  const size_t start_idx = r.begin();
783  const size_t end_idx = r.end();
784  size_t num_local_strings_not_found = 0;
785  for (size_t string_idx = start_idx; string_idx < end_idx; ++string_idx) {
786  if (string_ids[string_idx] != StringDictionary::INVALID_STR_ID) {
787  continue;
788  }
789  string_ids[string_idx] =
790  lookupTransientStringUnlocked(lookup_strings[string_idx]);
791  if (string_ids[string_idx] == StringDictionary::INVALID_STR_ID) {
792  num_local_strings_not_found++;
793  }
794  }
795  const size_t tbb_thread_idx = tbb::this_task_arena::current_thread_index();
796  num_strings_not_found_per_thread[tbb_thread_idx] = num_local_strings_not_found;
797  },
798  tbb::simple_partitioner());
799  });
800  size_t num_strings_not_found = 0;
801  for (int64_t thread_idx = 0; thread_idx < thread_info.num_threads; ++thread_idx) {
802  num_strings_not_found += num_strings_not_found_per_thread[thread_idx];
803  }
804  return num_strings_not_found;
805 }
#define CHECK_GE(x, y)
Definition: Logger.h:302
static constexpr int32_t INVALID_STR_ID
int32_t lookupTransientStringUnlocked(const String &lookup_string) const
void parallel_for(const blocked_range< Int > &range, const Body &body, const Partitioner &p=Partitioner())

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

template<typename String >
size_t StringDictionaryProxy::transientLookupBulkUnlocked ( const std::vector< String > &  lookup_strings,
int32_t *  string_ids 
) const
private

Definition at line 744 of file StringDictionaryProxy.cpp.

References StringDictionary::INVALID_STR_ID, and lookupTransientStringUnlocked().

Referenced by transientLookupBulk().

746  {
747  const size_t num_strings = lookup_strings.size();
748  size_t num_strings_not_found = 0;
749  for (size_t string_idx = 0; string_idx < num_strings; ++string_idx) {
750  if (string_ids[string_idx] != StringDictionary::INVALID_STR_ID) {
751  continue;
752  }
753  // If we're here it means we need to look up this string as we don't
754  // have a valid id for it
755  string_ids[string_idx] = lookupTransientStringUnlocked(lookup_strings[string_idx]);
756  if (string_ids[string_idx] == StringDictionary::INVALID_STR_ID) {
757  num_strings_not_found++;
758  }
759  }
760  return num_strings_not_found;
761 }
static constexpr int32_t INVALID_STR_ID
int32_t lookupTransientStringUnlocked(const String &lookup_string) const

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

StringDictionaryProxy::IdMap StringDictionaryProxy::transientUnion ( StringDictionaryProxy const &  sdp_rhs)

Definition at line 678 of file StringDictionaryProxy.cpp.

References eachStringSerially(), initIdMap(), and string_dict_.

679  {
680  IdMap id_map = sdp_rhs.initIdMap();
681  // serial_callback cannot be parallelized due to calling getOrAddTransientUnlocked().
682  std::unique_ptr<StringDictionary::StringCallback> serial_callback;
683  if (string_dict_->isClient()) {
684  serial_callback = std::make_unique<StringNetworkCallback>(this, id_map);
685  } else {
686  serial_callback = std::make_unique<StringLocalCallback>(this, id_map);
687  }
688  // Import all non-duplicate strings (transient and non-transient) and add to id_map.
689  sdp_rhs.eachStringSerially(*serial_callback);
690  return id_map;
691 }
TranslationMap< int32_t > IdMap
std::shared_ptr< StringDictionary > string_dict_

+ Here is the call graph for this function:

void StringDictionaryProxy::updateGeneration ( const int64_t  generation)
noexcept

Definition at line 693 of file StringDictionaryProxy.cpp.

References CHECK_EQ.

693  {
694  if (generation == -1) {
695  return;
696  }
697  if (generation_ != -1) {
698  CHECK_EQ(generation_, generation);
699  return;
700  }
701  generation_ = generation;
702 }
#define CHECK_EQ(x, y)
Definition: Logger.h:297

Friends And Related Function Documentation

friend class StringLocalCallback
friend

Definition at line 308 of file StringDictionaryProxy.h.

friend class StringNetworkCallback
friend

Definition at line 309 of file StringDictionaryProxy.h.

Member Data Documentation

const int32_t StringDictionaryProxy::string_dict_id_
private

Definition at line 295 of file StringDictionaryProxy.h.

Referenced by getDictId(), and operator==().


The documentation for this class was generated from the following files: