OmniSciDB  ca0c39ec8f
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
StringDictionaryProxy Class Reference

#include <StringDictionaryProxy.h>

Classes

struct  HeterogeneousStringEqual
 
struct  HeterogeneousStringHash
 
class  TranslationMap
 

Public Types

using IdMap = TranslationMap< int32_t >
 
using TransientMap = robin_hood::unordered_node_map< std::string, int32_t, HeterogeneousStringHash, HeterogeneousStringEqual >
 

Public Member Functions

 StringDictionaryProxy (StringDictionaryProxy const &)=delete
 
StringDictionaryProxy const & operator= (StringDictionaryProxy const &)=delete
 
 StringDictionaryProxy (std::shared_ptr< StringDictionary > sd, const int32_t string_dict_id, const int64_t generation)
 
int32_t getDictId () const noexcept
 
bool operator== (StringDictionaryProxy const &) const
 
bool operator!= (StringDictionaryProxy const &) const
 
int32_t getOrAdd (const std::string &str) noexcept
 
StringDictionarygetDictionary () const noexcept
 
int64_t getGeneration () const noexcept
 
std::vector< int32_t > getTransientBulk (const std::vector< std::string > &strings) const
 Executes read-only lookup of a vector of strings and returns a vector of their integer ids. More...
 
int32_t getOrAddTransient (const std::string &str)
 
std::vector< int32_t > getOrAddTransientBulk (const std::vector< std::string > &strings)
 
int32_t getIdOfString (const std::string &str) const
 
int32_t getIdOfStringNoGeneration (const std::string &str) const
 
std::string getString (int32_t string_id) const
 
std::vector< std::string > getStrings (const std::vector< int32_t > &string_ids) const
 
std::pair< const char *, size_t > getStringBytes (int32_t string_id) const noexcept
 
IdMap initIdMap () const
 
TranslationMap< DatumbuildNumericTranslationMap (const std::vector< StringOps_Namespace::StringOpInfo > &string_op_infos) const
 Builds a vectorized string_id translation map from this proxy to dest_proxy. More...
 
IdMap buildIntersectionTranslationMapToOtherProxy (const StringDictionaryProxy *dest_proxy, const std::vector< StringOps_Namespace::StringOpInfo > &string_op_infos) const
 
IdMap buildUnionTranslationMapToOtherProxy (StringDictionaryProxy *dest_proxy, const std::vector< StringOps_Namespace::StringOpInfo > &string_op_types) const
 
size_t storageEntryCount () const
 Returns the number of string entries in the underlying string dictionary, at this proxy's generation_ if it is set/valid, otherwise just the current size of the dictionary. More...
 
size_t transientEntryCount () const
 Returns the number of transient string entries for this proxy,. More...
 
size_t entryCount () const
 Returns the number of total string entries for this proxy, both stored in the underlying dictionary and in the transient map. Equal to storageEntryCount() + transientEntryCount() More...
 
void updateGeneration (const int64_t generation) noexcept
 
std::vector< int32_t > getLike (const std::string &pattern, const bool icase, const bool is_simple, const char escape) const
 
std::vector< int32_t > getCompare (const std::string &pattern, const std::string &comp_operator) const
 
std::vector< int32_t > getRegexpLike (const std::string &pattern, const char escape) const
 
const std::vector< std::string
const * > & 
getTransientVector () const
 
void eachStringSerially (StringDictionary::StringCallback &) const
 
IdMap transientUnion (StringDictionaryProxy const &)
 

Static Public Member Functions

static unsigned transientIdToIndex (int32_t const id)
 
static int32_t transientIndexToId (unsigned const index)
 

Private Member Functions

std::string getStringUnlocked (const int32_t string_id) const
 
size_t transientEntryCountUnlocked () const
 
size_t entryCountUnlocked () const
 
size_t persistedC () const
 
template<typename String >
int32_t lookupTransientStringUnlocked (const String &lookup_string) const
 
size_t getTransientBulkImpl (const std::vector< std::string > &strings, int32_t *string_ids, const bool take_read_lock) const
 
template<typename String >
size_t transientLookupBulk (const std::vector< String > &lookup_strings, int32_t *string_ids, const bool take_read_lock) const
 
template<typename String >
size_t transientLookupBulkUnlocked (const std::vector< String > &lookup_strings, int32_t *string_ids) const
 
template<typename String >
size_t transientLookupBulkParallelUnlocked (const std::vector< String > &lookup_strings, int32_t *string_ids) const
 
IdMap buildIntersectionTranslationMapToOtherProxyUnlocked (const StringDictionaryProxy *dest_proxy, const std::vector< StringOps_Namespace::StringOpInfo > &string_op_infos) const
 
template<typename String >
int32_t getIdOfStringFromClient (String const &) const
 
template<typename String >
int32_t getOrAddTransientUnlocked (String const &)
 

Private Attributes

std::shared_ptr< StringDictionarystring_dict_
 
const int32_t string_dict_id_
 
TransientMap transient_str_to_int_
 
std::vector< std::string const * > transient_string_vec_
 
int64_t generation_
 
std::shared_mutex rw_mutex_
 

Friends

class StringLocalCallback
 
class StringNetworkCallback
 

Detailed Description

Definition at line 39 of file StringDictionaryProxy.h.

Member Typedef Documentation

Definition at line 140 of file StringDictionaryProxy.h.

using StringDictionaryProxy::TransientMap = robin_hood::unordered_node_map<std::string, int32_t, HeterogeneousStringHash, HeterogeneousStringEqual>

Definition at line 241 of file StringDictionaryProxy.h.

Constructor & Destructor Documentation

StringDictionaryProxy::StringDictionaryProxy ( StringDictionaryProxy const &  )
delete
StringDictionaryProxy::StringDictionaryProxy ( std::shared_ptr< StringDictionary sd,
const int32_t  string_dict_id,
const int64_t  generation 
)

Definition at line 39 of file StringDictionaryProxy.cpp.

42  : string_dict_(sd), string_dict_id_(string_dict_id), generation_(generation) {}
std::shared_ptr< StringDictionary > string_dict_

Member Function Documentation

StringDictionaryProxy::IdMap StringDictionaryProxy::buildIntersectionTranslationMapToOtherProxy ( const StringDictionaryProxy dest_proxy,
const std::vector< StringOps_Namespace::StringOpInfo > &  string_op_infos 
) const

Definition at line 375 of file StringDictionaryProxy.cpp.

References buildIntersectionTranslationMapToOtherProxyUnlocked(), getDictId(), order_translation_locks(), and rw_mutex_.

Referenced by RowSetMemoryOwner::addStringProxyIntersectionTranslationMap().

377  {
378  const auto source_dict_id = getDictId();
379  const auto dest_dict_id = dest_proxy->getDictId();
380 
381  std::shared_lock<std::shared_mutex> source_proxy_read_lock(rw_mutex_, std::defer_lock);
382  std::unique_lock<std::shared_mutex> dest_proxy_write_lock(dest_proxy->rw_mutex_,
383  std::defer_lock);
385  source_dict_id, dest_dict_id, source_proxy_read_lock, dest_proxy_write_lock);
386  return buildIntersectionTranslationMapToOtherProxyUnlocked(dest_proxy, string_op_infos);
387 }
IdMap buildIntersectionTranslationMapToOtherProxyUnlocked(const StringDictionaryProxy *dest_proxy, const std::vector< StringOps_Namespace::StringOpInfo > &string_op_infos) const
void order_translation_locks(const int32_t source_db_id, const int32_t source_dict_id, const int32_t dest_db_id, const int32_t dest_dict_id, std::shared_lock< std::shared_mutex > &source_read_lock, std::shared_lock< std::shared_mutex > &dest_read_lock)
int32_t getDictId() const noexcept

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

StringDictionaryProxy::IdMap StringDictionaryProxy::buildIntersectionTranslationMapToOtherProxyUnlocked ( const StringDictionaryProxy dest_proxy,
const std::vector< StringOps_Namespace::StringOpInfo > &  string_op_infos 
) const
private

Definition at line 256 of file StringDictionaryProxy.cpp.

References CHECK_GT, CHECK_LE, StringDictionaryProxy::TranslationMap< T >::data(), DEBUG_TIMER, StringDictionaryProxy::TranslationMap< T >::empty(), entryCountUnlocked(), generation_, getTransientBulkImpl(), StringDictionaryProxy::TranslationMap< T >::getVectorMap(), initIdMap(), StringDictionary::INVALID_STR_ID, lookupTransientStringUnlocked(), StringDictionaryProxy::TranslationMap< T >::numTransients(), StringDictionaryProxy::TranslationMap< T >::setNumUntranslatedStrings(), StringDictionaryProxy::TranslationMap< T >::setRangeEnd(), StringDictionaryProxy::TranslationMap< T >::setRangeStart(), StringDictionaryProxy::TranslationMap< T >::storageData(), storageEntryCount(), string_dict_, shared::transform(), transient_string_vec_, transientEntryCountUnlocked(), and VLOG.

Referenced by buildIntersectionTranslationMapToOtherProxy(), and buildUnionTranslationMapToOtherProxy().

258  {
259  auto timer = DEBUG_TIMER(__func__);
260  IdMap id_map = initIdMap();
261 
262  if (id_map.empty()) {
263  return id_map;
264  }
265 
266  const StringOps_Namespace::StringOps string_ops(string_op_infos);
267 
268  // First map transient strings, store at front of vector map
269  const size_t num_transient_entries = id_map.numTransients();
270  size_t num_transient_strings_not_translated = 0UL;
271  if (num_transient_entries) {
272  std::vector<std::string> transient_lookup_strings(num_transient_entries);
273  if (string_ops.size()) {
275  transient_string_vec_.cend(),
276  transient_lookup_strings.rbegin(),
277  [&](std::string const* ptr) { return string_ops(*ptr); });
278  } else {
280  transient_string_vec_.cend(),
281  transient_lookup_strings.rbegin(),
282  [](std::string const* ptr) { return *ptr; });
283  }
284 
285  // This lookup may have a different snapshot of
286  // dest_proxy transients and dictionary than what happens under
287  // the below dest_proxy_read_lock. We may need an unlocked version of
288  // getTransientBulk to ensure consistency (I don't believe
289  // current behavior would cause crashes/races, verify this though)
290 
291  // Todo(mattp): Consider variant of getTransientBulkImp that can take
292  // a vector of pointer-to-strings so we don't have to materialize
293  // transient_string_vec_ into transient_lookup_strings.
294 
295  num_transient_strings_not_translated =
296  dest_proxy->getTransientBulkImpl(transient_lookup_strings, id_map.data(), false);
297  }
298 
299  // Now map strings in dictionary
300  // We place non-transient strings after the transient strings
301  // if they exist, otherwise at index 0
302  int32_t* translation_map_stored_entries_ptr = id_map.storageData();
303 
304  auto dest_transient_lookup_callback = [dest_proxy, translation_map_stored_entries_ptr](
305  const std::string_view& source_string,
306  const int32_t source_string_id) {
307  translation_map_stored_entries_ptr[source_string_id] =
308  dest_proxy->lookupTransientStringUnlocked(source_string);
309  return translation_map_stored_entries_ptr[source_string_id] ==
311  };
312 
313  const size_t num_dest_transients = dest_proxy->transientEntryCountUnlocked();
314  const size_t num_persisted_strings_not_translated =
315  generation_ > 0 ? string_dict_->buildDictionaryTranslationMap(
316  dest_proxy->string_dict_.get(),
317  translation_map_stored_entries_ptr,
318  generation_,
319  dest_proxy->generation_,
320  num_dest_transients > 0UL,
321  dest_transient_lookup_callback,
322  string_op_infos)
323  : 0UL;
324 
325  const size_t num_dest_entries = dest_proxy->entryCountUnlocked();
326  const size_t num_total_entries =
327  id_map.getVectorMap().size() - 1UL /* account for skipped entry -1 */;
328  CHECK_GT(num_total_entries, 0UL);
329  const size_t num_strings_not_translated =
330  num_transient_strings_not_translated + num_persisted_strings_not_translated;
331  CHECK_LE(num_strings_not_translated, num_total_entries);
332  id_map.setNumUntranslatedStrings(num_strings_not_translated);
333 
334  // Below is a conservative setting of range based on the size of the destination proxy,
335  // but probably not worth a scan over the data (or inline computation as we translate)
336  // to compute the actual ranges
337 
338  id_map.setRangeStart(
339  num_dest_transients > 0 ? -1 - static_cast<int32_t>(num_dest_transients) : 0);
340  id_map.setRangeEnd(dest_proxy->storageEntryCount());
341 
342  const size_t num_entries_translated = num_total_entries - num_strings_not_translated;
343  const float match_pct =
344  100.0 * static_cast<float>(num_entries_translated) / num_total_entries;
345  VLOG(1) << std::fixed << std::setprecision(2) << match_pct << "% ("
346  << num_entries_translated << " entries) from dictionary ("
347  << string_dict_->getDbId() << ", " << string_dict_->getDictId() << ") with "
348  << num_total_entries << " total entries ( " << num_transient_entries
349  << " literals)"
350  << " translated to dictionary (" << dest_proxy->string_dict_->getDbId() << ", "
351  << dest_proxy->string_dict_->getDictId() << ") with " << num_dest_entries
352  << " total entries (" << dest_proxy->transientEntryCountUnlocked()
353  << " literals).";
354 
355  return id_map;
356 }
size_t transientEntryCountUnlocked() const
size_t storageEntryCount() const
Returns the number of string entries in the underlying string dictionary, at this proxy&#39;s generation_...
#define CHECK_GT(x, y)
Definition: Logger.h:234
TranslationMap< int32_t > IdMap
static constexpr int32_t INVALID_STR_ID
std::shared_ptr< StringDictionary > string_dict_
std::vector< std::string const * > transient_string_vec_
int32_t lookupTransientStringUnlocked(const String &lookup_string) const
size_t getTransientBulkImpl(const std::vector< std::string > &strings, int32_t *string_ids, const bool take_read_lock) const
OUTPUT transform(INPUT const &input, FUNC const &func)
Definition: misc.h:296
#define CHECK_LE(x, y)
Definition: Logger.h:233
#define DEBUG_TIMER(name)
Definition: Logger.h:371
#define VLOG(n)
Definition: Logger.h:316

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

StringDictionaryProxy::TranslationMap< Datum > StringDictionaryProxy::buildNumericTranslationMap ( const std::vector< StringOps_Namespace::StringOpInfo > &  string_op_infos) const

Builds a vectorized string_id translation map from this proxy to dest_proxy.

Parameters
dest_proxyStringDictionaryProxy that we are to map this proxy's string ids to
Returns
An IdMap which encapsulates a std::vector<int32_t> of string ids for both transient and non-transient strings, mapping to their translated string_ids. offset_ is defined to be the number of transient entries + 1. The ordering of values in the vector_map_ is:
  • the transient ids (there are offset_-1 of these)
  • INVALID_STR_ID (=-1)
  • the non-transient string ids For example if there are 3 transient entries in this proxy and 20 in the underlying string dictionary, then vector_map_ will be of size() == 24 and offset_=3+1. The formula to translate ids is new_id = vector_map_[offset_ + old_id]. It is always the case that vector_map_[offset_-1]==-1 so that INVALID_STR_ID maps to INVALID_STR_ID.

Definition at line 208 of file StringDictionaryProxy.cpp.

References CHECK, DEBUG_TIMER, generation_, getStringUnlocked(), threading_serial::parallel_for(), string_dict_, and transient_string_vec_.

Referenced by RowSetMemoryOwner::addStringProxyNumericTranslationMap().

209  {
210  auto timer = DEBUG_TIMER(__func__);
211  CHECK(string_op_infos.size());
212  TranslationMap<Datum> translation_map(transient_string_vec_.size(), generation_);
213  if (translation_map.empty()) {
214  return translation_map;
215  }
216 
217  const StringOps_Namespace::StringOps string_ops(string_op_infos);
218 
219  const size_t num_transient_entries = translation_map.numTransients();
220  if (num_transient_entries) {
221  const int32_t map_domain_start = translation_map.domainStart();
222  if (num_transient_entries > 10000UL) {
224  tbb::blocked_range<int32_t>(map_domain_start, -1),
225  [&](const tbb::blocked_range<int32_t>& r) {
226  const int32_t start_idx = r.begin();
227  const int32_t end_idx = r.end();
228  for (int32_t source_string_id = start_idx; source_string_id < end_idx;
229  ++source_string_id) {
230  const auto source_string = getStringUnlocked(source_string_id);
231  translation_map[source_string_id] = string_ops.numericEval(source_string);
232  }
233  });
234  } else {
235  for (int32_t source_string_id = map_domain_start; source_string_id < -1;
236  ++source_string_id) {
237  const auto source_string = getStringUnlocked(source_string_id);
238  translation_map[source_string_id] = string_ops.numericEval(source_string);
239  }
240  }
241  }
242 
243  Datum* translation_map_stored_entries_ptr = translation_map.storageData();
244  if (generation_ > 0) {
245  string_dict_->buildDictionaryNumericTranslationMap(
246  translation_map_stored_entries_ptr, generation_, string_op_infos);
247  }
248  translation_map.setNumUntranslatedStrings(0UL);
249 
250  // Todo(todd): Set range start/end with scan
251 
252  return translation_map;
253 }
std::string getStringUnlocked(const int32_t string_id) const
std::shared_ptr< StringDictionary > string_dict_
std::vector< std::string const * > transient_string_vec_
void parallel_for(const blocked_range< Int > &range, const Body &body, const Partitioner &p=Partitioner())
#define CHECK(condition)
Definition: Logger.h:222
#define DEBUG_TIMER(name)
Definition: Logger.h:371
Definition: Datum.h:44

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

StringDictionaryProxy::IdMap StringDictionaryProxy::buildUnionTranslationMapToOtherProxy ( StringDictionaryProxy dest_proxy,
const std::vector< StringOps_Namespace::StringOpInfo > &  string_op_types 
) const

Definition at line 389 of file StringDictionaryProxy.cpp.

References buildIntersectionTranslationMapToOtherProxyUnlocked(), DEBUG_TIMER, getDictId(), getOrAddTransientUnlocked(), getStringUnlocked(), StringDictionary::INVALID_STR_ID, order_translation_locks(), rw_mutex_, string_dict_, to_string(), and transientEntryCountUnlocked().

Referenced by RowSetMemoryOwner::addStringProxyUnionTranslationMap().

391  {
392  auto timer = DEBUG_TIMER(__func__);
393 
394  const auto source_dict_id = getDictId();
395  const auto dest_dict_id = dest_proxy->getDictId();
396  std::shared_lock<std::shared_mutex> source_proxy_read_lock(rw_mutex_, std::defer_lock);
397  std::unique_lock<std::shared_mutex> dest_proxy_write_lock(dest_proxy->rw_mutex_,
398  std::defer_lock);
400  source_dict_id, dest_dict_id, source_proxy_read_lock, dest_proxy_write_lock);
401 
402  auto id_map =
403  buildIntersectionTranslationMapToOtherProxyUnlocked(dest_proxy, string_op_infos);
404  if (id_map.empty()) {
405  return id_map;
406  }
407  const auto num_untranslated_strings = id_map.numUntranslatedStrings();
408  if (num_untranslated_strings > 0) {
409  const size_t total_post_translation_dest_transients =
410  num_untranslated_strings + dest_proxy->transientEntryCountUnlocked();
411  constexpr size_t max_allowed_transients =
412  static_cast<size_t>(std::numeric_limits<int32_t>::max() -
413  2); /* -2 accounts for INVALID_STR_ID and NULL value */
414  if (total_post_translation_dest_transients > max_allowed_transients) {
415  throw std::runtime_error("Union translation to dictionary" +
416  std::to_string(getDictId()) + " would result in " +
417  std::to_string(total_post_translation_dest_transients) +
418  " transient entries, which is more than limit of " +
419  std::to_string(max_allowed_transients) + " transients.");
420  }
421  const int32_t map_domain_start = id_map.domainStart();
422  const int32_t map_domain_end = id_map.domainEnd();
423 
424  const StringOps_Namespace::StringOps string_ops(string_op_infos);
425  const bool has_string_ops = string_ops.size();
426 
427  // First iterate over transient strings and add to dest map
428  // Todo (todd): Add call to fetch string_views (local) or strings (distributed)
429  // for all non-translated ids to avoid string-by-string fetch
430 
431  for (int32_t source_string_id = map_domain_start; source_string_id < -1;
432  ++source_string_id) {
433  if (id_map[source_string_id] == StringDictionary::INVALID_STR_ID) {
434  const auto source_string = getStringUnlocked(source_string_id);
435  const auto dest_string_id = dest_proxy->getOrAddTransientUnlocked(
436  has_string_ops ? string_ops(source_string) : source_string);
437  id_map[source_string_id] = dest_string_id;
438  }
439  }
440  // Now iterate over stored strings
441  for (int32_t source_string_id = 0; source_string_id < map_domain_end;
442  ++source_string_id) {
443  if (id_map[source_string_id] == StringDictionary::INVALID_STR_ID) {
444  const auto source_string = string_dict_->getString(source_string_id);
445  const auto dest_string_id = dest_proxy->getOrAddTransientUnlocked(
446  has_string_ops ? string_ops(source_string) : source_string);
447  id_map[source_string_id] = dest_string_id;
448  }
449  }
450  }
451  // We may have added transients to the destination proxy, use this to update
452  // our id map range (used downstream for ExpressionRange)
453 
454  const size_t num_dest_transients = dest_proxy->transientEntryCountUnlocked();
455  id_map.setRangeStart(
456  num_dest_transients > 0 ? -1 - static_cast<int32_t>(num_dest_transients) : 0);
457  return id_map;
458 }
size_t transientEntryCountUnlocked() const
std::string getStringUnlocked(const int32_t string_id) const
IdMap buildIntersectionTranslationMapToOtherProxyUnlocked(const StringDictionaryProxy *dest_proxy, const std::vector< StringOps_Namespace::StringOpInfo > &string_op_infos) const
std::string to_string(char const *&&v)
static constexpr int32_t INVALID_STR_ID
std::shared_ptr< StringDictionary > string_dict_
void order_translation_locks(const int32_t source_db_id, const int32_t source_dict_id, const int32_t dest_db_id, const int32_t dest_dict_id, std::shared_lock< std::shared_mutex > &source_read_lock, std::shared_lock< std::shared_mutex > &dest_read_lock)
int32_t getOrAddTransientUnlocked(String const &)
int32_t getDictId() const noexcept
#define DEBUG_TIMER(name)
Definition: Logger.h:371

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

void StringDictionaryProxy::eachStringSerially ( StringDictionary::StringCallback serial_callback) const

Definition at line 605 of file StringDictionaryProxy.cpp.

References generation_, string_dict_, and transient_string_vec_.

Referenced by transientUnion().

606  {
607  constexpr int32_t max_transient_id = -2;
608  // Iterate over transient strings.
609  for (unsigned index = 0; index < transient_string_vec_.size(); ++index) {
610  std::string const& str = *transient_string_vec_[index];
611  int32_t const string_id = max_transient_id - index;
612  serial_callback(str, string_id);
613  }
614  // Iterate over non-transient strings.
615  string_dict_->eachStringSerially(generation_, serial_callback);
616 }
std::shared_ptr< StringDictionary > string_dict_
std::vector< std::string const * > transient_string_vec_

+ Here is the caller graph for this function:

size_t StringDictionaryProxy::entryCount ( ) const

Returns the number of total string entries for this proxy, both stored in the underlying dictionary and in the transient map. Equal to storageEntryCount() + transientEntryCount()

Returns
size_t Number of total string entries for this proxy

Definition at line 599 of file StringDictionaryProxy.cpp.

References entryCountUnlocked(), and rw_mutex_.

599  {
600  std::shared_lock<std::shared_mutex> read_lock(rw_mutex_);
601  return entryCountUnlocked();
602 }
heavyai::shared_lock< heavyai::shared_mutex > read_lock

+ Here is the call graph for this function:

size_t StringDictionaryProxy::entryCountUnlocked ( ) const
private

Definition at line 595 of file StringDictionaryProxy.cpp.

References storageEntryCount(), and transientEntryCountUnlocked().

Referenced by buildIntersectionTranslationMapToOtherProxyUnlocked(), and entryCount().

595  {
597 }
size_t transientEntryCountUnlocked() const
size_t storageEntryCount() const
Returns the number of string entries in the underlying string dictionary, at this proxy&#39;s generation_...

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

std::vector< int32_t > StringDictionaryProxy::getCompare ( const std::string &  pattern,
const std::string &  comp_operator 
) const

Definition at line 524 of file StringDictionaryProxy.cpp.

References CHECK_GE, anonymous_namespace{StringDictionaryProxy.cpp}::do_compare(), generation_, run_benchmark_import::result, string_dict_, transient_string_vec_, and transientIndexToId().

Referenced by anonymous_namespace{StringOpsIR.cpp}::get_compared_ids().

526  {
527  CHECK_GE(generation_, 0);
528  auto result = string_dict_->getCompare(pattern, comp_operator, generation_);
529  for (unsigned index = 0; index < transient_string_vec_.size(); ++index) {
530  if (do_compare(*transient_string_vec_[index], pattern, comp_operator)) {
531  result.push_back(transientIndexToId(index));
532  }
533  }
534  return result;
535 }
#define CHECK_GE(x, y)
Definition: Logger.h:235
std::shared_ptr< StringDictionary > string_dict_
std::vector< std::string const * > transient_string_vec_
static int32_t transientIndexToId(unsigned const index)
bool do_compare(const std::string &str, const std::string &pattern, const std::string &comp_operator)

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

int32_t StringDictionaryProxy::getDictId ( ) const
inlinenoexcept

Definition at line 47 of file StringDictionaryProxy.h.

References string_dict_id_.

Referenced by buildIntersectionTranslationMapToOtherProxy(), buildUnionTranslationMapToOtherProxy(), TransientStringLiteralsVisitor::visitStringOper(), and TransientStringLiteralsVisitor::visitUOper().

47 { return string_dict_id_; };

+ Here is the caller graph for this function:

StringDictionary * StringDictionaryProxy::getDictionary ( ) const
noexcept

Definition at line 798 of file StringDictionaryProxy.cpp.

References string_dict_.

Referenced by RowSetMemoryOwner::addStringProxyIntersectionTranslationMap(), RowSetMemoryOwner::addStringProxyNumericTranslationMap(), and RowSetMemoryOwner::addStringProxyUnionTranslationMap().

798  {
799  return string_dict_.get();
800 }
std::shared_ptr< StringDictionary > string_dict_

+ Here is the caller graph for this function:

int64_t StringDictionaryProxy::getGeneration ( ) const
noexcept

Definition at line 802 of file StringDictionaryProxy.cpp.

References generation_.

802  {
803  return generation_;
804 }
int32_t StringDictionaryProxy::getIdOfString ( const std::string &  str) const

Definition at line 110 of file StringDictionaryProxy.cpp.

References getIdOfStringFromClient(), StringDictionary::INVALID_STR_ID, rw_mutex_, and transient_str_to_int_.

Referenced by anonymous_namespace{RelAlgTranslator.cpp}::fill_dictionary_encoded_in_vals(), and Executor::serializeLiterals().

110  {
111  std::shared_lock<std::shared_mutex> read_lock(rw_mutex_);
112  auto const str_id = getIdOfStringFromClient(str);
113  if (str_id != StringDictionary::INVALID_STR_ID || transient_str_to_int_.empty()) {
114  return str_id;
115  }
116  auto it = transient_str_to_int_.find(str);
117  return it != transient_str_to_int_.end() ? it->second
119 }
heavyai::shared_lock< heavyai::shared_mutex > read_lock
int32_t getIdOfStringFromClient(String const &) const
static constexpr int32_t INVALID_STR_ID

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

template<typename String >
int32_t StringDictionaryProxy::getIdOfStringFromClient ( String const &  str) const
private

Definition at line 122 of file StringDictionaryProxy.cpp.

References CHECK_GE, generation_, string_dict_, and truncate_to_generation().

Referenced by getIdOfString(), and getOrAddTransient().

122  {
123  CHECK_GE(generation_, 0);
124  return truncate_to_generation(string_dict_->getIdOfString(str), generation_);
125 }
#define CHECK_GE(x, y)
Definition: Logger.h:235
std::shared_ptr< StringDictionary > string_dict_
int32_t truncate_to_generation(const int32_t id, const size_t generation)

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

int32_t StringDictionaryProxy::getIdOfStringNoGeneration ( const std::string &  str) const

Definition at line 127 of file StringDictionaryProxy.cpp.

References StringDictionary::INVALID_STR_ID, rw_mutex_, string_dict_, and transient_str_to_int_.

127  {
128  std::shared_lock<std::shared_mutex> read_lock(rw_mutex_);
129  auto str_id = string_dict_->getIdOfString(str);
130  if (str_id != StringDictionary::INVALID_STR_ID || transient_str_to_int_.empty()) {
131  return str_id;
132  }
133  auto it = transient_str_to_int_.find(str);
134  return it != transient_str_to_int_.end() ? it->second
136 }
heavyai::shared_lock< heavyai::shared_mutex > read_lock
static constexpr int32_t INVALID_STR_ID
std::shared_ptr< StringDictionary > string_dict_
std::vector< int32_t > StringDictionaryProxy::getLike ( const std::string &  pattern,
const bool  icase,
const bool  is_simple,
const char  escape 
) const

Definition at line 486 of file StringDictionaryProxy.cpp.

References CHECK_GE, generation_, anonymous_namespace{StringDictionary.cpp}::is_like(), run_benchmark_import::result, string_dict_, transient_string_vec_, and transientIndexToId().

489  {
490  CHECK_GE(generation_, 0);
491  auto result = string_dict_->getLike(pattern, icase, is_simple, escape, generation_);
492  for (unsigned index = 0; index < transient_string_vec_.size(); ++index) {
493  if (is_like(*transient_string_vec_[index], pattern, icase, is_simple, escape)) {
494  result.push_back(transientIndexToId(index));
495  }
496  }
497  return result;
498 }
#define CHECK_GE(x, y)
Definition: Logger.h:235
std::shared_ptr< StringDictionary > string_dict_
std::vector< std::string const * > transient_string_vec_
bool is_like(const std::string &str, const std::string &pattern, const bool icase, const bool is_simple, const char escape)
static int32_t transientIndexToId(unsigned const index)

+ Here is the call graph for this function:

int32_t StringDictionaryProxy::getOrAdd ( const std::string &  str)
noexcept

Definition at line 559 of file StringDictionaryProxy.cpp.

Referenced by DictionaryValueConverter< TARGET_TYPE >::convertTransientStringIdToPermanentId().

559  {
560  return string_dict_->getOrAdd(str);
561 }
std::shared_ptr< StringDictionary > string_dict_

+ Here is the caller graph for this function:

int32_t StringDictionaryProxy::getOrAddTransient ( const std::string &  str)

Definition at line 101 of file StringDictionaryProxy.cpp.

References getIdOfStringFromClient(), getOrAddTransientUnlocked(), StringDictionary::INVALID_STR_ID, and rw_mutex_.

Referenced by apply_string_ops_and_encode(), populate_output_stats_cols(), Executor::serializeLiterals(), string_compress(), TransientStringLiteralsVisitor::visitConstant(), and TransientStringLiteralsVisitor::visitStringOper().

101  {
102  auto const string_id = getIdOfStringFromClient(str);
103  if (string_id != StringDictionary::INVALID_STR_ID) {
104  return string_id;
105  }
106  std::lock_guard<std::shared_mutex> write_lock(rw_mutex_);
107  return getOrAddTransientUnlocked(str);
108 }
heavyai::unique_lock< heavyai::shared_mutex > write_lock
int32_t getIdOfStringFromClient(String const &) const
static constexpr int32_t INVALID_STR_ID
int32_t getOrAddTransientUnlocked(String const &)

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

std::vector< int32_t > StringDictionaryProxy::getOrAddTransientBulk ( const std::vector< std::string > &  strings)

Definition at line 60 of file StringDictionaryProxy.cpp.

References CHECK_GE, generation_, getOrAddTransientUnlocked(), StringDictionary::INVALID_STR_ID, rw_mutex_, and string_dict_.

Referenced by supported_ml_frameworks__cpu_().

61  {
63  const size_t num_strings = strings.size();
64  std::vector<int32_t> string_ids(num_strings);
65  if (num_strings == 0) {
66  return string_ids;
67  }
68  // Since new strings added to a StringDictionaryProxy are not materialized in the
69  // proxy's underlying StringDictionary, we can use the fast parallel
70  // StringDictionary::getBulk method to fetch ids from the underlying dictionary (which
71  // will return StringDictionary::INVALID_STR_ID for strings that don't exist)
72 
73  // Don't need to be under lock here as the string ids for strings in the underlying
74  // materialized dictionary are immutable
75  const size_t num_strings_not_found =
76  string_dict_->getBulk(strings, string_ids.data(), generation_);
77  if (num_strings_not_found > 0) {
78  std::lock_guard<std::shared_mutex> write_lock(rw_mutex_);
79  for (size_t string_idx = 0; string_idx < num_strings; ++string_idx) {
80  if (string_ids[string_idx] == StringDictionary::INVALID_STR_ID) {
81  string_ids[string_idx] = getOrAddTransientUnlocked(strings[string_idx]);
82  }
83  }
84  }
85  return string_ids;
86 }
#define CHECK_GE(x, y)
Definition: Logger.h:235
heavyai::unique_lock< heavyai::shared_mutex > write_lock
static constexpr int32_t INVALID_STR_ID
std::shared_ptr< StringDictionary > string_dict_
int32_t getOrAddTransientUnlocked(String const &)

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

template<typename String >
int32_t StringDictionaryProxy::getOrAddTransientUnlocked ( String const &  str)
private

Definition at line 89 of file StringDictionaryProxy.cpp.

References transient_str_to_int_, transient_string_vec_, and transientIndexToId().

Referenced by buildUnionTranslationMapToOtherProxy(), getOrAddTransient(), getOrAddTransientBulk(), StringLocalCallback::operator()(), and StringNetworkCallback::operator()().

89  {
90  unsigned const new_index = transient_str_to_int_.size();
91  auto transient_id = transientIndexToId(new_index);
92  auto const emplaced = transient_str_to_int_.emplace(str, transient_id);
93  if (emplaced.second) { // (str, transient_id) was added to transient_str_to_int_.
94  transient_string_vec_.push_back(&emplaced.first->first);
95  } else { // str already exists in transient_str_to_int_. Return existing transient_id.
96  transient_id = emplaced.first->second;
97  }
98  return transient_id;
99 }
std::vector< std::string const * > transient_string_vec_
static int32_t transientIndexToId(unsigned const index)

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

std::vector< int32_t > StringDictionaryProxy::getRegexpLike ( const std::string &  pattern,
const char  escape 
) const

Definition at line 547 of file StringDictionaryProxy.cpp.

References CHECK_GE, generation_, anonymous_namespace{StringDictionary.cpp}::is_regexp_like(), run_benchmark_import::result, string_dict_, transient_string_vec_, and transientIndexToId().

548  {
549  CHECK_GE(generation_, 0);
550  auto result = string_dict_->getRegexpLike(pattern, escape, generation_);
551  for (unsigned index = 0; index < transient_string_vec_.size(); ++index) {
552  if (is_regexp_like(*transient_string_vec_[index], pattern, escape)) {
553  result.push_back(transientIndexToId(index));
554  }
555  }
556  return result;
557 }
#define CHECK_GE(x, y)
Definition: Logger.h:235
bool is_regexp_like(const std::string &str, const std::string &pattern, const char escape)
std::shared_ptr< StringDictionary > string_dict_
std::vector< std::string const * > transient_string_vec_
static int32_t transientIndexToId(unsigned const index)

+ Here is the call graph for this function:

std::string StringDictionaryProxy::getString ( int32_t  string_id) const

Definition at line 163 of file StringDictionaryProxy.cpp.

References getStringUnlocked(), and rw_mutex_.

Referenced by anonymous_namespace{ResultSetIteration.cpp}::build_string_array_target_value(), StringValueConverter::convertToColumnarFormatFromDict(), DictionaryValueConverter< TARGET_TYPE >::convertTransientStringIdToPermanentId(), anonymous_namespace{RelAlgTranslator.cpp}::fill_dictionary_encoded_in_vals(), intersect_translate_string_id_to_other_dict(), and union_translate_string_id_to_other_dict().

163  {
164  if (inline_int_null_value<int32_t>() == string_id) {
165  return "";
166  }
167  std::shared_lock<std::shared_mutex> read_lock(rw_mutex_);
168  return getStringUnlocked(string_id);
169 }
heavyai::shared_lock< heavyai::shared_mutex > read_lock
std::string getStringUnlocked(const int32_t string_id) const

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

std::pair< const char *, size_t > StringDictionaryProxy::getStringBytes ( int32_t  string_id) const
noexcept

Definition at line 563 of file StringDictionaryProxy.cpp.

References CHECK_LT.

Referenced by anonymous_namespace{ExternalExecutor.cpp}::decode_string(), string_decompress(), StringDictionaryProxy_getStringBytes(), and StringDictionaryProxy_getStringLength().

564  {
565  if (string_id >= 0) {
566  return string_dict_.get()->getStringBytes(string_id);
567  }
568  unsigned const string_index = transientIdToIndex(string_id);
569  CHECK_LT(string_index, transient_string_vec_.size());
570  std::string const* const str_ptr = transient_string_vec_[string_index];
571  return {str_ptr->c_str(), str_ptr->size()};
572 }
std::shared_ptr< StringDictionary > string_dict_
std::vector< std::string const * > transient_string_vec_
#define CHECK_LT(x, y)
Definition: Logger.h:232
static unsigned transientIdToIndex(int32_t const id)

+ Here is the caller graph for this function:

std::vector< std::string > StringDictionaryProxy::getStrings ( const std::vector< int32_t > &  string_ids) const

Definition at line 180 of file StringDictionaryProxy.cpp.

References string_dict_, transient_string_vec_, and transientIdToIndex().

181  {
182  std::vector<std::string> strings;
183  if (!string_ids.empty()) {
184  strings.reserve(string_ids.size());
185  for (const auto string_id : string_ids) {
186  if (string_id >= 0) {
187  strings.emplace_back(string_dict_->getString(string_id));
188  } else if (inline_int_null_value<int32_t>() == string_id) {
189  strings.emplace_back("");
190  } else {
191  unsigned const string_index = transientIdToIndex(string_id);
192  strings.emplace_back(*transient_string_vec_[string_index]);
193  }
194  }
195  }
196  return strings;
197 }
std::shared_ptr< StringDictionary > string_dict_
std::vector< std::string const * > transient_string_vec_
static unsigned transientIdToIndex(int32_t const id)

+ Here is the call graph for this function:

std::string StringDictionaryProxy::getStringUnlocked ( const int32_t  string_id) const
private

Definition at line 171 of file StringDictionaryProxy.cpp.

References CHECK_LT, storageEntryCount(), string_dict_, transient_string_vec_, and transientIdToIndex().

Referenced by buildNumericTranslationMap(), buildUnionTranslationMapToOtherProxy(), and getString().

171  {
172  if (string_id >= 0 && storageEntryCount() > 0) {
173  return string_dict_->getString(string_id);
174  }
175  unsigned const string_index = transientIdToIndex(string_id);
176  CHECK_LT(string_index, transient_string_vec_.size());
177  return *transient_string_vec_[string_index];
178 }
size_t storageEntryCount() const
Returns the number of string entries in the underlying string dictionary, at this proxy&#39;s generation_...
std::shared_ptr< StringDictionary > string_dict_
std::vector< std::string const * > transient_string_vec_
#define CHECK_LT(x, y)
Definition: Logger.h:232
static unsigned transientIdToIndex(int32_t const id)

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

std::vector< int32_t > StringDictionaryProxy::getTransientBulk ( const std::vector< std::string > &  strings) const

Executes read-only lookup of a vector of strings and returns a vector of their integer ids.

This function, unlike getOrAddTransientBulk, will not add strings to the dictionary. Use this function if strings that don't currently exist in the StringDictionaryProxy should not be added to the proxy as transient entries. This method also has performance advantages over getOrAddTransientBulk for read-only use cases, in that it can: 1) Take a read lock instead of a write lock for the transient lookups 2) Use a tbb::parallel_for implementation of the transient string lookups as we are guaranteed that the underlying map of strings to int ids cannot change

Parameters
strings- Vector of strings to perform string id lookups on
Returns
A vector of string_ids of the same length as strings, containing the id of any strings for which were found in the underlying StringDictionary instance or in the proxy's tranient map, otherwise StringDictionary::INVALID_STR_ID for strings not found.

Definition at line 52 of file StringDictionaryProxy.cpp.

References CHECK_GE, generation_, and getTransientBulkImpl().

53  {
55  std::vector<int32_t> string_ids(strings.size());
56  getTransientBulkImpl(strings, string_ids.data(), true);
57  return string_ids;
58 }
#define CHECK_GE(x, y)
Definition: Logger.h:235
size_t getTransientBulkImpl(const std::vector< std::string > &strings, int32_t *string_ids, const bool take_read_lock) const

+ Here is the call graph for this function:

size_t StringDictionaryProxy::getTransientBulkImpl ( const std::vector< std::string > &  strings,
int32_t *  string_ids,
const bool  take_read_lock 
) const
private

Definition at line 695 of file StringDictionaryProxy.cpp.

References generation_, string_dict_, and transientLookupBulk().

Referenced by buildIntersectionTranslationMapToOtherProxyUnlocked(), and getTransientBulk().

698  {
699  const size_t num_strings = strings.size();
700  if (num_strings == 0) {
701  return 0UL;
702  }
703  // StringDictionary::getBulk returns the number of strings not found
704  if (string_dict_->getBulk(strings, string_ids, generation_) == 0UL) {
705  return 0UL;
706  }
707 
708  // If here, dictionary could not find at least 1 target string,
709  // now look these up in the transient dictionary
710  // transientLookupBulk returns the number of strings not found
711  return transientLookupBulk(strings, string_ids, take_read_lock);
712 }
size_t transientLookupBulk(const std::vector< String > &lookup_strings, int32_t *string_ids, const bool take_read_lock) const
std::shared_ptr< StringDictionary > string_dict_

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

const std::vector<std::string const*>& StringDictionaryProxy::getTransientVector ( ) const
inline

Definition at line 243 of file StringDictionaryProxy.h.

References transient_string_vec_.

Referenced by DictionaryValueConverter< TARGET_TYPE >::DictionaryValueConverter(), and DictionaryValueConverter< TARGET_TYPE >::processBuffer().

243  {
244  return transient_string_vec_;
245  }
std::vector< std::string const * > transient_string_vec_

+ Here is the caller graph for this function:

IdMap StringDictionaryProxy::initIdMap ( ) const
inline

Definition at line 142 of file StringDictionaryProxy.h.

References generation_, StringDictionary::INVALID_STR_ID, and transient_string_vec_.

Referenced by buildIntersectionTranslationMapToOtherProxyUnlocked(), and transientUnion().

142  {
143  return IdMap(
145  }
TranslationMap< int32_t > IdMap
static constexpr int32_t INVALID_STR_ID
std::vector< std::string const * > transient_string_vec_

+ Here is the caller graph for this function:

template<typename String >
int32_t StringDictionaryProxy::lookupTransientStringUnlocked ( const String &  lookup_string) const
private

Definition at line 200 of file StringDictionaryProxy.cpp.

References StringDictionary::INVALID_STR_ID, and transient_str_to_int_.

Referenced by buildIntersectionTranslationMapToOtherProxyUnlocked(), transientLookupBulkParallelUnlocked(), and transientLookupBulkUnlocked().

201  {
202  const auto it = transient_str_to_int_.find(lookup_string);
204  : it->second;
205 }
static constexpr int32_t INVALID_STR_ID

+ Here is the caller graph for this function:

bool StringDictionaryProxy::operator!= ( StringDictionaryProxy const &  rhs) const

Definition at line 811 of file StringDictionaryProxy.cpp.

References operator==().

811  {
812  return !operator==(rhs);
813 }
bool operator==(StringDictionaryProxy const &) const

+ Here is the call graph for this function:

StringDictionaryProxy const& StringDictionaryProxy::operator= ( StringDictionaryProxy const &  )
delete
bool StringDictionaryProxy::operator== ( StringDictionaryProxy const &  rhs) const

Definition at line 806 of file StringDictionaryProxy.cpp.

References string_dict_id_, and transient_str_to_int_.

Referenced by operator!=().

806  {
807  return string_dict_id_ == rhs.string_dict_id_ &&
808  transient_str_to_int_ == rhs.transient_str_to_int_;
809 }

+ Here is the caller graph for this function:

size_t StringDictionaryProxy::persistedC ( ) const
private
size_t StringDictionaryProxy::storageEntryCount ( ) const

Returns the number of string entries in the underlying string dictionary, at this proxy's generation_ if it is set/valid, otherwise just the current size of the dictionary.

Returns
size_t Number of entries in the string dictionary (at this proxy's generation if set)

Definition at line 574 of file StringDictionaryProxy.cpp.

References CHECK_LE, generation_, and string_dict_.

Referenced by buildIntersectionTranslationMapToOtherProxyUnlocked(), entryCountUnlocked(), and getStringUnlocked().

574  {
575  const size_t num_storage_entries{generation_ == -1 ? string_dict_->storageEntryCount()
576  : generation_};
577  CHECK_LE(num_storage_entries, static_cast<size_t>(std::numeric_limits<int32_t>::max()));
578  return num_storage_entries;
579 }
std::shared_ptr< StringDictionary > string_dict_
#define CHECK_LE(x, y)
Definition: Logger.h:233

+ Here is the caller graph for this function:

size_t StringDictionaryProxy::transientEntryCount ( ) const

Returns the number of transient string entries for this proxy,.

Returns
size_t Number of transient string entries for this proxy

Definition at line 590 of file StringDictionaryProxy.cpp.

References rw_mutex_, and transientEntryCountUnlocked().

590  {
591  std::shared_lock<std::shared_mutex> read_lock(rw_mutex_);
593 }
size_t transientEntryCountUnlocked() const
heavyai::shared_lock< heavyai::shared_mutex > read_lock

+ Here is the call graph for this function:

size_t StringDictionaryProxy::transientEntryCountUnlocked ( ) const
private

Definition at line 581 of file StringDictionaryProxy.cpp.

References CHECK_LE, and transient_str_to_int_.

Referenced by buildIntersectionTranslationMapToOtherProxyUnlocked(), buildUnionTranslationMapToOtherProxy(), entryCountUnlocked(), and transientEntryCount().

581  {
582  // CHECK_LE(num_storage_entries,
583  // static_cast<size_t>(std::numeric_limits<int32_t>::max()));
584  const size_t num_transient_entries{transient_str_to_int_.size()};
585  CHECK_LE(num_transient_entries,
586  static_cast<size_t>(std::numeric_limits<int32_t>::max()) - 1);
587  return num_transient_entries;
588 }
#define CHECK_LE(x, y)
Definition: Logger.h:233

+ Here is the caller graph for this function:

static unsigned StringDictionaryProxy::transientIdToIndex ( int32_t const  id)
inlinestatic

Definition at line 249 of file StringDictionaryProxy.h.

Referenced by getStrings(), getStringUnlocked(), and StringDictionary::populate_string_ids().

249  {
250  constexpr int max_transient_string_id = -2;
251  return static_cast<unsigned>(max_transient_string_id - id);
252  }

+ Here is the caller graph for this function:

static int32_t StringDictionaryProxy::transientIndexToId ( unsigned const  index)
inlinestatic

Definition at line 254 of file StringDictionaryProxy.h.

Referenced by DictionaryValueConverter< TARGET_TYPE >::DictionaryValueConverter(), getCompare(), getLike(), getOrAddTransientUnlocked(), getRegexpLike(), and ArrowResultSetConverter::initializeColumnBuilder().

254  {
255  constexpr int max_transient_string_id = -2;
256  return static_cast<int32_t>(max_transient_string_id - index);
257  }

+ Here is the caller graph for this function:

template<typename String >
size_t StringDictionaryProxy::transientLookupBulk ( const std::vector< String > &  lookup_strings,
int32_t *  string_ids,
const bool  take_read_lock 
) const
private

Definition at line 715 of file StringDictionaryProxy.cpp.

References rw_mutex_, transient_str_to_int_, transientLookupBulkParallelUnlocked(), and transientLookupBulkUnlocked().

Referenced by getTransientBulkImpl().

718  {
719  const size_t num_strings = lookup_strings.size();
720  auto read_lock = take_read_lock ? std::shared_lock<std::shared_mutex>(rw_mutex_)
721  : std::shared_lock<std::shared_mutex>();
722 
723  if (num_strings == static_cast<size_t>(0) || transient_str_to_int_.empty()) {
724  return 0UL;
725  }
726  constexpr size_t tbb_parallel_threshold{20000};
727  if (num_strings < tbb_parallel_threshold) {
728  return transientLookupBulkUnlocked(lookup_strings, string_ids);
729  } else {
730  return transientLookupBulkParallelUnlocked(lookup_strings, string_ids);
731  }
732 }
heavyai::shared_lock< heavyai::shared_mutex > read_lock
size_t transientLookupBulkUnlocked(const std::vector< String > &lookup_strings, int32_t *string_ids) const
size_t transientLookupBulkParallelUnlocked(const std::vector< String > &lookup_strings, int32_t *string_ids) const

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

template<typename String >
size_t StringDictionaryProxy::transientLookupBulkParallelUnlocked ( const std::vector< String > &  lookup_strings,
int32_t *  string_ids 
) const
private

Definition at line 755 of file StringDictionaryProxy.cpp.

References CHECK_GE, StringDictionary::INVALID_STR_ID, lookupTransientStringUnlocked(), ThreadInfo::num_elems_per_thread, ThreadInfo::num_threads, and threading_serial::parallel_for().

Referenced by transientLookupBulk().

757  {
758  const size_t num_lookup_strings = lookup_strings.size();
759  const size_t target_inputs_per_thread = 20000L;
760  ThreadInfo thread_info(
761  std::thread::hardware_concurrency(), num_lookup_strings, target_inputs_per_thread);
762  CHECK_GE(thread_info.num_threads, 1L);
763  CHECK_GE(thread_info.num_elems_per_thread, 1L);
764 
765  std::vector<size_t> num_strings_not_found_per_thread(thread_info.num_threads, 0UL);
766 
767  tbb::task_arena limited_arena(thread_info.num_threads);
768  limited_arena.execute([&] {
770  tbb::blocked_range<size_t>(
771  0, num_lookup_strings, thread_info.num_elems_per_thread /* tbb grain_size */),
772  [&](const tbb::blocked_range<size_t>& r) {
773  const size_t start_idx = r.begin();
774  const size_t end_idx = r.end();
775  size_t num_local_strings_not_found = 0;
776  for (size_t string_idx = start_idx; string_idx < end_idx; ++string_idx) {
777  if (string_ids[string_idx] != StringDictionary::INVALID_STR_ID) {
778  continue;
779  }
780  string_ids[string_idx] =
781  lookupTransientStringUnlocked(lookup_strings[string_idx]);
782  if (string_ids[string_idx] == StringDictionary::INVALID_STR_ID) {
783  num_local_strings_not_found++;
784  }
785  }
786  const size_t tbb_thread_idx = tbb::this_task_arena::current_thread_index();
787  num_strings_not_found_per_thread[tbb_thread_idx] = num_local_strings_not_found;
788  },
789  tbb::simple_partitioner());
790  });
791  size_t num_strings_not_found = 0;
792  for (int64_t thread_idx = 0; thread_idx < thread_info.num_threads; ++thread_idx) {
793  num_strings_not_found += num_strings_not_found_per_thread[thread_idx];
794  }
795  return num_strings_not_found;
796 }
#define CHECK_GE(x, y)
Definition: Logger.h:235
static constexpr int32_t INVALID_STR_ID
int32_t lookupTransientStringUnlocked(const String &lookup_string) const
void parallel_for(const blocked_range< Int > &range, const Body &body, const Partitioner &p=Partitioner())

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

template<typename String >
size_t StringDictionaryProxy::transientLookupBulkUnlocked ( const std::vector< String > &  lookup_strings,
int32_t *  string_ids 
) const
private

Definition at line 735 of file StringDictionaryProxy.cpp.

References StringDictionary::INVALID_STR_ID, and lookupTransientStringUnlocked().

Referenced by transientLookupBulk().

737  {
738  const size_t num_strings = lookup_strings.size();
739  size_t num_strings_not_found = 0;
740  for (size_t string_idx = 0; string_idx < num_strings; ++string_idx) {
741  if (string_ids[string_idx] != StringDictionary::INVALID_STR_ID) {
742  continue;
743  }
744  // If we're here it means we need to look up this string as we don't
745  // have a valid id for it
746  string_ids[string_idx] = lookupTransientStringUnlocked(lookup_strings[string_idx]);
747  if (string_ids[string_idx] == StringDictionary::INVALID_STR_ID) {
748  num_strings_not_found++;
749  }
750  }
751  return num_strings_not_found;
752 }
static constexpr int32_t INVALID_STR_ID
int32_t lookupTransientStringUnlocked(const String &lookup_string) const

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

StringDictionaryProxy::IdMap StringDictionaryProxy::transientUnion ( StringDictionaryProxy const &  sdp_rhs)

Definition at line 669 of file StringDictionaryProxy.cpp.

References eachStringSerially(), initIdMap(), and string_dict_.

670  {
671  IdMap id_map = sdp_rhs.initIdMap();
672  // serial_callback cannot be parallelized due to calling getOrAddTransientUnlocked().
673  std::unique_ptr<StringDictionary::StringCallback> serial_callback;
674  if (string_dict_->isClient()) {
675  serial_callback = std::make_unique<StringNetworkCallback>(this, id_map);
676  } else {
677  serial_callback = std::make_unique<StringLocalCallback>(this, id_map);
678  }
679  // Import all non-duplicate strings (transient and non-transient) and add to id_map.
680  sdp_rhs.eachStringSerially(*serial_callback);
681  return id_map;
682 }
TranslationMap< int32_t > IdMap
std::shared_ptr< StringDictionary > string_dict_

+ Here is the call graph for this function:

void StringDictionaryProxy::updateGeneration ( const int64_t  generation)
noexcept

Definition at line 684 of file StringDictionaryProxy.cpp.

References CHECK_EQ.

684  {
685  if (generation == -1) {
686  return;
687  }
688  if (generation_ != -1) {
689  CHECK_EQ(generation_, generation);
690  return;
691  }
692  generation_ = generation;
693 }
#define CHECK_EQ(x, y)
Definition: Logger.h:230

Friends And Related Function Documentation

friend class StringLocalCallback
friend

Definition at line 305 of file StringDictionaryProxy.h.

friend class StringNetworkCallback
friend

Definition at line 306 of file StringDictionaryProxy.h.

Member Data Documentation

const int32_t StringDictionaryProxy::string_dict_id_
private

Definition at line 292 of file StringDictionaryProxy.h.

Referenced by getDictId(), and operator==().


The documentation for this class was generated from the following files: