OmniSciDB  91042dcc5b
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
StringDictionaryProxy Class Reference

#include <StringDictionaryProxy.h>

Public Member Functions

 StringDictionaryProxy (std::shared_ptr< StringDictionary > sd, const int32_t string_dict_id, const int64_t generation)
 
int32_t getDictId () const noexcept
 
int32_t getOrAdd (const std::string &str) noexcept
 
StringDictionarygetDictionary () const noexcept
 
int64_t getGeneration () const noexcept
 
std::vector< int32_t > getTransientBulk (const std::vector< std::string > &strings) const
 Executes read-only lookup of a vector of strings and returns a vector of their integer ids. More...
 
int32_t getOrAddTransient (const std::string &str)
 
std::vector< int32_t > getOrAddTransientBulk (const std::vector< std::string > &strings)
 
int32_t getIdOfString (const std::string &str) const
 
int32_t getIdOfStringNoGeneration (const std::string &str) const
 
std::string getString (int32_t string_id) const
 
std::vector< std::string > getStrings (const std::vector< int32_t > &string_ids) const
 
std::pair< const char *, size_t > getStringBytes (int32_t string_id) const noexcept
 
StringDictionaryProxyRange getRange () const
 
StringDictionaryProxyRange getRangeUnlocked () const
 
std::shared_ptr
< StringDictionaryProxyTranslationMap
buildTranslationMapToOtherProxy (const StringDictionaryProxy *dest_proxy) const
 Builds a vectorized string_id translation map from this proxy to dest_proxy. More...
 
size_t storageEntryCount () const
 Returns the number of string entries in the underlying string dictionary, at this proxy's generation_ if it is set/valid, otherwise just the current size of the dictionary. More...
 
size_t transientEntryCount () const
 Returns the number of transient string entries for this proxy,. More...
 
size_t entryCount () const
 Returns the number of total string entries for this proxy, both stored in the underlying dictionary and in the transient map. Equal to storageEntryCount() + transientEntryCount() More...
 
void updateGeneration (const int64_t generation) noexcept
 
std::vector< int32_t > getLike (const std::string &pattern, const bool icase, const bool is_simple, const char escape) const
 
std::vector< int32_t > getCompare (const std::string &pattern, const std::string &comp_operator) const
 
std::vector< int32_t > getRegexpLike (const std::string &pattern, const char escape) const
 
const std::map< int32_t,
std::string > 
getTransientMapping () const
 
template<>
int32_t lookupTransientStringUnlocked (const std::string &lookup_string) const
 
template<>
int32_t lookupTransientStringUnlocked (const std::string_view &lookup_string) const
 

Private Member Functions

size_t transientEntryCountUnlocked () const
 
size_t entryCountUnlocked () const
 
int32_t transientLookupAndAddUnlocked (const std::string &str)
 
template<typename String >
int32_t lookupTransientStringUnlocked (const String &lookup_string) const
 
template<typename String >
void transientLookupBulk (const std::vector< String > &lookup_strings, int32_t *string_ids) const
 
template<typename String >
void transientLookupBulkUnlocked (const std::vector< String > &lookup_strings, int32_t *string_ids) const
 
template<typename String >
void transientLookupBulkParallelUnlocked (const std::vector< String > &lookup_strings, int32_t *string_ids) const
 

Private Attributes

std::shared_ptr< StringDictionarystring_dict_
 
const int32_t string_dict_id_
 
std::map< int32_t, std::string > transient_int_to_str_
 
std::map< std::string, int32_t > transient_str_to_int_
 
int64_t generation_
 
mapd_shared_mutex rw_mutex_
 

Friends

bool operator== (const StringDictionaryProxy &sdp1, const StringDictionaryProxy &sdp2)
 
bool operator!= (const StringDictionaryProxy &sdp1, const StringDictionaryProxy &sdp2)
 

Detailed Description

Definition at line 63 of file StringDictionaryProxy.h.

Constructor & Destructor Documentation

StringDictionaryProxy::StringDictionaryProxy ( std::shared_ptr< StringDictionary sd,
const int32_t  string_dict_id,
const int64_t  generation 
)

Definition at line 37 of file StringDictionaryProxy.cpp.

40  : string_dict_(sd), string_dict_id_(string_dict_id), generation_(generation) {}
std::shared_ptr< StringDictionary > string_dict_

Member Function Documentation

std::shared_ptr< StringDictionaryProxyTranslationMap > StringDictionaryProxy::buildTranslationMapToOtherProxy ( const StringDictionaryProxy dest_proxy) const

Builds a vectorized string_id translation map from this proxy to dest_proxy.

Parameters
dest_proxyStringDictionaryProxy that we are to map this proxy's string ids to
Returns
A shared_ptr to a StringDictionaryProxyTranslationMap, which contains both the source domain range (i.e. the min/max string ids of this proxy), and a vector representing a lnear dense vector map of source proxy ids to destination proxy ids, where index 0 corresponds to the lowest (negative) transient id in this proxy, and with each increasing index corresponding to the next string_id I.e. if there are 3 transient entries in this proxy, and 20 in the underlying string dictionary, there will be 25 total entries, mapping transient id -5 (as -1 and -0 are reserved, transients start at -2 (transient_id_ceil) and descend downward). Entries corresponding to -1 and 0 may contain garbage, it is expected that these entries are never accessed. The payload of the vector map are the string ids in the dest_proxy corresponding to the indexed string ids from this proxy

Definition at line 205 of file StringDictionaryProxy.cpp.

References CHECK, CHECK_GE, CHECK_GT, CHECK_LE, gpu_enabled::copy(), DEBUG_TIMER, entryCountUnlocked(), generation_, getRangeUnlocked(), getTransientBulk(), StringDictionary::INVALID_STR_ID, lookupTransientStringUnlocked(), rw_mutex_, string_dict_, transient_id_ceil, transient_int_to_str_, transientEntryCountUnlocked(), and VLOG.

206  {
207  auto timer = DEBUG_TIMER(__func__);
208  mapd_shared_lock<mapd_shared_mutex> read_lock(rw_mutex_);
209  auto str_proxy_translation_map =
210  std::make_shared<StringDictionaryProxyTranslationMap>(getRangeUnlocked());
211  CHECK(str_proxy_translation_map);
212 
213  if (str_proxy_translation_map->isEmpty()) {
214  return str_proxy_translation_map;
215  }
216 
217  // First map transient strings, store at front of vector map
218  const size_t num_transient_entries = str_proxy_translation_map->numTransientEntries();
219  if (num_transient_entries) {
220  std::vector<std::string> transient_lookup_strings(num_transient_entries);
221 
222  for (const auto& transient_entry : transient_int_to_str_) {
223  const auto transient_id = transient_entry.first;
224  CHECK_LE(transient_id, transient_id_ceil);
225  CHECK_GT(transient_id,
226  transient_id_ceil - static_cast<int32_t>(num_transient_entries));
227  const size_t map_idx = transient_entry.first + num_transient_entries + 1;
228  transient_lookup_strings[map_idx] = transient_entry.second;
229  }
230  // This lookup may have a different snapshot of
231  // dest_proxy transients and dictionary than what happends under
232  // the below dest_proxy_read_lock. We may need an unlocked version of
233  // getTransientBulk to ensure consistency (I don't believe
234  // current behavior would cause crashes/races, verify this though)
235 
236  // Todo(todd): Consider implementing a getTransientBulk call that takes
237  // an allocated pointer to avoid extra copy of return vector into
238  // the already allocated translation map
239 
240  const auto transient_str_to_id_vec_map =
241  dest_proxy->getTransientBulk(transient_lookup_strings);
242  CHECK_GE(str_proxy_translation_map->size(), transient_str_to_id_vec_map.size());
243  std::copy(transient_str_to_id_vec_map.begin(),
244  transient_str_to_id_vec_map.end(),
245  str_proxy_translation_map->translation_map_.begin());
246  }
247 
248  // Now map strings in dictionary
249  // We start non transient strings after the transient strings
250  // if they exist, otherwise at 0
251  auto translation_map_stored_entries_ptr =
252  str_proxy_translation_map->storageEntriesPtr();
253 
254  auto dest_transient_lookup_callback = [dest_proxy, translation_map_stored_entries_ptr](
255  const std::string_view& source_string,
256  const int32_t source_string_id) {
257  translation_map_stored_entries_ptr[source_string_id] =
258  dest_proxy->lookupTransientStringUnlocked(source_string);
259  return translation_map_stored_entries_ptr[source_string_id] ==
261  };
262 
263  mapd_lock_guard<mapd_shared_mutex> dest_proxy_read_lock(dest_proxy->rw_mutex_);
264  const size_t num_dest_transients = dest_proxy->transientEntryCountUnlocked();
265 
266  const size_t num_strings_not_translated =
267  string_dict_->buildDictionaryTranslationMap(dest_proxy->string_dict_.get(),
268  translation_map_stored_entries_ptr,
269  generation_,
270  dest_proxy->generation_,
271  num_dest_transients > 0UL,
272  dest_transient_lookup_callback);
273  const size_t num_dest_entries = dest_proxy->entryCountUnlocked();
274  const size_t num_total_entries = str_proxy_translation_map->size();
275  CHECK_GT(num_total_entries, 0UL);
276  CHECK_LE(num_strings_not_translated, str_proxy_translation_map->size());
277  const size_t num_entries_translated = num_total_entries - num_strings_not_translated;
278  const float match_pct =
279  100.0 * static_cast<float>(num_entries_translated) / num_total_entries;
280  VLOG(1) << std::fixed << std::setprecision(2) << match_pct << "% ("
281  << num_entries_translated << " entries) from dictionary ("
282  << string_dict_->getDbId() << ", " << string_dict_->getDictId() << ") with "
283  << num_total_entries << " total entries ( " << num_transient_entries
284  << " literals)"
285  << " translated to dictionary (" << dest_proxy->string_dict_->getDbId() << ", "
286  << dest_proxy->string_dict_->getDictId() << ") with " << num_dest_entries
287  << " total entries (" << dest_proxy->transientEntryCountUnlocked()
288  << " literals).";
289 
290  return str_proxy_translation_map;
291 }
size_t transientEntryCountUnlocked() const
std::map< int32_t, std::string > transient_int_to_str_
StringDictionaryProxyRange getRangeUnlocked() const
#define CHECK_GE(x, y)
Definition: Logger.h:224
#define CHECK_GT(x, y)
Definition: Logger.h:223
std::vector< int32_t > getTransientBulk(const std::vector< std::string > &strings) const
Executes read-only lookup of a vector of strings and returns a vector of their integer ids...
static constexpr int32_t INVALID_STR_ID
std::shared_ptr< StringDictionary > string_dict_
DEVICE auto copy(ARGS &&...args)
Definition: gpu_enabled.h:51
int32_t lookupTransientStringUnlocked(const String &lookup_string) const
#define CHECK_LE(x, y)
Definition: Logger.h:222
mapd_shared_lock< mapd_shared_mutex > read_lock
#define CHECK(condition)
Definition: Logger.h:211
#define DEBUG_TIMER(name)
Definition: Logger.h:358
constexpr int32_t transient_id_ceil
#define VLOG(n)
Definition: Logger.h:305

+ Here is the call graph for this function:

size_t StringDictionaryProxy::entryCount ( ) const

Returns the number of total string entries for this proxy, both stored in the underlying dictionary and in the transient map. Equal to storageEntryCount() + transientEntryCount()

Returns
size_t Number of total string entries for this proxy

Definition at line 435 of file StringDictionaryProxy.cpp.

References entryCountUnlocked(), and rw_mutex_.

435  {
436  mapd_shared_lock<mapd_shared_mutex> read_lock(rw_mutex_);
437  return entryCountUnlocked();
438 }
mapd_shared_lock< mapd_shared_mutex > read_lock

+ Here is the call graph for this function:

size_t StringDictionaryProxy::entryCountUnlocked ( ) const
private

Definition at line 431 of file StringDictionaryProxy.cpp.

References storageEntryCount(), and transientEntryCountUnlocked().

Referenced by buildTranslationMapToOtherProxy(), and entryCount().

431  {
433 }
size_t transientEntryCountUnlocked() const
size_t storageEntryCount() const
Returns the number of string entries in the underlying string dictionary, at this proxy&#39;s generation_...

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

std::vector< int32_t > StringDictionaryProxy::getCompare ( const std::string &  pattern,
const std::string &  comp_operator 
) const

Definition at line 358 of file StringDictionaryProxy.cpp.

References CHECK_GE, anonymous_namespace{StringDictionaryProxy.cpp}::do_compare(), generation_, getString(), run_benchmark_import::result, string_dict_, and transient_int_to_str_.

Referenced by anonymous_namespace{StringOpsIR.cpp}::get_compared_ids().

360  {
361  CHECK_GE(generation_, 0);
362  auto result = string_dict_->getCompare(pattern, comp_operator, generation_);
363  for (const auto& kv : transient_int_to_str_) {
364  const auto str = getString(kv.first);
365  if (do_compare(str, pattern, comp_operator)) {
366  result.push_back(kv.first);
367  }
368  }
369  return result;
370 }
std::map< int32_t, std::string > transient_int_to_str_
#define CHECK_GE(x, y)
Definition: Logger.h:224
std::string getString(int32_t string_id) const
std::shared_ptr< StringDictionary > string_dict_
bool do_compare(const std::string &str, const std::string &pattern, const std::string &comp_operator)

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

int32_t StringDictionaryProxy::getDictId ( ) const
inlinenoexcept

Definition at line 74 of file StringDictionaryProxy.h.

References string_dict_id_.

74 { return string_dict_id_; };
StringDictionary * StringDictionaryProxy::getDictionary ( ) const
noexcept

Definition at line 562 of file StringDictionaryProxy.cpp.

References string_dict_.

562  {
563  return string_dict_.get();
564 }
std::shared_ptr< StringDictionary > string_dict_
int64_t StringDictionaryProxy::getGeneration ( ) const
noexcept

Definition at line 566 of file StringDictionaryProxy.cpp.

References generation_.

566  {
567  return generation_;
568 }
int32_t StringDictionaryProxy::getIdOfString ( const std::string &  str) const

Definition at line 110 of file StringDictionaryProxy.cpp.

References CHECK_GE, generation_, StringDictionary::INVALID_STR_ID, rw_mutex_, string_dict_, transient_str_to_int_, and truncate_to_generation().

Referenced by anonymous_namespace{RelAlgTranslator.cpp}::fill_dictionary_encoded_in_vals(), and string_compress().

110  {
111  mapd_shared_lock<mapd_shared_mutex> read_lock(rw_mutex_);
112  CHECK_GE(generation_, 0);
113  auto str_id = truncate_to_generation(string_dict_->getIdOfString(str), generation_);
114  if (str_id != StringDictionary::INVALID_STR_ID || transient_str_to_int_.empty()) {
115  return str_id;
116  }
117  auto it = transient_str_to_int_.find(str);
118  return it != transient_str_to_int_.end() ? it->second
120 }
#define CHECK_GE(x, y)
Definition: Logger.h:224
static constexpr int32_t INVALID_STR_ID
std::shared_ptr< StringDictionary > string_dict_
std::map< std::string, int32_t > transient_str_to_int_
mapd_shared_lock< mapd_shared_mutex > read_lock
int32_t truncate_to_generation(const int32_t id, const size_t generation)

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

int32_t StringDictionaryProxy::getIdOfStringNoGeneration ( const std::string &  str) const

Definition at line 122 of file StringDictionaryProxy.cpp.

References StringDictionary::INVALID_STR_ID, rw_mutex_, string_dict_, and transient_str_to_int_.

122  {
123  mapd_shared_lock<mapd_shared_mutex> read_lock(rw_mutex_);
124  auto str_id = string_dict_->getIdOfString(str);
125  if (str_id != StringDictionary::INVALID_STR_ID || transient_str_to_int_.empty()) {
126  return str_id;
127  }
128  auto it = transient_str_to_int_.find(str);
129  return it != transient_str_to_int_.end() ? it->second
131 }
static constexpr int32_t INVALID_STR_ID
std::shared_ptr< StringDictionary > string_dict_
std::map< std::string, int32_t > transient_str_to_int_
mapd_shared_lock< mapd_shared_mutex > read_lock
std::vector< int32_t > StringDictionaryProxy::getLike ( const std::string &  pattern,
const bool  icase,
const bool  is_simple,
const char  escape 
) const

Definition at line 319 of file StringDictionaryProxy.cpp.

References CHECK_GE, generation_, getString(), anonymous_namespace{StringDictionary.cpp}::is_like(), run_benchmark_import::result, string_dict_, and transient_int_to_str_.

322  {
323  CHECK_GE(generation_, 0);
324  auto result = string_dict_->getLike(pattern, icase, is_simple, escape, generation_);
325  for (const auto& kv : transient_int_to_str_) {
326  const auto str = getString(kv.first);
327  if (is_like(str, pattern, icase, is_simple, escape)) {
328  result.push_back(kv.first);
329  }
330  }
331  return result;
332 }
std::map< int32_t, std::string > transient_int_to_str_
#define CHECK_GE(x, y)
Definition: Logger.h:224
std::string getString(int32_t string_id) const
std::shared_ptr< StringDictionary > string_dict_
bool is_like(const std::string &str, const std::string &pattern, const bool icase, const bool is_simple, const char escape)

+ Here is the call graph for this function:

int32_t StringDictionaryProxy::getOrAdd ( const std::string &  str)
noexcept

Definition at line 395 of file StringDictionaryProxy.cpp.

Referenced by DictionaryValueConverter< TARGET_TYPE >::convertTransientStringIdToPermanentId().

395  {
396  return string_dict_->getOrAdd(str);
397 }
std::shared_ptr< StringDictionary > string_dict_

+ Here is the caller graph for this function:

int32_t StringDictionaryProxy::getOrAddTransient ( const std::string &  str)

Definition at line 70 of file StringDictionaryProxy.cpp.

References CHECK_GE, generation_, StringDictionary::INVALID_STR_ID, rw_mutex_, string_dict_, transientLookupAndAddUnlocked(), and truncate_to_generation().

Referenced by lower_encoded(), and TransientStringLiteralsVisitor::visitConstant().

70  {
72  auto transient_id =
73  truncate_to_generation(string_dict_->getIdOfString(str), generation_);
74  if (transient_id != StringDictionary::INVALID_STR_ID) {
75  return transient_id;
76  }
77  mapd_lock_guard<mapd_shared_mutex> write_lock(rw_mutex_);
79 }
#define CHECK_GE(x, y)
Definition: Logger.h:224
static constexpr int32_t INVALID_STR_ID
std::shared_ptr< StringDictionary > string_dict_
mapd_unique_lock< mapd_shared_mutex > write_lock
int32_t transientLookupAndAddUnlocked(const std::string &str)
int32_t truncate_to_generation(const int32_t id, const size_t generation)

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

std::vector< int32_t > StringDictionaryProxy::getOrAddTransientBulk ( const std::vector< std::string > &  strings)

Definition at line 81 of file StringDictionaryProxy.cpp.

References CHECK_GE, generation_, StringDictionary::INVALID_STR_ID, rw_mutex_, string_dict_, transientLookupAndAddUnlocked(), and truncate_to_generation().

82  {
84  const size_t num_strings = strings.size();
85  std::vector<int32_t> string_ids(num_strings);
86  if (num_strings == 0) {
87  return string_ids;
88  }
89  // Since new strings added to a StringDictionaryProxy are not materialized in the
90  // proxy's underlying StringDictionary, we can use the fast parallel
91  // StringDictionary::getBulk method to fetch ids from the underlying dictionary (which
92  // will return StringDictionary::INVALID_STR_ID for strings that don't exist)
93 
94  // Don't need to be under lock here as the string ids for strings in the underlying
95  // materialized dictionary are immutable
96  const size_t num_strings_not_found = string_dict_->getBulk(strings, string_ids.data());
97  if (num_strings_not_found > 0) {
98  mapd_lock_guard<mapd_shared_mutex> write_lock(rw_mutex_);
99  for (size_t string_idx = 0; string_idx < num_strings; ++string_idx) {
100  const auto transient_id =
101  truncate_to_generation(string_ids[string_idx], generation_);
102  if (transient_id == StringDictionary::INVALID_STR_ID) {
103  string_ids[string_idx] = transientLookupAndAddUnlocked(strings[string_idx]);
104  }
105  }
106  }
107  return string_ids;
108 }
#define CHECK_GE(x, y)
Definition: Logger.h:224
static constexpr int32_t INVALID_STR_ID
std::shared_ptr< StringDictionary > string_dict_
mapd_unique_lock< mapd_shared_mutex > write_lock
int32_t transientLookupAndAddUnlocked(const std::string &str)
int32_t truncate_to_generation(const int32_t id, const size_t generation)

+ Here is the call graph for this function:

std::pair< int32_t, int32_t > StringDictionaryProxy::getRange ( ) const

Definition at line 170 of file StringDictionaryProxy.cpp.

References getRangeUnlocked(), and rw_mutex_.

170  {
171  mapd_shared_lock<mapd_shared_mutex> read_lock(rw_mutex_);
172  return getRangeUnlocked();
173 }
StringDictionaryProxyRange getRangeUnlocked() const
mapd_shared_lock< mapd_shared_mutex > read_lock

+ Here is the call graph for this function:

std::pair< int32_t, int32_t > StringDictionaryProxy::getRangeUnlocked ( ) const

Definition at line 175 of file StringDictionaryProxy.cpp.

References storageEntryCount(), transient_id_ceil, and transientEntryCountUnlocked().

Referenced by buildTranslationMapToOtherProxy(), and getRange().

175  {
176  const int32_t storage_entry_count = storageEntryCount();
177  const int32_t transient_entry_count = transientEntryCountUnlocked();
178  const int32_t min_elem =
179  transient_entry_count > 0 ? transient_id_ceil + 1 - transient_entry_count : 0;
180  const int32_t max_elem = storage_entry_count;
181  return std::make_pair(min_elem, max_elem);
182 }
size_t transientEntryCountUnlocked() const
size_t storageEntryCount() const
Returns the number of string entries in the underlying string dictionary, at this proxy&#39;s generation_...
constexpr int32_t transient_id_ceil

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

std::vector< int32_t > StringDictionaryProxy::getRegexpLike ( const std::string &  pattern,
const char  escape 
) const

Definition at line 382 of file StringDictionaryProxy.cpp.

References CHECK_GE, generation_, getString(), anonymous_namespace{StringDictionary.cpp}::is_regexp_like(), run_benchmark_import::result, string_dict_, and transient_int_to_str_.

383  {
384  CHECK_GE(generation_, 0);
385  auto result = string_dict_->getRegexpLike(pattern, escape, generation_);
386  for (const auto& kv : transient_int_to_str_) {
387  const auto str = getString(kv.first);
388  if (is_regexp_like(str, pattern, escape)) {
389  result.push_back(kv.first);
390  }
391  }
392  return result;
393 }
std::map< int32_t, std::string > transient_int_to_str_
#define CHECK_GE(x, y)
Definition: Logger.h:224
std::string getString(int32_t string_id) const
bool is_regexp_like(const std::string &str, const std::string &pattern, const char escape)
std::shared_ptr< StringDictionary > string_dict_

+ Here is the call graph for this function:

std::string StringDictionaryProxy::getString ( int32_t  string_id) const

Definition at line 133 of file StringDictionaryProxy.cpp.

References CHECK, CHECK_NE, StringDictionary::INVALID_STR_ID, rw_mutex_, storageEntryCount(), string_dict_, and transient_int_to_str_.

Referenced by anonymous_namespace{ResultSetIteration.cpp}::build_string_array_target_value(), StringValueConverter::convertToColumnarFormatFromDict(), DictionaryValueConverter< TARGET_TYPE >::convertTransientStringIdToPermanentId(), anonymous_namespace{RelAlgTranslator.cpp}::fill_dictionary_encoded_in_vals(), getCompare(), getLike(), getRegexpLike(), lower_encoded(), GenericKeyHandler::operator()(), and anonymous_namespace{HashJoinRuntime.cpp}::translate_str_id_to_outer_dict().

133  {
134  if (inline_int_null_value<int32_t>() == string_id) {
135  return "";
136  }
137  mapd_shared_lock<mapd_shared_mutex> read_lock(rw_mutex_);
138  if (string_id >= 0 && storageEntryCount() > 0) {
139  return string_dict_->getString(string_id);
140  }
142  auto it = transient_int_to_str_.find(string_id);
143  CHECK(it != transient_int_to_str_.end());
144  return it->second;
145 }
std::map< int32_t, std::string > transient_int_to_str_
size_t storageEntryCount() const
Returns the number of string entries in the underlying string dictionary, at this proxy&#39;s generation_...
static constexpr int32_t INVALID_STR_ID
std::shared_ptr< StringDictionary > string_dict_
#define CHECK_NE(x, y)
Definition: Logger.h:220
mapd_shared_lock< mapd_shared_mutex > read_lock
#define CHECK(condition)
Definition: Logger.h:211

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

std::pair< const char *, size_t > StringDictionaryProxy::getStringBytes ( int32_t  string_id) const
noexcept

Definition at line 399 of file StringDictionaryProxy.cpp.

References CHECK, CHECK_NE, and StringDictionary::INVALID_STR_ID.

Referenced by anonymous_namespace{ExternalExecutor.cpp}::decode_string(), and string_decompress().

400  {
401  if (string_id >= 0) {
402  return string_dict_.get()->getStringBytes(string_id);
403  }
405  auto it = transient_int_to_str_.find(string_id);
406  CHECK(it != transient_int_to_str_.end());
407  return std::make_pair(it->second.c_str(), it->second.size());
408 }
std::map< int32_t, std::string > transient_int_to_str_
static constexpr int32_t INVALID_STR_ID
std::shared_ptr< StringDictionary > string_dict_
#define CHECK_NE(x, y)
Definition: Logger.h:220
#define CHECK(condition)
Definition: Logger.h:211

+ Here is the caller graph for this function:

std::vector< std::string > StringDictionaryProxy::getStrings ( const std::vector< int32_t > &  string_ids) const

Definition at line 147 of file StringDictionaryProxy.cpp.

References rw_mutex_, string_dict_, and transient_int_to_str_.

148  {
149  const size_t num_string_ids = string_ids.size();
150  std::vector<std::string> strings;
151  if (num_string_ids == size_t(0)) {
152  return strings;
153  }
154  strings.reserve(num_string_ids);
155  mapd_shared_lock<mapd_shared_mutex> read_lock(rw_mutex_);
156  for (const auto& string_id : string_ids) {
157  if (inline_int_null_value<int32_t>() == string_id) {
158  strings.emplace_back("");
159  continue;
160  }
161  if (string_id >= 0) {
162  strings.emplace_back(string_dict_->getString(string_id));
163  continue;
164  }
165  auto it = transient_int_to_str_.find(string_id);
166  strings.emplace_back(it->second);
167  }
168  return strings;
169 }
std::map< int32_t, std::string > transient_int_to_str_
std::shared_ptr< StringDictionary > string_dict_
mapd_shared_lock< mapd_shared_mutex > read_lock
std::vector< int32_t > StringDictionaryProxy::getTransientBulk ( const std::vector< std::string > &  strings) const

Executes read-only lookup of a vector of strings and returns a vector of their integer ids.

This function, unlike getOrAddTransientBulk, will not add strings to the dictionary. Use this function if strings that don't currently exist in the StringDictionaryProxy should not be added to the proxy as transient entries. This method also has performance advantages over getOrAddTransientBulk for read-only use cases, in that it can: 1) Take a read lock instead of a write lock for the transient lookups 2) Use a tbb::parallel_for implementation of the transient string lookups as we are guaranteed that the underlying map of strings to int ids cannot change

Parameters
strings- Vector of strings to perform string id lookups on
Returns
A vector of string_ids of the same length as strings, containing the id of any strings for which were found in the underlying StringDictionary instance or in the proxy's tranient map, otherwise StringDictionary::INVALID_STRING_ID for strings not found.

Definition at line 50 of file StringDictionaryProxy.cpp.

References CHECK_GE, generation_, string_dict_, and transientLookupBulk().

Referenced by buildTranslationMapToOtherProxy().

51  {
53  const size_t num_strings = strings.size();
54  std::vector<int32_t> string_ids(num_strings);
55  if (num_strings == 0) {
56  return string_ids;
57  }
58  // Use fast parallel String::Dictionary getBulk method
59  // Todo: Evaluate getBulk method that takes callback to do transient lookup
60  // to avoid a second rescan of the data
61  const size_t num_strings_not_found = string_dict_->getBulk(strings, string_ids.data());
62  if (num_strings_not_found > 0) {
63  // Dictionary could not find at least 1 target string, now look these up
64  // in the transient dictionary
65  transientLookupBulk(strings, string_ids.data());
66  }
67  return string_ids;
68 }
#define CHECK_GE(x, y)
Definition: Logger.h:224
std::shared_ptr< StringDictionary > string_dict_
void transientLookupBulk(const std::vector< String > &lookup_strings, int32_t *string_ids) const

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

const std::map<int32_t, std::string> StringDictionaryProxy::getTransientMapping ( ) const
inline

Definition at line 175 of file StringDictionaryProxy.h.

References transient_int_to_str_.

Referenced by DictionaryValueConverter< TARGET_TYPE >::DictionaryValueConverter(), and DictionaryValueConverter< TARGET_TYPE >::processBuffer().

175  {
176  return transient_int_to_str_;
177  }
std::map< int32_t, std::string > transient_int_to_str_

+ Here is the caller graph for this function:

template<typename String >
int32_t StringDictionaryProxy::lookupTransientStringUnlocked ( const String &  lookup_string) const
private

Referenced by buildTranslationMapToOtherProxy(), transientLookupBulkParallelUnlocked(), and transientLookupBulkUnlocked().

+ Here is the caller graph for this function:

template<>
int32_t StringDictionaryProxy::lookupTransientStringUnlocked ( const std::string &  lookup_string) const

Definition at line 185 of file StringDictionaryProxy.cpp.

References StringDictionary::INVALID_STR_ID, and transient_str_to_int_.

186  {
187  const auto it = transient_str_to_int_.find(lookup_string);
188  if (it != transient_str_to_int_.end()) {
189  return it->second;
190  }
192 }
static constexpr int32_t INVALID_STR_ID
std::map< std::string, int32_t > transient_str_to_int_
template<>
int32_t StringDictionaryProxy::lookupTransientStringUnlocked ( const std::string_view &  lookup_string) const

Definition at line 195 of file StringDictionaryProxy.cpp.

References StringDictionary::INVALID_STR_ID, and transient_str_to_int_.

196  {
197  const auto it = transient_str_to_int_.find(std::string(lookup_string));
198  if (it != transient_str_to_int_.end()) {
199  return it->second;
200  }
202 }
static constexpr int32_t INVALID_STR_ID
std::map< std::string, int32_t > transient_str_to_int_
size_t StringDictionaryProxy::storageEntryCount ( ) const

Returns the number of string entries in the underlying string dictionary, at this proxy's generation_ if it is set/valid, otherwise just the current size of the dictionary.

Returns
size_t Number of entries in the string dictionary (at this proxy's generation if set)

Definition at line 410 of file StringDictionaryProxy.cpp.

References CHECK_LE, generation_, and string_dict_.

Referenced by entryCountUnlocked(), getRangeUnlocked(), and getString().

410  {
411  const size_t num_storage_entries{generation_ == -1 ? string_dict_->storageEntryCount()
412  : generation_};
413  CHECK_LE(num_storage_entries, static_cast<size_t>(std::numeric_limits<int32_t>::max()));
414  return num_storage_entries;
415 }
std::shared_ptr< StringDictionary > string_dict_
#define CHECK_LE(x, y)
Definition: Logger.h:222

+ Here is the caller graph for this function:

size_t StringDictionaryProxy::transientEntryCount ( ) const

Returns the number of transient string entries for this proxy,.

Returns
size_t Number of transient string entries for this proxy

Definition at line 426 of file StringDictionaryProxy.cpp.

References rw_mutex_, and transientEntryCountUnlocked().

426  {
427  mapd_shared_lock<mapd_shared_mutex> read_lock(rw_mutex_);
429 }
size_t transientEntryCountUnlocked() const
mapd_shared_lock< mapd_shared_mutex > read_lock

+ Here is the call graph for this function:

size_t StringDictionaryProxy::transientEntryCountUnlocked ( ) const
private

Definition at line 417 of file StringDictionaryProxy.cpp.

References CHECK_LE, and transient_str_to_int_.

Referenced by buildTranslationMapToOtherProxy(), entryCountUnlocked(), getRangeUnlocked(), and transientEntryCount().

417  {
418  // CHECK_LE(num_storage_entries,
419  // static_cast<size_t>(std::numeric_limits<int32_t>::max()));
420  const size_t num_transient_entries{transient_str_to_int_.size()};
421  CHECK_LE(num_transient_entries,
422  static_cast<size_t>(std::numeric_limits<int32_t>::max()) - 1);
423  return num_transient_entries;
424 }
std::map< std::string, int32_t > transient_str_to_int_
#define CHECK_LE(x, y)
Definition: Logger.h:222

+ Here is the caller graph for this function:

int32_t StringDictionaryProxy::transientLookupAndAddUnlocked ( const std::string &  str)
private

Definition at line 451 of file StringDictionaryProxy.cpp.

References CHECK, transient_int_to_str_, and transient_str_to_int_.

Referenced by getOrAddTransient(), and getOrAddTransientBulk().

451  {
452  const auto it = transient_str_to_int_.find(str);
453  if (it != transient_str_to_int_.end()) {
454  return it->second;
455  }
456  int32_t transient_id =
457  -(transient_str_to_int_.size() + 2); // make sure it's not INVALID_STR_ID
458  {
459  auto it_ok = transient_str_to_int_.insert(std::make_pair(str, transient_id));
460  CHECK(it_ok.second);
461  }
462  {
463  auto it_ok = transient_int_to_str_.insert(std::make_pair(transient_id, str));
464  CHECK(it_ok.second);
465  }
466  return transient_id;
467 }
std::map< int32_t, std::string > transient_int_to_str_
std::map< std::string, int32_t > transient_str_to_int_
#define CHECK(condition)
Definition: Logger.h:211

+ Here is the caller graph for this function:

template<typename String >
template void StringDictionaryProxy::transientLookupBulk ( const std::vector< String > &  lookup_strings,
int32_t *  string_ids 
) const
private

Definition at line 470 of file StringDictionaryProxy.cpp.

References rw_mutex_, transient_str_to_int_, transientLookupBulkParallelUnlocked(), and transientLookupBulkUnlocked().

Referenced by getTransientBulk().

471  {
472  // std::vector<int32_t>& string_ids) {
473  const size_t num_strings = lookup_strings.size();
474  mapd_shared_lock<mapd_shared_mutex> read_lock(rw_mutex_);
475 
476  if (num_strings == static_cast<size_t>(0) || transient_str_to_int_.empty()) {
477  return;
478  }
479  constexpr size_t tbb_parallel_threshold{20000};
480  if (num_strings < tbb_parallel_threshold) {
481  transientLookupBulkUnlocked(lookup_strings, string_ids);
482  } else {
483  transientLookupBulkParallelUnlocked(lookup_strings, string_ids);
484  }
485 }
void transientLookupBulkUnlocked(const std::vector< String > &lookup_strings, int32_t *string_ids) const
void transientLookupBulkParallelUnlocked(const std::vector< String > &lookup_strings, int32_t *string_ids) const
std::map< std::string, int32_t > transient_str_to_int_
mapd_shared_lock< mapd_shared_mutex > read_lock

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

template<typename String >
template void StringDictionaryProxy::transientLookupBulkParallelUnlocked ( const std::vector< String > &  lookup_strings,
int32_t *  string_ids 
) const
private

Definition at line 519 of file StringDictionaryProxy.cpp.

References StringDictionary::INVALID_STR_ID, lookupTransientStringUnlocked(), max_inputs_per_thread, and threading_serial::parallel_for().

Referenced by transientLookupBulk().

521  {
522  const size_t num_strings = lookup_strings.size();
523  const size_t max_thread_count = std::thread::hardware_concurrency();
524  const size_t max_inputs_per_thread = 20000;
525  const size_t min_grain_size = max_inputs_per_thread / 2;
526  const size_t num_threads =
527  std::min(max_thread_count,
528  ((num_strings + max_inputs_per_thread - 1) / max_inputs_per_thread));
529 
530  tbb::task_arena limited_arena(num_threads);
531  tbb::task_group tg;
532  limited_arena.execute([&] {
533  tg.run([&] {
535  tbb::blocked_range<size_t>(0, num_strings, min_grain_size),
536  [&](const tbb::blocked_range<size_t>& r) {
537  const size_t start_idx = r.begin();
538  const size_t end_idx = r.end();
539  for (size_t string_idx = start_idx; string_idx < end_idx; ++string_idx) {
540  if (string_ids[string_idx] != StringDictionary::INVALID_STR_ID) {
541  continue;
542  }
543  string_ids[string_idx] =
544  lookupTransientStringUnlocked(lookup_strings[string_idx]);
545  }
546  },
547  tbb::simple_partitioner());
548  });
549  });
550 
551  limited_arena.execute([&] { tg.wait(); });
552 }
const size_t max_inputs_per_thread
static constexpr int32_t INVALID_STR_ID
int32_t lookupTransientStringUnlocked(const String &lookup_string) const
void parallel_for(const blocked_range< Int > &range, const Body &body, const Partitioner &p=Partitioner())

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

template<typename String >
template void StringDictionaryProxy::transientLookupBulkUnlocked ( const std::vector< String > &  lookup_strings,
int32_t *  string_ids 
) const
private

Definition at line 496 of file StringDictionaryProxy.cpp.

References StringDictionary::INVALID_STR_ID, and lookupTransientStringUnlocked().

Referenced by transientLookupBulk().

498  {
499  const size_t num_strings = lookup_strings.size();
500  for (size_t string_idx = 0; string_idx < num_strings; ++string_idx) {
501  if (string_ids[string_idx] != StringDictionary::INVALID_STR_ID) {
502  continue;
503  }
504  // If we're here it means we need to look up this string as we don't
505  // have a valid id for it
506  string_ids[string_idx] = lookupTransientStringUnlocked(lookup_strings[string_idx]);
507  }
508 }
static constexpr int32_t INVALID_STR_ID
int32_t lookupTransientStringUnlocked(const String &lookup_string) const

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

void StringDictionaryProxy::updateGeneration ( const int64_t  generation)
noexcept

Definition at line 440 of file StringDictionaryProxy.cpp.

References CHECK_EQ.

440  {
441  if (generation == -1) {
442  return;
443  }
444  if (generation_ != -1) {
445  CHECK_EQ(generation_, generation);
446  return;
447  }
448  generation_ = generation;
449 }
#define CHECK_EQ(x, y)
Definition: Logger.h:219

Friends And Related Function Documentation

bool operator!= ( const StringDictionaryProxy sdp1,
const StringDictionaryProxy sdp2 
)
friend

Definition at line 582 of file StringDictionaryProxy.cpp.

582  {
583  return !(sdp1 == sdp2);
584 }
bool operator== ( const StringDictionaryProxy sdp1,
const StringDictionaryProxy sdp2 
)
friend

Definition at line 570 of file StringDictionaryProxy.cpp.

570  {
571  if (sdp1.string_dict_id_ != sdp2.string_dict_id_) {
572  return false;
573  }
574  if (sdp1.transient_int_to_str_.size() != sdp2.transient_int_to_str_.size()) {
575  return false;
576  }
577  return std::equal(sdp1.transient_int_to_str_.begin(),
578  sdp1.transient_int_to_str_.end(),
579  sdp2.transient_int_to_str_.begin());
580 }
std::map< int32_t, std::string > transient_int_to_str_

Member Data Documentation

const int32_t StringDictionaryProxy::string_dict_id_
private

Definition at line 195 of file StringDictionaryProxy.h.

Referenced by getDictId(), and operator==().

std::map<int32_t, std::string> StringDictionaryProxy::transient_int_to_str_
private
std::map<std::string, int32_t> StringDictionaryProxy::transient_str_to_int_
private

The documentation for this class was generated from the following files: