OmniSciDB  91042dcc5b
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
StringDictionaryProxy.h
Go to the documentation of this file.
1 /*
2  * Copyright 2017 MapD Technologies, Inc.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #ifndef STRINGDICTIONARY_STRINGDICTIONARYPROXY_H
18 #define STRINGDICTIONARY_STRINGDICTIONARYPROXY_H
19 
20 #include "../Shared/mapd_shared_mutex.h"
21 #include "StringDictionary.h"
22 
23 #include <map>
24 #include <string>
25 #include <string_view>
26 #include <tuple>
27 #include <vector>
28 
29 using StringDictionaryProxyRange = std::pair<int32_t, int32_t>;
31  int32_t source_domain_min_{0};
32  std::vector<int32_t> translation_map_;
33 
34  bool isEmpty() const { return translation_map_.empty(); }
35  size_t size() const { return translation_map_.size(); }
36  size_t numTransientEntries() const {
37  return static_cast<size_t>(source_domain_min_ < 0 ? -source_domain_min_ - 1 : 0);
38  }
39  size_t numStorageEntries() const { return static_cast<size_t>(domainEnd()); }
40  int32_t* dataPtr() { return !isEmpty() ? translation_map_.data() : nullptr; }
41  const int32_t* dataPtr() const {
42  return !isEmpty() ? translation_map_.data() : nullptr;
43  }
44  int32_t* storageEntriesPtr() {
45  return !isEmpty() ? translation_map_.data() +
47  : nullptr;
48  }
49  int32_t domainStart() const { return source_domain_min_; }
50  int32_t domainEnd() const { return source_domain_min_ + static_cast<int64_t>(size()); }
52  return std::make_pair(source_domain_min_, domainEnd());
53  }
54 
56  : source_domain_min_(source_domain.first)
57  , translation_map_(std::max(source_domain.second - source_domain.first, 0)) {}
58  // Builds an
60 };
61 
62 // used to access a StringDictionary when transient strings are involved
64  friend bool operator==(const StringDictionaryProxy& sdp1,
65  const StringDictionaryProxy& sdp2);
66  friend bool operator!=(const StringDictionaryProxy& sdp1,
67  const StringDictionaryProxy& sdp2);
68 
69  public:
70  StringDictionaryProxy(std::shared_ptr<StringDictionary> sd,
71  const int32_t string_dict_id,
72  const int64_t generation);
73 
74  int32_t getDictId() const noexcept { return string_dict_id_; };
75  int32_t getOrAdd(const std::string& str) noexcept;
77  int64_t getGeneration() const noexcept;
78 
99  std::vector<int32_t> getTransientBulk(const std::vector<std::string>& strings) const;
100  int32_t getOrAddTransient(const std::string& str);
101  std::vector<int32_t> getOrAddTransientBulk(const std::vector<std::string>& strings);
102  int32_t getIdOfString(const std::string& str) const;
104  const std::string& str) const; // disregard generation, only used by QueryRenderer
105  std::string getString(int32_t string_id) const;
106  std::vector<std::string> getStrings(const std::vector<int32_t>& string_ids) const;
107  std::pair<const char*, size_t> getStringBytes(int32_t string_id) const noexcept;
110 
132  const StringDictionaryProxy* dest_proxy) const;
133 
143  size_t storageEntryCount() const;
144 
151  size_t transientEntryCount() const;
152 
161  size_t entryCount() const;
162 
163  void updateGeneration(const int64_t generation) noexcept;
164 
165  std::vector<int32_t> getLike(const std::string& pattern,
166  const bool icase,
167  const bool is_simple,
168  const char escape) const;
169 
170  std::vector<int32_t> getCompare(const std::string& pattern,
171  const std::string& comp_operator) const;
172 
173  std::vector<int32_t> getRegexpLike(const std::string& pattern, const char escape) const;
174 
175  const std::map<int32_t, std::string> getTransientMapping() const {
176  return transient_int_to_str_;
177  }
178 
179  private:
180  size_t transientEntryCountUnlocked() const;
181  size_t entryCountUnlocked() const;
182  int32_t transientLookupAndAddUnlocked(const std::string& str);
183  template <typename String>
184  int32_t lookupTransientStringUnlocked(const String& lookup_string) const;
185  template <typename String>
186  void transientLookupBulk(const std::vector<String>& lookup_strings,
187  int32_t* string_ids) const;
188  template <typename String>
189  void transientLookupBulkUnlocked(const std::vector<String>& lookup_strings,
190  int32_t* string_ids) const;
191  template <typename String>
192  void transientLookupBulkParallelUnlocked(const std::vector<String>& lookup_strings,
193  int32_t* string_ids) const;
194  std::shared_ptr<StringDictionary> string_dict_;
195  const int32_t string_dict_id_;
196  std::map<int32_t, std::string> transient_int_to_str_;
197  std::map<std::string, int32_t> transient_str_to_int_;
198  int64_t generation_;
200 };
201 #endif // STRINGDICTIONARY_STRINGDICTIONARYPROXY_H
std::pair< const char *, size_t > getStringBytes(int32_t string_id) const noexcept
std::vector< int32_t > getLike(const std::string &pattern, const bool icase, const bool is_simple, const char escape) const
size_t transientEntryCountUnlocked() const
std::map< int32_t, std::string > transient_int_to_str_
size_t entryCount() const
Returns the number of total string entries for this proxy, both stored in the underlying dictionary a...
int32_t getIdOfStringNoGeneration(const std::string &str) const
#define const
StringDictionaryProxyRange getRangeUnlocked() const
size_t storageEntryCount() const
Returns the number of string entries in the underlying string dictionary, at this proxy&#39;s generation_...
StringDictionary * getDictionary() const noexcept
std::string getString(int32_t string_id) const
StringDictionaryProxyRange domain() const
void transientLookupBulkUnlocked(const std::vector< String > &lookup_strings, int32_t *string_ids) const
std::vector< int32_t > getTransientBulk(const std::vector< std::string > &strings) const
Executes read-only lookup of a vector of strings and returns a vector of their integer ids...
StringDictionaryProxy(std::shared_ptr< StringDictionary > sd, const int32_t string_dict_id, const int64_t generation)
std::vector< int32_t > getCompare(const std::string &pattern, const std::string &comp_operator) const
std::shared_ptr< StringDictionary > string_dict_
void transientLookupBulkParallelUnlocked(const std::vector< String > &lookup_strings, int32_t *string_ids) const
std::shared_timed_mutex mapd_shared_mutex
int32_t lookupTransientStringUnlocked(const String &lookup_string) const
std::map< std::string, int32_t > transient_str_to_int_
std::vector< std::string > getStrings(const std::vector< int32_t > &string_ids) const
std::pair< int32_t, int32_t > StringDictionaryProxyRange
const std::map< int32_t, std::string > getTransientMapping() const
void updateGeneration(const int64_t generation) noexcept
size_t transientEntryCount() const
Returns the number of transient string entries for this proxy,.
void transientLookupBulk(const std::vector< String > &lookup_strings, int32_t *string_ids) const
std::vector< int32_t > getRegexpLike(const std::string &pattern, const char escape) const
int32_t getOrAdd(const std::string &str) noexcept
int32_t getDictId() const noexcept
std::vector< int32_t > getOrAddTransientBulk(const std::vector< std::string > &strings)
friend bool operator!=(const StringDictionaryProxy &sdp1, const StringDictionaryProxy &sdp2)
int32_t getOrAddTransient(const std::string &str)
std::shared_ptr< StringDictionaryProxyTranslationMap > buildTranslationMapToOtherProxy(const StringDictionaryProxy *dest_proxy) const
Builds a vectorized string_id translation map from this proxy to dest_proxy.
friend bool operator==(const StringDictionaryProxy &sdp1, const StringDictionaryProxy &sdp2)
StringDictionaryProxyRange getRange() const
int32_t transientLookupAndAddUnlocked(const std::string &str)
int32_t getIdOfString(const std::string &str) const
int64_t getGeneration() const noexcept
StringDictionaryProxyTranslationMap(const StringDictionaryProxyRange source_domain)