OmniSciDB  04ee39c94c
StringDictionaryProxy.cpp
Go to the documentation of this file.
1 /*
2  * Copyright 2017 MapD Technologies, Inc.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #include "StringDictionaryProxy.h"
18 #include "../Shared/sqltypes.h"
19 #include "../Utils/Regexp.h"
20 #include "../Utils/StringLike.h"
21 #include "Shared/Logger.h"
22 #include "Shared/thread_count.h"
23 #include "StringDictionary.h"
24 
25 #include <sys/fcntl.h>
26 
27 #include <thread>
28 
29 StringDictionaryProxy::StringDictionaryProxy(std::shared_ptr<StringDictionary> sd,
30  const ssize_t generation)
31  : string_dict_(sd), generation_(generation) {}
32 
33 int32_t truncate_to_generation(const int32_t id, const size_t generation) {
35  return id;
36  }
37  CHECK_GE(id, 0);
38  return static_cast<size_t>(id) >= generation ? StringDictionary::INVALID_STR_ID : id;
39 }
40 
41 int32_t StringDictionaryProxy::getOrAddTransient(const std::string& str) {
42  mapd_lock_guard<mapd_shared_mutex> write_lock(rw_mutex_);
44  auto transient_id =
45  truncate_to_generation(string_dict_->getIdOfString(str), generation_);
46  if (transient_id != StringDictionary::INVALID_STR_ID) {
47  return transient_id;
48  }
49  const auto it = transient_str_to_int_.find(str);
50  if (it != transient_str_to_int_.end()) {
51  return it->second;
52  }
53  transient_id =
54  -(transient_str_to_int_.size() + 2); // make sure it's not INVALID_STR_ID
55  {
56  auto it_ok = transient_str_to_int_.insert(std::make_pair(str, transient_id));
57  CHECK(it_ok.second);
58  }
59  {
60  auto it_ok = transient_int_to_str_.insert(std::make_pair(transient_id, str));
61  CHECK(it_ok.second);
62  }
63  return transient_id;
64 }
65 
66 int32_t StringDictionaryProxy::getIdOfString(const std::string& str) const {
67  mapd_shared_lock<mapd_shared_mutex> read_lock(rw_mutex_);
69  auto str_id = truncate_to_generation(string_dict_->getIdOfString(str), generation_);
71  return str_id;
72  }
73  auto it = transient_str_to_int_.find(str);
74  return it != transient_str_to_int_.end() ? it->second
76 }
77 
78 int32_t StringDictionaryProxy::getIdOfStringNoGeneration(const std::string& str) const {
79  mapd_shared_lock<mapd_shared_mutex> read_lock(rw_mutex_);
80  auto str_id = string_dict_->getIdOfString(str);
82  return str_id;
83  }
84  auto it = transient_str_to_int_.find(str);
85  return it != transient_str_to_int_.end() ? it->second
87 }
88 
89 std::string StringDictionaryProxy::getString(int32_t string_id) const {
90  if (inline_int_null_value<int32_t>() == string_id) {
91  return "";
92  }
93  mapd_shared_lock<mapd_shared_mutex> read_lock(rw_mutex_);
94  if (string_id >= 0) {
95  return string_dict_->getString(string_id);
96  }
98  auto it = transient_int_to_str_.find(string_id);
99  CHECK(it != transient_int_to_str_.end());
100  return it->second;
101 }
102 
103 namespace {
104 
105 bool is_like(const std::string& str,
106  const std::string& pattern,
107  const bool icase,
108  const bool is_simple,
109  const char escape) {
110  return icase
111  ? (is_simple ? string_ilike_simple(
112  str.c_str(), str.size(), pattern.c_str(), pattern.size())
113  : string_ilike(str.c_str(),
114  str.size(),
115  pattern.c_str(),
116  pattern.size(),
117  escape))
118  : (is_simple ? string_like_simple(
119  str.c_str(), str.size(), pattern.c_str(), pattern.size())
120  : string_like(str.c_str(),
121  str.size(),
122  pattern.c_str(),
123  pattern.size(),
124  escape));
125 }
126 
127 } // namespace
128 
129 std::vector<int32_t> StringDictionaryProxy::getLike(const std::string& pattern,
130  const bool icase,
131  const bool is_simple,
132  const char escape) const {
133  CHECK_GE(generation_, 0);
134  auto result = string_dict_->getLike(pattern, icase, is_simple, escape, generation_);
135  for (const auto& kv : transient_int_to_str_) {
136  const auto str = getString(kv.first);
137  if (is_like(str, pattern, icase, is_simple, escape)) {
138  result.push_back(kv.first);
139  }
140  }
141  return result;
142 }
143 
144 namespace {
145 
146 bool do_compare(const std::string& str,
147  const std::string& pattern,
148  const std::string& comp_operator) {
149  int res = str.compare(pattern);
150  if (comp_operator == "<") {
151  return res < 0;
152  } else if (comp_operator == "<=") {
153  return res <= 0;
154  } else if (comp_operator == "=") {
155  return res == 0;
156  } else if (comp_operator == ">") {
157  return res > 0;
158  } else if (comp_operator == ">=") {
159  return res >= 0;
160  } else if (comp_operator == "<>") {
161  return res != 0;
162  }
163  throw std::runtime_error("unsupported string compare operator");
164 }
165 
166 } // namespace
167 
169  const std::string& pattern,
170  const std::string& comp_operator) const {
171  CHECK_GE(generation_, 0);
172  auto result = string_dict_->getCompare(pattern, comp_operator, generation_);
173  for (const auto& kv : transient_int_to_str_) {
174  const auto str = getString(kv.first);
175  if (do_compare(str, pattern, comp_operator)) {
176  result.push_back(kv.first);
177  }
178  }
179  return result;
180 }
181 
182 namespace {
183 
184 bool is_regexp_like(const std::string& str,
185  const std::string& pattern,
186  const char escape) {
187  return regexp_like(str.c_str(), str.size(), pattern.c_str(), pattern.size(), escape);
188 }
189 
190 } // namespace
191 
192 std::vector<int32_t> StringDictionaryProxy::getRegexpLike(const std::string& pattern,
193  const char escape) const {
194  CHECK_GE(generation_, 0);
195  auto result = string_dict_->getRegexpLike(pattern, escape, generation_);
196  for (const auto& kv : transient_int_to_str_) {
197  const auto str = getString(kv.first);
198  if (is_regexp_like(str, pattern, escape)) {
199  result.push_back(kv.first);
200  }
201  }
202  return result;
203 }
204 
205 int32_t StringDictionaryProxy::getOrAdd(const std::string& str) noexcept {
206  return string_dict_->getOrAdd(str);
207 }
208 
209 std::pair<char*, size_t> StringDictionaryProxy::getStringBytes(int32_t string_id) const
210  noexcept {
211  return string_dict_.get()->getStringBytes(string_id);
212 }
213 
215  return string_dict_.get()->storageEntryCount();
216 }
217 
218 void StringDictionaryProxy::updateGeneration(const ssize_t generation) noexcept {
219  if (generation == -1) {
220  return;
221  }
222  if (generation_ != -1) {
223  CHECK_EQ(generation_, generation);
224  return;
225  }
226  generation_ = generation;
227 }
228 
230  return string_dict_.get();
231 }
232 
233 ssize_t StringDictionaryProxy::getGeneration() const noexcept {
234  return generation_;
235 }
int32_t getIdOfString(const std::string &str) const
std::pair< char *, size_t > getStringBytes(int32_t string_id) const noexcept
#define CHECK_EQ(x, y)
Definition: Logger.h:195
std::map< int32_t, std::string > transient_int_to_str_
std::vector< int32_t > getLike(const std::string &pattern, const bool icase, const bool is_simple, const char escape) const
StringDictionary * getDictionary() noexcept
void updateGeneration(const ssize_t generation) noexcept
#define CHECK_GE(x, y)
Definition: Logger.h:200
ssize_t getGeneration() const noexcept
static constexpr int32_t INVALID_STR_ID
std::shared_ptr< StringDictionary > string_dict_
DEVICE bool regexp_like(const char *str, const int32_t str_len, const char *pattern, const int32_t pat_len, const char escape_char)
Definition: Regexp.cpp:40
DEVICE bool string_ilike(const char *str, const int32_t str_len, const char *pattern, const int32_t pat_len, const char escape_char)
Definition: StringLike.cpp:257
std::string getString(int32_t string_id) const
#define CHECK_NE(x, y)
Definition: Logger.h:196
bool is_like(const std::string &str, const std::string &pattern, const bool icase, const bool is_simple, const char escape)
DEVICE bool string_like(const char *str, const int32_t str_len, const char *pattern, const int32_t pat_len, const char escape_char)
Definition: StringLike.cpp:246
std::map< std::string, int32_t > transient_str_to_int_
StringDictionaryProxy(std::shared_ptr< StringDictionary > sd, const ssize_t generation)
bool is_regexp_like(const std::string &str, const std::string &pattern, const char escape)
DEVICE bool string_ilike_simple(const char *str, const int32_t str_len, const char *pattern, const int32_t pat_len)
Definition: StringLike.cpp:59
bool do_compare(const std::string &str, const std::string &pattern, const std::string &comp_operator)
int32_t getOrAdd(const std::string &str) noexcept
int32_t truncate_to_generation(const int32_t id, const size_t generation)
#define CHECK(condition)
Definition: Logger.h:187
int32_t getOrAddTransient(const std::string &str)
std::vector< int32_t > getRegexpLike(const std::string &pattern, const char escape) const
std::vector< int32_t > getCompare(const std::string &pattern, const std::string &comp_operator) const
int32_t getIdOfStringNoGeneration(const std::string &str) const
DEVICE bool string_like_simple(const char *str, const int32_t str_len, const char *pattern, const int32_t pat_len)
Definition: StringLike.cpp:43
mapd_shared_mutex rw_mutex_