OmniSciDB  dfae7c3b14
StringDictionaryProxy.cpp
Go to the documentation of this file.
1 /*
2  * Copyright 2017 MapD Technologies, Inc.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
18 
19 #include <thread>
20 
21 #include "Logger/Logger.h"
22 #include "Shared/sqltypes.h"
23 #include "Shared/thread_count.h"
25 #include "Utils/Regexp.h"
26 #include "Utils/StringLike.h"
27 
28 StringDictionaryProxy::StringDictionaryProxy(std::shared_ptr<StringDictionary> sd,
29  const int64_t generation)
30  : string_dict_(sd), generation_(generation) {}
31 
32 int32_t truncate_to_generation(const int32_t id, const size_t generation) {
34  return id;
35  }
36  CHECK_GE(id, 0);
37  return static_cast<size_t>(id) >= generation ? StringDictionary::INVALID_STR_ID : id;
38 }
39 
40 int32_t StringDictionaryProxy::getOrAddTransient(const std::string& str) {
41  mapd_lock_guard<mapd_shared_mutex> write_lock(rw_mutex_);
43  auto transient_id =
44  truncate_to_generation(string_dict_->getIdOfString(str), generation_);
45  if (transient_id != StringDictionary::INVALID_STR_ID) {
46  return transient_id;
47  }
48  const auto it = transient_str_to_int_.find(str);
49  if (it != transient_str_to_int_.end()) {
50  return it->second;
51  }
52  transient_id =
53  -(transient_str_to_int_.size() + 2); // make sure it's not INVALID_STR_ID
54  {
55  auto it_ok = transient_str_to_int_.insert(std::make_pair(str, transient_id));
56  CHECK(it_ok.second);
57  }
58  {
59  auto it_ok = transient_int_to_str_.insert(std::make_pair(transient_id, str));
60  CHECK(it_ok.second);
61  }
62  return transient_id;
63 }
64 
65 int32_t StringDictionaryProxy::getIdOfString(const std::string& str) const {
66  mapd_shared_lock<mapd_shared_mutex> read_lock(rw_mutex_);
68  auto str_id = truncate_to_generation(string_dict_->getIdOfString(str), generation_);
70  return str_id;
71  }
72  auto it = transient_str_to_int_.find(str);
73  return it != transient_str_to_int_.end() ? it->second
75 }
76 
77 int32_t StringDictionaryProxy::getIdOfStringNoGeneration(const std::string& str) const {
78  mapd_shared_lock<mapd_shared_mutex> read_lock(rw_mutex_);
79  auto str_id = string_dict_->getIdOfString(str);
81  return str_id;
82  }
83  auto it = transient_str_to_int_.find(str);
84  return it != transient_str_to_int_.end() ? it->second
86 }
87 
88 std::string StringDictionaryProxy::getString(int32_t string_id) const {
89  if (inline_int_null_value<int32_t>() == string_id) {
90  return "";
91  }
92  mapd_shared_lock<mapd_shared_mutex> read_lock(rw_mutex_);
93  if (string_id >= 0) {
94  return string_dict_->getString(string_id);
95  }
97  auto it = transient_int_to_str_.find(string_id);
98  CHECK(it != transient_int_to_str_.end());
99  return it->second;
100 }
101 
102 namespace {
103 
104 bool is_like(const std::string& str,
105  const std::string& pattern,
106  const bool icase,
107  const bool is_simple,
108  const char escape) {
109  return icase
110  ? (is_simple ? string_ilike_simple(
111  str.c_str(), str.size(), pattern.c_str(), pattern.size())
112  : string_ilike(str.c_str(),
113  str.size(),
114  pattern.c_str(),
115  pattern.size(),
116  escape))
117  : (is_simple ? string_like_simple(
118  str.c_str(), str.size(), pattern.c_str(), pattern.size())
119  : string_like(str.c_str(),
120  str.size(),
121  pattern.c_str(),
122  pattern.size(),
123  escape));
124 }
125 
126 } // namespace
127 
128 std::vector<int32_t> StringDictionaryProxy::getLike(const std::string& pattern,
129  const bool icase,
130  const bool is_simple,
131  const char escape) const {
132  CHECK_GE(generation_, 0);
133  auto result = string_dict_->getLike(pattern, icase, is_simple, escape, generation_);
134  for (const auto& kv : transient_int_to_str_) {
135  const auto str = getString(kv.first);
136  if (is_like(str, pattern, icase, is_simple, escape)) {
137  result.push_back(kv.first);
138  }
139  }
140  return result;
141 }
142 
143 namespace {
144 
145 bool do_compare(const std::string& str,
146  const std::string& pattern,
147  const std::string& comp_operator) {
148  int res = str.compare(pattern);
149  if (comp_operator == "<") {
150  return res < 0;
151  } else if (comp_operator == "<=") {
152  return res <= 0;
153  } else if (comp_operator == "=") {
154  return res == 0;
155  } else if (comp_operator == ">") {
156  return res > 0;
157  } else if (comp_operator == ">=") {
158  return res >= 0;
159  } else if (comp_operator == "<>") {
160  return res != 0;
161  }
162  throw std::runtime_error("unsupported string compare operator");
163 }
164 
165 } // namespace
166 
168  const std::string& pattern,
169  const std::string& comp_operator) const {
170  CHECK_GE(generation_, 0);
171  auto result = string_dict_->getCompare(pattern, comp_operator, generation_);
172  for (const auto& kv : transient_int_to_str_) {
173  const auto str = getString(kv.first);
174  if (do_compare(str, pattern, comp_operator)) {
175  result.push_back(kv.first);
176  }
177  }
178  return result;
179 }
180 
181 namespace {
182 
183 bool is_regexp_like(const std::string& str,
184  const std::string& pattern,
185  const char escape) {
186  return regexp_like(str.c_str(), str.size(), pattern.c_str(), pattern.size(), escape);
187 }
188 
189 } // namespace
190 
191 std::vector<int32_t> StringDictionaryProxy::getRegexpLike(const std::string& pattern,
192  const char escape) const {
193  CHECK_GE(generation_, 0);
194  auto result = string_dict_->getRegexpLike(pattern, escape, generation_);
195  for (const auto& kv : transient_int_to_str_) {
196  const auto str = getString(kv.first);
197  if (is_regexp_like(str, pattern, escape)) {
198  result.push_back(kv.first);
199  }
200  }
201  return result;
202 }
203 
204 int32_t StringDictionaryProxy::getOrAdd(const std::string& str) noexcept {
205  return string_dict_->getOrAdd(str);
206 }
207 
208 std::pair<const char*, size_t> StringDictionaryProxy::getStringBytes(
209  int32_t string_id) const noexcept {
210  if (string_id >= 0) {
211  return string_dict_.get()->getStringBytes(string_id);
212  }
214  auto it = transient_int_to_str_.find(string_id);
215  CHECK(it != transient_int_to_str_.end());
216  return std::make_pair(it->second.c_str(), it->second.size());
217 }
218 
220  return string_dict_.get()->storageEntryCount();
221 }
222 
223 void StringDictionaryProxy::updateGeneration(const int64_t generation) noexcept {
224  if (generation == -1) {
225  return;
226  }
227  if (generation_ != -1) {
228  CHECK_EQ(generation_, generation);
229  return;
230  }
231  generation_ = generation;
232 }
233 
235  return string_dict_.get();
236 }
237 
238 int64_t StringDictionaryProxy::getGeneration() const noexcept {
239  return generation_;
240 }
int32_t getIdOfString(const std::string &str) const
#define CHECK_EQ(x, y)
Definition: Logger.h:205
std::pair< const char *, size_t > getStringBytes(int32_t string_id) const noexcept
std::map< int32_t, std::string > transient_int_to_str_
std::vector< int32_t > getLike(const std::string &pattern, const bool icase, const bool is_simple, const char escape) const
StringDictionary * getDictionary() noexcept
#define CHECK_GE(x, y)
Definition: Logger.h:210
Constants for Builtin SQL Types supported by OmniSci.
static constexpr int32_t INVALID_STR_ID
std::shared_ptr< StringDictionary > string_dict_
DEVICE bool regexp_like(const char *str, const int32_t str_len, const char *pattern, const int32_t pat_len, const char escape_char)
Definition: Regexp.cpp:40
DEVICE bool string_ilike(const char *str, const int32_t str_len, const char *pattern, const int32_t pat_len, const char escape_char)
Definition: StringLike.cpp:257
std::string getString(int32_t string_id) const
#define CHECK_NE(x, y)
Definition: Logger.h:206
bool is_like(const std::string &str, const std::string &pattern, const bool icase, const bool is_simple, const char escape)
DEVICE bool string_like(const char *str, const int32_t str_len, const char *pattern, const int32_t pat_len, const char escape_char)
Definition: StringLike.cpp:246
std::map< std::string, int32_t > transient_str_to_int_
bool is_regexp_like(const std::string &str, const std::string &pattern, const char escape)
void updateGeneration(const int64_t generation) noexcept
DEVICE bool string_ilike_simple(const char *str, const int32_t str_len, const char *pattern, const int32_t pat_len)
Definition: StringLike.cpp:59
Functions to support the LIKE and ILIKE operator in SQL. Only single-byte character set is supported ...
bool do_compare(const std::string &str, const std::string &pattern, const std::string &comp_operator)
int32_t getOrAdd(const std::string &str) noexcept
int32_t truncate_to_generation(const int32_t id, const size_t generation)
StringDictionaryProxy(std::shared_ptr< StringDictionary > sd, const int64_t generation)
mapd_shared_lock< mapd_shared_mutex > read_lock
#define CHECK(condition)
Definition: Logger.h:197
int32_t getOrAddTransient(const std::string &str)
std::vector< int32_t > getRegexpLike(const std::string &pattern, const char escape) const
mapd_unique_lock< mapd_shared_mutex > write_lock
std::vector< int32_t > getCompare(const std::string &pattern, const std::string &comp_operator) const
int32_t getIdOfStringNoGeneration(const std::string &str) const
DEVICE bool string_like_simple(const char *str, const int32_t str_len, const char *pattern, const int32_t pat_len)
Definition: StringLike.cpp:43
mapd_shared_mutex rw_mutex_
int64_t getGeneration() const noexcept