OmniSciDB  04ee39c94c
StringTransform.cpp
Go to the documentation of this file.
1 /*
2  * Copyright 2017 MapD Technologies, Inc.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #include "StringTransform.h"
18 
19 #include <numeric>
20 #include <random>
21 #include <regex>
22 
23 void apply_shim(std::string& result,
24  const boost::regex& reg_expr,
25  const std::function<void(std::string&, const boost::smatch&)>& shim_fn) {
26  boost::smatch what;
27  std::vector<std::pair<size_t, size_t>> lit_pos = find_string_literals(result);
28  auto start_it = result.cbegin();
29  auto end_it = result.cend();
30  while (true) {
31  if (!boost::regex_search(start_it, end_it, what, reg_expr)) {
32  break;
33  }
34  const auto next_start =
35  inside_string_literal(what.position(), what.length(), lit_pos);
36  if (next_start >= 0) {
37  start_it = result.cbegin() + next_start;
38  } else {
39  shim_fn(result, what);
40  lit_pos = find_string_literals(result);
41  start_it = result.cbegin();
42  end_it = result.cend();
43  }
44  }
45 }
46 
47 std::vector<std::pair<size_t, size_t>> find_string_literals(const std::string& query) {
48  boost::regex literal_string_regex{R"(([^']+)('(?:[^']+|'')+'))", boost::regex::perl};
49  boost::smatch what;
50  auto it = query.begin();
51  auto prev_it = it;
52  std::vector<std::pair<size_t, size_t>> positions;
53  while (true) {
54  if (!boost::regex_search(it, query.end(), what, literal_string_regex)) {
55  break;
56  }
57  CHECK_GT(what[1].length(), 0);
58  prev_it = it;
59  it += what.length();
60  positions.emplace_back(prev_it + what[1].length() - query.begin(),
61  it - query.begin());
62  }
63  return positions;
64 }
65 
66 std::string hide_sensitive_data_from_query(std::string const& query_str) {
67  constexpr std::regex::flag_type flags =
68  std::regex::ECMAScript | std::regex::icase | std::regex::optimize;
69  static const std::initializer_list<std::pair<std::regex, std::string>> rules{
70  {std::regex(R"(\b((?:password|s3_access_key|s3_secret_key)\s*=\s*)'.+?')", flags),
71  "$1'XXXXXXXX'"},
72  {std::regex(R"((\\set_license\s+)\S+)", flags), "$1XXXXXXXX"}};
73  return std::accumulate(
74  rules.begin(), rules.end(), query_str, [](auto& str, auto& rule) {
75  return std::regex_replace(str, rule.first, rule.second);
76  });
77 }
78 
80  const size_t start,
81  const size_t length,
82  const std::vector<std::pair<size_t, size_t>>& literal_positions) {
83  const auto end = start + length;
84  for (const auto& literal_position : literal_positions) {
85  if (literal_position.first <= start && end <= literal_position.second) {
86  return literal_position.second;
87  }
88  }
89  return -1;
90 }
91 
92 template <>
93 std::string to_string(char const*&& v) {
94  return std::string(v);
95 }
96 
97 template <>
98 std::string to_string(std::string&& v) {
99  return std::move(v);
100 }
101 
102 std::string generate_random_string(const size_t len) {
103  static char charset[] =
104  "0123456789"
105  "abcdefghijklmnopqrstuvwxyz"
106  "ABCDEFGHIJKLMNOPQRSTUVWXYZ";
107 
108  static std::mt19937 prng{std::random_device{}()};
109  static std::uniform_int_distribution<size_t> dist(0, strlen(charset) - 1);
110 
111  std::string str;
112  str.reserve(len);
113  for (size_t i = 0; i < len; i++) {
114  str += charset[dist(prng)];
115  }
116  return str;
117 }
118 
119 std::vector<std::string> split(const std::string& str, const std::string& delim) {
120  CHECK(!delim.empty());
121  std::vector<std::string> result;
122  std::string::size_type i = 0, j = 0;
123  while ((i = str.find(delim, i)) != std::string::npos) {
124  result.emplace_back(str, j, i - j);
125  i += delim.size();
126  j = i;
127  }
128  result.emplace_back(str, j);
129  return result;
130 }
131 
132 std::string strip(const std::string& str) {
133  std::string::size_type i, j;
134  for (i = 0; i < str.size() && std::isspace(str[i]); ++i) {
135  }
136  for (j = str.size(); j > i && std::isspace(str[j - 1]); --j) {
137  }
138  return str.substr(i, j - i);
139 }
140 
142  std::string& str) noexcept {
143  char inside_quote = 0;
144  bool previous_c_was_backslash = false;
145  for (auto& c : str) {
146  // if this character is a quote of either type
147  if (c == '\'' || c == '\"') {
148  // ignore if previous character was a backslash
149  if (!previous_c_was_backslash) {
150  // start or end of a quoted region
151  if (inside_quote == c) {
152  // end region
153  inside_quote = 0;
154  } else if (inside_quote == 0) {
155  // start region
156  inside_quote = c;
157  }
158  }
159  } else if (inside_quote == 0) {
160  // outside quoted region
161  if (c == '\n' || c == '\t' || c == '\r') {
162  // replace these with space
163  c = ' ';
164  }
165  // otherwise leave alone, including quotes of a different type
166  }
167  // handle backslashes, except for double backslashes
168  if (c == '\\') {
169  previous_c_was_backslash = !previous_c_was_backslash;
170  } else {
171  previous_c_was_backslash = false;
172  }
173  }
174  // if we didn't end a region, there were unclosed or mixed-nested quotes
175  // accounting for backslashes should mean that this should only be the
176  // case with truly malformed strings which Calcite will barf on anyway
177  return (inside_quote == 0);
178 }
std::string hide_sensitive_data_from_query(std::string const &query_str)
ssize_t inside_string_literal(const size_t start, const size_t length, const std::vector< std::pair< size_t, size_t >> &literal_positions)
std::vector< std::pair< size_t, size_t > > find_string_literals(const std::string &query)
void c(const std::string &query_string, const ExecutorDeviceType device_type)
std::string strip(const std::string &str)
#define CHECK_GT(x, y)
Definition: Logger.h:199
std::string to_string(char const *&&v)
T v(const TargetValue &r)
std::string generate_random_string(const size_t len)
void apply_shim(std::string &result, const boost::regex &reg_expr, const std::function< void(std::string &, const boost::smatch &)> &shim_fn)
std::vector< std::string > split(const std::string &str, const std::string &delim)
bool remove_unquoted_newlines_linefeeds_and_tabs_from_sql_string(std::string &str) noexcept
#define CHECK(condition)
Definition: Logger.h:187