OmniSciDB  72c90bc290
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
LogFileBufferParser.cpp
Go to the documentation of this file.
1 /*
2  * Copyright 2022 HEAVY.AI, Inc.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
18 #include "Catalog/Catalog.h"
20 #include "Shared/distributed.h"
21 
22 namespace foreign_storage {
23 namespace {
24 std::string remove_quotes(const std::string& value) {
25  if (value.length() > 1 && value[0] == '"' && value[value.length() - 1] == '"') {
26  return value.substr(1, value.length() - 2);
27  }
28  return value;
29 }
30 
31 std::map<std::string, std::string> create_map_from_arrays(
32  const std::string& keys_array,
33  const std::string& values_array) {
34  std::vector<std::string> keys;
35  import_export::delimited_parser::parse_string_array(keys_array, {}, keys, true);
36  std::vector<std::string> values;
37  import_export::delimited_parser::parse_string_array(values_array, {}, values, true);
38  if (keys.size() == values.size()) {
39  std::map<std::string, std::string> values_map;
40  for (size_t i = 0; i < keys.size(); i++) {
41  values_map[remove_quotes(keys[i])] = remove_quotes(values[i]);
42  }
43  return values_map;
44  } else {
45  return {};
46  }
47 }
48 
49 std::string get_node_name() {
50  std::string node_name;
51  if (dist::is_leaf_node()) {
52  node_name = "Leaf " + to_string(g_distributed_leaf_idx);
53  } else {
54  node_name = "Server";
55  }
56  return node_name;
57 }
58 
59 void add_column_value(std::vector<std::string>& parsed_columns_str,
60  std::vector<std::string_view>& parsed_columns_sv,
61  const std::string& value,
62  size_t count = 1) {
63  for (size_t i = 0; i < count; i++) {
64  parsed_columns_str.emplace_back(value);
65  parsed_columns_sv.emplace_back(parsed_columns_str.back());
66  }
67 }
68 
69 void add_nonce_values(std::vector<std::string>& parsed_columns_str,
70  std::vector<std::string_view>& parsed_columns_sv,
71  const std::string& nonce,
72  int32_t table_id,
73  int32_t db_id) {
74  // Nonce has the following format: "{dashboard id}/{chart id}-{layer id}"
75  auto dashboard_and_chart_id = split(nonce, "/");
76  if (dashboard_and_chart_id.size() == 2) {
77  auto dashboard_id_str = dashboard_and_chart_id[0];
78  int32_t dashboard_id{0};
79  if (dashboard_id_str == "null" ||
80  !(dashboard_id = std::atoi(dashboard_id_str.c_str()))) {
81  dashboard_id_str = "";
82  }
83  add_column_value(parsed_columns_str, parsed_columns_sv, dashboard_id_str);
84 
85  // Get dashboard name from dashboard id.
86  std::string dashboard_name;
87  if (dashboard_id > 0) {
88  // Get dashboard database name.
89  auto info_schema_catalog =
91  CHECK(info_schema_catalog);
92  auto db_name_column =
93  info_schema_catalog->getMetadataForColumn(table_id, "database_name");
94  CHECK(db_name_column);
95  CHECK_GT(db_name_column->columnId, 0);
96  CHECK_LE(size_t(db_name_column->columnId), parsed_columns_str.size());
97  const auto& db_name = parsed_columns_str[db_name_column->columnId - 1];
98 
99  // Get dashboard metadata.
100  Catalog_Namespace::DBMetadata db_metadata;
101  auto& sys_catalog = Catalog_Namespace::SysCatalog::instance();
102  if (sys_catalog.getMetadataForDB(db_name, db_metadata)) {
103  auto catalog = sys_catalog.getCatalog(db_metadata, false);
104  auto dashboard = catalog->getMetadataForDashboard(dashboard_id);
105  if (dashboard) {
106  dashboard_name = dashboard->dashboardName;
107  } else {
108  dashboard_name = "<DELETED>";
109  }
110  }
111  add_column_value(parsed_columns_str, parsed_columns_sv, dashboard_name);
112 
113  auto chart_and_layer_id = split(dashboard_and_chart_id[1], "-");
114  add_column_value(parsed_columns_str, parsed_columns_sv, chart_and_layer_id[0]);
115  } else {
116  // Null dashboard name and chart id.
117  add_column_value(parsed_columns_str, parsed_columns_sv, "", 2);
118  }
119  } else {
120  // Null dashboard id, dashboard name, and chart id.
121  add_column_value(parsed_columns_str, parsed_columns_sv, "", 3);
122  }
123 }
124 } // namespace
125 
126 LogFileBufferParser::LogFileBufferParser(const ForeignTable* foreign_table, int32_t db_id)
127  : RegexFileBufferParser(foreign_table)
128  , foreign_table_(foreign_table)
129  , db_id_(db_id) {}
130 
132  const std::string& row_str,
133  const boost::regex& line_regex,
134  size_t logical_column_count,
135  std::vector<std::string>& parsed_columns_str,
136  std::vector<std::string_view>& parsed_columns_sv,
137  const std::string& file_path) const {
138  CHECK(parsed_columns_str.empty());
139  CHECK(parsed_columns_sv.empty());
141  if (foreign_table_->tableName == "request_logs") {
142  boost::smatch match;
143  bool set_all_nulls{false};
144  if (boost::regex_match(row_str, match, line_regex)) {
145  auto matched_column_count = match.size() - 1;
146  // Last 2 matched columns are associative arrays.
147  CHECK_GT(matched_column_count, size_t(2));
148  for (size_t i = 1; i < match.size() - 2; i++) {
149  add_column_value(parsed_columns_str, parsed_columns_sv, match[i].str());
150  }
151  // Special handling for associative arrays.
152  auto values_map = create_map_from_arrays(match[matched_column_count - 1].str(),
153  match[matched_column_count].str());
154  static const std::array<std::string, 5> keys{
155  "query_str", "client", "nonce", "execution_time_ms", "total_time_ms"};
156  CHECK_EQ(logical_column_count, matched_column_count + keys.size());
157  for (const auto& key : keys) {
158  auto it = values_map.find(key);
159  if (it == values_map.end()) {
160  if (key == "nonce") {
161  // Null dashboard id, dashboard name, and chart id.
162  add_column_value(parsed_columns_str, parsed_columns_sv, "", 3);
163  } else {
164  // Add null value for missing entry.
165  add_column_value(parsed_columns_str, parsed_columns_sv, "");
166  }
167  } else {
168  if (key == "nonce") {
169  add_nonce_values(parsed_columns_str,
170  parsed_columns_sv,
171  it->second,
173  db_id_);
174  } else {
175  add_column_value(parsed_columns_str, parsed_columns_sv, it->second);
176  }
177  }
178  }
179  CHECK_EQ(parsed_columns_str.size(), parsed_columns_sv.size());
180  } else {
181  parsed_columns_str.clear();
182  parsed_columns_sv =
183  std::vector<std::string_view>(logical_column_count, std::string_view{});
184  set_all_nulls = true;
185  }
186  CHECK_EQ(parsed_columns_sv.size(), logical_column_count) << "In row: " << row_str;
187  return set_all_nulls;
188  } else {
189  // Add fixed value for the server_logs table "node" column.
190  if (foreign_table_->tableName == "server_logs") {
191  add_column_value(parsed_columns_str, parsed_columns_sv, get_node_name());
192  }
194  line_regex,
195  logical_column_count,
196  parsed_columns_str,
197  parsed_columns_sv,
198  file_path);
199  }
200 }
201 
203  return true;
204 }
205 
207  return true;
208 }
209 } // namespace foreign_storage
#define CHECK_EQ(x, y)
Definition: Logger.h:301
std::string tableName
void add_nonce_values(std::vector< std::string > &parsed_columns_str, std::vector< std::string_view > &parsed_columns_sv, const std::string &nonce, int32_t table_id, int32_t db_id)
#define CHECK_GT(x, y)
Definition: Logger.h:305
std::string to_string(char const *&&v)
std::vector< std::string > split(std::string_view str, std::string_view delim, std::optional< size_t > maxsplit)
split apart a string into a vector of substrings
LogFileBufferParser(const ForeignTable *foreign_table, int32_t db_id)
This file contains the class specification and related data structures for Catalog.
static SysCatalog & instance()
Definition: SysCatalog.h:343
bool is_leaf_node()
Definition: distributed.cpp:29
void parse_string_array(const std::string &s, const import_export::CopyParams &copy_params, std::vector< std::string > &string_vec, bool truncate_values)
Parses given string array and inserts into given vector of strings.
bool regexMatchColumns(const std::string &row_str, const boost::regex &line_regex, size_t logical_column_count, std::vector< std::string > &parsed_columns_str, std::vector< std::string_view > &parsed_columns_sv, const std::string &file_path) const override
std::shared_ptr< Catalog > getCatalog(const std::string &dbName)
int32_t g_distributed_leaf_idx
Definition: Catalog.cpp:98
#define CHECK_LE(x, y)
Definition: Logger.h:304
virtual bool regexMatchColumns(const std::string &row_str, const boost::regex &line_regex, size_t logical_column_count, std::vector< std::string > &parsed_columns_str, std::vector< std::string_view > &parsed_columns_sv, const std::string &file_path) const
std::map< std::string, std::string > create_map_from_arrays(const std::string &keys_array, const std::string &values_array)
#define CHECK(condition)
Definition: Logger.h:291
void add_column_value(std::vector< std::string > &parsed_columns_str, std::vector< std::string_view > &parsed_columns_sv, const std::string &value, size_t count=1)