OmniSciDB  72c90bc290
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
MLModel.h
Go to the documentation of this file.
1 /*
2  * Copyright 2023 HEAVY.AI, Inc.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #pragma once
18 
19 #include "AbstractMLModel.h"
20 #include "MLModelMetadata.h"
22 
23 #include <iostream>
24 #include <map>
25 #include <memory>
26 #include <stack>
27 #include <vector>
28 
29 #ifndef __CUDACC__
30 
31 #ifdef HAVE_ONEDAL
32 #include "daal.h"
33 #endif
34 
35 class MLModelMap {
36  public:
37  void addModel(const std::string& model_name, std::shared_ptr<AbstractMLModel> model) {
38  const auto upper_model_name = to_upper(model_name);
39  std::lock_guard<std::shared_mutex> model_map_write_lock(model_map_mutex_);
40  model_map_[upper_model_name] = model;
41  }
42 
43  bool modelExists(const std::string& model_name) const {
44  const auto upper_model_name = to_upper(model_name);
45  std::shared_lock<std::shared_mutex> model_map_read_lock(model_map_mutex_);
46  auto model_map_itr = model_map_.find(upper_model_name);
47  return model_map_itr != model_map_.end();
48  }
49 
50  std::shared_ptr<AbstractMLModel> getModel(const std::string& model_name) const {
51  const auto upper_model_name = to_upper(model_name);
52  std::shared_lock<std::shared_mutex> model_map_read_lock(model_map_mutex_);
53  auto model_map_itr = model_map_.find(upper_model_name);
54  if (model_map_itr != model_map_.end()) {
55  return model_map_itr->second;
56  }
57  const std::string error_str = "Model '" + upper_model_name + "' does not exist.";
58  throw std::runtime_error(error_str);
59  }
60 
61  void deleteModel(const std::string& model_name) {
62  const auto upper_model_name = to_upper(model_name);
63  std::lock_guard<std::shared_mutex> model_map_write_lock(model_map_mutex_);
64  auto const model_it = model_map_.find(upper_model_name);
65  if (model_it == model_map_.end()) {
66  std::ostringstream error_oss;
67  error_oss << "Cannot erase model " << upper_model_name
68  << ". No model by that name was found.";
69  throw std::runtime_error(error_oss.str());
70  }
71  model_map_.erase(model_it);
72  }
73 
74  std::vector<std::string> getModelNames() const {
75  std::shared_lock<std::shared_mutex> model_map_read_lock(model_map_mutex_);
76  std::vector<std::string> model_names;
77  model_names.reserve(model_map_.size());
78  for (auto const& model : model_map_) {
79  model_names.emplace_back(model.first);
80  }
81  return model_names;
82  }
83  std::vector<MLModelMetadata> getModelMetadata() const {
84  std::shared_lock<std::shared_mutex> model_map_read_lock(model_map_mutex_);
85  std::vector<MLModelMetadata> model_metadata;
86  for (auto const& model : model_map_) {
87  model_metadata.emplace_back(MLModelMetadata(
88  model.first,
89  model.second->getModelType(),
90  model.second->getModelTypeString(),
91  model.second->getNumLogicalFeatures(),
92  model.second->getNumFeatures(),
93  model.second->getNumCatFeatures(),
94  model.second->getNumLogicalFeatures() - model.second->getNumCatFeatures(),
95  model.second->getModelMetadataStr()));
96  }
97  return model_metadata;
98  }
99 
100  MLModelMetadata getModelMetadata(const std::string& model_name) const {
101  const auto upper_model_name = to_upper(model_name);
102  std::shared_lock<std::shared_mutex> model_map_read_lock(model_map_mutex_);
103  auto model_map_itr = model_map_.find(upper_model_name);
104  if (model_map_itr != model_map_.end()) {
105  return MLModelMetadata(model_map_itr->first,
106  model_map_itr->second->getModelType(),
107  model_map_itr->second->getModelTypeString(),
108  model_map_itr->second->getNumLogicalFeatures(),
109  model_map_itr->second->getNumFeatures(),
110  model_map_itr->second->getNumCatFeatures(),
111  model_map_itr->second->getNumLogicalFeatures() -
112  model_map_itr->second->getNumCatFeatures(),
113  model_map_itr->second->getModelMetadataStr());
114  }
115  const std::string error_str = "Model '" + upper_model_name + "' does not exist.";
116  throw std::runtime_error(error_str);
117  }
118 
119  private:
120  std::map<std::string, std::shared_ptr<AbstractMLModel>> model_map_;
122 };
123 
125 
127  public:
128  LinearRegressionModel(const std::vector<double>& coefs,
129  const std::string& model_metadata)
130  : AbstractMLModel(model_metadata), coefs_(coefs) {}
131 
132  LinearRegressionModel(const std::vector<double>& coefs,
133  const std::string& model_metadata,
134  const std::vector<std::vector<std::string>>& cat_feature_keys)
135  : AbstractMLModel(model_metadata, cat_feature_keys), coefs_(coefs) {}
136 
137  virtual MLModelType getModelType() const override { return MLModelType::LINEAR_REG; }
138 
139  virtual std::string getModelTypeString() const override { return "Linear Regression"; }
140 
141  virtual int64_t getNumFeatures() const override {
142  return static_cast<int64_t>(coefs_.size()) - 1;
143  }
144 
145  const std::vector<double>& getCoefs() const { return coefs_; }
146 
147  private:
148  std::vector<double> coefs_;
149 };
150 
151 #ifdef HAVE_ONEDAL
152 
153 using namespace daal::algorithms;
154 using namespace daal::data_management;
155 
156 class TreeModelVisitor : public daal::algorithms::regression::TreeNodeVisitor {
157  public:
158  TreeModelVisitor(std::vector<DecisionTreeEntry>& decision_table)
159  : decision_table_(decision_table) {}
160 
161  const std::vector<DecisionTreeEntry>& getDecisionTable() const {
162  return decision_table_;
163  }
164 
165  bool onLeafNode(size_t level, double response) override {
166  decision_table_.emplace_back(DecisionTreeEntry(response));
167  if (last_node_leaf_) {
168  decision_table_[parent_nodes_.top()].right_child_row_idx =
169  static_cast<int64_t>(decision_table_.size() - 1);
170  parent_nodes_.pop();
171  }
172  last_node_leaf_ = true;
173  return true;
174  }
175 
176  bool onSplitNode(size_t level, size_t featureIndex, double featureValue) override {
177  decision_table_.emplace_back(
178  DecisionTreeEntry(featureValue,
179  static_cast<int64_t>(featureIndex),
180  static_cast<int64_t>(decision_table_.size() + 1)));
181  if (last_node_leaf_) {
182  decision_table_[parent_nodes_.top()].right_child_row_idx =
183  static_cast<int64_t>(decision_table_.size() - 1);
184  parent_nodes_.pop();
185  }
186  last_node_leaf_ = false;
187  parent_nodes_.emplace(decision_table_.size() - 1);
188  return true;
189  }
190 
191  private:
192  std::vector<DecisionTreeEntry>& decision_table_;
193  std::stack<size_t> parent_nodes_;
194  bool last_node_leaf_{false};
195 };
196 
197 class AbstractTreeModel : public virtual AbstractMLModel {
198  public:
199  virtual MLModelType getModelType() const = 0;
200  virtual std::string getModelTypeString() const = 0;
201  virtual int64_t getNumFeatures() const = 0;
202  virtual int64_t getNumTrees() const = 0;
203  virtual void traverseDF(const int64_t tree_idx,
204  TreeModelVisitor& tree_node_visitor) const = 0;
205  virtual ~AbstractTreeModel() = default;
206 };
207 
208 class DecisionTreeRegressionModel : public virtual AbstractTreeModel {
209  public:
210  DecisionTreeRegressionModel(decision_tree::regression::interface1::ModelPtr& model_ptr,
211  const std::string& model_metadata)
212  : AbstractMLModel(model_metadata), model_ptr_(model_ptr) {}
213  DecisionTreeRegressionModel(
214  decision_tree::regression::interface1::ModelPtr& model_ptr,
215  const std::string& model_metadata,
216  const std::vector<std::vector<std::string>>& cat_feature_keys)
217  : AbstractMLModel(model_metadata, cat_feature_keys), model_ptr_(model_ptr) {}
218 
219  virtual MLModelType getModelType() const override {
221  }
222 
223  virtual std::string getModelTypeString() const override {
224  return "Decision Tree Regression";
225  }
226 
227  virtual int64_t getNumFeatures() const override {
228  return model_ptr_->getNumberOfFeatures();
229  }
230  virtual int64_t getNumTrees() const override { return 1; }
231  virtual void traverseDF(const int64_t tree_idx,
232  TreeModelVisitor& tree_node_visitor) const override {
233  CHECK_EQ(tree_idx, 0);
234  model_ptr_->traverseDF(tree_node_visitor);
235  }
236  const decision_tree::regression::interface1::ModelPtr getModelPtr() const {
237  return model_ptr_;
238  }
239 
240  private:
241  decision_tree::regression::interface1::ModelPtr model_ptr_;
242 };
243 
244 class GbtRegressionModel : public virtual AbstractTreeModel {
245  public:
246  GbtRegressionModel(gbt::regression::interface1::ModelPtr& model_ptr,
247  const std::string& model_metadata)
248  : AbstractMLModel(model_metadata), model_ptr_(model_ptr) {}
249 
250  GbtRegressionModel(gbt::regression::interface1::ModelPtr& model_ptr,
251  const std::string& model_metadata,
252  const std::vector<std::vector<std::string>>& cat_feature_keys)
253  : AbstractMLModel(model_metadata, cat_feature_keys), model_ptr_(model_ptr) {}
254 
255  virtual MLModelType getModelType() const override { return MLModelType::GBT_REG; }
256 
257  virtual std::string getModelTypeString() const override {
258  return "Gradient Boosted Trees Regression";
259  }
260 
261  virtual int64_t getNumFeatures() const override {
262  return model_ptr_->getNumberOfFeatures();
263  }
264  virtual int64_t getNumTrees() const override { return model_ptr_->getNumberOfTrees(); }
265  virtual void traverseDF(const int64_t tree_idx,
266  TreeModelVisitor& tree_node_visitor) const override {
267  model_ptr_->traverseDF(tree_idx, tree_node_visitor);
268  }
269  const gbt::regression::interface1::ModelPtr getModelPtr() const { return model_ptr_; }
270 
271  private:
272  gbt::regression::interface1::ModelPtr model_ptr_;
273 };
274 
275 class RandomForestRegressionModel : public virtual AbstractTreeModel {
276  public:
277  RandomForestRegressionModel(
278  decision_forest::regression::interface1::ModelPtr& model_ptr,
279  const std::string& model_metadata,
280  const std::vector<double>& variable_importance,
281  const double out_of_bag_error)
282  : AbstractMLModel(model_metadata)
283  , model_ptr_(model_ptr)
284  , variable_importance_(variable_importance)
285  , out_of_bag_error_(out_of_bag_error) {}
286 
287  RandomForestRegressionModel(
288  decision_forest::regression::interface1::ModelPtr& model_ptr,
289  const std::string& model_metadata,
290  const std::vector<std::vector<std::string>>& cat_feature_keys,
291  const std::vector<double>& variable_importance,
292  const double out_of_bag_error)
293  : AbstractMLModel(model_metadata, cat_feature_keys)
294  , model_ptr_(model_ptr)
295  , variable_importance_(variable_importance)
296  , out_of_bag_error_(out_of_bag_error) {}
297 
298  virtual MLModelType getModelType() const override {
300  }
301 
302  virtual std::string getModelTypeString() const override {
303  return "Random Forest Regression";
304  }
305  virtual int64_t getNumFeatures() const override {
306  return model_ptr_->getNumberOfFeatures();
307  }
308  virtual int64_t getNumTrees() const override { return model_ptr_->getNumberOfTrees(); }
309  virtual void traverseDF(const int64_t tree_idx,
310  TreeModelVisitor& tree_node_visitor) const override {
311  model_ptr_->traverseDF(tree_idx, tree_node_visitor);
312  }
313 
314  const decision_forest::regression::interface1::ModelPtr getModelPtr() const {
315  return model_ptr_;
316  }
317 
318  const std::vector<double>& getVariableImportanceScores() const {
319  return variable_importance_;
320  }
321 
322  const double getOutOfBagError() const { return out_of_bag_error_; }
323 
324  private:
325  decision_forest::regression::interface1::ModelPtr model_ptr_;
326  std::vector<double> variable_importance_;
327  double out_of_bag_error_;
328 };
329 
330 #endif // #ifdef HAVE_ONEDAL
331 
332 class PcaModel : public AbstractMLModel {
333  public:
334  PcaModel(const std::vector<double>& col_means,
335  const std::vector<double>& col_std_devs,
336  const std::vector<std::vector<double>>& eigenvectors,
337  const std::vector<double>& eigenvalues,
338  const std::string& model_metadata)
339  : AbstractMLModel(model_metadata)
340  , col_means_(col_means)
341  , col_std_devs_(col_std_devs)
342  , eigenvectors_(eigenvectors)
343  , eigenvalues_(eigenvalues) {}
344 
345  PcaModel(const std::vector<double>& col_means,
346  const std::vector<double>& col_std_devs,
347  const std::vector<std::vector<double>>& eigenvectors,
348  const std::vector<double>& eigenvalues,
349  const std::string& model_metadata,
350  const std::vector<std::vector<std::string>>& cat_feature_keys)
351  : AbstractMLModel(model_metadata, cat_feature_keys)
352  , col_means_(col_means)
353  , col_std_devs_(col_std_devs)
354  , eigenvectors_(eigenvectors)
355  , eigenvalues_(eigenvalues) {}
356 
357  virtual MLModelType getModelType() const override { return MLModelType::PCA; }
358 
359  virtual std::string getModelTypeString() const override { return "PCA"; }
360 
361  virtual int64_t getNumFeatures() const override {
362  return static_cast<int64_t>(col_means_.size());
363  }
364 
365  const std::vector<double>& getColumnMeans() const { return col_means_; }
366  const std::vector<double>& getColumnStdDevs() const { return col_std_devs_; }
367  const std::vector<std::vector<double>>& getEigenvectors() const {
368  return eigenvectors_;
369  }
370  const std::vector<double>& getEigenvalues() const { return eigenvalues_; }
371 
372  private:
373  std::vector<double> col_means_;
374  std::vector<double> col_std_devs_;
375  std::vector<std::vector<double>> eigenvectors_;
376  std::vector<double> eigenvalues_;
377 };
378 
379 #endif // #ifndef __CUDACC__
PcaModel(const std::vector< double > &col_means, const std::vector< double > &col_std_devs, const std::vector< std::vector< double >> &eigenvectors, const std::vector< double > &eigenvalues, const std::string &model_metadata)
Definition: MLModel.h:334
const std::vector< double > & getColumnStdDevs() const
Definition: MLModel.h:366
#define CHECK_EQ(x, y)
Definition: Logger.h:301
virtual std::string getModelTypeString() const override
Definition: MLModel.h:139
virtual int64_t getNumFeatures() const override
Definition: MLModel.h:361
std::vector< MLModelMetadata > getModelMetadata() const
Definition: MLModel.h:83
virtual std::string getModelTypeString() const override
Definition: MLModel.h:359
std::vector< double > eigenvalues_
Definition: MLModel.h:376
LinearRegressionModel(const std::vector< double > &coefs, const std::string &model_metadata)
Definition: MLModel.h:128
virtual MLModelType getModelType() const override
Definition: MLModel.h:357
MLModelType
Definition: MLModelType.h:25
std::vector< double > col_std_devs_
Definition: MLModel.h:374
std::vector< double > col_means_
Definition: MLModel.h:373
void addModel(const std::string &model_name, std::shared_ptr< AbstractMLModel > model)
Definition: MLModel.h:37
bool modelExists(const std::string &model_name) const
Definition: MLModel.h:43
std::shared_ptr< AbstractMLModel > getModel(const std::string &model_name) const
Definition: MLModel.h:50
std::string to_upper(const std::string &str)
LinearRegressionModel(const std::vector< double > &coefs, const std::string &model_metadata, const std::vector< std::vector< std::string >> &cat_feature_keys)
Definition: MLModel.h:132
MLModelMap g_ml_models
Definition: MLModel.h:124
MLModelMetadata getModelMetadata(const std::string &model_name) const
Definition: MLModel.h:100
std::shared_mutex model_map_mutex_
Definition: MLModel.h:121
PcaModel(const std::vector< double > &col_means, const std::vector< double > &col_std_devs, const std::vector< std::vector< double >> &eigenvectors, const std::vector< double > &eigenvalues, const std::string &model_metadata, const std::vector< std::vector< std::string >> &cat_feature_keys)
Definition: MLModel.h:345
void deleteModel(const std::string &model_name)
Definition: MLModel.h:61
std::vector< double > coefs_
Definition: MLModel.h:148
const std::vector< double > & getColumnMeans() const
Definition: MLModel.h:365
const std::vector< double > & getCoefs() const
Definition: MLModel.h:145
const std::vector< double > & getEigenvalues() const
Definition: MLModel.h:370
virtual MLModelType getModelType() const override
Definition: MLModel.h:137
std::vector< std::string > getModelNames() const
Definition: MLModel.h:74
virtual int64_t getNumFeatures() const override
Definition: MLModel.h:141
std::shared_timed_mutex shared_mutex
std::map< std::string, std::shared_ptr< AbstractMLModel > > model_map_
Definition: MLModel.h:120
std::vector< std::vector< double > > eigenvectors_
Definition: MLModel.h:375
const std::vector< std::vector< double > > & getEigenvectors() const
Definition: MLModel.h:367