OmniSciDB  72c90bc290
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
OneDalFunctions.hpp
Go to the documentation of this file.
1 /*
2  * Copyright 2022 HEAVY.AI, Inc., Inc.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #pragma once
18 
19 #ifndef __CUDACC__
20 #ifdef HAVE_ONEDAL
21 
22 #include "MLModel.h"
25 #include "daal.h"
26 
27 using namespace daal::algorithms;
28 using namespace daal::data_management;
29 
30 template <typename T>
31 const NumericTablePtr prepare_data_table(const T* data, const int64_t num_rows) {
32  // Prepare input data as structure of arrays (SOA) as columnar format (zero-copy)
33  const auto data_table = SOANumericTable::create(1 /* num_columns */, num_rows);
34  data_table->setArray<T>(const_cast<T*>(data), 0);
35  return data_table;
36 }
37 
38 template <typename T>
39 const NumericTablePtr prepare_data_table(const std::vector<const T*>& data,
40  const int64_t num_rows) {
41  // Data dimensions
42  const size_t num_columns = data.size();
43 
44  // Prepare input data as structure of arrays (SOA) as columnar format (zero-copy)
45  const auto data_table = SOANumericTable::create(num_columns, num_rows);
46  for (size_t i = 0; i < num_columns; ++i) {
47  data_table->setArray<T>(const_cast<T*>(data[i]), i);
48  }
49  return data_table;
50 }
51 
52 template <typename T>
53 const NumericTablePtr prepare_pivoted_data_table(const T* data, const int64_t num_elems) {
54  // Data dimensions
55  // Prepare input data as structure of arrays (SOA) as columnar format (zero-copy)
56  const auto data_table = SOANumericTable::create(num_elems, 1);
57  for (size_t c = 0; c < static_cast<size_t>(num_elems); ++c) {
58  data_table->setArray<T>(const_cast<T*>(data) + c, c);
59  }
60  return data_table;
61 }
62 
63 inline kmeans::init::Method get_kmeans_init_type(const KMeansInitStrategy init_type) {
64  const static std::map<KMeansInitStrategy, kmeans::init::Method> kmeans_init_type_map = {
65  {KMeansInitStrategy::DEFAULT, kmeans::init::Method::deterministicDense},
66  {KMeansInitStrategy::DETERMINISTIC, kmeans::init::Method::deterministicDense},
67  {KMeansInitStrategy::RANDOM, kmeans::init::Method::randomDense},
68  {KMeansInitStrategy::PLUS_PLUS, kmeans::init::Method::parallelPlusDense}};
69 
70  const auto itr = kmeans_init_type_map.find(init_type);
71  if (itr == kmeans_init_type_map.end()) {
72  std::ostringstream oss;
73  oss << "Invalid Kmeans cluster centroid initialization type. "
74  << "Was expecting one of DETERMINISTIC, RANDOM, or PLUS_PLUS.";
75  throw std::runtime_error(oss.str());
76  }
77  return itr->second;
78 }
79 
80 template <typename T, kmeans::init::Method M>
81 const NumericTablePtr init_centroids_for_type(const NumericTablePtr& input_features_table,
82  const int32_t num_clusters) {
83  kmeans::init::Batch<T, M> init(num_clusters);
84  init.input.set(kmeans::init::data, input_features_table);
85  init.compute();
86  return init.getResult()->get(kmeans::init::centroids);
87 }
88 
89 template <typename T>
90 const NumericTablePtr init_centroids(const NumericTablePtr& input_features_table,
91  const kmeans::init::Method& init_type,
92  const int32_t num_clusters) {
93  switch (init_type) {
94  case kmeans::init::Method::deterministicDense:
95  return init_centroids_for_type<T, kmeans::init::Method::deterministicDense>(
96  input_features_table, num_clusters);
97  case kmeans::init::Method::randomDense:
98  return init_centroids_for_type<T, kmeans::init::Method::randomDense>(
99  input_features_table, num_clusters);
100  case kmeans::init::Method::plusPlusDense:
101  return init_centroids_for_type<T, kmeans::init::Method::plusPlusDense>(
102  input_features_table, num_clusters);
103  case kmeans::init::Method::parallelPlusDense:
104  return init_centroids_for_type<T, kmeans::init::Method::parallelPlusDense>(
105  input_features_table, num_clusters);
106  default: {
107  UNREACHABLE();
108  return init_centroids_for_type<T, kmeans::init::Method::deterministicDense>(
109  input_features_table, num_clusters);
110  }
111  }
112 }
113 
114 template <typename T>
115 NEVER_INLINE HOST int32_t onedal_kmeans_impl(const std::vector<const T*>& input_features,
116  int32_t* output_clusters,
117  const int64_t num_rows,
118  const int num_clusters,
119  const int num_iterations,
120  const KMeansInitStrategy kmeans_init_type) {
121  try {
122  const auto features_table = prepare_data_table(input_features, num_rows);
123  const auto onedal_kmeans_init_type = get_kmeans_init_type(kmeans_init_type);
124  const auto centroids =
125  init_centroids<T>(features_table, onedal_kmeans_init_type, num_clusters);
126  const auto assignments_table =
127  HomogenNumericTable<int32_t>::create(output_clusters, 1, num_rows);
128  const kmeans::ResultPtr result(new kmeans::Result);
129  result->set(kmeans::assignments, assignments_table);
130  result->set(kmeans::objectiveFunction,
131  HomogenNumericTable<T>::create(1, 1, NumericTable::doAllocate));
132  result->set(kmeans::nIterations,
133  HomogenNumericTable<int>::create(1, 1, NumericTable::doAllocate));
134  kmeans::Batch<> algorithm(num_clusters, num_iterations);
135  algorithm.input.set(kmeans::data, features_table);
136  algorithm.input.set(kmeans::inputCentroids, centroids);
137  algorithm.parameter().resultsToEvaluate = kmeans::computeAssignments;
138  algorithm.setResult(result);
139  algorithm.compute();
140  } catch (std::exception& e) {
141  throw std::runtime_error(e.what());
142  }
143  return num_rows;
144 }
145 
146 template <typename T>
147 NEVER_INLINE HOST int32_t onedal_dbscan_impl(const std::vector<const T*>& input_features,
148  int32_t* output_clusters,
149  const int64_t num_rows,
150  const double epsilon,
151  const int32_t min_observations) {
152  try {
153  const auto features_table = prepare_data_table(input_features, num_rows);
154  const auto assignments_table =
155  HomogenNumericTable<int32_t>::create(output_clusters, 1, num_rows);
156  const dbscan::ResultPtr result(new dbscan::Result);
157  result->set(dbscan::assignments, assignments_table);
158  result->set(dbscan::nClusters,
159  HomogenNumericTable<int>::create(1, 1, NumericTable::doAllocate));
160  dbscan::Batch<> algorithm(epsilon, min_observations);
161  algorithm.input.set(dbscan::data, features_table);
162  algorithm.parameter().resultsToCompute = dbscan::assignments;
163  algorithm.setResult(result);
164  algorithm.compute();
165  } catch (std::exception& e) {
166  throw std::runtime_error(e.what());
167  }
168  return num_rows;
169 }
170 
171 template <typename T>
172 NEVER_INLINE HOST std::pair<std::vector<std::vector<T>>, std::vector<T>> onedal_pca_impl(
173  const std::vector<const T*>& input_features,
174  const int64_t num_rows) {
175  try {
176  const auto features_table = prepare_data_table(input_features, num_rows);
177  pca::Batch<> algorithm;
178  algorithm.input.set(pca::data, features_table);
179  algorithm.parameter.resultsToCompute = pca::mean | pca::variance | pca::eigenvalue;
180  algorithm.parameter.isDeterministic = true;
181 
182  algorithm.compute();
183  pca::ResultPtr result = algorithm.getResult();
184  const auto eigenvectors_table = result->get(pca::eigenvectors);
185  const int64_t num_dims = eigenvectors_table->getNumberOfRows();
186  CHECK_EQ(num_dims, static_cast<int64_t>(eigenvectors_table->getNumberOfColumns()));
187  std::vector<std::vector<T>> eigenvectors(num_dims, std::vector<T>(num_dims));
188  for (int64_t row_idx = 0; row_idx < num_dims; ++row_idx) {
189  for (int64_t col_idx = 0; col_idx < num_dims; ++col_idx) {
190  // eigenvectors_table is column major, so need to flip the lookup indicies
191  eigenvectors[row_idx][col_idx] =
192  eigenvectors_table->getValue<T>(col_idx, row_idx);
193  }
194  }
195  const auto eigenvalues_table = result->get(pca::eigenvalues);
196  std::vector<T> eigenvalues(num_dims);
197  for (int64_t dim_idx = 0; dim_idx < num_dims; ++dim_idx) {
198  eigenvalues[dim_idx] = eigenvalues_table->getValue<T>(dim_idx, 0);
199  }
200  return std::make_pair(eigenvectors, eigenvalues);
201  } catch (std::exception& e) {
202  throw std::runtime_error(e.what());
203  }
204 }
205 
206 template <typename T>
207 int32_t extract_model_coefs(const NumericTablePtr& coefs_table,
208  int64_t* coef_idxs,
209  double* coefs) {
210  const int64_t num_coefs = coefs_table->getNumberOfColumns();
211  for (int64_t coef_idx = 0; coef_idx < num_coefs; ++coef_idx) {
212  coef_idxs[coef_idx] = coef_idx;
213  coefs[coef_idx] =
214  coefs_table->NumericTable::getValue<T>(coef_idx, static_cast<size_t>(0));
215  }
216  return num_coefs;
217 }
218 
219 template <typename T>
220 NEVER_INLINE HOST int32_t
221 onedal_linear_reg_fit_impl(const T* input_labels,
222  const std::vector<const T*>& input_features,
223  int64_t* output_coef_idxs,
224  double* output_coefs,
225  const int64_t num_rows) {
226  try {
227  const auto labels_table = prepare_data_table(input_labels, num_rows);
228  const auto features_table = prepare_data_table(input_features, num_rows);
229 
230  linear_regression::training::Batch<T, linear_regression::training::Method::qrDense>
231  algorithm;
232 
233  algorithm.input.set(linear_regression::training::data, features_table);
234  algorithm.input.set(linear_regression::training::dependentVariables, labels_table);
235 
236  algorithm.compute();
237  const auto training_result = algorithm.getResult();
238  const auto coefs_table =
239  training_result->get(linear_regression::training::model)->getBeta();
240  return extract_model_coefs<T>(coefs_table, output_coef_idxs, output_coefs);
241  } catch (std::exception& e) {
242  throw std::runtime_error(e.what());
243  }
244 }
245 
246 template <typename T>
247 NEVER_INLINE HOST linear_regression::ModelPtr build_linear_reg_model(
248  const double* model_coefs,
249  const int64_t num_coefs) {
250  // See comment at end of onedal_lin_reg_fit_impl
251  // We need to unpivot the model data back to the native
252  // format oneDal expects, with 1 column per beta
253  std::vector<T> casted_model_coefs(num_coefs);
254  for (int64_t coef_idx = 0; coef_idx < num_coefs; ++coef_idx) {
255  casted_model_coefs[coef_idx] = model_coefs[coef_idx];
256  }
257  const auto betas_table =
258  prepare_pivoted_data_table(casted_model_coefs.data(), num_coefs);
259  CHECK_EQ(betas_table->getNumberOfColumns(), num_coefs);
260 
261  // Create model builder with true intercept flag
262  linear_regression::ModelBuilder<T> model_builder(num_coefs - 1,
263  1 /* num_dependent_variables */);
264 
265  // Retrive pointer to the begining of betas_table
266  BlockDescriptor<T> block_result;
267 
268  // Use generic code for getting start and end iterators for betas table, even though we
269  // currently only support case of one dependent variable (i.e. 1 row in the betas table)
270  betas_table->getBlockOfRows(0, betas_table->getNumberOfRows(), readOnly, block_result);
271  size_t num_betas =
272  (betas_table->getNumberOfRows()) * (betas_table->getNumberOfColumns());
273 
274  // Initialize iterators for beta array with itrecepts
275  T* first_itr = block_result.getBlockPtr();
276  T* last_itr = first_itr + num_betas;
277  model_builder.setBeta(first_itr, last_itr);
278  betas_table->releaseBlockOfRows(block_result);
279 
280  return model_builder.getModel();
281 }
282 
283 template <typename T>
284 NEVER_INLINE HOST int32_t
285 onedal_linear_reg_predict_impl(const std::shared_ptr<LinearRegressionModel>& model,
286  const std::vector<const T*>& input_features,
287  T* output_predictions,
288  const int64_t num_rows) {
289  CHECK(model->getModelType() == MLModelType::LINEAR_REG);
290  try {
291  if (model->getNumFeatures() != static_cast<int64_t>(input_features.size())) {
292  throw std::runtime_error(
293  "Number of model coefficients does not match number of input features.");
294  }
295  const auto features_table = prepare_data_table(input_features, num_rows);
296  const auto model_ptr =
297  build_linear_reg_model<T>(model->getCoefs().data(), input_features.size() + 1);
298 
299  linear_regression::prediction::Batch<> algorithm;
300  algorithm.input.set(linear_regression::prediction::data, features_table);
301  algorithm.input.set(linear_regression::prediction::model, model_ptr);
302 
303  const auto predictions_table =
304  HomogenNumericTable<T>::create(output_predictions, 1, num_rows);
305 
306  const linear_regression::prediction::ResultPtr result(
307  new linear_regression::prediction::Result);
308  result->set(linear_regression::prediction::prediction, predictions_table);
309  algorithm.setResult(result);
310  algorithm.compute();
311  return num_rows;
312  } catch (std::exception& e) {
313  throw std::runtime_error(e.what());
314  }
315 }
316 
317 template <typename T>
318 NEVER_INLINE HOST void onedal_decision_tree_reg_fit_impl(
319  const std::string& model_name,
320  const T* input_labels,
321  const std::vector<const T*>& input_features,
322  const std::string& model_metadata,
323  const std::vector<std::vector<std::string>>& cat_feature_keys,
324  const int64_t num_rows,
325  const int64_t max_tree_depth,
326  const int64_t min_observations_per_leaf_node) {
327  try {
328  const auto labels_table = prepare_data_table(input_labels, num_rows);
329  const auto features_table = prepare_data_table(input_features, num_rows);
330  decision_tree::regression::training::Batch<T> algorithm;
331  algorithm.input.set(decision_tree::regression::training::data, features_table);
332  algorithm.input.set(decision_tree::regression::training::dependentVariables,
333  labels_table);
334 
335  algorithm.parameter.pruning = decision_tree::Pruning::none;
336  algorithm.parameter.maxTreeDepth = max_tree_depth;
337  algorithm.parameter.minObservationsInLeafNodes = min_observations_per_leaf_node;
338  algorithm.compute();
339  /* Retrieve the algorithm results */
340  decision_tree::regression::training::ResultPtr training_result =
341  algorithm.getResult();
342 
343  auto model_ptr = training_result->get(decision_tree::regression::training::model);
344  auto model = std::make_shared<DecisionTreeRegressionModel>(
345  model_ptr, model_metadata, cat_feature_keys);
346  g_ml_models.addModel(model_name, model);
347  } catch (std::exception& e) {
348  throw std::runtime_error(e.what());
349  }
350 }
351 
352 template <typename T>
353 NEVER_INLINE HOST void onedal_gbt_reg_fit_impl(
354  const std::string& model_name,
355  const T* input_labels,
356  const std::vector<const T*>& input_features,
357  const std::string& model_metadata,
358  const std::vector<std::vector<std::string>>& cat_feature_keys,
359  const int64_t num_rows,
360  const int64_t max_iterations,
361  const int64_t max_tree_depth,
362  const double shrinkage,
363  const double min_split_loss,
364  const double lambda,
365  const double obs_per_tree_fraction,
366  const int64_t features_per_node,
367  const int64_t min_observations_per_leaf_node,
368  const int64_t max_bins,
369  const int64_t min_bin_size) {
370  try {
371  const auto labels_table = prepare_data_table(input_labels, num_rows);
372  const auto features_table = prepare_data_table(input_features, num_rows);
373  gbt::regression::training::Batch<T> algorithm;
374  algorithm.input.set(gbt::regression::training::data, features_table);
375  algorithm.input.set(gbt::regression::training::dependentVariable, labels_table);
376 
377  algorithm.parameter().maxIterations = max_iterations;
378  algorithm.parameter().maxTreeDepth = max_tree_depth;
379  algorithm.parameter().shrinkage = shrinkage;
380  algorithm.parameter().minSplitLoss = min_split_loss;
381  algorithm.parameter().lambda = lambda;
382  algorithm.parameter().observationsPerTreeFraction = obs_per_tree_fraction;
383  algorithm.parameter().featuresPerNode = features_per_node;
384  algorithm.parameter().minObservationsInLeafNode = min_observations_per_leaf_node;
385  algorithm.parameter().maxBins = max_bins;
386  algorithm.parameter().minBinSize = min_bin_size;
387  algorithm.compute();
388  /* Retrieve the algorithm results */
389  gbt::regression::training::ResultPtr training_result = algorithm.getResult();
390 
391  auto model_ptr = training_result->get(gbt::regression::training::model);
392  auto model =
393  std::make_shared<GbtRegressionModel>(model_ptr, model_metadata, cat_feature_keys);
394  g_ml_models.addModel(model_name, model);
395  } catch (std::exception& e) {
396  throw std::runtime_error(e.what());
397  }
398 }
399 
400 inline decision_forest::training::VariableImportanceMode get_var_importance_metric_type(
401  const VarImportanceMetric var_importance_metric) {
402  const static std::map<VarImportanceMetric,
403  decision_forest::training::VariableImportanceMode>
404  var_importance_mode_type_map = {
406  decision_forest::training::VariableImportanceMode::MDI},
408  decision_forest::training::VariableImportanceMode::none},
410  decision_forest::training::VariableImportanceMode::MDI},
412  decision_forest::training::VariableImportanceMode::MDA_Raw},
414  decision_forest::training::VariableImportanceMode::MDA_Scaled}};
415 
416  const auto itr = var_importance_mode_type_map.find(var_importance_metric);
417  if (itr == var_importance_mode_type_map.end()) {
418  std::ostringstream oss;
419  oss << "Invalid variable importance mode type. "
420  << "Was expecting one of DEFAULT, NONE, MDI, MDA, or MDA_SCALED.";
421  throw std::runtime_error(oss.str());
422  }
423  return itr->second;
424 }
425 
426 template <typename T, decision_forest::regression::training::Method M>
427 NEVER_INLINE HOST void onedal_random_forest_reg_fit_impl(
428  const std::string& model_name,
429  const T* input_labels,
430  const std::vector<const T*>& input_features,
431  const std::string& model_metadata,
432  const std::vector<std::vector<std::string>>& cat_feature_keys,
433  const int64_t num_rows,
434  const int64_t num_trees,
435  const double obs_per_tree_fraction,
436  const int64_t max_tree_depth,
437  const int64_t features_per_node,
438  const double impurity_threshold,
439  const bool bootstrap,
440  const int64_t min_obs_per_leaf_node,
441  const int64_t min_obs_per_split_node,
442  const double min_weight_fraction_in_leaf_node,
443  const double min_impurity_decrease_in_split_node,
444  const int64_t max_leaf_nodes,
445  const VarImportanceMetric var_importance_metric) {
446  constexpr bool compute_out_of_bag_error{false};
447  try {
448  const auto labels_table = prepare_data_table(input_labels, num_rows);
449  const auto features_table = prepare_data_table(input_features, num_rows);
450  decision_forest::regression::training::Batch<T, M> algorithm;
451  algorithm.input.set(decision_forest::regression::training::data, features_table);
452  algorithm.input.set(decision_forest::regression::training::dependentVariable,
453  labels_table);
454 
455  algorithm.parameter().nTrees = num_trees;
456  algorithm.parameter().observationsPerTreeFraction = obs_per_tree_fraction;
457  algorithm.parameter().maxTreeDepth = max_tree_depth;
458  algorithm.parameter().featuresPerNode = features_per_node;
459  algorithm.parameter().impurityThreshold = impurity_threshold;
460  algorithm.parameter().bootstrap = bootstrap;
461  algorithm.parameter().minObservationsInLeafNode = min_obs_per_leaf_node;
462  algorithm.parameter().minObservationsInSplitNode = min_obs_per_split_node;
463  algorithm.parameter().minWeightFractionInLeafNode = min_weight_fraction_in_leaf_node;
464  algorithm.parameter().minImpurityDecreaseInSplitNode =
465  min_impurity_decrease_in_split_node;
466  algorithm.parameter().varImportance =
467  get_var_importance_metric_type(var_importance_metric);
468  algorithm.parameter().resultsToCompute =
469  compute_out_of_bag_error ? decision_forest::training::computeOutOfBagError : 0;
470  algorithm.compute();
471  /* Retrieve the algorithm results */
472  decision_forest::regression::training::ResultPtr training_result =
473  algorithm.getResult();
474 
475  auto model_ptr = training_result->get(decision_forest::regression::training::model);
476  auto variable_importance_table =
477  training_result->get(decision_forest::regression::training::variableImportance);
478  const size_t num_features = input_features.size();
479  std::vector<double> variable_importance(
480  var_importance_metric != VarImportanceMetric::NONE ? num_features : 0);
481  if (var_importance_metric != VarImportanceMetric::NONE) {
482  for (size_t feature_idx = 0; feature_idx < num_features; ++feature_idx) {
483  variable_importance[feature_idx] =
484  variable_importance_table->NumericTable::getValue<T>(feature_idx, size_t(0));
485  }
486  }
487  double out_of_bag_error{0};
488  if (compute_out_of_bag_error) {
489  auto out_of_bag_error_table =
490  training_result->get(decision_forest::regression::training::outOfBagError);
491  out_of_bag_error =
492  out_of_bag_error_table->NumericTable::getValue<T>(0, static_cast<size_t>(0));
493  }
494  auto model = std::make_shared<RandomForestRegressionModel>(model_ptr,
495  model_metadata,
496  cat_feature_keys,
497  variable_importance,
498  out_of_bag_error);
499  g_ml_models.addModel(model_name, model);
500  } catch (std::exception& e) {
501  throw std::runtime_error(e.what());
502  }
503 }
504 
505 template <typename T>
506 NEVER_INLINE HOST int32_t onedal_decision_tree_reg_predict_impl(
507  const std::shared_ptr<DecisionTreeRegressionModel>& model,
508  const std::vector<const T*>& input_features,
509  T* output_predictions,
510  const int64_t num_rows) {
511  CHECK(model->getModelType() == MLModelType::DECISION_TREE_REG);
512  try {
513  if (model->getNumFeatures() != static_cast<int64_t>(input_features.size())) {
514  throw std::runtime_error("Number of provided features does not match model.");
515  }
516  const auto features_table = prepare_data_table(input_features, num_rows);
517  decision_tree::regression::prediction::Batch<T> algorithm;
518  algorithm.input.set(decision_tree::regression::prediction::data, features_table);
519  algorithm.input.set(decision_tree::regression::prediction::model,
520  model->getModelPtr());
521 
522  const auto predictions_table =
523  HomogenNumericTable<T>::create(output_predictions, 1, num_rows);
524 
525  const decision_tree::regression::prediction::ResultPtr result(
526  new decision_tree::regression::prediction::Result);
527  result->set(decision_tree::regression::prediction::prediction, predictions_table);
528  algorithm.setResult(result);
529  algorithm.compute();
530  return num_rows;
531  } catch (std::exception& e) {
532  throw std::runtime_error(e.what());
533  }
534 }
535 
536 template <typename T>
537 NEVER_INLINE HOST int32_t
538 onedal_gbt_reg_predict_impl(const std::shared_ptr<GbtRegressionModel>& model,
539  const std::vector<const T*>& input_features,
540  T* output_predictions,
541  const int64_t num_rows) {
542  CHECK(model->getModelType() == MLModelType::GBT_REG);
543  try {
544  if (model->getNumFeatures() != static_cast<int64_t>(input_features.size())) {
545  throw std::runtime_error("Number of provided features does not match model.");
546  }
547  const auto features_table = prepare_data_table(input_features, num_rows);
548  gbt::regression::prediction::Batch<T> algorithm;
549  algorithm.input.set(gbt::regression::prediction::data, features_table);
550  algorithm.input.set(gbt::regression::prediction::model, model->getModelPtr());
551 
552  const auto predictions_table =
553  HomogenNumericTable<T>::create(output_predictions, 1, num_rows);
554 
555  const gbt::regression::prediction::ResultPtr result(
556  new gbt::regression::prediction::Result);
557  result->set(gbt::regression::prediction::prediction, predictions_table);
558  algorithm.setResult(result);
559  algorithm.compute();
560  return num_rows;
561  } catch (std::exception& e) {
562  throw std::runtime_error(e.what());
563  }
564 }
565 
566 template <typename T>
567 NEVER_INLINE HOST int32_t onedal_random_forest_reg_predict_impl(
568  const std::shared_ptr<RandomForestRegressionModel>& model,
569  const std::vector<const T*>& input_features,
570  T* output_predictions,
571  const int64_t num_rows) {
572  CHECK(model->getModelType() == MLModelType::RANDOM_FOREST_REG);
573  try {
574  if (model->getNumFeatures() != static_cast<int64_t>(input_features.size())) {
575  throw std::runtime_error("Number of provided features does not match model.");
576  }
577  const auto features_table = prepare_data_table(input_features, num_rows);
578  decision_forest::regression::prediction::Batch<T> algorithm;
579  algorithm.input.set(decision_forest::regression::prediction::data, features_table);
580  algorithm.input.set(decision_forest::regression::prediction::model,
581  model->getModelPtr());
582 
583  const auto predictions_table =
584  HomogenNumericTable<T>::create(output_predictions, 1, num_rows);
585 
586  const decision_forest::regression::prediction::ResultPtr result(
587  new decision_forest::regression::prediction::Result);
588  result->set(decision_forest::regression::prediction::prediction, predictions_table);
589  algorithm.setResult(result);
590  algorithm.compute();
591  return num_rows;
592  } catch (std::exception& e) {
593  throw std::runtime_error(e.what());
594  }
595 }
596 
597 inline const std::vector<double>& onedal_random_forest_reg_var_importance_impl(
598  const std::shared_ptr<RandomForestRegressionModel>& rand_forest_model) {
599  return rand_forest_model->getVariableImportanceScores();
600 }
601 
602 #endif // #ifdef HAVE_ONEDAL
603 #endif // #ifdef __CUDACC__
#define CHECK_EQ(x, y)
Definition: Logger.h:301
KMeansInitStrategy get_kmeans_init_type(const std::string &init_type_str)
VarImportanceMetric
#define UNREACHABLE()
Definition: Logger.h:338
std::pair< FILE *, std::string > create(const std::string &basePath, const int fileId, const size_t pageSize, const size_t numPages)
Definition: File.cpp:55
KMeansInitStrategy
#define HOST
void init(LogOptions const &log_opts)
Definition: Logger.cpp:364
void addModel(const std::string &model_name, std::shared_ptr< AbstractMLModel > model)
Definition: MLModel.h:37
MLModelMap g_ml_models
Definition: MLModel.h:124
#define NEVER_INLINE
#define CHECK(condition)
Definition: Logger.h:291