OmniSciDB  c0231cc57d
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
MLTableFunctions.hpp File Reference
+ Include dependency graph for MLTableFunctions.hpp:

Go to the source code of this file.

Functions

template<typename T >
std::vector< const T * > pluck_ptrs (const std::vector< std::vector< T >> &data, const int64_t start_idx, const int64_t end_idx)
 
template<typename T >
std::vector< const T * > pluck_ptrs (const std::vector< T * > &data, const int64_t start_idx, const int64_t end_idx)
 
EXTENSION_NOINLINE_HOST int32_t supported_ml_frameworks__cpu_ (TableFunctionManager &mgr, Column< TextEncodingDict > &output_ml_frameworks, Column< bool > &output_availability, Column< bool > &output_default)
 
template<typename K , typename T >
NEVER_INLINE HOST int32_t kmeans__cpu_template (TableFunctionManager &mgr, const Column< K > &input_ids, const ColumnList< T > &input_features, const int num_clusters, const int num_iterations, const TextEncodingNone &init_type_str, const TextEncodingNone &preferred_ml_framework_str, Column< K > &output_ids, Column< int32_t > &output_clusters)
 
template<typename K , typename T >
NEVER_INLINE HOST int32_t kmeans__cpu_template (TableFunctionManager &mgr, const Column< K > &input_ids, const ColumnList< T > &input_features, const int num_clusters, const int num_iterations, const TextEncodingNone &init_type_str, Column< K > &output_ids, Column< int32_t > &output_clusters)
 
template<typename K , typename T >
NEVER_INLINE HOST int32_t kmeans__cpu_template (TableFunctionManager &mgr, const Column< K > &input_ids, const ColumnList< T > &input_features, const int32_t num_clusters, const int32_t num_iterations, Column< K > &output_ids, Column< int32_t > &output_clusters)
 
template<typename K , typename T >
NEVER_INLINE HOST int32_t dbscan__cpu_template (TableFunctionManager &mgr, const Column< K > &input_ids, const ColumnList< T > &input_features, const double epsilon, const int32_t min_observations, const TextEncodingNone &preferred_ml_framework_str, Column< K > &output_ids, Column< int32_t > &output_clusters)
 
template<typename K , typename T >
NEVER_INLINE HOST int32_t dbscan__cpu_template (TableFunctionManager &mgr, const Column< K > &input_ids, const ColumnList< T > &input_features, const double epsilon, const int32_t min_observations, Column< K > &output_ids, Column< int32_t > &output_clusters)
 
template<typename T >
NEVER_INLINE HOST int32_t linear_reg_fit__cpu_template (TableFunctionManager &mgr, const Column< T > &input_labels, const ColumnList< T > &input_features, const TextEncodingNone &preferred_ml_framework_str, Column< int32_t > &output_coef_idxs, Column< T > &output_coefs)
 
template<typename T >
NEVER_INLINE HOST int32_t linear_reg_fit__cpu_template (TableFunctionManager &mgr, const Column< T > &input_labels, const ColumnList< T > &input_features, Column< int32_t > &output_coef_idxs, Column< T > &output_coefs)
 
template<typename T >
std::vector< T > sort_coefs (const Column< int32_t > &coef_idxs, const Column< T > &coefs)
 
template<typename T , typename K >
NEVER_INLINE HOST int32_t linear_reg_predict__cpu_template (TableFunctionManager &mgr, const Column< K > &input_ids, const ColumnList< T > &input_features, const Column< int32_t > &coef_idxs, const Column< T > &coefs, const TextEncodingNone &preferred_ml_framework_str, Column< K > &output_ids, Column< T > &output_predictions)
 
template<typename T , typename K >
NEVER_INLINE HOST int32_t linear_reg_predict__cpu_template (TableFunctionManager &mgr, const Column< K > &input_ids, const ColumnList< T > &input_features, const Column< int32_t > &coef_idxs, const Column< T > &coefs, Column< K > &output_ids, Column< T > &output_predictions)
 
template<typename T >
Column< T > create_wrapper_col (std::vector< T > &col_vec)
 
template<typename T , typename K >
NEVER_INLINE HOST int32_t linear_reg_fit_predict__cpu_template (TableFunctionManager &mgr, const Column< K > &input_ids, const Column< T > &input_labels, const ColumnList< T > &input_features, const TextEncodingNone &preferred_ml_framework_str, Column< K > &output_ids, Column< T > &output_predictions)
 
template<typename T , typename K >
NEVER_INLINE HOST int32_t linear_reg_fit_predict__cpu_template (TableFunctionManager &mgr, const Column< K > &input_ids, const Column< T > &input_labels, const ColumnList< T > &input_features, Column< K > &output_ids, Column< T > &output_predictions)
 

Function Documentation

template<typename T >
Column<T> create_wrapper_col ( std::vector< T > &  col_vec)

Definition at line 595 of file MLTableFunctions.hpp.

Referenced by linear_reg_fit_predict__cpu_template().

595  {
596  Column<T> wrapper_col(col_vec.data(), static_cast<int64_t>(col_vec.size()));
597  return wrapper_col;
598 }

+ Here is the caller graph for this function:

template<typename K , typename T >
NEVER_INLINE HOST int32_t dbscan__cpu_template ( TableFunctionManager mgr,
const Column< K > &  input_ids,
const ColumnList< T > &  input_features,
const double  epsilon,
const int32_t  min_observations,
const TextEncodingNone preferred_ml_framework_str,
Column< K > &  output_ids,
Column< int32_t > &  output_clusters 
)

Definition at line 298 of file MLTableFunctions.hpp.

References DEFAULT, TableFunctions_Namespace::denull_data(), get_ml_framework(), TextEncodingNone::getString(), INVALID, MLPACK, ONEDAL, pluck_ptrs(), Column< T >::ptr_, TableFunctionManager::set_output_row_size(), Column< T >::size(), TableFunctions_Namespace::unmask_data(), and z_std_normalize_data().

Referenced by dbscan__cpu_template().

305  {
306  mgr.set_output_row_size(input_ids.size());
307  output_ids = input_ids;
308 
309  const auto preferred_ml_framework = get_ml_framework(preferred_ml_framework_str);
310  if (preferred_ml_framework == MLFramework::INVALID) {
311  return mgr.ERROR_MESSAGE("Invalid ML Framework: " +
312  preferred_ml_framework_str.getString());
313  }
314 
315  const auto denulled_data = denull_data(input_features);
316  const int64_t num_rows = denulled_data.masked_num_rows;
317  const bool data_is_masked =
318  denulled_data.masked_num_rows < denulled_data.unmasked_num_rows;
319  std::vector<int32_t> denulled_output_allocation(data_is_masked ? num_rows : 0);
320  int32_t* denulled_output =
321  data_is_masked ? denulled_output_allocation.data() : output_clusters.ptr_;
322 
323  const auto normalized_data = z_std_normalize_data(denulled_data.data, num_rows);
324  const auto normalized_ptrs = pluck_ptrs(normalized_data, 0L, normalized_data.size());
325 
326  try {
327  bool did_execute = false;
328 #ifdef HAVE_ONEDAL
329  if (!did_execute && (preferred_ml_framework == MLFramework::ONEDAL ||
330  preferred_ml_framework == MLFramework::DEFAULT)) {
331  onedal_dbscan_impl(
332  normalized_ptrs, denulled_output, num_rows, epsilon, min_observations);
333  did_execute = true;
334  }
335 #endif
336 #ifdef HAVE_MLPACK
337  if (!did_execute && (preferred_ml_framework == MLFramework::MLPACK ||
338  preferred_ml_framework == MLFramework::DEFAULT)) {
339  mlpack_dbscan_impl(
340  normalized_ptrs, denulled_output, num_rows, epsilon, min_observations);
341  did_execute = true;
342  }
343 #endif
344  if (!did_execute) {
345  return mgr.ERROR_MESSAGE("Cannot find " + preferred_ml_framework_str.getString() +
346  " ML library to support dbscan implementation.");
347  }
348  } catch (std::runtime_error& e) {
349  return mgr.ERROR_MESSAGE(e.what());
350  }
351 
352  if (data_is_masked) {
353  unmask_data(denulled_output,
354  denulled_data.reverse_index_map,
355  output_clusters.ptr_,
356  denulled_data.unmasked_num_rows,
357  inline_null_value<int32_t>());
358  }
359  return input_ids.size();
360 }
void set_output_row_size(int64_t num_rows)
std::string getString() const
Definition: heavydbTypes.h:252
DEVICE int64_t size() const
Definition: heavydbTypes.h:592
T * ptr_
Definition: heavydbTypes.h:566
MaskedData< T > denull_data(const ColumnList< T > &features)
MLFramework get_ml_framework(const std::string &ml_framework_str)
std::vector< const T * > pluck_ptrs(const std::vector< std::vector< T >> &data, const int64_t start_idx, const int64_t end_idx)
void unmask_data(const T *masked_input, const std::vector< int32_t > &reverse_index_map, T *unmasked_output, const int64_t num_unmasked_rows, const T null_val)
std::vector< std::vector< T > > z_std_normalize_data(const std::vector< T * > &input_data, const int64_t num_rows)

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

template<typename K , typename T >
NEVER_INLINE HOST int32_t dbscan__cpu_template ( TableFunctionManager mgr,
const Column< K > &  input_ids,
const ColumnList< T > &  input_features,
const double  epsilon,
const int32_t  min_observations,
Column< K > &  output_ids,
Column< int32_t > &  output_clusters 
)

Definition at line 374 of file MLTableFunctions.hpp.

References dbscan__cpu_template().

380  {
381  std::string preferred_ml_framework{"DEFAULT"};
382  return dbscan__cpu_template(mgr,
383  input_ids,
384  input_features,
385  epsilon,
386  min_observations,
387  preferred_ml_framework,
388  output_ids,
389  output_clusters);
390 }
NEVER_INLINE HOST int32_t dbscan__cpu_template(TableFunctionManager &mgr, const Column< K > &input_ids, const ColumnList< T > &input_features, const double epsilon, const int32_t min_observations, const TextEncodingNone &preferred_ml_framework_str, Column< K > &output_ids, Column< int32_t > &output_clusters)

+ Here is the call graph for this function:

template<typename K , typename T >
NEVER_INLINE HOST int32_t kmeans__cpu_template ( TableFunctionManager mgr,
const Column< K > &  input_ids,
const ColumnList< T > &  input_features,
const int  num_clusters,
const int  num_iterations,
const TextEncodingNone init_type_str,
const TextEncodingNone preferred_ml_framework_str,
Column< K > &  output_ids,
Column< int32_t > &  output_clusters 
)

Definition at line 139 of file MLTableFunctions.hpp.

References DEFAULT, TableFunctions_Namespace::denull_data(), get_kmeans_init_type(), get_ml_framework(), TextEncodingNone::getString(), INVALID, MLPACK, ONEDAL, pluck_ptrs(), Column< T >::ptr_, TableFunctionManager::set_output_row_size(), Column< T >::size(), TableFunctions_Namespace::unmask_data(), and z_std_normalize_data().

Referenced by kmeans__cpu_template().

147  {
148  mgr.set_output_row_size(input_ids.size());
149  output_ids = input_ids;
150  const auto kmeans_init_strategy = get_kmeans_init_type(init_type_str);
151  if (kmeans_init_strategy == KMeansInitStrategy::INVALID) {
152  return mgr.ERROR_MESSAGE("Invalid KMeans initializaiton strategy: " +
153  init_type_str.getString());
154  }
155 
156  const auto preferred_ml_framework = get_ml_framework(preferred_ml_framework_str);
157  if (preferred_ml_framework == MLFramework::INVALID) {
158  return mgr.ERROR_MESSAGE("Invalid ML Framework: " +
159  preferred_ml_framework_str.getString());
160  }
161 
162  const auto denulled_data = denull_data(input_features);
163  const int64_t num_rows = denulled_data.masked_num_rows;
164  const bool data_is_masked =
165  denulled_data.masked_num_rows < denulled_data.unmasked_num_rows;
166  std::vector<int32_t> denulled_output_allocation(data_is_masked ? num_rows : 0);
167  int32_t* denulled_output =
168  data_is_masked ? denulled_output_allocation.data() : output_clusters.ptr_;
169 
170  const auto normalized_data = z_std_normalize_data(denulled_data.data, num_rows);
171  const auto normalized_ptrs = pluck_ptrs(normalized_data, 0L, normalized_data.size());
172 
173  try {
174  bool did_execute = false;
175 #ifdef HAVE_ONEDAL
176  if (!did_execute && (preferred_ml_framework == MLFramework::ONEDAL ||
177  preferred_ml_framework == MLFramework::DEFAULT)) {
178  onedal_kmeans_impl(normalized_ptrs,
179  denulled_output,
180  num_rows,
181  num_clusters,
182  num_iterations,
183  kmeans_init_strategy);
184  did_execute = true;
185  }
186 #endif
187 #ifdef HAVE_MLPACK
188  if (!did_execute && (preferred_ml_framework == MLFramework::MLPACK ||
189  preferred_ml_framework == MLFramework::DEFAULT)) {
190  mlpack_kmeans_impl(normalized_ptrs,
191  denulled_output,
192  num_rows,
193  num_clusters,
194  num_iterations,
195  kmeans_init_strategy);
196  did_execute = true;
197  }
198 #endif
199  if (!did_execute) {
200  return mgr.ERROR_MESSAGE("Cannot find " + preferred_ml_framework_str.getString() +
201  " ML library to support kmeans implementation.");
202  }
203  } catch (std::runtime_error& e) {
204  return mgr.ERROR_MESSAGE(e.what());
205  }
206 
207  if (data_is_masked) {
208  unmask_data(denulled_output,
209  denulled_data.reverse_index_map,
210  output_clusters.ptr_,
211  denulled_data.unmasked_num_rows,
212  inline_null_value<int32_t>());
213  }
214  return input_ids.size();
215 }
void set_output_row_size(int64_t num_rows)
KMeansInitStrategy get_kmeans_init_type(const std::string &init_type_str)
std::string getString() const
Definition: heavydbTypes.h:252
DEVICE int64_t size() const
Definition: heavydbTypes.h:592
T * ptr_
Definition: heavydbTypes.h:566
MaskedData< T > denull_data(const ColumnList< T > &features)
MLFramework get_ml_framework(const std::string &ml_framework_str)
std::vector< const T * > pluck_ptrs(const std::vector< std::vector< T >> &data, const int64_t start_idx, const int64_t end_idx)
void unmask_data(const T *masked_input, const std::vector< int32_t > &reverse_index_map, T *unmasked_output, const int64_t num_unmasked_rows, const T null_val)
std::vector< std::vector< T > > z_std_normalize_data(const std::vector< T * > &input_data, const int64_t num_rows)

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

template<typename K , typename T >
NEVER_INLINE HOST int32_t kmeans__cpu_template ( TableFunctionManager mgr,
const Column< K > &  input_ids,
const ColumnList< T > &  input_features,
const int  num_clusters,
const int  num_iterations,
const TextEncodingNone init_type_str,
Column< K > &  output_ids,
Column< int32_t > &  output_clusters 
)

Definition at line 231 of file MLTableFunctions.hpp.

References kmeans__cpu_template().

238  {
239  std::string preferred_ml_framework{"DEFAULT"};
240  return kmeans__cpu_template(mgr,
241  input_ids,
242  input_features,
243  num_clusters,
244  num_iterations,
245  init_type_str,
246  preferred_ml_framework,
247  output_ids,
248  output_clusters);
249 }
NEVER_INLINE HOST int32_t kmeans__cpu_template(TableFunctionManager &mgr, const Column< K > &input_ids, const ColumnList< T > &input_features, const int num_clusters, const int num_iterations, const TextEncodingNone &init_type_str, const TextEncodingNone &preferred_ml_framework_str, Column< K > &output_ids, Column< int32_t > &output_clusters)

+ Here is the call graph for this function:

template<typename K , typename T >
NEVER_INLINE HOST int32_t kmeans__cpu_template ( TableFunctionManager mgr,
const Column< K > &  input_ids,
const ColumnList< T > &  input_features,
const int32_t  num_clusters,
const int32_t  num_iterations,
Column< K > &  output_ids,
Column< int32_t > &  output_clusters 
)

Definition at line 264 of file MLTableFunctions.hpp.

References kmeans__cpu_template().

270  {
271  std::string kmeans_init_strategy{"DEFAULT"};
272  std::string preferred_ml_framework{"DEFAULT"};
273  return kmeans__cpu_template(mgr,
274  input_ids,
275  input_features,
276  num_clusters,
277  num_iterations,
278  kmeans_init_strategy,
279  preferred_ml_framework,
280  output_ids,
281  output_clusters);
282 }
NEVER_INLINE HOST int32_t kmeans__cpu_template(TableFunctionManager &mgr, const Column< K > &input_ids, const ColumnList< T > &input_features, const int num_clusters, const int num_iterations, const TextEncodingNone &init_type_str, const TextEncodingNone &preferred_ml_framework_str, Column< K > &output_ids, Column< int32_t > &output_clusters)

+ Here is the call graph for this function:

template<typename T >
NEVER_INLINE HOST int32_t linear_reg_fit__cpu_template ( TableFunctionManager mgr,
const Column< T > &  input_labels,
const ColumnList< T > &  input_features,
const TextEncodingNone preferred_ml_framework_str,
Column< int32_t > &  output_coef_idxs,
Column< T > &  output_coefs 
)

Definition at line 403 of file MLTableFunctions.hpp.

References DEFAULT, TableFunctions_Namespace::denull_data(), get_ml_framework(), TextEncodingNone::getString(), INVALID, MLPACK, ColumnList< T >::numCols(), ONEDAL, pluck_ptrs(), Column< T >::ptr_, and TableFunctionManager::set_output_row_size().

Referenced by linear_reg_fit__cpu_template(), and linear_reg_fit_predict__cpu_template().

408  {
409  const auto preferred_ml_framework = get_ml_framework(preferred_ml_framework_str);
410  if (preferred_ml_framework == MLFramework::INVALID) {
411  return mgr.ERROR_MESSAGE("Invalid ML Framework: " +
412  preferred_ml_framework_str.getString());
413  }
414  const auto denulled_data = denull_data(input_labels, input_features);
415  const auto labels_ptrs = pluck_ptrs(denulled_data.data, 0L, 1L);
416  const auto features_ptrs =
417  pluck_ptrs(denulled_data.data, 1L, input_features.numCols() + 1);
418  const int64_t num_coefs = input_features.numCols() + 1;
419  mgr.set_output_row_size(num_coefs);
420  try {
421  bool did_execute = false;
422 #ifdef HAVE_ONEDAL
423  if (!did_execute && (preferred_ml_framework == MLFramework::ONEDAL ||
424  preferred_ml_framework == MLFramework::DEFAULT)) {
425  onedal_linear_reg_fit_impl(labels_ptrs[0],
426  features_ptrs,
427  output_coef_idxs.ptr_,
428  output_coefs.ptr_,
429  denulled_data.masked_num_rows);
430  did_execute = true;
431  }
432 #endif
433 #ifdef HAVE_MLPACK
434  if (!did_execute && (preferred_ml_framework == MLFramework::MLPACK ||
435  preferred_ml_framework == MLFramework::DEFAULT)) {
436  mlpack_linear_reg_fit_impl(labels_ptrs[0],
437  features_ptrs,
438  output_coef_idxs.ptr_,
439  output_coefs.ptr_,
440  denulled_data.masked_num_rows);
441  did_execute = true;
442  }
443 #endif
444  if (!did_execute) {
445  return mgr.ERROR_MESSAGE(
446  "Cannot find " + preferred_ml_framework_str.getString() +
447  " ML library to support linear regression implementation.");
448  }
449  } catch (std::runtime_error& e) {
450  return mgr.ERROR_MESSAGE(e.what());
451  }
452  return num_coefs;
453 }
void set_output_row_size(int64_t num_rows)
std::string getString() const
Definition: heavydbTypes.h:252
DEVICE int64_t numCols() const
Definition: heavydbTypes.h:863
T * ptr_
Definition: heavydbTypes.h:566
MaskedData< T > denull_data(const ColumnList< T > &features)
MLFramework get_ml_framework(const std::string &ml_framework_str)
std::vector< const T * > pluck_ptrs(const std::vector< std::vector< T >> &data, const int64_t start_idx, const int64_t end_idx)

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

template<typename T >
NEVER_INLINE HOST int32_t linear_reg_fit__cpu_template ( TableFunctionManager mgr,
const Column< T > &  input_labels,
const ColumnList< T > &  input_features,
Column< int32_t > &  output_coef_idxs,
Column< T > &  output_coefs 
)

Definition at line 464 of file MLTableFunctions.hpp.

References linear_reg_fit__cpu_template().

468  {
469  std::string preferred_ml_framework{"DEFAULT"};
470  return linear_reg_fit__cpu_template(mgr,
471  input_labels,
472  input_features,
473  preferred_ml_framework,
474  output_coef_idxs,
475  output_coefs);
476 }
NEVER_INLINE HOST int32_t linear_reg_fit__cpu_template(TableFunctionManager &mgr, const Column< T > &input_labels, const ColumnList< T > &input_features, const TextEncodingNone &preferred_ml_framework_str, Column< int32_t > &output_coef_idxs, Column< T > &output_coefs)

+ Here is the call graph for this function:

template<typename T , typename K >
NEVER_INLINE HOST int32_t linear_reg_fit_predict__cpu_template ( TableFunctionManager mgr,
const Column< K > &  input_ids,
const Column< T > &  input_labels,
const ColumnList< T > &  input_features,
const TextEncodingNone preferred_ml_framework_str,
Column< K > &  output_ids,
Column< T > &  output_predictions 
)

Definition at line 612 of file MLTableFunctions.hpp.

References create_wrapper_col(), TableFunctionManager::disable_output_allocations(), TableFunctionManager::enable_output_allocations(), linear_reg_fit__cpu_template(), linear_reg_predict__cpu_template(), and ColumnList< T >::numCols().

Referenced by linear_reg_fit_predict__cpu_template().

618  {
619  const int64_t num_coefs = input_features.numCols() + 1;
620  // Need to create backing vectors for coef column wrappers
621  std::vector<int32_t> coef_idxs_vec(num_coefs);
622  std::vector<T> coefs_vec(num_coefs);
623  auto coef_idxs = create_wrapper_col(coef_idxs_vec);
624  auto coefs = create_wrapper_col(coefs_vec);
625  // Disable output allocations as we are not calling the fit function
626  // through the normal table functions path, and we have already
627  // allocated our coef storage with the vectors above.
629  const auto fit_ret = linear_reg_fit__cpu_template(
630  mgr, input_labels, input_features, preferred_ml_framework_str, coef_idxs, coefs);
632  if (fit_ret < 0) {
633  return fit_ret;
634  }
636  input_ids,
637  input_features,
638  coef_idxs,
639  coefs,
640  preferred_ml_framework_str,
641  output_ids,
642  output_predictions);
643 }
DEVICE int64_t numCols() const
Definition: heavydbTypes.h:863
NEVER_INLINE HOST int32_t linear_reg_fit__cpu_template(TableFunctionManager &mgr, const Column< T > &input_labels, const ColumnList< T > &input_features, const TextEncodingNone &preferred_ml_framework_str, Column< int32_t > &output_coef_idxs, Column< T > &output_coefs)
NEVER_INLINE HOST int32_t linear_reg_predict__cpu_template(TableFunctionManager &mgr, const Column< K > &input_ids, const ColumnList< T > &input_features, const Column< int32_t > &coef_idxs, const Column< T > &coefs, const TextEncodingNone &preferred_ml_framework_str, Column< K > &output_ids, Column< T > &output_predictions)
void disable_output_allocations()
void enable_output_allocations()
Column< T > create_wrapper_col(std::vector< T > &col_vec)

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

template<typename T , typename K >
NEVER_INLINE HOST int32_t linear_reg_fit_predict__cpu_template ( TableFunctionManager mgr,
const Column< K > &  input_ids,
const Column< T > &  input_labels,
const ColumnList< T > &  input_features,
Column< K > &  output_ids,
Column< T > &  output_predictions 
)

Definition at line 656 of file MLTableFunctions.hpp.

References linear_reg_fit_predict__cpu_template().

661  {
662  std::string preferred_ml_framework{"DEFAULT"};
664  input_ids,
665  input_labels,
666  input_features,
667  preferred_ml_framework,
668  output_ids,
669  output_predictions);
670 }
NEVER_INLINE HOST int32_t linear_reg_fit_predict__cpu_template(TableFunctionManager &mgr, const Column< K > &input_ids, const Column< T > &input_labels, const ColumnList< T > &input_features, const TextEncodingNone &preferred_ml_framework_str, Column< K > &output_ids, Column< T > &output_predictions)

+ Here is the call graph for this function:

template<typename T , typename K >
NEVER_INLINE HOST int32_t linear_reg_predict__cpu_template ( TableFunctionManager mgr,
const Column< K > &  input_ids,
const ColumnList< T > &  input_features,
const Column< int32_t > &  coef_idxs,
const Column< T > &  coefs,
const TextEncodingNone preferred_ml_framework_str,
Column< K > &  output_ids,
Column< T > &  output_predictions 
)

Definition at line 501 of file MLTableFunctions.hpp.

References DEFAULT, TableFunctions_Namespace::denull_data(), get_ml_framework(), TextEncodingNone::getString(), INVALID, MLPACK, ColumnList< T >::numCols(), ONEDAL, pluck_ptrs(), Column< T >::ptr_, TableFunctionManager::set_output_row_size(), Column< T >::size(), sort_coefs(), heavydb.dtypes::T, and TableFunctions_Namespace::unmask_data().

Referenced by linear_reg_fit_predict__cpu_template(), and linear_reg_predict__cpu_template().

508  {
509  const auto preferred_ml_framework = get_ml_framework(preferred_ml_framework_str);
510  if (preferred_ml_framework == MLFramework::INVALID) {
511  return mgr.ERROR_MESSAGE("Invalid ML Framework: " +
512  preferred_ml_framework_str.getString());
513  }
514 
515  mgr.set_output_row_size(input_ids.size());
516  const auto denulled_data = denull_data(input_features);
517  const int64_t num_rows = denulled_data.masked_num_rows;
518  const bool data_is_masked =
519  denulled_data.masked_num_rows < denulled_data.unmasked_num_rows;
520  std::vector<T> denulled_output_allocation(data_is_masked ? num_rows : 0);
521  T* denulled_output =
522  data_is_masked ? denulled_output_allocation.data() : output_predictions.ptr_;
523 
524  const auto features_ptrs = pluck_ptrs(denulled_data.data, 0L, input_features.numCols());
525 
526  const auto ordered_coefs = sort_coefs(coef_idxs, coefs);
527 
528  try {
529  bool did_execute = false;
530 #ifdef HAVE_ONEDAL
531  if (!did_execute && (preferred_ml_framework == MLFramework::ONEDAL ||
532  preferred_ml_framework == MLFramework::DEFAULT)) {
533  onedal_linear_reg_predict_impl(
534  features_ptrs, denulled_output, num_rows, ordered_coefs.data());
535  did_execute = true;
536  }
537 #endif
538 #ifdef HAVE_MLPACK
539  if (!did_execute && (preferred_ml_framework == MLFramework::MLPACK ||
540  preferred_ml_framework == MLFramework::DEFAULT)) {
541  mlpack_linear_reg_predict_impl(
542  features_ptrs, denulled_output, num_rows, ordered_coefs.data());
543  did_execute = true;
544  }
545 #endif
546  if (!did_execute) {
547  return mgr.ERROR_MESSAGE("Cannot find " + preferred_ml_framework_str.getString() +
548  " ML library to support kmeans implementation.");
549  }
550  } catch (std::runtime_error& e) {
551  return mgr.ERROR_MESSAGE(e.what());
552  }
553  output_ids = input_ids;
554  if (data_is_masked) {
555  unmask_data(denulled_output,
556  denulled_data.reverse_index_map,
557  output_predictions.ptr_,
558  denulled_data.unmasked_num_rows,
559  inline_null_value<T>());
560  }
561  return input_ids.size();
562 }
void set_output_row_size(int64_t num_rows)
std::string getString() const
Definition: heavydbTypes.h:252
DEVICE int64_t size() const
Definition: heavydbTypes.h:592
DEVICE int64_t numCols() const
Definition: heavydbTypes.h:863
T * ptr_
Definition: heavydbTypes.h:566
MaskedData< T > denull_data(const ColumnList< T > &features)
std::vector< T > sort_coefs(const Column< int32_t > &coef_idxs, const Column< T > &coefs)
MLFramework get_ml_framework(const std::string &ml_framework_str)
std::vector< const T * > pluck_ptrs(const std::vector< std::vector< T >> &data, const int64_t start_idx, const int64_t end_idx)
void unmask_data(const T *masked_input, const std::vector< int32_t > &reverse_index_map, T *unmasked_output, const int64_t num_unmasked_rows, const T null_val)

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

template<typename T , typename K >
NEVER_INLINE HOST int32_t linear_reg_predict__cpu_template ( TableFunctionManager mgr,
const Column< K > &  input_ids,
const ColumnList< T > &  input_features,
const Column< int32_t > &  coef_idxs,
const Column< T > &  coefs,
Column< K > &  output_ids,
Column< T > &  output_predictions 
)

Definition at line 576 of file MLTableFunctions.hpp.

References linear_reg_predict__cpu_template().

582  {
583  std::string preferred_ml_framework{"DEFAULT"};
585  input_ids,
586  input_features,
587  coef_idxs,
588  coefs,
589  preferred_ml_framework,
590  output_ids,
591  output_predictions);
592 }
NEVER_INLINE HOST int32_t linear_reg_predict__cpu_template(TableFunctionManager &mgr, const Column< K > &input_ids, const ColumnList< T > &input_features, const Column< int32_t > &coef_idxs, const Column< T > &coefs, const TextEncodingNone &preferred_ml_framework_str, Column< K > &output_ids, Column< T > &output_predictions)

+ Here is the call graph for this function:

template<typename T >
std::vector<const T*> pluck_ptrs ( const std::vector< std::vector< T >> &  data,
const int64_t  start_idx,
const int64_t  end_idx 
)

Definition at line 36 of file MLTableFunctions.hpp.

References CHECK_GE, CHECK_GT, and CHECK_LE.

Referenced by dbscan__cpu_template(), kmeans__cpu_template(), linear_reg_fit__cpu_template(), and linear_reg_predict__cpu_template().

38  {
39  std::vector<const T*> raw_ptrs;
40  CHECK_GE(start_idx, 0L);
41  CHECK_GT(end_idx, start_idx);
42  CHECK_LE(end_idx, static_cast<int64_t>(data.size()));
43  for (int64_t col_idx = start_idx; col_idx < end_idx; ++col_idx) {
44  raw_ptrs.emplace_back(data[col_idx].data());
45  }
46  return raw_ptrs;
47 }
#define CHECK_GE(x, y)
Definition: Logger.h:235
#define CHECK_GT(x, y)
Definition: Logger.h:234
#define CHECK_LE(x, y)
Definition: Logger.h:233

+ Here is the caller graph for this function:

template<typename T >
std::vector<const T*> pluck_ptrs ( const std::vector< T * > &  data,
const int64_t  start_idx,
const int64_t  end_idx 
)

Definition at line 50 of file MLTableFunctions.hpp.

References CHECK_GE, CHECK_GT, and CHECK_LE.

52  {
53  std::vector<const T*> raw_ptrs;
54  CHECK_GE(start_idx, 0L);
55  CHECK_GT(end_idx, start_idx);
56  CHECK_LE(end_idx, static_cast<int64_t>(data.size()));
57  for (int64_t col_idx = start_idx; col_idx < end_idx; ++col_idx) {
58  raw_ptrs.emplace_back(data[col_idx]);
59  }
60  return raw_ptrs;
61 }
#define CHECK_GE(x, y)
Definition: Logger.h:235
#define CHECK_GT(x, y)
Definition: Logger.h:234
#define CHECK_LE(x, y)
Definition: Logger.h:233
template<typename T >
std::vector<T> sort_coefs ( const Column< int32_t > &  coef_idxs,
const Column< T > &  coefs 
)

Definition at line 479 of file MLTableFunctions.hpp.

References Column< T >::size().

Referenced by linear_reg_predict__cpu_template().

479  {
480  const size_t num_coefs = coef_idxs.size();
481  std::vector<T> ordered_coefs(num_coefs);
482  for (size_t coef_idx = 0; coef_idx < num_coefs; ++coef_idx) {
483  ordered_coefs[coef_idxs[coef_idx]] = coefs[coef_idx];
484  }
485  return ordered_coefs;
486 }
DEVICE int64_t size() const
Definition: heavydbTypes.h:592

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

EXTENSION_NOINLINE_HOST int32_t supported_ml_frameworks__cpu_ ( TableFunctionManager mgr,
Column< TextEncodingDict > &  output_ml_frameworks,
Column< bool > &  output_availability,
Column< bool > &  output_default 
)

Definition at line 71 of file MLTableFunctions.hpp.

References StringDictionaryProxy::getOrAddTransientBulk(), TableFunctionManager::set_output_row_size(), and Column< TextEncodingDict >::string_dict_proxy_.

74  {
75  const std::vector<std::string> ml_frameworks = {"onedal", "mlpack"};
76  const int32_t num_frameworks = ml_frameworks.size();
77  mgr.set_output_row_size(num_frameworks);
78  const std::vector<int32_t> ml_framework_string_ids =
79  output_ml_frameworks.string_dict_proxy_->getOrAddTransientBulk(ml_frameworks);
80 
81 #if defined(HAVE_ONEDAL) || defined(HAVE_MLPACK)
82  bool found_available_framework = false;
83  auto framework_found_actions = [&output_availability,
84  &output_default,
85  &found_available_framework](const int64_t out_row_idx) {
86  output_availability[out_row_idx] = true;
87  if (!found_available_framework) {
88  output_default[out_row_idx] = true;
89  found_available_framework = true;
90  } else {
91  output_default[out_row_idx] = false;
92  }
93  };
94 #endif
95 
96 #if !defined(HAVE_ONEDAL) || !defined(HAVE_MLPACK)
97  auto framework_not_found_actions = [&output_availability,
98  &output_default](const int64_t out_row_idx) {
99  output_availability[out_row_idx] = false;
100  output_default[out_row_idx] = false;
101  };
102 #endif
103 
104  for (int32_t out_row_idx = 0; out_row_idx < num_frameworks; ++out_row_idx) {
105  output_ml_frameworks[out_row_idx] = ml_framework_string_ids[out_row_idx];
106  if (ml_frameworks[out_row_idx] == "onedal") {
107 #ifdef HAVE_ONEDAL
108  framework_found_actions(out_row_idx);
109 #else
110  framework_not_found_actions(out_row_idx);
111 #endif
112  } else if (ml_frameworks[out_row_idx] == "mlpack") {
113 #ifdef HAVE_MLPACK
114  framework_found_actions(out_row_idx);
115 #else
116  framework_not_found_actions(out_row_idx);
117 #endif
118  }
119  }
120  return num_frameworks;
121 }
void set_output_row_size(int64_t num_rows)
StringDictionaryProxy * string_dict_proxy_
Definition: heavydbTypes.h:751
std::vector< int32_t > getOrAddTransientBulk(const std::vector< std::string > &strings)

+ Here is the call graph for this function: