OmniSciDB  cde582ebc3
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
MLTableFunctions.hpp
Go to the documentation of this file.
1 /*
2  * Copyright 2022 HEAVY.AI, Inc., Inc.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #pragma once
18 
19 #ifndef __CUDACC__
20 
24 
25 #ifdef HAVE_ONEDAL
27 #endif
28 
29 #ifdef HAVE_MLPACK
31 #endif
32 
33 using namespace TableFunctions_Namespace;
34 
35 template <typename T>
36 std::vector<const T*> pluck_ptrs(const std::vector<std::vector<T>>& data,
37  const int64_t start_idx,
38  const int64_t end_idx) {
39  std::vector<const T*> raw_ptrs;
40  CHECK_GE(start_idx, 0L);
41  CHECK_GT(end_idx, start_idx);
42  CHECK_LE(end_idx, static_cast<int64_t>(data.size()));
43  for (int64_t col_idx = start_idx; col_idx < end_idx; ++col_idx) {
44  raw_ptrs.emplace_back(data[col_idx].data());
45  }
46  return raw_ptrs;
47 }
48 
49 template <typename T>
50 std::vector<const T*> pluck_ptrs(const std::vector<T*>& data,
51  const int64_t start_idx,
52  const int64_t end_idx) {
53  std::vector<const T*> raw_ptrs;
54  CHECK_GE(start_idx, 0L);
55  CHECK_GT(end_idx, start_idx);
56  CHECK_LE(end_idx, static_cast<int64_t>(data.size()));
57  for (int64_t col_idx = start_idx; col_idx < end_idx; ++col_idx) {
58  raw_ptrs.emplace_back(data[col_idx]);
59  }
60  return raw_ptrs;
61 }
62 
63 // clang-format off
64 /*
65  UDTF: supported_ml_frameworks__cpu_(TableFunctionManager) ->
66  Column<TextEncodingDict> ml_framework | input_id=args<>, Column<bool> is_available, Column<bool> is_default
67 */
68 // clang-format on
69 
72  Column<TextEncodingDict>& output_ml_frameworks,
73  Column<bool>& output_availability,
74  Column<bool>& output_default) {
75  const std::vector<std::string> ml_frameworks = {"onedal", "mlpack"};
76  const int32_t num_frameworks = ml_frameworks.size();
77  mgr.set_output_row_size(num_frameworks);
78  const std::vector<int32_t> ml_framework_string_ids =
79  output_ml_frameworks.string_dict_proxy_->getOrAddTransientBulk(ml_frameworks);
80 
81 #if defined(HAVE_ONEDAL) || defined(HAVE_MLPACK)
82  bool found_available_framework = false;
83  auto framework_found_actions = [&output_availability,
84  &output_default,
85  &found_available_framework](const int64_t out_row_idx) {
86  output_availability[out_row_idx] = true;
87  if (!found_available_framework) {
88  output_default[out_row_idx] = true;
89  found_available_framework = true;
90  } else {
91  output_default[out_row_idx] = false;
92  }
93  };
94 #endif
95 
96 #if !defined(HAVE_ONEDAL) || !defined(HAVE_MLPACK)
97  auto framework_not_found_actions = [&output_availability,
98  &output_default](const int64_t out_row_idx) {
99  output_availability[out_row_idx] = false;
100  output_default[out_row_idx] = false;
101  };
102 #endif
103 
104  for (int32_t out_row_idx = 0; out_row_idx < num_frameworks; ++out_row_idx) {
105  output_ml_frameworks[out_row_idx] = ml_framework_string_ids[out_row_idx];
106  if (ml_frameworks[out_row_idx] == "onedal") {
107 #ifdef HAVE_ONEDAL
108  framework_found_actions(out_row_idx);
109 #else
110  framework_not_found_actions(out_row_idx);
111 #endif
112  } else if (ml_frameworks[out_row_idx] == "mlpack") {
113 #ifdef HAVE_MLPACK
114  framework_found_actions(out_row_idx);
115 #else
116  framework_not_found_actions(out_row_idx);
117 #endif
118  }
119  }
120  return num_frameworks;
121 }
122 
123 // clang-format off
124 /*
125  UDTF: kmeans__cpu_template(TableFunctionManager,
126  Cursor<Column<K> input_ids, ColumnList<T> input_features> data,
127  int32_t num_clusters | require="num_clusters > 0" | require="num_clusters <= input_ids.size()",
128  int32_t num_iterations | require="num_iterations > 0",
129  TextEncodingNone init_type,
130  TextEncodingNone preferred_ml_framework) ->
131  Column<K> id | input_id=args<0>,
132  Column<int32_t> cluster_id,
133  K=[int32_t, int64_t, TextEncodingDict], T=[float, double]
134 */
135 // clang-format on
136 
137 template <typename K, typename T>
138 NEVER_INLINE HOST int32_t
140  const Column<K>& input_ids,
141  const ColumnList<T>& input_features,
142  const int num_clusters,
143  const int num_iterations,
144  const TextEncodingNone& init_type_str,
145  const TextEncodingNone& preferred_ml_framework_str,
146  Column<K>& output_ids,
147  Column<int32_t>& output_clusters) {
148  mgr.set_output_row_size(input_ids.size());
149  output_ids = input_ids;
150  const auto kmeans_init_strategy = get_kmeans_init_type(init_type_str);
151  if (kmeans_init_strategy == KMeansInitStrategy::INVALID) {
152  return mgr.ERROR_MESSAGE("Invalid KMeans initializaiton strategy: " +
153  init_type_str.getString());
154  }
155 
156  const auto preferred_ml_framework = get_ml_framework(preferred_ml_framework_str);
157  if (preferred_ml_framework == MLFramework::INVALID) {
158  return mgr.ERROR_MESSAGE("Invalid ML Framework: " +
159  preferred_ml_framework_str.getString());
160  }
161 
162  const auto denulled_data = denull_data(input_features);
163  const int64_t num_rows = denulled_data.masked_num_rows;
164  const bool data_is_masked =
165  denulled_data.masked_num_rows < denulled_data.unmasked_num_rows;
166  std::vector<int32_t> denulled_output_allocation(data_is_masked ? num_rows : 0);
167  int32_t* denulled_output =
168  data_is_masked ? denulled_output_allocation.data() : output_clusters.ptr_;
169 
170  const auto normalized_data = z_std_normalize_data(denulled_data.data, num_rows);
171  const auto normalized_ptrs = pluck_ptrs(normalized_data, 0L, normalized_data.size());
172 
173  try {
174  bool did_execute = false;
175 #ifdef HAVE_ONEDAL
176  if (!did_execute && (preferred_ml_framework == MLFramework::ONEDAL ||
177  preferred_ml_framework == MLFramework::DEFAULT)) {
178  onedal_kmeans_impl(normalized_ptrs,
179  denulled_output,
180  num_rows,
181  num_clusters,
182  num_iterations,
183  kmeans_init_strategy);
184  did_execute = true;
185  }
186 #endif
187 #ifdef HAVE_MLPACK
188  if (!did_execute && (preferred_ml_framework == MLFramework::MLPACK ||
189  preferred_ml_framework == MLFramework::DEFAULT)) {
190  mlpack_kmeans_impl(normalized_ptrs,
191  denulled_output,
192  num_rows,
193  num_clusters,
194  num_iterations,
195  kmeans_init_strategy);
196  did_execute = true;
197  }
198 #endif
199  if (!did_execute) {
200  return mgr.ERROR_MESSAGE("Cannot find " + preferred_ml_framework_str.getString() +
201  " ML library to support kmeans implementation.");
202  }
203  } catch (std::runtime_error& e) {
204  return mgr.ERROR_MESSAGE(e.what());
205  }
206 
207  if (data_is_masked) {
208  unmask_data(denulled_output,
209  denulled_data.reverse_index_map,
210  output_clusters.ptr_,
211  denulled_data.unmasked_num_rows,
212  inline_null_value<int32_t>());
213  }
214  return input_ids.size();
215 }
216 
217 // clang-format off
218 /*
219  UDTF: kmeans__cpu_template(TableFunctionManager,
220  Cursor<Column<K> input_ids, ColumnList<T> input_features> data,
221  int32_t num_clusters | require="num_clusters > 0" | require="num_clusters <= input_ids.size()",
222  int32_t num_iterations | require="num_iterations > 0",
223  TextEncodingNone init_type) ->
224  Column<K> id | input_id=args<0>,
225  Column<int32_t> cluster_id,
226  K=[int32_t, int64_t, TextEncodingDict], T=[float, double]
227 */
228 // clang-format on
229 
230 template <typename K, typename T>
232  const Column<K>& input_ids,
233  const ColumnList<T>& input_features,
234  const int num_clusters,
235  const int num_iterations,
236  const TextEncodingNone& init_type_str,
237  Column<K>& output_ids,
238  Column<int32_t>& output_clusters) {
239  std::string preferred_ml_framework{"DEFAULT"};
240  return kmeans__cpu_template(mgr,
241  input_ids,
242  input_features,
243  num_clusters,
244  num_iterations,
245  init_type_str,
246  preferred_ml_framework,
247  output_ids,
248  output_clusters);
249 }
250 
251 // clang-format off
252 /*
253  UDTF: kmeans__cpu_template(TableFunctionManager,
254  Cursor<Column<K> input_ids, ColumnList<T> input_features> data,
255  int32_t num_clusters | require="num_clusters > 0" | require="num_clusters <= input_ids.size()",
256  int32_t num_iterations | require="num_iterations > 0") ->
257  Column<K> id | input_id=args<0>,
258  Column<int32_t> cluster_id,
259  K=[int32_t, int64_t, TextEncodingDict], T=[float, double]
260 */
261 // clang-format on
262 
263 template <typename K, typename T>
265  const Column<K>& input_ids,
266  const ColumnList<T>& input_features,
267  const int32_t num_clusters,
268  const int32_t num_iterations,
269  Column<K>& output_ids,
270  Column<int32_t>& output_clusters) {
271  std::string kmeans_init_strategy{"DEFAULT"};
272  std::string preferred_ml_framework{"DEFAULT"};
273  return kmeans__cpu_template(mgr,
274  input_ids,
275  input_features,
276  num_clusters,
277  num_iterations,
278  kmeans_init_strategy,
279  preferred_ml_framework,
280  output_ids,
281  output_clusters);
282 }
283 
284 // clang-format off
285 /*
286  UDTF: dbscan__cpu_template(TableFunctionManager,
287  Cursor<Column<K> input_ids, ColumnList<T> input_features> data,
288  double epsilon | require="epsilon > 0.0",
289  int32_t min_observations | require="min_observations > 0",
290  TextEncodingNone preferred_ml_framework) ->
291  Column<K> id | input_id=args<0>, Column<int32_t> cluster_id,
292  K=[int32_t, int64_t, TextEncodingDict], T=[float, double]
293  */
294 // clang-format on
295 
296 template <typename K, typename T>
297 NEVER_INLINE HOST int32_t
299  const Column<K>& input_ids,
300  const ColumnList<T>& input_features,
301  const double epsilon,
302  const int32_t min_observations,
303  const TextEncodingNone& preferred_ml_framework_str,
304  Column<K>& output_ids,
305  Column<int32_t>& output_clusters) {
306  mgr.set_output_row_size(input_ids.size());
307  output_ids = input_ids;
308 
309  const auto preferred_ml_framework = get_ml_framework(preferred_ml_framework_str);
310  if (preferred_ml_framework == MLFramework::INVALID) {
311  return mgr.ERROR_MESSAGE("Invalid ML Framework: " +
312  preferred_ml_framework_str.getString());
313  }
314 
315  const auto denulled_data = denull_data(input_features);
316  const int64_t num_rows = denulled_data.masked_num_rows;
317  const bool data_is_masked =
318  denulled_data.masked_num_rows < denulled_data.unmasked_num_rows;
319  std::vector<int32_t> denulled_output_allocation(data_is_masked ? num_rows : 0);
320  int32_t* denulled_output =
321  data_is_masked ? denulled_output_allocation.data() : output_clusters.ptr_;
322 
323  const auto normalized_data = z_std_normalize_data(denulled_data.data, num_rows);
324  const auto normalized_ptrs = pluck_ptrs(normalized_data, 0L, normalized_data.size());
325 
326  try {
327  bool did_execute = false;
328 #ifdef HAVE_ONEDAL
329  if (!did_execute && (preferred_ml_framework == MLFramework::ONEDAL ||
330  preferred_ml_framework == MLFramework::DEFAULT)) {
331  onedal_dbscan_impl(
332  normalized_ptrs, denulled_output, num_rows, epsilon, min_observations);
333  did_execute = true;
334  }
335 #endif
336 #ifdef HAVE_MLPACK
337  if (!did_execute && (preferred_ml_framework == MLFramework::MLPACK ||
338  preferred_ml_framework == MLFramework::DEFAULT)) {
339  mlpack_dbscan_impl(
340  normalized_ptrs, denulled_output, num_rows, epsilon, min_observations);
341  did_execute = true;
342  }
343 #endif
344  if (!did_execute) {
345  return mgr.ERROR_MESSAGE("Cannot find " + preferred_ml_framework_str.getString() +
346  " ML library to support dbscan implementation.");
347  }
348  } catch (std::runtime_error& e) {
349  return mgr.ERROR_MESSAGE(e.what());
350  }
351 
352  if (data_is_masked) {
353  unmask_data(denulled_output,
354  denulled_data.reverse_index_map,
355  output_clusters.ptr_,
356  denulled_data.unmasked_num_rows,
357  inline_null_value<int32_t>());
358  }
359  return input_ids.size();
360 }
361 
362 // clang-format off
363 /*
364  UDTF: dbscan__cpu_template(TableFunctionManager,
365  Cursor<Column<K> input_ids, ColumnList<T> input_features> data,
366  double epsilon | require="epsilon > 0.0",
367  int32_t min_observations | require="min_observations > 0") ->
368  Column<K> id | input_id=args<0>, Column<int32_t> cluster_id,
369  K=[int32_t, int64_t, TextEncodingDict], T=[float, double]
370  */
371 // clang-format on
372 
373 template <typename K, typename T>
375  const Column<K>& input_ids,
376  const ColumnList<T>& input_features,
377  const double epsilon,
378  const int32_t min_observations,
379  Column<K>& output_ids,
380  Column<int32_t>& output_clusters) {
381  std::string preferred_ml_framework{"DEFAULT"};
382  return dbscan__cpu_template(mgr,
383  input_ids,
384  input_features,
385  epsilon,
386  min_observations,
387  preferred_ml_framework,
388  output_ids,
389  output_clusters);
390 }
391 
392 // clang-format off
393 /*
394  UDTF: linear_reg_fit__cpu_template(TableFunctionManager,
395  Cursor<Column<T> labels, ColumnList<T> features> data,
396  TextEncodingNone preferred_ml_framework) ->
397  Column<int32_t> coef_idx, Column<T> coef, T=[float, double]
398  */
399 // clang-format on
400 
401 template <typename T>
402 NEVER_INLINE HOST int32_t
404  const Column<T>& input_labels,
405  const ColumnList<T>& input_features,
406  const TextEncodingNone& preferred_ml_framework_str,
407  Column<int32_t>& output_coef_idxs,
408  Column<T>& output_coefs) {
409  const auto preferred_ml_framework = get_ml_framework(preferred_ml_framework_str);
410  if (preferred_ml_framework == MLFramework::INVALID) {
411  return mgr.ERROR_MESSAGE("Invalid ML Framework: " +
412  preferred_ml_framework_str.getString());
413  }
414  const auto denulled_data = denull_data(input_labels, input_features);
415  const auto labels_ptrs = pluck_ptrs(denulled_data.data, 0L, 1L);
416  const auto features_ptrs =
417  pluck_ptrs(denulled_data.data, 1L, input_features.numCols() + 1);
418  const int64_t num_coefs = input_features.numCols() + 1;
419  mgr.set_output_row_size(num_coefs);
420  try {
421  bool did_execute = false;
422 #ifdef HAVE_ONEDAL
423  if (!did_execute && (preferred_ml_framework == MLFramework::ONEDAL ||
424  preferred_ml_framework == MLFramework::DEFAULT)) {
425  onedal_linear_reg_fit_impl(labels_ptrs[0],
426  features_ptrs,
427  output_coef_idxs.ptr_,
428  output_coefs.ptr_,
429  denulled_data.masked_num_rows);
430  did_execute = true;
431  }
432 #endif
433 #ifdef HAVE_MLPACK
434  if (!did_execute && (preferred_ml_framework == MLFramework::MLPACK ||
435  preferred_ml_framework == MLFramework::DEFAULT)) {
436  mlpack_linear_reg_fit_impl(labels_ptrs[0],
437  features_ptrs,
438  output_coef_idxs.ptr_,
439  output_coefs.ptr_,
440  denulled_data.masked_num_rows);
441  did_execute = true;
442  }
443 #endif
444  if (!did_execute) {
445  return mgr.ERROR_MESSAGE(
446  "Cannot find " + preferred_ml_framework_str.getString() +
447  " ML library to support linear regression implementation.");
448  }
449  } catch (std::runtime_error& e) {
450  return mgr.ERROR_MESSAGE(e.what());
451  }
452  return num_coefs;
453 }
454 
455 // clang-format off
456 /*
457  UDTF: linear_reg_fit__cpu_template(TableFunctionManager,
458  Cursor<Column<T> labels, ColumnList<T> features> data) ->
459  Column<int32_t> coef_idx, Column<T> coef, T=[float, double]
460  */
461 // clang-format on
462 template <typename T>
463 NEVER_INLINE HOST int32_t
465  const Column<T>& input_labels,
466  const ColumnList<T>& input_features,
467  Column<int32_t>& output_coef_idxs,
468  Column<T>& output_coefs) {
469  std::string preferred_ml_framework{"DEFAULT"};
470  return linear_reg_fit__cpu_template(mgr,
471  input_labels,
472  input_features,
473  preferred_ml_framework,
474  output_coef_idxs,
475  output_coefs);
476 }
477 
478 template <typename T>
479 std::vector<T> sort_coefs(const Column<int32_t>& coef_idxs, const Column<T>& coefs) {
480  const size_t num_coefs = coef_idxs.size();
481  std::vector<T> ordered_coefs(num_coefs);
482  for (size_t coef_idx = 0; coef_idx < num_coefs; ++coef_idx) {
483  ordered_coefs[coef_idxs[coef_idx]] = coefs[coef_idx];
484  }
485  return ordered_coefs;
486 }
487 
488 // clang-format off
489 /*
490  UDTF: linear_reg_predict__cpu_template(TableFunctionManager,
491  Cursor<Column<K> id, ColumnList<T> features> data,
492  Cursor<Column<int32_t> coef_idx, Column<T> coef> params | require="coef_idx.size() == features.numCols() + 1",
493  TextEncodingNone preferred_ml_framework) ->
494  Column<K> id | input_id=args<0>, Column<T> prediction,
495  K=[int32_t, int64_t, TextEncodingDict], T=[float, double]
496  */
497 // clang-format on
498 
499 template <typename T, typename K>
500 NEVER_INLINE HOST int32_t
502  const Column<K>& input_ids,
503  const ColumnList<T>& input_features,
504  const Column<int32_t>& coef_idxs,
505  const Column<T>& coefs,
506  const TextEncodingNone& preferred_ml_framework_str,
507  Column<K>& output_ids,
508  Column<T>& output_predictions) {
509  const auto preferred_ml_framework = get_ml_framework(preferred_ml_framework_str);
510  if (preferred_ml_framework == MLFramework::INVALID) {
511  return mgr.ERROR_MESSAGE("Invalid ML Framework: " +
512  preferred_ml_framework_str.getString());
513  }
514 
515  mgr.set_output_row_size(input_ids.size());
516  const auto denulled_data = denull_data(input_features);
517  const int64_t num_rows = denulled_data.masked_num_rows;
518  const bool data_is_masked =
519  denulled_data.masked_num_rows < denulled_data.unmasked_num_rows;
520  std::vector<T> denulled_output_allocation(data_is_masked ? num_rows : 0);
521  T* denulled_output =
522  data_is_masked ? denulled_output_allocation.data() : output_predictions.ptr_;
523 
524  const auto features_ptrs = pluck_ptrs(denulled_data.data, 0L, input_features.numCols());
525 
526  const auto ordered_coefs = sort_coefs(coef_idxs, coefs);
527 
528  try {
529  bool did_execute = false;
530 #ifdef HAVE_ONEDAL
531  if (!did_execute && (preferred_ml_framework == MLFramework::ONEDAL ||
532  preferred_ml_framework == MLFramework::DEFAULT)) {
533  onedal_linear_reg_predict_impl(
534  features_ptrs, denulled_output, num_rows, ordered_coefs.data());
535  did_execute = true;
536  }
537 #endif
538 #ifdef HAVE_MLPACK
539  if (!did_execute && (preferred_ml_framework == MLFramework::MLPACK ||
540  preferred_ml_framework == MLFramework::DEFAULT)) {
541  mlpack_linear_reg_predict_impl(
542  features_ptrs, denulled_output, num_rows, ordered_coefs.data());
543  did_execute = true;
544  }
545 #endif
546  if (!did_execute) {
547  return mgr.ERROR_MESSAGE("Cannot find " + preferred_ml_framework_str.getString() +
548  " ML library to support kmeans implementation.");
549  }
550  } catch (std::runtime_error& e) {
551  return mgr.ERROR_MESSAGE(e.what());
552  }
553  output_ids = input_ids;
554  if (data_is_masked) {
555  unmask_data(denulled_output,
556  denulled_data.reverse_index_map,
557  output_predictions.ptr_,
558  denulled_data.unmasked_num_rows,
559  inline_null_value<T>());
560  }
561  return input_ids.size();
562 }
563 
564 // clang-format off
565 /*
566  UDTF: linear_reg_predict__cpu_template(TableFunctionManager,
567  Cursor<Column<K> id, ColumnList<T> features> data,
568  Cursor<Column<int32_t> coef_idx, Column<T> coef> params | require="coef_idx.size() == features.numCols() + 1") ->
569  Column<K> id | input_id=args<0>, Column<T> prediction,
570  K=[int32_t, int64_t, TextEncodingDict], T=[float, double]
571  */
572 // clang-format on
573 
574 template <typename T, typename K>
575 NEVER_INLINE HOST int32_t
577  const Column<K>& input_ids,
578  const ColumnList<T>& input_features,
579  const Column<int32_t>& coef_idxs,
580  const Column<T>& coefs,
581  Column<K>& output_ids,
582  Column<T>& output_predictions) {
583  std::string preferred_ml_framework{"DEFAULT"};
585  input_ids,
586  input_features,
587  coef_idxs,
588  coefs,
589  preferred_ml_framework,
590  output_ids,
591  output_predictions);
592 }
593 
594 template <typename T>
595 Column<T> create_wrapper_col(std::vector<T>& col_vec) {
596  Column<T> wrapper_col;
597  wrapper_col.ptr_ = col_vec.data();
598  wrapper_col.size_ = static_cast<int64_t>(col_vec.size());
599  return wrapper_col;
600 }
601 
602 // clang-format off
603 /*
604  UDTF: linear_reg_fit_predict__cpu_template(TableFunctionManager,
605  Cursor<Column<K> id, Column<T> labels, ColumnList<T> features> data,
606  TextEncodingNone preferred_ml_framework) ->
607  Column<K> id | input_id=args<0>, Column<T> prediction,
608  K=[int32_t, int64_t, TextEncodingDict], T=[float, double]
609  */
610 // clang-format on
611 
612 template <typename T, typename K>
613 NEVER_INLINE HOST int32_t
615  const Column<K>& input_ids,
616  const Column<T>& input_labels,
617  const ColumnList<T>& input_features,
618  const TextEncodingNone& preferred_ml_framework_str,
619  Column<K>& output_ids,
620  Column<T>& output_predictions) {
621  const int64_t num_coefs = input_features.numCols() + 1;
622  // Need to create backing vectors for coef column wrappers
623  std::vector<int32_t> coef_idxs_vec(num_coefs);
624  std::vector<T> coefs_vec(num_coefs);
625  auto coef_idxs = create_wrapper_col(coef_idxs_vec);
626  auto coefs = create_wrapper_col(coefs_vec);
627  // Disable output allocations as we are not calling the fit function
628  // through the normal table functions path, and we have already
629  // allocated our coef storage with the vectors above.
631  const auto fit_ret = linear_reg_fit__cpu_template(
632  mgr, input_labels, input_features, preferred_ml_framework_str, coef_idxs, coefs);
634  if (fit_ret < 0) {
635  return fit_ret;
636  }
638  input_ids,
639  input_features,
640  coef_idxs,
641  coefs,
642  preferred_ml_framework_str,
643  output_ids,
644  output_predictions);
645 }
646 
647 // clang-format off
648 /*
649  UDTF: linear_reg_fit_predict__cpu_template(TableFunctionManager,
650  Cursor<Column<K> id, Column<T> labels, ColumnList<T> features> data) ->
651  Column<K> id | input_id=args<0>, Column<T> prediction,
652  K=[int32_t, int64_t, TextEncodingDict], T=[float, double]
653  */
654 // clang-format on
655 
656 template <typename T, typename K>
657 NEVER_INLINE HOST int32_t
659  const Column<K>& input_ids,
660  const Column<T>& input_labels,
661  const ColumnList<T>& input_features,
662  Column<K>& output_ids,
663  Column<T>& output_predictions) {
664  std::string preferred_ml_framework{"DEFAULT"};
666  input_ids,
667  input_labels,
668  input_features,
669  preferred_ml_framework,
670  output_ids,
671  output_predictions);
672 }
673 
674 #endif // #ifndef __CUDACC__
void set_output_row_size(int64_t num_rows)
Definition: heavydbTypes.h:679
KMeansInitStrategy get_kmeans_init_type(const std::string &init_type_str)
std::string getString() const
Definition: heavydbTypes.h:186
DEVICE int64_t size() const
Definition: heavydbTypes.h:469
DEVICE int64_t numCols() const
Definition: heavydbTypes.h:591
#define CHECK_GE(x, y)
Definition: Logger.h:235
T * ptr_
Definition: heavydbTypes.h:454
MaskedData< T > denull_data(const ColumnList< T > &features)
NEVER_INLINE HOST int32_t linear_reg_fit__cpu_template(TableFunctionManager &mgr, const Column< T > &input_labels, const ColumnList< T > &input_features, const TextEncodingNone &preferred_ml_framework_str, Column< int32_t > &output_coef_idxs, Column< T > &output_coefs)
NEVER_INLINE HOST int32_t kmeans__cpu_template(TableFunctionManager &mgr, const Column< K > &input_ids, const ColumnList< T > &input_features, const int num_clusters, const int num_iterations, const TextEncodingNone &init_type_str, const TextEncodingNone &preferred_ml_framework_str, Column< K > &output_ids, Column< int32_t > &output_clusters)
#define CHECK_GT(x, y)
Definition: Logger.h:234
std::vector< T > sort_coefs(const Column< int32_t > &coef_idxs, const Column< T > &coefs)
NEVER_INLINE HOST int32_t linear_reg_predict__cpu_template(TableFunctionManager &mgr, const Column< K > &input_ids, const ColumnList< T > &input_features, const Column< int32_t > &coef_idxs, const Column< T > &coefs, const TextEncodingNone &preferred_ml_framework_str, Column< K > &output_ids, Column< T > &output_predictions)
MLFramework get_ml_framework(const std::string &ml_framework_str)
std::vector< const T * > pluck_ptrs(const std::vector< std::vector< T >> &data, const int64_t start_idx, const int64_t end_idx)
#define HOST
EXTENSION_NOINLINE_HOST int32_t supported_ml_frameworks__cpu_(TableFunctionManager &mgr, Column< TextEncodingDict > &output_ml_frameworks, Column< bool > &output_availability, Column< bool > &output_default)
StringDictionaryProxy * string_dict_proxy_
Definition: heavydbTypes.h:506
#define EXTENSION_NOINLINE_HOST
Definition: heavydbTypes.h:44
void disable_output_allocations()
Definition: heavydbTypes.h:685
NEVER_INLINE HOST int32_t linear_reg_fit_predict__cpu_template(TableFunctionManager &mgr, const Column< K > &input_ids, const Column< T > &input_labels, const ColumnList< T > &input_features, const TextEncodingNone &preferred_ml_framework_str, Column< K > &output_ids, Column< T > &output_predictions)
#define CHECK_LE(x, y)
Definition: Logger.h:233
int64_t size_
Definition: heavydbTypes.h:455
void unmask_data(const T *masked_input, const std::vector< int32_t > &reverse_index_map, T *unmasked_output, const int64_t num_unmasked_rows, const T null_val)
NEVER_INLINE HOST int32_t dbscan__cpu_template(TableFunctionManager &mgr, const Column< K > &input_ids, const ColumnList< T > &input_features, const double epsilon, const int32_t min_observations, const TextEncodingNone &preferred_ml_framework_str, Column< K > &output_ids, Column< int32_t > &output_clusters)
#define NEVER_INLINE
std::vector< std::vector< T > > z_std_normalize_data(const std::vector< T * > &input_data, const int64_t num_rows)
std::vector< int32_t > getOrAddTransientBulk(const std::vector< std::string > &strings)
void enable_output_allocations()
Definition: heavydbTypes.h:687
Column< T > create_wrapper_col(std::vector< T > &col_vec)