OmniSciDB  a667adc9c8
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
MLFunctions.hpp
Go to the documentation of this file.
1 /*
2  * Copyright 2021 OmniSci, Inc.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #ifdef HAVE_MLPACK
18 
19 #include <mlpack/methods/dbscan/dbscan.hpp>
20 #include <mlpack/methods/kmeans/kmeans.hpp>
21 
22 // clang-format off
23 /*
24  UDTF: dbscan__cpu_(Cursor<Column<int>, ColumnList<double>>, float, int, RowMultiplier) -> Column<int>, Column<int>
25 */
26 // clang-format on
27 EXTENSION_NOINLINE int32_t dbscan__cpu_(const Column<int>& input_ids,
28  const ColumnList<double>& cluster_features,
29  const float epsilon,
30  const int min_num_points,
31  const int output_size_multiplier,
32  Column<int>& output_ids,
33  Column<int>& output_clusters) {
34  if (epsilon <= 0.0) {
35  throw std::runtime_error("DBSCAN: epsilon must be positive");
36  }
37  if (min_num_points < 1) {
38  throw std::runtime_error("DBSCAN: min_num_points must be >= 1");
39  }
40  const int64_t num_rows = input_ids.getSize();
41 #ifndef __CUDACC__
42  const int64_t num_cluster_features = cluster_features.getCols();
43  arma::Mat<double> cluster_features_matrix(num_rows, num_cluster_features);
44  for (int64_t c = 0; c < num_cluster_features; ++c) {
45  memcpy(cluster_features_matrix.colptr(c),
46  cluster_features(c).ptr,
47  sizeof(double) * num_rows);
48  }
49  arma::Mat<double> cluster_features_matrix_transposed = cluster_features_matrix.t();
50  mlpack::dbscan::DBSCAN<> dbscan(epsilon, min_num_points);
51  arma::Row<size_t> cluster_assignments;
52  dbscan.Cluster(cluster_features_matrix_transposed, cluster_assignments);
53 
54  for (int64_t r = 0; r < num_rows; ++r) {
55  output_ids[r] = input_ids[r];
56  output_clusters[r] = cluster_assignments[r] == SIZE_MAX ? -1 : cluster_assignments[r];
57  }
58 #endif
59  return num_rows;
60 }
61 
62 // clang-format off
63 /*
64  UDTF: kmeans__cpu_(Cursor<Column<int>, ColumnList<double>>, int, RowMultiplier) -> Column<int>, Column<int>
65 */
66 // clang-format on
67 EXTENSION_NOINLINE int32_t kmeans__cpu_(const Column<int>& input_ids,
68  const ColumnList<double>& cluster_features,
69  const int num_clusters,
70  const int output_size_multiplier,
71  Column<int>& output_ids,
72  Column<int>& output_clusters) {
73  if (num_clusters <= 0) {
74  throw std::runtime_error("KMEANS: num_clusters must be positive integer");
75  }
76  const int64_t num_rows = input_ids.getSize();
77 #ifndef __CUDACC__
78  const int64_t num_cluster_features = cluster_features.getCols();
79  arma::Mat<double> cluster_features_matrix(num_rows, num_cluster_features);
80  for (int64_t c = 0; c < num_cluster_features; ++c) {
81  memcpy(cluster_features_matrix.colptr(c),
82  cluster_features(c).ptr,
83  sizeof(double) * num_rows);
84  }
85  arma::Mat<double> cluster_features_matrix_transposed = cluster_features_matrix.t();
86  mlpack::kmeans::KMeans<> kmeans;
87  arma::Row<size_t> cluster_assignments;
88  kmeans.Cluster(cluster_features_matrix_transposed,
89  static_cast<size_t>(num_clusters),
90  cluster_assignments);
91 
92  for (int64_t r = 0; r < num_rows; ++r) {
93  output_ids[r] = input_ids[r];
94  output_clusters[r] = cluster_assignments[r] == SIZE_MAX ? -1 : cluster_assignments[r];
95  }
96 #endif
97  return num_rows;
98 }
99 
100 #endif
#define SIZE_MAX
tuple r
Definition: test_fsi.py:16
#define EXTENSION_NOINLINE
Definition: OmniSciTypes.h:27
DEVICE int64_t getCols() const
Definition: OmniSciTypes.h:198
DEVICE int64_t getSize() const
Definition: OmniSciTypes.h:154