23 #include <tbb/parallel_for.h>
24 #include <mlpack/methods/dbscan/dbscan.hpp>
25 #include <mlpack/methods/kmeans/kmeans.hpp>
29 using MatrixT = arma::Mat<double>;
31 arma::Mat<double> create_input_matrix(
const std::vector<const float*>& input_features,
32 const int64_t num_rows) {
33 const int64_t num_features = input_features.size();
34 arma::Mat<double> features_matrix(num_rows, num_features);
35 for (int64_t c = 0; c < num_features; ++c) {
36 double* matrix_col_ptr = features_matrix.colptr(c);
37 const float* input_features_col = input_features[c];
39 [&](
const tbb::blocked_range<int64_t>& r) {
40 const int64_t start_idx = r.begin();
41 const int64_t end_idx = r.end();
42 for (int64_t r = start_idx; r < end_idx; ++r) {
43 matrix_col_ptr[r] = input_features_col[r];
47 return features_matrix;
50 arma::Mat<double> create_input_matrix(
const std::vector<const double*>& input_features,
51 const int64_t num_rows) {
52 const int64_t num_features = input_features.size();
53 arma::Mat<double> features_matrix(num_rows, num_features);
54 for (int64_t c = 0; c < num_features; ++c) {
55 memcpy(features_matrix.colptr(c), input_features[c],
sizeof(double) * num_rows);
57 return features_matrix;
60 void rewrite_cluster_id_nulls(
const arma::Row<size_t>& cluster_assignments,
61 int32_t* output_clusters,
62 const int64_t num_rows) {
64 [&](
const tbb::blocked_range<int64_t>& r) {
65 const int64_t start_idx = r.begin();
66 const int64_t end_idx = r.end();
67 for (int64_t r = start_idx; r < end_idx; ++r) {
68 output_clusters[r] = cluster_assignments[r] == SIZE_MAX
70 : cluster_assignments[r];
75 template <
typename InitStrategy>
76 void run_kmeans(
const MatrixT& input_features_matrix_transposed,
77 arma::Row<size_t>& cluster_assignments,
78 const int32_t num_clusters) {
79 mlpack::kmeans::KMeans<mlpack::metric::EuclideanDistance,
81 mlpack::kmeans::MaxVarianceNewCluster,
82 mlpack::kmeans::NaiveKMeans,
85 kmeans.Cluster(input_features_matrix_transposed,
86 static_cast<size_t>(num_clusters),
91 NEVER_INLINE HOST int32_t mlpack_kmeans_impl(
const std::vector<const T*>& input_features,
92 int32_t* output_clusters,
93 const int64_t num_rows,
94 const int32_t num_clusters,
95 const int32_t num_iterations,
98 const auto input_features_matrix = create_input_matrix(input_features, num_rows);
99 const MatrixT input_features_matrix_transposed = input_features_matrix.t();
100 arma::Row<size_t> cluster_assignments;
104 run_kmeans<mlpack::kmeans::SampleInitialization>(
105 input_features_matrix_transposed, cluster_assignments, num_clusters);
117 throw std::runtime_error(
118 "MLPack KMeans initialization not implemented for given strategy.");
123 rewrite_cluster_id_nulls(cluster_assignments, output_clusters, num_rows);
124 }
catch (std::exception& e) {
125 throw std::runtime_error(e.what());
130 template <
typename T>
131 NEVER_INLINE HOST int32_t mlpack_dbscan_impl(
const std::vector<const T*>& input_features,
132 int32_t* output_clusters,
133 const int64_t num_rows,
134 const double epsilon,
135 const int32_t min_observations) {
137 const auto input_features_matrix = create_input_matrix(input_features, num_rows);
138 const MatrixT input_features_matrix_transposed = input_features_matrix.t();
140 mlpack::dbscan::DBSCAN<> dbscan(epsilon, min_observations);
141 arma::Row<size_t> cluster_assignments;
142 dbscan.Cluster(input_features_matrix_transposed, cluster_assignments);
144 rewrite_cluster_id_nulls(cluster_assignments, output_clusters, num_rows);
145 }
catch (std::exception& e) {
146 throw std::runtime_error(e.what());
151 template <
typename T>
153 mlpack_linear_reg_fit_impl(
const T* input_labels,
154 const std::vector<const T*>& input_features,
155 int32_t* output_coef_idxs,
157 const int64_t num_rows) {
161 arma::Mat<T>
X(num_rows, input_features.size() + 1);
163 X.unsafe_col(0).fill(1);
165 const int64_t num_features = input_features.size();
166 for (int64_t feature_idx = 0; feature_idx < num_features; ++feature_idx) {
168 X.colptr(feature_idx + 1), input_features[feature_idx],
sizeof(
T) * num_rows);
170 const arma::Mat<T> Xt =
X.t();
171 const arma::Mat<T> XtX = Xt *
X;
172 const arma::Col<T>
Y(input_labels, num_rows);
173 const arma::Col<T> B_est = arma::solve(XtX, Xt *
Y);
174 for (int64_t coef_idx = 0; coef_idx < num_features + 1; ++coef_idx) {
175 output_coef_idxs[coef_idx] = coef_idx;
176 output_coefs[coef_idx] = B_est[coef_idx];
178 return num_features + 1;
179 }
catch (std::exception& e) {
180 throw std::runtime_error(e.what());
184 template <
typename T>
186 mlpack_linear_reg_predict_impl(
const std::vector<const T*>& input_features,
187 T* output_predictions,
188 const int64_t num_rows,
193 const int64_t num_features = input_features.size();
194 const int64_t num_coefs = num_features + 1;
195 arma::Mat<T>
X(num_rows, num_coefs);
196 X.unsafe_col(0).fill(1);
197 for (int64_t feature_idx = 0; feature_idx < num_features; ++feature_idx) {
199 X.colptr(feature_idx + 1), input_features[feature_idx],
sizeof(
T) * num_rows);
201 const arma::Col<T> B(coefs, num_coefs);
202 const arma::Col<T> Y_hat = X * B;
203 memcpy(output_predictions, Y_hat.colptr(0),
sizeof(
T) * num_rows);
205 }
catch (std::exception& e) {
206 throw std::runtime_error(e.what());
210 #endif // #ifdef HAVE_TBB
211 #endif // #ifdef HAVE_MLPACK
212 #endif // #ifdef __CUDACC__
void parallel_for(const blocked_range< Int > &range, const Body &body, const Partitioner &p=Partitioner())