26 using namespace daal::algorithms;
27 using namespace daal::data_management;
30 const NumericTablePtr prepare_data_table(
const T* data,
32 const int64_t num_rows) {
35 data_table->setArray<
T>(
const_cast<T*
>(data), 0);
41 const NumericTablePtr prepare_data_table(
const std::vector<const T*>& data,
42 const int64_t num_rows) {
44 const size_t num_columns = data.size();
48 for (
size_t i = 0; i < num_columns; ++i) {
49 data_table->setArray<
T>(
const_cast<T*
>(data[i]), i);
55 const NumericTablePtr prepare_pivoted_data_table(
const T* data,
const int64_t num_elems) {
59 for (
size_t c = 0; c < static_cast<size_t>(num_elems); ++c) {
60 data_table->setArray<
T>(
const_cast<T*
>(data) + c, c);
66 const static std::map<KMeansInitStrategy, kmeans::init::Method> kmeans_init_type_map = {
72 const auto itr = kmeans_init_type_map.find(init_type);
73 if (itr == kmeans_init_type_map.end()) {
74 std::ostringstream oss;
75 oss <<
"Invalid Kmeans cluster centroid initialization type. "
76 <<
"Was expecting one of DETERMINISTIC, RANDOM, or PLUS_PLUS.";
77 throw std::runtime_error(oss.str());
82 template <
typename T, kmeans::init::Method M>
83 const NumericTablePtr init_centroids_for_type(
const NumericTablePtr& input_features_table,
84 const int32_t num_clusters) {
85 kmeans::init::Batch<T, M>
init(num_clusters);
86 init.input.set(kmeans::init::data, input_features_table);
88 return init.getResult()->get(kmeans::init::centroids);
92 const NumericTablePtr init_centroids(
const NumericTablePtr& input_features_table,
93 const kmeans::init::Method& init_type,
94 const int32_t num_clusters) {
96 case kmeans::init::Method::deterministicDense:
97 return init_centroids_for_type<T, kmeans::init::Method::deterministicDense>(
98 input_features_table, num_clusters);
99 case kmeans::init::Method::randomDense:
100 return init_centroids_for_type<T, kmeans::init::Method::randomDense>(
101 input_features_table, num_clusters);
102 case kmeans::init::Method::plusPlusDense:
103 return init_centroids_for_type<T, kmeans::init::Method::plusPlusDense>(
104 input_features_table, num_clusters);
105 case kmeans::init::Method::parallelPlusDense:
106 return init_centroids_for_type<T, kmeans::init::Method::parallelPlusDense>(
107 input_features_table, num_clusters);
110 return init_centroids_for_type<T, kmeans::init::Method::deterministicDense>(
111 input_features_table, num_clusters);
116 template <
typename T>
117 NEVER_INLINE HOST int32_t onedal_kmeans_impl(
const std::vector<const T*>& input_features,
118 int32_t* output_clusters,
119 const int64_t num_rows,
120 const int num_clusters,
121 const int num_iterations,
124 const auto features_table = prepare_data_table(input_features, num_rows);
126 const auto centroids =
127 init_centroids<T>(features_table, onedal_kmeans_init_type, num_clusters);
128 const auto assignments_table =
130 const kmeans::ResultPtr
result(
new kmeans::Result);
131 result->set(kmeans::assignments, assignments_table);
132 result->set(kmeans::objectiveFunction,
134 result->set(kmeans::nIterations,
136 kmeans::Batch<> algorithm(num_clusters, num_iterations);
137 algorithm.input.set(kmeans::data, features_table);
138 algorithm.input.set(kmeans::inputCentroids, centroids);
139 algorithm.parameter().resultsToEvaluate = kmeans::computeAssignments;
140 algorithm.setResult(
result);
142 }
catch (std::exception& e) {
143 throw std::runtime_error(e.what());
148 template <
typename T>
149 NEVER_INLINE HOST int32_t onedal_dbscan_impl(
const std::vector<const T*>& input_features,
150 int32_t* output_clusters,
151 const int64_t num_rows,
152 const double epsilon,
153 const int32_t min_observations) {
155 const auto features_table = prepare_data_table(input_features, num_rows);
156 const auto assignments_table =
158 const dbscan::ResultPtr
result(
new dbscan::Result);
159 result->set(dbscan::assignments, assignments_table);
160 result->set(dbscan::nClusters,
162 dbscan::Batch<> algorithm(epsilon, min_observations);
163 algorithm.input.set(dbscan::data, features_table);
164 algorithm.parameter().resultsToCompute = dbscan::assignments;
165 algorithm.setResult(
result);
167 }
catch (std::exception& e) {
168 throw std::runtime_error(e.what());
173 template <
typename T>
174 int32_t extract_model_coefs(
const NumericTablePtr& coefs_table,
177 const int64_t num_coefs = coefs_table->getNumberOfColumns();
178 for (int64_t coef_idx = 0; coef_idx < num_coefs; ++coef_idx) {
179 coef_idxs[coef_idx] = coef_idx;
181 coefs_table->NumericTable::getValue<
T>(coef_idx,
static_cast<size_t>(0));
186 template <
typename T>
188 onedal_linear_reg_fit_impl(
const T* input_labels,
189 const std::vector<const T*>& input_features,
190 int32_t* output_coef_idxs,
192 const int64_t num_rows) {
194 const auto labels_table = prepare_data_table(input_labels, num_rows);
195 const auto features_table = prepare_data_table(input_features, num_rows);
197 linear_regression::training::Batch<T, linear_regression::training::Method::qrDense>
200 algorithm.input.set(linear_regression::training::data, features_table);
201 algorithm.input.set(linear_regression::training::dependentVariables, labels_table);
204 const auto training_result = algorithm.getResult();
205 const auto coefs_table =
206 training_result->get(linear_regression::training::model)->getBeta();
207 return extract_model_coefs<T>(coefs_table, output_coef_idxs, output_coefs);
208 }
catch (std::exception& e) {
209 throw std::runtime_error(e.what());
213 template <
typename T>
215 const T* model_coefs,
216 const int64_t num_coefs) {
220 const auto betas_table = prepare_pivoted_data_table(model_coefs, num_coefs);
221 CHECK_EQ(betas_table->getNumberOfColumns(), num_coefs);
224 linear_regression::ModelBuilder<T> model_builder(num_coefs - 1,
228 BlockDescriptor<T> block_result;
232 betas_table->getBlockOfRows(0, betas_table->getNumberOfRows(), readOnly, block_result);
234 (betas_table->getNumberOfRows()) * (betas_table->getNumberOfColumns());
237 T* first_itr = block_result.getBlockPtr();
238 T* last_itr = first_itr + num_betas;
239 model_builder.setBeta(first_itr, last_itr);
240 betas_table->releaseBlockOfRows(block_result);
242 return model_builder.getModel();
245 template <
typename T>
247 onedal_linear_reg_predict_impl(
const std::vector<const T*>& input_features,
248 T* output_predictions,
249 const int64_t num_rows,
252 const auto features_table = prepare_data_table(input_features, num_rows);
253 const auto model_ptr = build_linear_reg_model(coefs, input_features.size() + 1);
255 linear_regression::prediction::Batch<> algorithm;
256 algorithm.input.set(linear_regression::prediction::data, features_table);
257 algorithm.input.set(linear_regression::prediction::model, model_ptr);
259 const auto predictions_table =
262 const linear_regression::prediction::ResultPtr
result(
263 new linear_regression::prediction::Result);
264 result->set(linear_regression::prediction::prediction, predictions_table);
265 algorithm.setResult(
result);
268 }
catch (std::exception& e) {
269 throw std::runtime_error(e.what());
273 #endif // #ifdef HAVE_ONEDAL
274 #endif // #ifdef __CUDACC__
KMeansInitStrategy get_kmeans_init_type(const std::string &init_type_str)
std::pair< FILE *, std::string > create(const std::string &basePath, const int fileId, const size_t pageSize, const size_t numPages)
void init(LogOptions const &log_opts)