OmniSciDB  72c90bc290
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
CategoricalFeaturesBuilder< T > Struct Template Reference

#include <MLTableFunctions.hpp>

Public Member Functions

 CategoricalFeaturesBuilder (const ColumnList< TextEncodingDict > &cat_features, const ColumnList< T > &numeric_features, const int32_t cat_top_k, const float cat_min_fraction, const bool cat_include_others)
 
 CategoricalFeaturesBuilder (const ColumnList< TextEncodingDict > &cat_features, const int32_t cat_top_k, const float cat_min_fraction, const bool cat_include_others)
 
 CategoricalFeaturesBuilder (const ColumnList< TextEncodingDict > &cat_features, const ColumnList< T > &numeric_features, const std::vector< std::vector< std::string >> &cat_feature_keys)
 
 CategoricalFeaturesBuilder (const ColumnList< TextEncodingDict > &cat_features, const std::vector< std::vector< std::string >> &cat_feature_keys)
 
ColumnList< T > getFeatures ()
 
const std::vector< std::vector
< std::string > > & 
getCatFeatureKeys () const
 

Private Attributes

int64_t num_rows_
 
std::vector
< TableFunctions_Namespace::OneHotEncoder_Namespace::OneHotEncodedCol
< T > > 
one_hot_encoded_cols_
 
std::vector< std::vector
< std::string > > 
cat_feature_keys_
 
std::vector< int8_t * > col_ptrs_
 

Detailed Description

template<typename T>
struct CategoricalFeaturesBuilder< T >

Definition at line 361 of file MLTableFunctions.hpp.

Constructor & Destructor Documentation

template<typename T>
CategoricalFeaturesBuilder< T >::CategoricalFeaturesBuilder ( const ColumnList< TextEncodingDict > &  cat_features,
const ColumnList< T > &  numeric_features,
const int32_t  cat_top_k,
const float  cat_min_fraction,
const bool  cat_include_others 
)
inline

Definition at line 363 of file MLTableFunctions.hpp.

References ColumnList< T >::numCols(), ColumnList< TextEncodingDict >::numCols(), and ColumnList< T >::ptrs_.

368  : num_rows_(numeric_features.size()) {
370  one_hot_encoding_info(cat_top_k, cat_min_fraction, cat_include_others);
371  const size_t num_cat_features = static_cast<size_t>(cat_features.numCols());
372  std::vector<TableFunctions_Namespace::OneHotEncoder_Namespace::OneHotEncodingInfo>
373  one_hot_encoding_infos;
374  for (size_t cat_idx = 0; cat_idx < num_cat_features; ++cat_idx) {
375  one_hot_encoding_infos.emplace_back(one_hot_encoding_info);
376  }
378  TableFunctions_Namespace::OneHotEncoder_Namespace::one_hot_encode<T>(
379  cat_features, one_hot_encoding_infos);
380  for (auto& one_hot_encoded_col : one_hot_encoded_cols_) {
381  cat_feature_keys_.emplace_back(one_hot_encoded_col.cat_features);
382  for (auto& one_hot_encoded_vec : one_hot_encoded_col.encoded_buffers) {
383  col_ptrs_.emplace_back(reinterpret_cast<int8_t*>(one_hot_encoded_vec.data()));
384  }
385  }
386  const int64_t num_numeric_features = numeric_features.numCols();
387  for (int64_t numeric_feature_idx = 0; numeric_feature_idx < num_numeric_features;
388  ++numeric_feature_idx) {
389  col_ptrs_.emplace_back(numeric_features.ptrs_[numeric_feature_idx]);
390  }
391  }
DEVICE int64_t numCols() const
DEVICE int64_t numCols() const
int8_t ** ptrs_
std::vector< int8_t * > col_ptrs_
std::vector< TableFunctions_Namespace::OneHotEncoder_Namespace::OneHotEncodedCol< T > > one_hot_encoded_cols_
std::vector< std::vector< std::string > > cat_feature_keys_
DEVICE int64_t size() const

+ Here is the call graph for this function:

template<typename T>
CategoricalFeaturesBuilder< T >::CategoricalFeaturesBuilder ( const ColumnList< TextEncodingDict > &  cat_features,
const int32_t  cat_top_k,
const float  cat_min_fraction,
const bool  cat_include_others 
)
inline

Definition at line 393 of file MLTableFunctions.hpp.

References ColumnList< TextEncodingDict >::numCols().

397  : num_rows_(cat_features.size()) {
399  one_hot_encoding_info(cat_top_k, cat_min_fraction, cat_include_others);
400  const size_t num_cat_features = static_cast<size_t>(cat_features.numCols());
401  std::vector<TableFunctions_Namespace::OneHotEncoder_Namespace::OneHotEncodingInfo>
402  one_hot_encoding_infos;
403  for (size_t cat_idx = 0; cat_idx < num_cat_features; ++cat_idx) {
404  one_hot_encoding_infos.emplace_back(one_hot_encoding_info);
405  }
407  TableFunctions_Namespace::OneHotEncoder_Namespace::one_hot_encode<T>(
408  cat_features, one_hot_encoding_infos);
409  for (auto& one_hot_encoded_col : one_hot_encoded_cols_) {
410  cat_feature_keys_.emplace_back(one_hot_encoded_col.cat_features);
411  for (auto& one_hot_encoded_vec : one_hot_encoded_col.encoded_buffers) {
412  col_ptrs_.emplace_back(reinterpret_cast<int8_t*>(one_hot_encoded_vec.data()));
413  }
414  }
415  }
DEVICE int64_t size() const
DEVICE int64_t numCols() const
std::vector< int8_t * > col_ptrs_
std::vector< TableFunctions_Namespace::OneHotEncoder_Namespace::OneHotEncodedCol< T > > one_hot_encoded_cols_
std::vector< std::vector< std::string > > cat_feature_keys_

+ Here is the call graph for this function:

template<typename T>
CategoricalFeaturesBuilder< T >::CategoricalFeaturesBuilder ( const ColumnList< TextEncodingDict > &  cat_features,
const ColumnList< T > &  numeric_features,
const std::vector< std::vector< std::string >> &  cat_feature_keys 
)
inline

Definition at line 417 of file MLTableFunctions.hpp.

References ColumnList< T >::numCols(), ColumnList< TextEncodingDict >::numCols(), and ColumnList< T >::ptrs_.

421  : num_rows_(numeric_features.size()), cat_feature_keys_(cat_feature_keys) {
422  const size_t num_cat_features = static_cast<size_t>(cat_features.numCols());
423  if (num_cat_features != cat_feature_keys_.size()) {
424  throw std::runtime_error(
425  "Number of provided categorical features does not match number of categorical "
426  "features in the model.");
427  }
428  std::vector<TableFunctions_Namespace::OneHotEncoder_Namespace::OneHotEncodingInfo>
429  one_hot_encoding_infos;
430  for (size_t cat_idx = 0; cat_idx < num_cat_features; ++cat_idx) {
431  one_hot_encoding_infos.emplace_back(cat_feature_keys_[cat_idx]);
432  }
434  TableFunctions_Namespace::OneHotEncoder_Namespace::one_hot_encode<T>(
435  cat_features, one_hot_encoding_infos);
436  for (auto& one_hot_encoded_col : one_hot_encoded_cols_) {
437  for (auto& one_hot_encoded_vec : one_hot_encoded_col.encoded_buffers) {
438  col_ptrs_.emplace_back(reinterpret_cast<int8_t*>(one_hot_encoded_vec.data()));
439  }
440  }
441  const int64_t num_numeric_features = numeric_features.numCols();
442  for (int64_t numeric_feature_idx = 0; numeric_feature_idx < num_numeric_features;
443  ++numeric_feature_idx) {
444  col_ptrs_.emplace_back(numeric_features.ptrs_[numeric_feature_idx]);
445  }
446  }
DEVICE int64_t numCols() const
DEVICE int64_t numCols() const
int8_t ** ptrs_
std::vector< int8_t * > col_ptrs_
std::vector< TableFunctions_Namespace::OneHotEncoder_Namespace::OneHotEncodedCol< T > > one_hot_encoded_cols_
std::vector< std::vector< std::string > > cat_feature_keys_
DEVICE int64_t size() const

+ Here is the call graph for this function:

template<typename T>
CategoricalFeaturesBuilder< T >::CategoricalFeaturesBuilder ( const ColumnList< TextEncodingDict > &  cat_features,
const std::vector< std::vector< std::string >> &  cat_feature_keys 
)
inline

Definition at line 448 of file MLTableFunctions.hpp.

References ColumnList< TextEncodingDict >::numCols().

451  : num_rows_(cat_features.size()), cat_feature_keys_(cat_feature_keys) {
452  const size_t num_cat_features = static_cast<size_t>(cat_features.numCols());
453  if (num_cat_features != cat_feature_keys_.size()) {
454  throw std::runtime_error(
455  "Number of provided categorical features does not match number of categorical "
456  "features in the model.");
457  }
458  std::vector<TableFunctions_Namespace::OneHotEncoder_Namespace::OneHotEncodingInfo>
459  one_hot_encoding_infos;
460  for (size_t cat_idx = 0; cat_idx < num_cat_features; ++cat_idx) {
461  one_hot_encoding_infos.emplace_back(cat_feature_keys_[cat_idx]);
462  }
464  TableFunctions_Namespace::OneHotEncoder_Namespace::one_hot_encode<T>(
465  cat_features, one_hot_encoding_infos);
466  for (auto& one_hot_encoded_col : one_hot_encoded_cols_) {
467  for (auto& one_hot_encoded_vec : one_hot_encoded_col.encoded_buffers) {
468  col_ptrs_.emplace_back(reinterpret_cast<int8_t*>(one_hot_encoded_vec.data()));
469  }
470  }
471  }
DEVICE int64_t size() const
DEVICE int64_t numCols() const
std::vector< int8_t * > col_ptrs_
std::vector< TableFunctions_Namespace::OneHotEncoder_Namespace::OneHotEncodedCol< T > > one_hot_encoded_cols_
std::vector< std::vector< std::string > > cat_feature_keys_

+ Here is the call graph for this function:

Member Function Documentation

template<typename T>
const std::vector<std::vector<std::string> >& CategoricalFeaturesBuilder< T >::getCatFeatureKeys ( ) const
inline

Definition at line 478 of file MLTableFunctions.hpp.

Referenced by decision_tree_reg_fit__cpu_template(), gbt_reg_fit__cpu_template(), linear_reg_fit__cpu_template(), pca_fit__cpu_1(), pca_fit__cpu_template(), and random_forest_reg_fit__cpu_template().

478  {
479  return cat_feature_keys_;
480  }
std::vector< std::vector< std::string > > cat_feature_keys_

+ Here is the caller graph for this function:

template<typename T>
ColumnList<T> CategoricalFeaturesBuilder< T >::getFeatures ( )
inline

Definition at line 473 of file MLTableFunctions.hpp.

Referenced by decision_tree_reg_fit__cpu_template(), gbt_reg_fit__cpu_template(), linear_reg_fit__cpu_template(), pca_fit__cpu_1(), pca_fit__cpu_template(), and random_forest_reg_fit__cpu_template().

473  {
474  return ColumnList<T>(
475  col_ptrs_.data(), static_cast<int64_t>(col_ptrs_.size()), num_rows_);
476  }
std::vector< int8_t * > col_ptrs_

+ Here is the caller graph for this function:

Member Data Documentation

template<typename T>
std::vector<std::vector<std::string> > CategoricalFeaturesBuilder< T >::cat_feature_keys_
private

Definition at line 486 of file MLTableFunctions.hpp.

template<typename T>
std::vector<int8_t*> CategoricalFeaturesBuilder< T >::col_ptrs_
private

Definition at line 487 of file MLTableFunctions.hpp.

template<typename T>
int64_t CategoricalFeaturesBuilder< T >::num_rows_
private

Definition at line 483 of file MLTableFunctions.hpp.

template<typename T>
std::vector<TableFunctions_Namespace::OneHotEncoder_Namespace::OneHotEncodedCol<T> > CategoricalFeaturesBuilder< T >::one_hot_encoded_cols_
private

Definition at line 485 of file MLTableFunctions.hpp.


The documentation for this struct was generated from the following file: