Source code for fsfc.base

from abc import abstractmethod

from sklearn.base import BaseEstimator
from sklearn.feature_selection.base import SelectorMixin
from sklearn.base import ClusterMixin

from fsfc.mixins import KBestSelectorMixin


[docs]class BaseFeatureSelector(BaseEstimator, SelectorMixin): """ Base class for all feature selection algorithms. It's SKLearn-compliant, so it can be used in pipelines. Successors should override methods :meth:`fit` and :meth:`_get_support_mask`. """
[docs] @abstractmethod def fit(self, x, *rest): """ Fit selector to a dataset. Parameters ---------- x: ndarray The input samples - array of shape [n_samples, n_features]. rest: list List of miscellaneous arguments. Returns ------- selector: BaseFeatureSelector Returns self to support chaining. """ return self
[docs]class KBestFeatureSelector(KBestSelectorMixin, BaseFeatureSelector): """ Base class for algorithms that selects K best features according to features scores. Successors should override method :meth:`_calc_scores` for computation of the score. Parameters ---------- k: int Number of features to select """ def __init__(self, k): self.k = k self.scores = None @abstractmethod def _calc_scores(self, x): """ Calculate scores for features in dataset. Parameters ---------- x: ndarray The input samples - array of shape [n_samples, n_features]. Returns ------- scores: ndarray Array of shape [n_features]. i-th element is a score of i-th feature. """ pass
[docs] def fit(self, x, *rest): self.scores = self._calc_scores(x) return self
def _get_k(self): return self.k def _get_scores(self): return self.scores
[docs]class ClusteringFeatureSelector(KBestSelectorMixin, BaseFeatureSelector, ClusterMixin): """ Clusters samples and simultaneously finds relevant features. Allows to transform dataset and select K best features according to features scores. Parameters ---------- k: int Number of features to select """ def __init__(self, k): self.k = k self.scores = None self.labels_ = None @abstractmethod def _calc_scores_and_labels(self, x): """ Calculate scores and labels for samples. Parameters ---------- x: ndarray The input samples - array of shape [n_samples, n_features]. Returns ------- scores_and_labels: tuple Tuple where first element is scores of features and second element are cluster labels for samples """ pass
[docs] def fit(self, x, *rest): scores, labels = self._calc_scores_and_labels(x) self.scores = scores self.labels_ = labels return self
def _get_scores(self): return self.scores def _get_k(self): return self.k