Backport polynomial library fixes to trapping_extended

dynamicslab · May 30, 2024 · f463b18 · f463b18
1 parent afa25db
commit f463b18
Showing 1 changed file with 120 additions and 138 deletions.
diff --git a/pysindy/feature_library/polynomial_library.py b/pysindy/feature_library/polynomial_library.py
@@ -1,12 +1,12 @@
 from itertools import chain
-from itertools import combinations
-from itertools import combinations_with_replacement as combinations_w_r
+from math import comb
+from typing import Iterator
+from typing import Tuple
 
 import numpy as np
+from numpy.typing import NDArray
 from scipy import sparse
-from sklearn import __version__
 from sklearn.preprocessing import PolynomialFeatures
-from sklearn.preprocessing._csr_polynomial_expansion import _csr_polynomial_expansion
 from sklearn.utils.validation import check_is_fitted
 
 from ..utils import AxesArray
@@ -45,22 +45,13 @@ class PolynomialLibrary(PolynomialFeatures, BaseFeatureLibrary):
         Order of output array in the dense case. 'F' order is faster to
         compute, but may slow down subsequent estimators.
 
-    library_ensemble : boolean, optional (default False)
-        Whether or not to use library bagging (regress on subset of the
-        candidate terms in the library)
-
-    ensemble_indices : integer array, optional (default [0])
-        The indices to use for ensembling the library.
-
     Attributes
     ----------
     powers_ : array, shape (n_output_features, n_input_features)
         powers_[i, j] is the exponent of the jth input in the ith output.
 
-    n_input_features_ : int
+    n_features_in_ : int
         The total number of input features.
-        WARNING: This is deprecated in scikit-learn version 1.0 and higher so
-        we check the sklearn.__version__ and switch to n_features_in if needed.
 
     n_output_features_ : int
         The total number of output features. This number is computed by
@@ -74,68 +65,68 @@ def __init__(
         interaction_only=False,
         include_bias=True,
         order="C",
-        library_ensemble=False,
-        ensemble_indices=[0],
     ):
-        super(PolynomialLibrary, self).__init__(
+        super().__init__(
             degree=degree,
             interaction_only=interaction_only,
             include_bias=include_bias,
             order=order,
         )
-        BaseFeatureLibrary.__init__(
-            self, library_ensemble=library_ensemble, ensemble_indices=ensemble_indices
-        )
-        if degree < 0 or not isinstance(degree, int):
-            raise ValueError("degree must be a nonnegative integer")
-        if (not include_interaction) and interaction_only:
-            raise ValueError(
-                "Can't have include_interaction be False and interaction_only"
-                " be True"
-            )
         self.include_interaction = include_interaction
 
     @staticmethod
     def _combinations(
-        n_features, degree, include_interaction, interaction_only, include_bias
-    ):
-        comb = combinations if interaction_only else combinations_w_r
-        start = int(not include_bias)
+        n_features: int,
+        degree: int,
+        include_interaction: bool,
+        interaction_only: bool,
+        include_bias: bool,
+    ) -> Iterator[Tuple[int, ...]]:
+        """
+        Create selection tuples of input indexes for each polynomail term
+
+        Selection tuple iterates the input indexes present in a single term.
+        For example, (x+y+1)^2 would be in iterator of the tuples:
+        (), (0,), (1,), (0, 0), (0, 1), (1, 1)
+        1    x     y      x^2     x*y     y^2
+
+        Order of terms is preserved regardless of include_interation.
+        """
         if not include_interaction:
-            if include_bias:
-                return chain(
-                    [()],
-                    chain.from_iterable(
-                        combinations_w_r([j], i)
-                        for i in range(1, degree + 1)
-                        for j in range(n_features)
-                    ),
-                )
-            else:
-                return chain.from_iterable(
-                    combinations_w_r([j], i)
-                    for i in range(1, degree + 1)
-                    for j in range(n_features)
-                )
-        return chain.from_iterable(
-            comb(range(n_features), i) for i in range(start, degree + 1)
+            return chain(
+                [()] if include_bias else [],
+                (
+                    exponent * (feat_idx,)
+                    for exponent in range(1, degree + 1)
+                    for feat_idx in range(n_features)
+                ),
+            )
+        return PolynomialFeatures._combinations(
+            n_features=n_features,
+            min_degree=int(not include_bias),
+            max_degree=degree,
+            interaction_only=interaction_only,
+            include_bias=include_bias,
         )
 
     @property
-    def powers_(self):
+    def powers_(self) -> NDArray[np.int_]:
+        """
+        The exponents of the polynomial as an array of shape
+        (n_features_out, n_features_in), where each item is the exponent of the
+        jth input variable in the ith polynomial term.
+        """
         check_is_fitted(self)
-        if float(__version__[:3]) >= 1.0:
-            n_features = self.n_features_in_
-        else:
-            n_features = self.n_input_features_
         combinations = self._combinations(
-            n_features,
-            self.degree,
-            self.include_interaction,
-            self.interaction_only,
-            self.include_bias,
+            n_features=self.n_features_in_,
+            degree=self.degree,
+            include_interaction=self.include_interaction,
+            interaction_only=self.interaction_only,
+            include_bias=self.include_bias,
+        )
+        return np.vstack(
+            [np.bincount(c, minlength=self.n_features_in_) for c in combinations]
         )
-        return np.vstack([np.bincount(c, minlength=n_features) for c in combinations])
 
     def get_feature_names(self, input_features=None):
         """Return feature names for output features.
@@ -182,6 +173,13 @@ def fit(self, x_full, y=None):
         -------
         self : instance
         """
+        if self.degree < 0 or not isinstance(self.degree, int):
+            raise ValueError("degree must be a nonnegative integer")
+        if (not self.include_interaction) and self.interaction_only:
+            raise ValueError(
+                "Can't have include_interaction be False and interaction_only"
+                " be True"
+            )
         n_features = x_full[0].shape[x_full[0].ax_coord]
         combinations = self._combinations(
             n_features,
@@ -190,10 +188,7 @@ def fit(self, x_full, y=None):
             self.interaction_only,
             self.include_bias,
         )
-        if float(__version__[:3]) >= 1.0:
-            self.n_features_in_ = n_features
-        else:
-            self.n_input_features_ = n_features
+        self.n_features_in_ = n_features
         self.n_output_features_ = sum(1 for _ in combinations)
         return self
 
@@ -203,19 +198,8 @@ def transform(self, x_full):
 
         Parameters
         ----------
-        x : array-like or CSR/CSC sparse matrix, shape (n_samples, n_features)
+        x_full : {array-like, sparse matrix} of shape (n_samples, n_features)
             The data to transform, row by row.
-            Prefer CSR over CSC for sparse input (for speed), but CSC is
-            required if the degree is 4 or higher. If the degree is less than
-            4 and the input format is CSC, it will be converted to CSR, have
-            its polynomial features generated, then converted back to CSC.
-            If the degree is 2 or 3, the method described in "Leveraging
-            Sparsity to Speed Up Polynomial Feature Expansions of CSR Matrices
-            Using K-Simplex Numbers" by Andrew Nystrom and John Hughes is
-            used, which is much faster than the method used on CSC input. For
-            this reason, a CSC input will be converted to CSR, and the output
-            will be converted back to CSC prior to being returned, hence the
-            preference of CSR.
 
         Returns
         -------
@@ -231,72 +215,70 @@ def transform(self, x_full):
             if sparse.issparse(x) and x.format not in ["csr", "csc"]:
                 # create new with correct sparse
                 axes = comprehend_axes(x)
-                x = x.asformat("csr")
+                x = x.asformat("csc")
                 wrap_axes(axes, x)
-
-            n_samples = x.shape[x.ax_time]
             n_features = x.shape[x.ax_coord]
-            if float(__version__[:3]) >= 1.0:
-                if n_features != self.n_features_in_:
-                    raise ValueError("x shape does not match training shape")
-            else:
-                if n_features != self.n_input_features_:
-                    raise ValueError("x shape does not match training shape")
-
-            if sparse.isspmatrix_csr(x):
-                if self.degree > 3:
-                    return sparse.csr_matrix(self.transform(x.tocsc()))
-                to_stack = []
-                if self.include_bias:
-                    to_stack.append(np.ones(shape=(n_samples, 1), dtype=x.dtype))
-                to_stack.append(x)
-                for deg in range(2, self.degree + 1):
-                    xp_next = _csr_polynomial_expansion(
-                        x.data,
-                        x.indices,
-                        x.indptr,
-                        x.shape[1],
-                        self.interaction_only,
-                        deg,
-                    )
-                    if xp_next is None:
-                        break
-                    to_stack.append(xp_next)
-                xp = sparse.hstack(to_stack, format="csr")
-            elif sparse.isspmatrix_csc(x) and self.degree < 4:
-                return sparse.csc_matrix(self.transform(x.tocsr()))
+            if n_features != self.n_features_in_:
+                raise ValueError("x shape does not match training shape")
+
+            combinations = self._combinations(
+                n_features,
+                self.degree,
+                self.include_interaction,
+                self.interaction_only,
+                self.include_bias,
+            )
+            if sparse.isspmatrix(x):
+                columns = []
+                for combo in combinations:
+                    if combo:
+                        out_col = 1
+                        for col_idx in combo:
+                            out_col = x[..., col_idx].multiply(out_col)
+                        columns.append(out_col)
+                    else:
+                        bias = sparse.csc_matrix(np.ones((x.shape[0], 1)))
+                        columns.append(bias)
+                xp = sparse.hstack(columns, dtype=x.dtype).tocsc()
             else:
-                combinations = self._combinations(
-                    n_features,
-                    self.degree,
-                    self.include_interaction,
-                    self.interaction_only,
-                    self.include_bias,
+                xp = AxesArray(
+                    np.empty(
+                        (*x.shape[:-1], self.n_output_features_),
+                        dtype=x.dtype,
+                        order=self.order,
+                    ),
+                    x.axes,
                 )
-                if sparse.isspmatrix(x):
-                    columns = []
-                    for comb in combinations:
-                        if comb:
-                            out_col = 1
-                            for col_idx in comb:
-                                out_col = x[..., col_idx].multiply(out_col)
-                            columns.append(out_col)
-                        else:
-                            bias = sparse.csc_matrix(np.ones((x.shape[0], 1)))
-                            columns.append(bias)
-                    xp = sparse.hstack(columns, dtype=x.dtype).tocsc()
-                else:
-                    xp = AxesArray(
-                        np.empty(
-                            (*x.shape[:-1], self.n_output_features_),
-                            dtype=x.dtype,
-                            order=self.order,
-                        ),
-                        x.__dict__,
-                    )
-                    for i, comb in enumerate(combinations):
-                        xp[..., i] = x[..., comb].prod(-1)
+                for i, combo in enumerate(combinations):
+                    xp[..., i] = x[..., combo].prod(-1)
             xp_full = xp_full + [xp]
-        if self.library_ensemble:
-            xp_full = self._ensemble(xp_full)
         return xp_full
+
+
+def n_poly_features(
+    n_in_feat: int,
+    degree: int,
+    include_bias: bool = False,
+    include_interation: bool = True,
+    interaction_only: bool = False,
+) -> int:
+    """Calculate number of polynomial features
+
+    Args:
+        n_in_feat: number of input features, e.g. 3 for x, y, z
+        degree: polynomial degree, e.g. 2 for quadratic
+        include_bias: whether to include a constant term
+        include_interaction: whether to include terms mixing multiple inputs
+        interaction_only: whether to omit terms of x_m * x_n^p for p > 1
+    """
+    if not include_interation and interaction_only:
+        raise ValueError("Cannot set interaction only if include_interaction is False")
+    n_feat = include_bias
+    if not include_interation:
+        return n_feat + n_in_feat * degree
+    for deg in range(1, degree + 1):
+        if interaction_only:
+            n_feat += comb(n_in_feat, deg)
+        else:
+            n_feat += comb(n_in_feat + deg - 1, deg)
+    return n_feat