Skip to content

Commit

Permalink
Backport polynomial library fixes to trapping_extended
Browse files Browse the repository at this point in the history
  • Loading branch information
Jacob-Stevens-Haas committed May 30, 2024
1 parent afa25db commit f463b18
Showing 1 changed file with 120 additions and 138 deletions.
258 changes: 120 additions & 138 deletions pysindy/feature_library/polynomial_library.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,12 @@
from itertools import chain
from itertools import combinations
from itertools import combinations_with_replacement as combinations_w_r
from math import comb
from typing import Iterator
from typing import Tuple

import numpy as np
from numpy.typing import NDArray
from scipy import sparse
from sklearn import __version__
from sklearn.preprocessing import PolynomialFeatures
from sklearn.preprocessing._csr_polynomial_expansion import _csr_polynomial_expansion
from sklearn.utils.validation import check_is_fitted

from ..utils import AxesArray
Expand Down Expand Up @@ -45,22 +45,13 @@ class PolynomialLibrary(PolynomialFeatures, BaseFeatureLibrary):
Order of output array in the dense case. 'F' order is faster to
compute, but may slow down subsequent estimators.
library_ensemble : boolean, optional (default False)
Whether or not to use library bagging (regress on subset of the
candidate terms in the library)
ensemble_indices : integer array, optional (default [0])
The indices to use for ensembling the library.
Attributes
----------
powers_ : array, shape (n_output_features, n_input_features)
powers_[i, j] is the exponent of the jth input in the ith output.
n_input_features_ : int
n_features_in_ : int
The total number of input features.
WARNING: This is deprecated in scikit-learn version 1.0 and higher so
we check the sklearn.__version__ and switch to n_features_in if needed.
n_output_features_ : int
The total number of output features. This number is computed by
Expand All @@ -74,68 +65,68 @@ def __init__(
interaction_only=False,
include_bias=True,
order="C",
library_ensemble=False,
ensemble_indices=[0],
):
super(PolynomialLibrary, self).__init__(
super().__init__(
degree=degree,
interaction_only=interaction_only,
include_bias=include_bias,
order=order,
)
BaseFeatureLibrary.__init__(
self, library_ensemble=library_ensemble, ensemble_indices=ensemble_indices
)
if degree < 0 or not isinstance(degree, int):
raise ValueError("degree must be a nonnegative integer")
if (not include_interaction) and interaction_only:
raise ValueError(
"Can't have include_interaction be False and interaction_only"
" be True"
)
self.include_interaction = include_interaction

@staticmethod
def _combinations(
n_features, degree, include_interaction, interaction_only, include_bias
):
comb = combinations if interaction_only else combinations_w_r
start = int(not include_bias)
n_features: int,
degree: int,
include_interaction: bool,
interaction_only: bool,
include_bias: bool,
) -> Iterator[Tuple[int, ...]]:
"""
Create selection tuples of input indexes for each polynomail term
Selection tuple iterates the input indexes present in a single term.
For example, (x+y+1)^2 would be in iterator of the tuples:
(), (0,), (1,), (0, 0), (0, 1), (1, 1)
1 x y x^2 x*y y^2
Order of terms is preserved regardless of include_interation.
"""
if not include_interaction:
if include_bias:
return chain(
[()],
chain.from_iterable(
combinations_w_r([j], i)
for i in range(1, degree + 1)
for j in range(n_features)
),
)
else:
return chain.from_iterable(
combinations_w_r([j], i)
for i in range(1, degree + 1)
for j in range(n_features)
)
return chain.from_iterable(
comb(range(n_features), i) for i in range(start, degree + 1)
return chain(
[()] if include_bias else [],
(
exponent * (feat_idx,)
for exponent in range(1, degree + 1)
for feat_idx in range(n_features)
),
)
return PolynomialFeatures._combinations(
n_features=n_features,
min_degree=int(not include_bias),
max_degree=degree,
interaction_only=interaction_only,
include_bias=include_bias,
)

@property
def powers_(self):
def powers_(self) -> NDArray[np.int_]:
"""
The exponents of the polynomial as an array of shape
(n_features_out, n_features_in), where each item is the exponent of the
jth input variable in the ith polynomial term.
"""
check_is_fitted(self)
if float(__version__[:3]) >= 1.0:
n_features = self.n_features_in_
else:
n_features = self.n_input_features_
combinations = self._combinations(
n_features,
self.degree,
self.include_interaction,
self.interaction_only,
self.include_bias,
n_features=self.n_features_in_,
degree=self.degree,
include_interaction=self.include_interaction,
interaction_only=self.interaction_only,
include_bias=self.include_bias,
)
return np.vstack(
[np.bincount(c, minlength=self.n_features_in_) for c in combinations]
)
return np.vstack([np.bincount(c, minlength=n_features) for c in combinations])

def get_feature_names(self, input_features=None):
"""Return feature names for output features.
Expand Down Expand Up @@ -182,6 +173,13 @@ def fit(self, x_full, y=None):
-------
self : instance
"""
if self.degree < 0 or not isinstance(self.degree, int):
raise ValueError("degree must be a nonnegative integer")
if (not self.include_interaction) and self.interaction_only:
raise ValueError(
"Can't have include_interaction be False and interaction_only"
" be True"
)
n_features = x_full[0].shape[x_full[0].ax_coord]
combinations = self._combinations(
n_features,
Expand All @@ -190,10 +188,7 @@ def fit(self, x_full, y=None):
self.interaction_only,
self.include_bias,
)
if float(__version__[:3]) >= 1.0:
self.n_features_in_ = n_features
else:
self.n_input_features_ = n_features
self.n_features_in_ = n_features
self.n_output_features_ = sum(1 for _ in combinations)
return self

Expand All @@ -203,19 +198,8 @@ def transform(self, x_full):
Parameters
----------
x : array-like or CSR/CSC sparse matrix, shape (n_samples, n_features)
x_full : {array-like, sparse matrix} of shape (n_samples, n_features)
The data to transform, row by row.
Prefer CSR over CSC for sparse input (for speed), but CSC is
required if the degree is 4 or higher. If the degree is less than
4 and the input format is CSC, it will be converted to CSR, have
its polynomial features generated, then converted back to CSC.
If the degree is 2 or 3, the method described in "Leveraging
Sparsity to Speed Up Polynomial Feature Expansions of CSR Matrices
Using K-Simplex Numbers" by Andrew Nystrom and John Hughes is
used, which is much faster than the method used on CSC input. For
this reason, a CSC input will be converted to CSR, and the output
will be converted back to CSC prior to being returned, hence the
preference of CSR.
Returns
-------
Expand All @@ -231,72 +215,70 @@ def transform(self, x_full):
if sparse.issparse(x) and x.format not in ["csr", "csc"]:
# create new with correct sparse
axes = comprehend_axes(x)
x = x.asformat("csr")
x = x.asformat("csc")
wrap_axes(axes, x)

n_samples = x.shape[x.ax_time]
n_features = x.shape[x.ax_coord]
if float(__version__[:3]) >= 1.0:
if n_features != self.n_features_in_:
raise ValueError("x shape does not match training shape")
else:
if n_features != self.n_input_features_:
raise ValueError("x shape does not match training shape")

if sparse.isspmatrix_csr(x):
if self.degree > 3:
return sparse.csr_matrix(self.transform(x.tocsc()))
to_stack = []
if self.include_bias:
to_stack.append(np.ones(shape=(n_samples, 1), dtype=x.dtype))
to_stack.append(x)
for deg in range(2, self.degree + 1):
xp_next = _csr_polynomial_expansion(
x.data,
x.indices,
x.indptr,
x.shape[1],
self.interaction_only,
deg,
)
if xp_next is None:
break
to_stack.append(xp_next)
xp = sparse.hstack(to_stack, format="csr")
elif sparse.isspmatrix_csc(x) and self.degree < 4:
return sparse.csc_matrix(self.transform(x.tocsr()))
if n_features != self.n_features_in_:
raise ValueError("x shape does not match training shape")

combinations = self._combinations(
n_features,
self.degree,
self.include_interaction,
self.interaction_only,
self.include_bias,
)
if sparse.isspmatrix(x):
columns = []
for combo in combinations:
if combo:
out_col = 1
for col_idx in combo:
out_col = x[..., col_idx].multiply(out_col)
columns.append(out_col)
else:
bias = sparse.csc_matrix(np.ones((x.shape[0], 1)))
columns.append(bias)
xp = sparse.hstack(columns, dtype=x.dtype).tocsc()
else:
combinations = self._combinations(
n_features,
self.degree,
self.include_interaction,
self.interaction_only,
self.include_bias,
xp = AxesArray(
np.empty(
(*x.shape[:-1], self.n_output_features_),
dtype=x.dtype,
order=self.order,
),
x.axes,
)
if sparse.isspmatrix(x):
columns = []
for comb in combinations:
if comb:
out_col = 1
for col_idx in comb:
out_col = x[..., col_idx].multiply(out_col)
columns.append(out_col)
else:
bias = sparse.csc_matrix(np.ones((x.shape[0], 1)))
columns.append(bias)
xp = sparse.hstack(columns, dtype=x.dtype).tocsc()
else:
xp = AxesArray(
np.empty(
(*x.shape[:-1], self.n_output_features_),
dtype=x.dtype,
order=self.order,
),
x.__dict__,
)
for i, comb in enumerate(combinations):
xp[..., i] = x[..., comb].prod(-1)
for i, combo in enumerate(combinations):
xp[..., i] = x[..., combo].prod(-1)
xp_full = xp_full + [xp]
if self.library_ensemble:
xp_full = self._ensemble(xp_full)
return xp_full


def n_poly_features(
n_in_feat: int,
degree: int,
include_bias: bool = False,
include_interation: bool = True,
interaction_only: bool = False,
) -> int:
"""Calculate number of polynomial features
Args:
n_in_feat: number of input features, e.g. 3 for x, y, z
degree: polynomial degree, e.g. 2 for quadratic
include_bias: whether to include a constant term
include_interaction: whether to include terms mixing multiple inputs
interaction_only: whether to omit terms of x_m * x_n^p for p > 1
"""
if not include_interation and interaction_only:
raise ValueError("Cannot set interaction only if include_interaction is False")
n_feat = include_bias
if not include_interation:
return n_feat + n_in_feat * degree
for deg in range(1, degree + 1):
if interaction_only:
n_feat += comb(n_in_feat, deg)
else:
n_feat += comb(n_in_feat + deg - 1, deg)
return n_feat

0 comments on commit f463b18

Please sign in to comment.