Skip to content

Commit

Permalink
Moving iterative parameter to a parameter to allow for a version of C…
Browse files Browse the repository at this point in the history
…UR where you compute the pi score every n iterations, where if n=0 pi is never recomputed, and the default is n=1
  • Loading branch information
rosecers committed May 20, 2022
1 parent 18bccdf commit 945a266
Show file tree
Hide file tree
Showing 9 changed files with 51 additions and 41 deletions.
8 changes: 4 additions & 4 deletions docs/source/selection.rst
Original file line number Diff line number Diff line change
Expand Up @@ -102,8 +102,8 @@ They are instantiated using
# int, number of eigenvectors to use in computing pi
k = 1,
# boolean, whether to orthogonalize after each selection, defaults to true
iterative = True,
# int, number of steps after which to recompute pi
recompute_every = 1,
# float, threshold below which scores will be considered 0, defaults to 1E-12
tolerance=1E-12,
Expand Down Expand Up @@ -149,8 +149,8 @@ and are instantiated using
# int, number of eigenvectors to use in computing pi
k = 1,
# boolean, whether to orthogonalize after each selection, defaults to true
iterative = True,
# int, number of steps after which to recompute pi
recompute_every = 1,
# float, threshold below which scores will be considered 0, defaults to 1E-12
tolerance=1E-12,
Expand Down
2 changes: 1 addition & 1 deletion examples/FeatureSelection.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -178,7 +178,7 @@
"\n",
"\n",
"idx = PCovCUR(mixing=m, n_to_select=n).fit(X, y).selected_idx_\n",
"idx_non_it = PCovCUR(mixing=m, iterative=False, n_to_select=n).fit(X, y).selected_idx_\n",
"idx_non_it = PCovCUR(mixing=m, recompute_every=0, n_to_select=n).fit(X, y).selected_idx_\n",
"\n",
"plt.loglog(\n",
" range(1, n + 1),\n",
Expand Down
34 changes: 20 additions & 14 deletions skcosmo/_selection.py
Original file line number Diff line number Diff line change
Expand Up @@ -422,8 +422,9 @@ class _CUR(GreedySelector):
Parameters
----------
iterative : bool
whether to orthogonalize after each selection, defaults to `true`
recompute_every : int
number of steps after which to recompute the pi score
defaults to 1, if 0 no re-computation is done
k : int
number of eigenvectors to compute the importance score with, defaults to 1
Expand All @@ -443,7 +444,7 @@ class _CUR(GreedySelector):
def __init__(
self,
selection_type,
iterative=True,
recompute_every=1,
k=1,
tolerance=1e-12,
n_to_select=None,
Expand All @@ -454,8 +455,8 @@ def __init__(
):

self.k = k
self.iterative = iterative
self.tolerance = tolerance
self.recompute_every = recompute_every

super().__init__(
selection_type=selection_type,
Expand Down Expand Up @@ -510,7 +511,7 @@ def _continue_greedy_search(self, X, y, n_to_select):

for c in self.selected_idx_:

if (
if self.recompute_every != 0 and (
np.linalg.norm(np.take(self.X_current_, [c], axis=self._axis))
> self.tolerance
):
Expand Down Expand Up @@ -570,13 +571,15 @@ def _compute_pi(self, X, y=None):
def _update_post_selection(self, X, y, last_selected):
"""
Saves the most recently selected feature, increments the feature counter,
and, if the CUR is iterative, orthogonalizes the remaining features by
and, if the CUR is iterative (recompute_every>0), orthogonalizes the remaining features by
the most recently selected.
"""
super()._update_post_selection(X, y, last_selected)

if self.iterative:
if self.recompute_every != 0:
self._orthogonalize(last_selected)

if len(self.selected_idx_) % self.recompute_every == 0:
self.pi_ = self._compute_pi(self.X_current_)

self.pi_[last_selected] = 0.0
Expand Down Expand Up @@ -605,8 +608,9 @@ class _PCovCUR(GreedySelector):
Parameters
----------
iterative : bool
whether to orthogonalize after each selection, defaults to `true`
recompute_every : int
number of steps after which to recompute the pi score
defaults to 1, if 0 no re-computation is done
k : int
number of eigenvectors to compute the importance score with, defaults to 1
Expand Down Expand Up @@ -634,7 +638,7 @@ def __init__(
self,
selection_type,
mixing=0.5,
iterative=True,
recompute_every=1,
k=1,
tolerance=1e-12,
n_to_select=None,
Expand All @@ -646,7 +650,7 @@ def __init__(
self.mixing = mixing

self.k = k
self.iterative = iterative
self.recompute_every = recompute_every
self.tolerance = tolerance

super().__init__(
Expand Down Expand Up @@ -707,7 +711,7 @@ def _continue_greedy_search(self, X, y, n_to_select):

for c in self.selected_idx_:

if (
if self.recompute_every != 0 and (
np.linalg.norm(np.take(self.X_current_, [c], axis=self._axis))
> self.tolerance
):
Expand All @@ -720,13 +724,15 @@ def _continue_greedy_search(self, X, y, n_to_select):
def _update_post_selection(self, X, y, last_selected):
"""
Saves the most recently selected feature, increments the feature counter,
and, if the CUR is iterative, orthogonalizes the remaining features by
and, if the CUR is iterative (recompute_every>0), orthogonalizes the remaining features by
the most recently selected.
"""
super()._update_post_selection(X, y, last_selected)

if self.iterative:
if self.recompute_every != 0:
self._orthogonalize(last_selected)

if len(self.selected_idx_) % self.recompute_every == 0:
self.pi_ = self._compute_pi(self.X_current_, self.y_current_)

self.pi_[last_selected] = 0.0
Expand Down
18 changes: 10 additions & 8 deletions skcosmo/feature_selection/_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -147,8 +147,9 @@ class CUR(_CUR):
Parameters
----------
iterative : bool
whether to orthogonalize after each selection, defaults to `true`
recompute_every : int
number of steps after which to recompute the pi score
defaults to 1, if 0 no re-computation is done
k : int
number of eigenvectors to compute the importance score with, defaults to 1
Expand Down Expand Up @@ -193,7 +194,7 @@ class CUR(_CUR):

def __init__(
self,
iterative=True,
recompute_every=1,
k=1,
tolerance=1e-12,
n_to_select=None,
Expand All @@ -204,7 +205,7 @@ def __init__(
):
super().__init__(
selection_type="feature",
iterative=iterative,
recompute_every=recompute_every,
k=k,
tolerance=tolerance,
n_to_select=n_to_select,
Expand All @@ -223,8 +224,9 @@ class PCovCUR(_PCovCUR):
Parameters
----------
iterative : bool
whether to orthogonalize after each selection, defaults to `true`
recompute_every : int
number of steps after which to recompute the pi score
defaults to 1, if 0 no re-computation is done
k : int
number of eigenvectors to compute the importance score with, defaults to 1
Expand Down Expand Up @@ -277,7 +279,7 @@ class PCovCUR(_PCovCUR):
def __init__(
self,
mixing=0.5,
iterative=True,
recompute_every=1,
k=1,
tolerance=1e-12,
n_to_select=None,
Expand All @@ -289,7 +291,7 @@ def __init__(
super().__init__(
selection_type="feature",
mixing=mixing,
iterative=iterative,
recompute_every=recompute_every,
k=k,
tolerance=tolerance,
n_to_select=n_to_select,
Expand Down
19 changes: 11 additions & 8 deletions skcosmo/sample_selection/_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -152,8 +152,9 @@ class CUR(_CUR):
Parameters
----------
iterative : bool
whether to orthogonalize after each selection, defaults to `true`
recompute_every : int
number of steps after which to recompute the pi score
defaults to 1, if 0 no re-computation is done
k : int
number of eigenvectors to compute the importance score with, defaults to 1
Expand Down Expand Up @@ -199,7 +200,7 @@ class CUR(_CUR):

def __init__(
self,
iterative=True,
recompute_every=1,
k=1,
tolerance=1e-12,
n_to_select=None,
Expand All @@ -210,7 +211,7 @@ def __init__(
):
super().__init__(
selection_type="sample",
iterative=iterative,
recompute_every=recompute_every,
k=k,
tolerance=tolerance,
n_to_select=n_to_select,
Expand All @@ -234,8 +235,10 @@ class PCovCUR(_PCovCUR):
The PCovR mixing parameter, as described in PCovR as
:math:`{\\alpha}`. Stored in :py:attr:`self.mixing`.
iterative : bool
whether to orthogonalize after each selection, defaults to `true`
recompute_every : int
number of steps after which to recompute the pi score
defaults to 1, if 0 no re-computation is done
k : int
number of eigenvectors to compute the importance score with, defaults to 1
Expand Down Expand Up @@ -286,7 +289,7 @@ class PCovCUR(_PCovCUR):
def __init__(
self,
mixing=0.5,
iterative=True,
recompute_every=1,
k=1,
tolerance=1e-12,
n_to_select=None,
Expand All @@ -298,7 +301,7 @@ def __init__(
super().__init__(
selection_type="sample",
mixing=mixing,
iterative=iterative,
recompute_every=recompute_every,
k=k,
tolerance=tolerance,
n_to_select=n_to_select,
Expand Down
2 changes: 1 addition & 1 deletion tests/test_feature_pcov_cur.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@ def test_non_it(self):
This test checks that the model can be run non-iteratively
"""
self.idx = [2, 8, 3, 6, 7, 9, 1, 0, 5]
selector = PCovCUR(n_to_select=9, iterative=False)
selector = PCovCUR(n_to_select=9, recompute_every=0)
selector.fit(self.X, self.y)

self.assertTrue(np.allclose(selector.selected_idx_, self.idx))
Expand Down
2 changes: 1 addition & 1 deletion tests/test_feature_simple_cur.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@ def test_non_it(self):
_, UC = np.linalg.eigh(C)
ref_idx = np.argsort(-(UC[:, -1] ** 2.0))[:-1]

selector = CUR(n_to_select=self.X.shape[-1] - 1, iterative=False)
selector = CUR(n_to_select=self.X.shape[-1] - 1, recompute_every=0)
selector.fit(self.X)

self.assertTrue(np.allclose(selector.selected_idx_, ref_idx))
Expand Down
5 changes: 2 additions & 3 deletions tests/test_sample_pcov_cur.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ def test_known(self):
This test checks that the model returns a known set of indices
"""

selector = PCovCUR(n_to_select=10, mixing=0.5, iterative=True)
selector = PCovCUR(n_to_select=10, mixing=0.5)
selector.fit(self.X, self.y)

self.assertTrue(np.allclose(selector.selected_idx_, self.idx))
Expand Down Expand Up @@ -50,9 +50,8 @@ def test_non_it(self):
"""
This test checks that the model can be run non-iteratively
"""
selector = PCovCUR(n_to_select=10, iterative=False)
self.idx = [256, 32, 138, 290, 362, 141, 359, 254, 428, 9]
selector = PCovCUR(n_to_select=10, iterative=False)
selector = PCovCUR(n_to_select=10, recompute_every=0)
selector.fit(self.X, self.y)

self.assertTrue(np.allclose(selector.selected_idx_, self.idx))
Expand Down
2 changes: 1 addition & 1 deletion tests/test_sample_simple_cur.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@ def test_non_it(self):
_, UK = np.linalg.eigh(K)
ref_idx = np.argsort(-(UK[:, -1] ** 2.0))[: self.n_select]

selector = CUR(n_to_select=len(ref_idx), iterative=False)
selector = CUR(n_to_select=len(ref_idx), recompute_every=0)
selector.fit(self.X)

self.assertTrue(np.allclose(selector.selected_idx_, ref_idx))
Expand Down

0 comments on commit 945a266

Please sign in to comment.