Skip to content

Commit

Permalink
small cv fold fixes
Browse files Browse the repository at this point in the history
  • Loading branch information
armaan-abraham committed Feb 6, 2024
1 parent af52e91 commit 9b8e988
Show file tree
Hide file tree
Showing 2 changed files with 7 additions and 12 deletions.
2 changes: 1 addition & 1 deletion ddmc/figures/figureM4.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,7 @@ def do_phenotype_regression(n_runs=3, n_components=35, n_cv_folds=3):
hot_cold = cptac.get_hot_cold_labels()

lr = LogisticRegressionCV(
cv=3,
cv=n_cv_folds,
solver="saga",
max_iter=10000,
n_jobs=-1,
Expand Down
17 changes: 6 additions & 11 deletions ddmc/figures/figureM7.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,40 +15,35 @@
plot_roc,
plot_cluster_regression_coefficients,
normalize_cluster_centers,
get_highest_weighted_clusters,
)


def makeFigure():
"""Get a list of the axis objects and create a figure"""
# Get list of axis objects
axes, f = getSetup((11, 7), (2, 3), multz={0: 1, 4: 1})
cptac = CPTAC()
is_hot = cptac.get_hot_cold_labels()
p_signal = cptac.get_p_signal()
model = DDMC(n_components=30, seq_weight=100, random_state=5).fit(p_signal)
assert (
not model.has_empty_clusters()
), "This plot assumes that every cluster will have at least one peptide. Please rerun with fewer components are more peptides."

centers = model.transform(as_df=True).loc[is_hot.index]
centers.iloc[:, :] = normalize_cluster_centers(centers.values)

plot_p_signal_across_clusters_and_binary_feature(is_hot, centers, "is_hot", axes[0])

centers.iloc[:, :] = normalize_cluster_centers(centers.values)
n_cv = 15
lr = LogisticRegressionCV(
cv=3, solver="saga", n_jobs=1, penalty="l1", max_iter=10000
cv=n_cv, solver="saga", n_jobs=1, penalty="l1", max_iter=10000
)
plot_roc(
lr, centers.values, is_hot.values, cv_folds=3, title="ROC TI", return_mAUC=True
lr, centers.values, is_hot.values, cv_folds=n_cv, title="ROC TI", return_mAUC=True
)
plot_cluster_regression_coefficients(axes[1], lr, title="")

top_clusters = np.argsort(np.abs(lr.coef_.squeeze()))[-3:]
top_clusters = get_highest_weighted_clusters(model, lr.coef_)

# plot predicted kinases for most weighted clusters
distances = model.predict_upstream_kinases()[top_clusters]

# plot upstream Kinases
plot_cluster_kinase_distances(
distances, model.get_pssms(clusters=top_clusters), axes[2], num_hits=2
)
Expand Down

0 comments on commit 9b8e988

Please sign in to comment.