Skip to content

toncus/Project-5-MatplotlibPymaceuticals

Repository files navigation

Pymaceuticals Inc


Analysis

  • Overall, it is clear that Capomulin outperforms all other treatment options in the screen.
  • Capomulin was the only treatment to reduce tumor volume. It held to a 19% reduction in tumor volume over the course of trial, whereas all other drugs were correlated with an increase in tumor volume by roughly 40-50%.
  • Capomulin greatly limited the spread of the tumor compared to other treatment options. By study end, the average mouse on Capomulin had only 1 new metastatic site, as opposed to the average 2-3 found in mice of other treatment options.
  • Lastly, mice on the Capomulin treatment had the highest survival rate of any treatment in the screen. Over 90% of mice treated by Capomulin survived the full duration of the trial, compared to only 35-45% of mice on other treatment options.
# Dependencies and Setup
%matplotlib inline
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np

# Incorporate Seaborn if preferred
# import seaborn as sns

# File to Load (Remember to Change These)
mouse_drug_data_to_load = "raw_data/mouse_drug_data.csv"
clinical_trial_data_to_load = "raw_data/clinicaltrial_data.csv"

# Read the Mouse and Drug Data and the Clinical Trial Data
mouse_drug_data = pd.read_csv(mouse_drug_data_to_load)
clinical_data = pd.read_csv(clinical_trial_data_to_load)

# Combine the data into a single dataset
clinical_data_complete = pd.merge(clinical_data, mouse_drug_data, how="left", on=["Mouse ID", "Mouse ID"])

# Display the data table for preview
clinical_data_complete.head()
Mouse ID Timepoint Tumor Volume (mm3) Metastatic Sites Drug
0 b128 0 45.0 0 Capomulin
1 f932 0 45.0 0 Ketapril
2 g107 0 45.0 0 Ketapril
3 a457 0 45.0 0 Ketapril
4 c819 0 45.0 0 Ketapril

Tumor Response to Treatment

# Store the Mean Tumor Volume Data Grouped by Drug and Timepoint
tumor_vols_mean = clinical_data_complete.groupby(["Drug", "Timepoint"]).mean()["Tumor Volume (mm3)"]

# Convert to DataFrame
tumor_vols_mean = pd.DataFrame(tumor_vols_mean)

# Preview DataFrame
tumor_vols_mean
Tumor Volume (mm3)
Drug Timepoint
Capomulin 0 45.000000
5 44.266086
10 43.084291
15 42.064317
20 40.716325
25 39.939528
30 38.769339
35 37.816839
40 36.958001
45 36.236114
Ceftamin 0 45.000000
5 46.503051
10 48.285125
15 50.094055
20 52.157049
25 54.287674
30 56.769517
35 58.827548
40 61.467895
45 64.132421
Infubinol 0 45.000000
5 47.062001
10 49.403909
15 51.296397
20 53.197691
25 55.715252
30 58.299397
35 60.742461
40 63.162824
45 65.755562
... ... ...
Ramicane 0 45.000000
5 43.944859
10 42.531957
15 41.495061
20 40.238325
25 38.974300
30 38.703137
35 37.451996
40 36.574081
45 34.955595
Stelasyn 0 45.000000
5 47.527452
10 49.463844
15 51.529409
20 54.067395
25 56.166123
30 59.826738
35 62.440699
40 65.356386
45 68.438310
Zoniferol 0 45.000000
5 46.851818
10 48.689881
15 50.779059
20 53.170334
25 55.432935
30 57.713531
35 60.089372
40 62.916692
45 65.960888

100 rows Ă— 1 columns

# Store the Standard Error of Tumor Volumes Grouped by Drug and Timepoint
tumor_vols_sem = clinical_data_complete.groupby(["Drug", "Timepoint"]).sem()["Tumor Volume (mm3)"]

# Convert to DataFrame
tumor_vols_sem = pd.DataFrame(tumor_vols_sem)

# Preview DataFrame
tumor_vols_sem.head()
Tumor Volume (mm3)
Drug Timepoint
Capomulin 0 0.000000
5 0.448593
10 0.702684
15 0.838617
20 0.909731
# Minor Data Munging to Re-Format the Data Frames
tumor_vols_mean = tumor_vols_mean.reset_index()
tumor_vols_pivot_mean = tumor_vols_mean.pivot(index="Timepoint", columns="Drug")["Tumor Volume (mm3)"]

tumor_vols_sem = tumor_vols_sem.reset_index()
tumor_vols_pivot_sem = tumor_vols_sem.pivot(index="Timepoint", columns="Drug")["Tumor Volume (mm3)"]

# Preview that Reformatting worked
tumor_vols_pivot_mean.head()
Drug Capomulin Ceftamin Infubinol Ketapril Naftisol Placebo Propriva Ramicane Stelasyn Zoniferol
Timepoint
0 45.000000 45.000000 45.000000 45.000000 45.000000 45.000000 45.000000 45.000000 45.000000 45.000000
5 44.266086 46.503051 47.062001 47.389175 46.796098 47.125589 47.248967 43.944859 47.527452 46.851818
10 43.084291 48.285125 49.403909 49.582269 48.694210 49.423329 49.101541 42.531957 49.463844 48.689881
15 42.064317 50.094055 51.296397 52.399974 50.933018 51.359742 51.067318 41.495061 51.529409 50.779059
20 40.716325 52.157049 53.197691 54.920935 53.644087 54.364417 53.346737 40.238325 54.067395 53.170334
# Generate the Plot (with Error Bars)
plt.errorbar(tumor_vols_pivot_mean.index, tumor_vols_pivot_mean["Capomulin"], yerr=tumor_vols_pivot_sem["Capomulin"], color="r", marker="o", markersize=5, linestyle="dashed", linewidth=0.50)
plt.errorbar(tumor_vols_pivot_mean.index, tumor_vols_pivot_mean["Infubinol"], yerr=tumor_vols_pivot_sem["Infubinol"], color="b", marker="^", markersize=5, linestyle="dashed", linewidth=0.50)
plt.errorbar(tumor_vols_pivot_mean.index, tumor_vols_pivot_mean["Ketapril"], yerr=tumor_vols_pivot_sem["Ketapril"], color="g", marker="s", markersize=5, linestyle="dashed", linewidth=0.50)
plt.errorbar(tumor_vols_pivot_mean.index, tumor_vols_pivot_mean["Placebo"], yerr=tumor_vols_pivot_sem["Placebo"], color="k", marker="d", markersize=5, linestyle="dashed", linewidth=0.50)

plt.title("Tumor Response to Treatment")
plt.ylabel("Tumor Volume (mm3)")
plt.xlabel("Time (Days)")
plt.grid(True)
plt.legend(loc="best", fontsize="small", fancybox=True)

# Save the Figure
plt.savefig("analysis/Fig1.png")

# Show the Figure
plt.show()

png

Metastatic Response to Treatment

# Store the Mean Met. Site Data Grouped by Drug and Timepoint
met_sites_mean = clinical_data_complete.groupby(["Drug", "Timepoint"]).mean()["Metastatic Sites"]

# Convert to DataFrame
met_sites_mean = pd.DataFrame(met_sites_mean)

# Preview DataFrame
met_sites_mean.head()
Metastatic Sites
Drug Timepoint
Capomulin 0 0.000000
5 0.160000
10 0.320000
15 0.375000
20 0.652174
# Store the Standard Error associated with Met. Sites Grouped by Drug and Timepoint
met_sites_sem = clinical_data_complete.groupby(["Drug", "Timepoint"]).sem()["Metastatic Sites"]

# Convert to DataFrame
met_sites_sem = pd.DataFrame(met_sites_sem)

# Preview DataFrame
met_sites_sem.head()
Metastatic Sites
Drug Timepoint
Capomulin 0 0.000000
5 0.074833
10 0.125433
15 0.132048
20 0.161621
# Minor Data Munging to Re-Format the Data Frames
met_sites_mean = met_sites_mean.reset_index()
met_sites_pivot_mean = met_sites_mean.pivot(index="Timepoint", columns="Drug")["Metastatic Sites"]

met_sites_sem = met_sites_sem.reset_index()
met_sites_pivot_sem = met_sites_sem.pivot(index="Timepoint", columns="Drug")["Metastatic Sites"]

# Preview that Reformatting worked
tumor_vols_pivot_mean.head()
Drug Capomulin Ceftamin Infubinol Ketapril Naftisol Placebo Propriva Ramicane Stelasyn Zoniferol
Timepoint
0 45.000000 45.000000 45.000000 45.000000 45.000000 45.000000 45.000000 45.000000 45.000000 45.000000
5 44.266086 46.503051 47.062001 47.389175 46.796098 47.125589 47.248967 43.944859 47.527452 46.851818
10 43.084291 48.285125 49.403909 49.582269 48.694210 49.423329 49.101541 42.531957 49.463844 48.689881
15 42.064317 50.094055 51.296397 52.399974 50.933018 51.359742 51.067318 41.495061 51.529409 50.779059
20 40.716325 52.157049 53.197691 54.920935 53.644087 54.364417 53.346737 40.238325 54.067395 53.170334
# Generate the Plot (with Error Bars)
plt.errorbar(met_sites_pivot_mean.index, met_sites_pivot_mean["Capomulin"], yerr=met_sites_pivot_sem["Capomulin"], color="r", marker="o", markersize=5, linestyle="dashed", linewidth=0.50)
plt.errorbar(met_sites_pivot_mean.index, met_sites_pivot_mean["Infubinol"], yerr=met_sites_pivot_sem["Infubinol"], color="b", marker="^", markersize=5, linestyle="dashed", linewidth=0.50)
plt.errorbar(met_sites_pivot_mean.index, met_sites_pivot_mean["Ketapril"], yerr=met_sites_pivot_sem["Ketapril"], color="g", marker="s", markersize=5, linestyle="dashed", linewidth=0.50)
plt.errorbar(met_sites_pivot_mean.index, met_sites_pivot_mean["Placebo"], yerr=met_sites_pivot_sem["Placebo"], color="k", marker="d", markersize=5, linestyle="dashed", linewidth=0.50)

plt.title("Metastatic Spread During Treatment")
plt.ylabel("Met. Sites")
plt.xlabel("Treatment Duration (Days)")
plt.grid(True)
plt.legend(loc="best", fontsize="small", fancybox=True)

# Save the Figure
plt.savefig("analysis/Fig2.png")

# Show the Figure
plt.show()

png

Survival Rates

# Store the Count of Mice Grouped by Drug and Timepoint (W can pass any metric)
survival_count = clinical_data_complete.groupby(["Drug", "Timepoint"]).count()["Tumor Volume (mm3)"]

# Convert to DataFrame
survival_count = pd.DataFrame({"Mouse Count": survival_count})

# Preview DataFrame
survival_count.head()
Mouse Count
Drug Timepoint
Capomulin 0 25
5 25
10 25
15 24
20 23
# Minor Data Munging to Re-Format the Data Frames
survival_count = survival_count.reset_index()
survival_count_pivot = survival_count.pivot(index="Timepoint", columns="Drug")["Mouse Count"]

# Preview the Data Frame
survival_count_pivot.head()
Drug Capomulin Ceftamin Infubinol Ketapril Naftisol Placebo Propriva Ramicane Stelasyn Zoniferol
Timepoint
0 25 25 25 25 25 25 26 25 26 25
5 25 21 25 23 23 24 25 25 25 24
10 25 20 21 22 21 24 23 24 23 22
15 24 19 21 19 21 20 17 24 23 21
20 23 18 20 19 20 19 17 23 21 17
# Generate the Plot (Accounting for percentages)
plt.plot(100 * survival_count_pivot["Capomulin"] / 25, "ro", linestyle="dashed", markersize=5, linewidth=0.50)
plt.plot(100 * survival_count_pivot["Infubinol"] / 25, "b^", linestyle="dashed", markersize=5, linewidth=0.50)
plt.plot(100 * survival_count_pivot["Ketapril"] / 25, "gs", linestyle="dashed", markersize=5, linewidth=0.50)
plt.plot(100 * survival_count_pivot["Placebo"] / 25 , "kd", linestyle="dashed", markersize=6, linewidth=0.50)
plt.title("Survival During Treatment")
plt.ylabel("Survival Rate (%)")
plt.xlabel("Time (Days)")
plt.grid(True)
plt.legend(loc="best", fontsize="small", fancybox=True)

# Save the Figure
plt.savefig("analysis/Fig3.png")

# Show the Figure
plt.show()

png

Summary Bar Graph

# Calculate the percent changes for each drug
tumor_pct_change =  100 * (tumor_vols_pivot_mean.iloc[-1] - tumor_vols_pivot_mean.iloc[0]) / tumor_vols_pivot_mean.iloc[0]
tumor_pct_change_sem =  100 * (tumor_vols_pivot_sem.iloc[-1] - tumor_vols_pivot_sem.iloc[0]) / tumor_vols_pivot_sem.iloc[0]

# Display the data to confirm
tumor_pct_change
Drug
Capomulin   -19.475303
Ceftamin     42.516492
Infubinol    46.123472
Ketapril     57.028795
Naftisol     53.923347
Placebo      51.297960
Propriva     47.241175
Ramicane    -22.320900
Stelasyn     52.085134
Zoniferol    46.579751
dtype: float64
# Store all Relevant Percent Changes into a Tuple
pct_changes = (tumor_pct_change["Capomulin"],
               tumor_pct_change["Infubinol"],
               tumor_pct_change["Ketapril"],
               tumor_pct_change["Placebo"])

# Splice the data between passing and failing drugs
fig, ax = plt.subplots()
ind = np.arange(len(pct_changes))
width = 1
rectsPass = ax.bar(ind[0], pct_changes[0], width, color='green')
rectsFail = ax.bar(ind[1:], pct_changes[1:], width, color='red')

# Orient widths. Add labels, tick marks, etc.
ax.set_ylabel('% Tumor Volume Change')
ax.set_title('Tumor Change Over 45 Day Treatment')
ax.set_xticks(ind + 0.5)
ax.set_xticklabels(('Capomulin', 'Infubinol', 'Ketapril', 'Placebo'))
ax.set_autoscaley_on(False)
ax.set_ylim([-30,70])
ax.grid(True)

# Use functions to label the percentages of changes
def autolabelFail(rects):
    for rect in rects:
        height = rect.get_height()
        ax.text(rect.get_x() + rect.get_width()/2., 3,
                '%d%%' % int(height),
                ha='center', va='bottom', color="white")

def autolabelPass(rects):
    for rect in rects:
        height = rect.get_height()
        ax.text(rect.get_x() + rect.get_width()/2., -8,
                '-%d%% ' % int(height),
                ha='center', va='bottom', color="white")

# Call functions to implement the function calls
autolabelPass(rectsPass)
autolabelFail(rectsFail)

# Save the Figure
fig.savefig("analysis/Fig4.png")

# Show the Figure
fig.show()
C:\Users\Ahmed\Anaconda3\envs\PythonData\lib\site-packages\matplotlib\figure.py:397: UserWarning: matplotlib is currently using a non-GUI backend, so cannot show the figure
  "matplotlib is currently using a non-GUI backend, "

png

About

No description, website, or topics provided.

Resources

Stars

Watchers

Forks

Releases

No releases published

Packages

No packages published