-
Notifications
You must be signed in to change notification settings - Fork 1
/
main.py
90 lines (86 loc) · 3.25 KB
/
main.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
""" Main script
"""
import os
import random
import numpy as np
import pandas as pd
import tensorflow as tf
from pathlib import Path
from processing import process, aligned_representation
from correlation import correlation_analysis
from classification import classification_analysis
# Define current folder using this file
CWD = os.path.dirname(os.path.abspath(__file__))
# Define folder that contains the dhg dataset
DHG_PATH = os.path.join(CWD, "..", "data", "DHG")
# Define folder that contains raw data
DHG_RAW_DATA = os.path.join(DHG_PATH, "raw")
# Define folder to save aligned data
DHG_ALIGNED_DATA = os.path.join(DHG_PATH, "aligned")
# Define folder to save processed data
DHG_PROCESSED_DATA = os.path.join(DHG_PATH, "processed")
# Define file that contains dhg metadata
METADATA_PATH = os.path.join(DHG_PATH, "metadata.csv")
# Define path to save plots and results
FIGURES_PATH = os.path.join(CWD, "figures_no_shift_seed_364")
# Define mass range start value
MZ_START = 50
# Define mass range end value
MZ_END = 1200
# Define mass resolution of the data
MASS_RESOLUTION = 0.025
# Define lock mass reference peak
LOCK_MASS_PEAK = 885.5498
# Define lock mass tol
LOCK_MASK_TOL = 0.3
# Define representative peaks
REPRESENTATIVE_PEAKS = [794.5, 834.5, 886.6]
# Define random seed
SEED = 364
random.seed(SEED)
np.random.seed(SEED)
tf.random.set_seed(SEED)
if __name__ == '__main__':
# Read metadata csv
metadata_df = pd.read_csv(METADATA_PATH)
""" Processing """
"""
# Loop over each unique msi imzML file
for file_name in metadata_df.file_name.unique():
# Define path to msi imzML file
msi_path = os.path.join(DHG_RAW_DATA, f"{file_name}.imzML")
# Define path to new msi imzML file after alignment
output_path = os.path.join(DHG_ALIGNED_DATA, f"{file_name}.imzML")
# Align MSI
aligned_representation(
msi_path, output_path, LOCK_MASS_PEAK, LOCK_MASK_TOL
)
# Loop over each ROI in data frame
for index, roi in metadata_df.iterrows():
# Define path to msi imzML file
msi_path = os.path.join(DHG_ALIGNED_DATA, f"{roi.file_name}.imzML")
# Define path to new msi imzML file after processing
output_path = os.path.join(DHG_PROCESSED_DATA, f"{roi.sample_file_name}")
# Create output folder if doesn't exist
Path(output_path).mkdir(parents=True, exist_ok=True)
# Process msi
process(
msi_path, output_path, roi.x_min, roi.x_max, roi.y_min, roi.y_max,
MZ_START, MZ_END, MASS_RESOLUTION, REPRESENTATIVE_PEAKS
)
"""
""" Correlation analysis"""
# Define path to save correlations
output_path = os.path.join(FIGURES_PATH, "correlations")
# Create output folder if doesn't exist
Path(output_path).mkdir(parents=True, exist_ok=True)
# Correlation analysis
#correlation_analysis(DHG_PROCESSED_DATA, output_path)
""" Classification analysis"""
# Define binary classification label
metadata_df["label"] = (metadata_df["who_grade"] > 2).astype(int)
# Define path to save correlations
output_path = os.path.join(FIGURES_PATH, "classification")
# Create output folder if doesn't exist
Path(output_path).mkdir(parents=True, exist_ok=True)
classification_analysis(DHG_PROCESSED_DATA, output_path, metadata_df)