-
Notifications
You must be signed in to change notification settings - Fork 0
/
runner.py
96 lines (79 loc) · 3.88 KB
/
runner.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
# runner.py
# run model code from command line, seperate from the Flask application
# save the outputfiles, or as written currently, just save the HTML file putting together the results
# TODO : incorporate this into the flask command line using click per Flask docs
# eg. flask run; flask test; flask run_model <params>
# import model code from the same module used by the flask application
# from app import models #.models import intial_ID_convert, run_model, make_results_html, alter_validation_df, make_validation_df
# from app.models import data_path, max_num_genes
import argparse
import os, sys
from mljob import geneplexus
def read_input_gene_file(filename):
""" convert file to array of input gene ids
copied directly from app/views.py """
# TODO move this into the job or geneplexus module
with open(filename) as genefile:
string = genefile.read()
# remove any single quotes if they exist
no_quotes = string.translate(str.maketrans({"'": None}))
#input_genes_list = no_quotes.split(",") # turn into a list
input_genes_list = no_quotes.splitlines() # turn into a list
input_genes = [x.strip(' ') for x in input_genes_list] # remove any whitespace
return(input_genes)
if __name__ == "__main__":
#TODO send these args in a single package e.g. JSON
### gather args
parser = argparse.ArgumentParser()
parser.add_argument('-o', '--output_path',
default='.',
type=str,
help='folder to save the output data files')
parser.add_argument('-n', '--net_type',
default='BioGRID',
type=str,
help='options are BioGRID, STRING-EXP, STRING, GIANT-TN')
parser.add_argument('-f', '--features',
default='Embedding',
type=str,
help='options are Embedding, Adjacency, Influence')
parser.add_argument('-g', '--GSC',
default='GO',
type=str,
help='options are GO, DisGeNet')
parser.add_argument('-j', '--jobname',
default='jobrunner',
type=str,
help='jobname sets the name of the folder to read/write to')
parser.add_argument('-d', '--data_path',
default='./',
type=str,
help='path with the backend data files are located')
# cross validation - allow for two ways to indicate if we are doing CV
parser.add_argument('-cv', '--cross_validation',
dest='CV', action='store_true')
parser.add_argument('--no-cv', '--no_cross_validation',
dest='CV', action='store_false')
parser.set_defaults(CV=True)
# the final arg is just the filename of the genefile to read in
parser.add_argument('gene_file')
args = parser.parse_args()
# setup logger
import logging
import sys
logging.basicConfig(format='%(msg)s', stream=sys.stderr, level=logging.INFO)
#### run it
# TODO save args in a log file, perhaps from stderr
input_genes = read_input_gene_file(filename=args.gene_file)
print(f"processing {len(input_genes)} input genes", file=sys.stderr)
# in models module, data_path is a global var, so set it here
# set additional config (if needed) available for geneplexus ML
from dotenv import load_dotenv
load_dotenv()
# this is a module-level var that must be set prior to running
geneplexus.data_path = args.data_path
# run and save the html. this command has by-product of also saving data files
html = geneplexus.run_and_render(input_genes, net_type=args.net_type,
features=args.features, GSC=args.GSC, jobname=args.jobname,
output_path=args.output_path)
print(html)