Skip to content

Commit

Permalink
Merge pull request #308 from jodyphelan/dev
Browse files Browse the repository at this point in the history
Dev
  • Loading branch information
jodyphelan committed Oct 18, 2023
2 parents afc89fc + 21043a3 commit 8020c5c
Show file tree
Hide file tree
Showing 8 changed files with 53 additions and 42 deletions.
33 changes: 18 additions & 15 deletions tb-profiler
Original file line number Diff line number Diff line change
Expand Up @@ -128,6 +128,9 @@ def main_profile(args):
if pp.nofolder(d:=args.dir+"/results"):
os.mkdir(d)

if args.bam_for_depth:
args.bam = args.bam_for_depth

if args.snp_dist:
tbp.run_snp_dists(args,results)

Expand Down Expand Up @@ -309,7 +312,7 @@ def int_2_or_more(arg):

parser = argparse.ArgumentParser(description='tb-profiler: a tool to predict drug resistance and infer lineages',formatter_class=ArgumentDefaultsRichHelpFormatter,add_help=False)
parser.add_argument('--version', action='version', version="tb-profiler version %s" % tbp.__version__)
parser.add_argument('--logging',default="INFO",choices=["DEBUG","INFO","WARNING","ERROR","CRITICAL"],help='Logging level')
parser.add_argument('--logging',type=str.upper,default="INFO",choices=["DEBUG","INFO","WARNING","ERROR","CRITICAL"],help='Logging level')
subparsers = parser.add_subparsers(help="Task to perform")

# Profile #
Expand Down Expand Up @@ -366,8 +369,9 @@ algorithm.add_argument('--threads','-t',default=1,help='Threads to use',type=int
algorithm.add_argument('--ram',default=2,help='Maximum memory to use',type=int)

other=parser_sub.add_argument_group("Other options")
other.add_argument('--logging',default="INFO",choices=["DEBUG","INFO","WARNING","ERROR","CRITICAL"],help='Logging level')
other.add_argument('--logging',type=str.upper,default="INFO",choices=["DEBUG","INFO","WARNING","ERROR","CRITICAL"],help='Logging level')
other.add_argument('--delly_vcf',help=argparse.SUPPRESS)
other.add_argument('--bam_for_depth',help=argparse.SUPPRESS)
other.add_argument('--no_clean', action='store_true',help=argparse.SUPPRESS)
other.add_argument('--temp',help="Temp firectory to process all files",type=str,default=".")
other.add_argument('--version', action='version', version="tb-profiler version %s" % tbp.__version__)
Expand All @@ -393,7 +397,7 @@ parser_sub.add_argument('--dir','-d',default=".",help='Storage directory')
parser_sub.add_argument('--no_clean', action='store_true',help=argparse.SUPPRESS)
parser_sub.add_argument('--temp',help="Temp firectory to process all files",type=str,default=".")
parser_sub.add_argument('--version', action='version', version="tb-profiler version %s" % tbp.__version__)
parser_sub.add_argument('--logging',default="INFO",choices=["DEBUG","INFO","WARNING","ERROR","CRITICAL"],help='Logging level')
parser_sub.add_argument('--logging',type=str.upper,default="INFO",choices=["DEBUG","INFO","WARNING","ERROR","CRITICAL"],help='Logging level')
parser_sub.set_defaults(func=main_lineage)

parser_sub = subparsers.add_parser('spoligotype', help='Profile spoligotype (experimental feature)', formatter_class=ArgumentDefaultsRichHelpFormatter)
Expand All @@ -417,7 +421,7 @@ parser_sub.add_argument('--threads','-t',default=1,help='Threads to use',type=in
parser_sub.add_argument('--ram',default=2,type=int_2_or_more,help='Maximum memory to use in Gb')
parser_sub.add_argument('--temp',help="Temp firectory to process all files",type=str,default=".")
parser_sub.add_argument('--version', action='version', version="tb-profiler version %s" % tbp.__version__)
parser_sub.add_argument('--logging',default="INFO",choices=["DEBUG","INFO","WARNING","ERROR","CRITICAL"],help='Logging level')
parser_sub.add_argument('--logging',type=str.upper,default="INFO",choices=["DEBUG","INFO","WARNING","ERROR","CRITICAL"],help='Logging level')
parser_sub.set_defaults(func=main_spoligotype)

parser_sub = subparsers.add_parser('collate', help='Collate results form multiple samples together', formatter_class=ArgumentDefaultsRichHelpFormatter)
Expand All @@ -426,14 +430,13 @@ parser_sub.add_argument('--samples',help='File with samples (one per line)')
parser_sub.add_argument('--full',action="store_true",help='Output mutations in main result file')
parser_sub.add_argument('--all_variants',action="store_true",help='Output all variants in variant matrix')
parser_sub.add_argument('--mark_missing',action="store_true",help='An asteriks will be use to mark predictions which are affected by missing data at a drug resistance position')
parser_sub.add_argument('--reporting_af',default=0.1,type=float,help='Minimum allele frequency to call variants')
parser_sub.add_argument('--db',default='tbdb',help='Full path to mutation database json file to use')
parser_sub.add_argument('--external_db',type=str,help='Path to db files prefix (overrides "--db" parameter)')
parser_sub.add_argument('--dir','-d',nargs="+",default=["results"],help='Storage directory')
parser_sub.add_argument('--no_clean', action='store_true',help=argparse.SUPPRESS)
parser_sub.add_argument('--temp',help="Temp firectory to process all files",type=str,default=".")
parser_sub.add_argument('--version', action='version', version="tb-profiler version %s" % tbp.__version__)
parser_sub.add_argument('--logging',default="INFO",choices=["DEBUG","INFO","WARNING","ERROR","CRITICAL"],help='Logging level')
parser_sub.add_argument('--logging',type=str.upper,default="INFO",choices=["DEBUG","INFO","WARNING","ERROR","CRITICAL"],help='Logging level')
parser_sub.set_defaults(func=main_collate)


Expand All @@ -451,7 +454,7 @@ parser_sub.add_argument('--no_clean', action='store_true',help=argparse.SUPPRESS
parser_sub.add_argument('--suspect',action="store_true",help=argparse.SUPPRESS)
parser_sub.add_argument('--temp',help="Temp firectory to process all files",type=str,default=".")
parser_sub.add_argument('--version', action='version', version="tb-profiler version %s" % tbp.__version__)
parser_sub.add_argument('--logging',default="INFO",choices=["DEBUG","INFO","WARNING","ERROR","CRITICAL"],help='Logging level')
parser_sub.add_argument('--logging',type=str.upper,default="INFO",choices=["DEBUG","INFO","WARNING","ERROR","CRITICAL"],help='Logging level')
parser_sub.set_defaults(func=main_reformat)

parser_sub = subparsers.add_parser('create_db', help='Generate the files required to run tb-profiler', formatter_class=ArgumentDefaultsRichHelpFormatter)
Expand All @@ -478,7 +481,7 @@ parser_sub.add_argument('--dir','-d',default=".",help='Storage directory')
parser_sub.add_argument('--no_clean', action='store_true',help=argparse.SUPPRESS)
parser_sub.add_argument('--temp',help="Temp firectory to process all files",type=str,default=".")
parser_sub.add_argument('--version', action='version', version="tb-profiler version %s" % tbp.__version__)
parser_sub.add_argument('--logging',default="INFO",choices=["DEBUG","INFO","WARNING","ERROR","CRITICAL"],help='Logging level')
parser_sub.add_argument('--logging',type=str.upper,default="INFO",choices=["DEBUG","INFO","WARNING","ERROR","CRITICAL"],help='Logging level')
parser_sub.set_defaults(func=main_create_db)

parser_sub = subparsers.add_parser('load_library', help='Load new library', formatter_class=ArgumentDefaultsRichHelpFormatter)
Expand All @@ -487,7 +490,7 @@ parser_sub.add_argument('--dir','-d',default=".",help='Storage directory')
parser_sub.add_argument('--no_clean', action='store_true',help=argparse.SUPPRESS)
parser_sub.add_argument('--temp',help="Temp firectory to process all files",type=str,default=".")
parser_sub.add_argument('--version', action='version', version="tb-profiler version %s" % tbp.__version__)
parser_sub.add_argument('--logging',default="INFO",choices=["DEBUG","INFO","WARNING","ERROR","CRITICAL"],help='Logging level')
parser_sub.add_argument('--logging',type=str.upper,default="INFO",choices=["DEBUG","INFO","WARNING","ERROR","CRITICAL"],help='Logging level')
parser_sub.set_defaults(func=main_load_library)

parser_sub = subparsers.add_parser('update_tbdb', help='Pull the latest tbdb library and load', formatter_class=ArgumentDefaultsRichHelpFormatter)
Expand All @@ -500,7 +503,7 @@ parser_sub.add_argument('--dir','-d',default=".",help='Storage directory')
parser_sub.add_argument('--no_clean', action='store_true',help=argparse.SUPPRESS)
parser_sub.add_argument('--temp',help="Temp firectory to process all files",type=str,default=".")
parser_sub.add_argument('--version', action='version', version="tb-profiler version %s" % tbp.__version__)
parser_sub.add_argument('--logging',default="INFO",choices=["DEBUG","INFO","WARNING","ERROR","CRITICAL"],help='Logging level')
parser_sub.add_argument('--logging',type=str.upper,default="INFO",choices=["DEBUG","INFO","WARNING","ERROR","CRITICAL"],help='Logging level')
parser_sub.set_defaults(func=main_update_tbdb)

parser_sub = subparsers.add_parser('batch', help='Run tb-profiler for several samples', formatter_class=ArgumentDefaultsRichHelpFormatter)
Expand All @@ -512,31 +515,31 @@ parser_sub.add_argument('--dir','-d',default=".",help='Storage directory')
parser_sub.add_argument('--no_clean', action='store_true',help=argparse.SUPPRESS)
parser_sub.add_argument('--temp',help="Temp firectory to process all files",type=str,default=".")
parser_sub.add_argument('--version', action='version', version="tb-profiler version %s" % tbp.__version__)
parser_sub.add_argument('--logging',default="INFO",choices=["DEBUG","INFO","WARNING","ERROR","CRITICAL"],help='Logging level')
parser_sub.add_argument('--logging',type=str.upper,default="INFO",choices=["DEBUG","INFO","WARNING","ERROR","CRITICAL"],help='Logging level')
parser_sub.set_defaults(func=main_batch)

parser_sub = subparsers.add_parser('list_db', help='List loaded databases', formatter_class=ArgumentDefaultsRichHelpFormatter)
parser_sub.add_argument('--dir','-d',default=".",help='Storage directory')
parser_sub.add_argument('--no_clean', action='store_true',help=argparse.SUPPRESS)
parser_sub.add_argument('--temp',help="Temp firectory to process all files",type=str,default=".")
parser_sub.add_argument('--version', action='version', version="tb-profiler version %s" % tbp.__version__)
parser_sub.add_argument('--logging',default="INFO",choices=["DEBUG","INFO","WARNING","ERROR","CRITICAL"],help='Logging level')
parser_sub.add_argument('--logging',type=str.upper,default="INFO",choices=["DEBUG","INFO","WARNING","ERROR","CRITICAL"],help='Logging level')
parser_sub.set_defaults(func=main_list_db)

parser_sub = subparsers.add_parser('version', help='Output program version and exit', formatter_class=ArgumentDefaultsRichHelpFormatter)
parser_sub.add_argument('--dir','-d',default=".",help='Storage directory')
parser_sub.add_argument('--no_clean', action='store_true',help=argparse.SUPPRESS)
parser_sub.add_argument('--temp',help="Temp firectory to process all files",type=str,default=".")
parser_sub.add_argument('--version', action='version', version="tb-profiler version %s" % tbp.__version__)
parser_sub.add_argument('--logging',default="INFO",choices=["DEBUG","INFO","WARNING","ERROR","CRITICAL"],help='Logging level')
parser_sub.add_argument('--logging',type=str.upper,default="INFO",choices=["DEBUG","INFO","WARNING","ERROR","CRITICAL"],help='Logging level')
parser_sub.set_defaults(func=main_version)


args = parser.parse_args()

logging.basicConfig(
level=args.logging, format="%(message)s", datefmt="[%X]", handlers=[RichHandler()]
)
level=args.logging, format="%(message)s", datefmt="[%X]", handlers=[RichHandler()]
)

if hasattr(args, 'func'):
args.software_name = __softwarename__
Expand Down
2 changes: 1 addition & 1 deletion tbprofiler/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,4 +8,4 @@
from .phylo import *
from .rules import *

__version__ = "5.0.0"
__version__ = "5.0.1"
18 changes: 9 additions & 9 deletions tbprofiler/collate.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,21 +25,21 @@ def get_field_values(rows,cols):
def collate_results(prefix,conf,result_dirs=["./results"],sample_file=None,full_results=True,full_variant_results=True,mark_missing=False,sep="\t"):
for d in result_dirs:
if not os.path.isdir(d):
errlog("\nERROR: Can't find directory %s\n" % d )
logging.error("\nERROR: Can't find directory %s\n" % d )
exit()
set_all_drugs = set()
for l in open(conf["bed"]):
arr = l.rstrip().split()
for d in arr[5].split(","):
set_all_drugs.add(d)
tmp_drugs = ["rifampicin", "isoniazid", "pyrazinamide", "ethambutol", "streptomycin", "fluoroquinolones", "moxifloxacin", "ofloxacin", "levofloxacin", "ciprofloxacin", "aminoglycosides", "amikacin", "kanamycin", "capreomycin", "ethionamide", "para-aminosalicylic_acid", "cycloserine", "linezolid"]
drug_list = []
for d in tmp_drugs:
if d in set_all_drugs:
drug_list.append(d)
for d in sorted(list(set_all_drugs)):
if d not in drug_list:
drug_list.append(d)
# tmp_drugs = ["rifampicin", "isoniazid", "pyrazinamide", "ethambutol", "streptomycin", "fluoroquinolones", "moxifloxacin", "ofloxacin", "levofloxacin", "ciprofloxacin", "aminoglycosides", "amikacin", "kanamycin", "capreomycin", "ethionamide", "para-aminosalicylic_acid", "cycloserine", "linezolid"]
drug_list = conf['drugs']
# for d in tmp_drugs:
# if d in set_all_drugs:
# drug_list.append(d)
# for d in sorted(list(set_all_drugs)):
# if d not in drug_list:
# drug_list.append(d)


samples = {}
Expand Down
2 changes: 1 addition & 1 deletion tbprofiler/docx.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ def sanitize(d):
return d


def write_docx(json_results,conf,outfile,template_file = None,reporting_af = 0.0):
def write_docx(json_results,conf,outfile,template_file = None):
if template_file is None:
template_file = sys.prefix+"/share/tbprofiler/default_template.docx"
data = json_results
Expand Down
9 changes: 4 additions & 5 deletions tbprofiler/output.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,8 +14,7 @@ def write_outputs(args,results,template_file = None):
csv_output = args.dir+"/results/"+args.prefix+".results.csv"
docx_output = args.dir+"/results/"+args.prefix+".results.docx"
# tree_output = args.dir+"/results/"+args.prefix+".results.nwk"
if "reporting_af" not in vars(args):
args.reporting_af = 0.1

if "add_columns" not in vars(args):
args.add_columns = None
extra_columns = [x.lower() for x in args.add_columns.split(",")] if args.add_columns else []
Expand All @@ -26,10 +25,10 @@ def write_outputs(args,results,template_file = None):

if args.txt:
logging.info(f"Writing text file: {text_output}")
write_text(results,args.conf,text_output,extra_columns,reporting_af=args.reporting_af,sep="\t",template_file=template_file)
write_text(results,args.conf,text_output,extra_columns,sep="\t",template_file=template_file)
if args.csv:
logging.info(f"Writing csv file: {csv_output}")
write_text(results,args.conf,csv_output,extra_columns,reporting_af=args.reporting_af,sep=",",template_file = template_file)
write_text(results,args.conf,csv_output,extra_columns,sep=",",template_file = template_file)
if args.docx:
logging.info(f"Writing docx file: {docx_output}")
write_docx(results,args.conf,docx_output,reporting_af=args.reporting_af,template_file = args.docx_template)
write_docx(results,args.conf,docx_output,template_file = args.docx_template)
11 changes: 9 additions & 2 deletions tbprofiler/phylo.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
def usher_add_sample(args):
logging.info(f"Adding sample to phylogeny")


if args.vcf:
args.wg_vcf = args.vcf
else:
Expand All @@ -21,7 +22,13 @@ def usher_add_sample(args):
args.input_phylo = f"{args.dir}/results/phylo.pb"
args.tmp_output_phylo = f"{args.files_prefix}.pb"
args.output_nwk = f"{args.files_prefix}.nwk"
lock = filelock.FileLock(args.input_phylo + ".lock")

if not os.path.isfile(args.input_phylo):
logging.error("Phylogeny doesn't exist. Please create one first with `tb-profiler-tools`")
quit("Exiting!")


lock = filelock.SoftFileLock(args.input_phylo + ".lock")

cwd = os.getcwd()
with lock:
Expand Down Expand Up @@ -61,7 +68,7 @@ def prepare_usher(treefile,vcf_file):
def prepare_sample_consensus(sample,input_vcf,args):
s = sample
tmp_vcf = f"{args.files_prefix}.{s}.vcf.gz"
run_cmd(f"bcftools norm -m - {input_vcf} | bcftools view -T ^{args.conf['bedmask']} | bcftools filter --SnpGap 50 | annotate_maaf.py | bcftools filter -S . -e 'MAAF<0.7' |bcftools filter -S . -e 'FMT/DP<20' | bcftools view -v snps -Oz -o {tmp_vcf}")
run_cmd(f"bcftools norm -m - {input_vcf} | bcftools view -T ^{args.conf['bedmask']} | bcftools filter --SnpGap 50 | annotate_maaf.py | bcftools filter -S . -e 'MAAF<0.7' |bcftools filter -S . -e 'FMT/DP<20' | rename_vcf_sample.py --sample-name {s} | bcftools view -v snps -Oz -o {tmp_vcf}")
run_cmd(f"bcftools index {tmp_vcf}")

mask_bed = f"{args.files_prefix}.{s}.mask.bed"
Expand Down
16 changes: 9 additions & 7 deletions tbprofiler/snp_dists.py
Original file line number Diff line number Diff line change
Expand Up @@ -91,7 +91,7 @@ def search(self,json_results,vcf_file, cutoff = 20):

def read_json(filename):
logging.debug("Reading %s" % filename)
lock = filelock.FileLock(filename + ".lock")
lock = filelock.SoftFileLock(filename + ".lock")
with lock:
data = json.load(open(filename))
logging.debug("Finished reading %s" % filename)
Expand All @@ -116,11 +116,13 @@ def run_snp_dists(args,results):
dbname = args.snp_diff_db
else:
dbname = f'{args.dir}/results/snp_diffs.db'
db = DB(dbname)
results["close_samples"] = db.search(results,input_vcf,args.snp_dist)
if not args.snp_diff_no_store:
db.store(results,input_vcf)
results["close_samples"] = [d for d in results["close_samples"] if d["sample"]!=results["id"]]
lock = f"{dbname}.lock"
with filelock.SoftFileLock(lock):
db = DB(dbname)
results["close_samples"] = db.search(results,input_vcf,args.snp_dist)
if not args.snp_diff_no_store:
db.store(results,input_vcf)
results["close_samples"] = [d for d in results["close_samples"] if d["sample"]!=results["id"]]



Expand All @@ -131,7 +133,7 @@ def update_neighbour_snp_dist_output(args,results):
if not os.path.exists(f):
continue
if not sample_in_json(args.prefix,f):
lock = filelock.FileLock(f + ".lock")
lock = filelock.SoftFileLock(f + ".lock")
with lock:
logging.debug("Acquiring lock for %s" % f)
data = json.load(open(f))
Expand Down
4 changes: 2 additions & 2 deletions tbprofiler/text.py
Original file line number Diff line number Diff line change
Expand Up @@ -114,7 +114,7 @@ def stringify_annotations(annotation):
annotations.append("|".join([f'{key}={val}' for key,val in ann.items()]))
return ";".join(annotations)

def write_text(json_results,conf,outfile,columns = None,reporting_af = 0.0,sep="\t",add_annotations=True,template_file = None):
def write_text(json_results,conf,outfile,columns = None,sep="\t",add_annotations=True,template_file = None):
json_results = copy(json_results)
if columns==None:
columns = []
Expand All @@ -140,7 +140,7 @@ def write_text(json_results,conf,outfile,columns = None,reporting_af = 0.0,sep="
else:
var["annotation_str"] = ""

json_results = get_summary(json_results,conf,columns = columns,reporting_af=reporting_af)
json_results = get_summary(json_results,conf,columns = columns)
drug_list = get_drug_list(conf["bed"])

json_results["drug_table"] = [[y for y in json_results["drug_table"] if y["Drug"].upper()==d.upper()][0] for d in conf["drugs"] if d in drug_list]
Expand Down

0 comments on commit 8020c5c

Please sign in to comment.