Skip to content

Commit

Permalink
distributed workfload for hlatyping
Browse files Browse the repository at this point in the history
  • Loading branch information
riasc committed Jul 5, 2024
1 parent 0d34988 commit b75b146
Show file tree
Hide file tree
Showing 6 changed files with 148 additions and 242 deletions.
9 changes: 9 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,15 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
- Prioritization of neoantigens is now done separately for each variant type (speeds up the process)
- NMD information (e.g., escape rule,...) is now also calculated for all variants

## [0.2.9] - 2024-07-04

### Fix

- Splitted rules in HLA typing to ensure better distribution of the workload
- Changed order in HLA typing rules (BAM files are now part of single-end)
- samtools fastq is only called for BAM files
- input of filtering directly from preprocessed/raw reads

## [0.2.8] - 2024-06-26

### Fix
Expand Down
2 changes: 1 addition & 1 deletion workflow/rules/altsplicing.smk
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ rule spladder:
confidence=f"""{config["altsplicing"]["confidence"]}""",
iteration=f"""{config["altsplicing"]["iterations"]}""",
edgelimit=f"""{config["altsplicing"]["edgelimit"]}"""
threads: config['threads']
threads: 20
shell:
"""
bash workflow/scripts/run_spladder.sh \
Expand Down
156 changes: 47 additions & 109 deletions workflow/rules/common.smk
Original file line number Diff line number Diff line change
Expand Up @@ -161,123 +161,53 @@ def get_preproc_input(wildcards):


########### HLA GENOTYPING ##########
def get_input_hlatyping_SE(wildcards):
# special case: filetype is BAM (single-end) - just return (raw) BAM file
def get_input_reads_hlatyping_BAM(wildcards):
seqtype = "dnaseq" if wildcards.nartype == "DNA" else "rnaseq"
if config["data"][f"{seqtype}_filetype"] == ".bam":
return config["data"][seqtype][wildcards.group]
return config["data"][seqtype][wildcards.group]

if config["preproc"]["activate"]:
return expand("results/{sample}/{seqtype}/reads/{group}_preproc.fq.gz",
sample = wildcards.sample,
seqtype = "dnaseq" if wildcards.nartype == "DNA" else "rnaseq",
group = wildcards.group)
def get_input_filtering_hlatyping_SE(wildcards):
seqtype = "dnaseq" if wildcards.nartype == "DNA" else "rnaseq"
if config["data"][f"{seqtype}_filetype"] == ".bam":
return expand("results/{sample}/{seqtype}/reads/{group}_flt_BAM.fq",
sample=wildcards.sample,
seqtype=seqtype,
group=wildcards.group)
else:
return config["data"][f"{seqtype}"][wildcards.group]
if config["preproc"]["activate"]:
return expand("results/{sample}/{seqtype}/reads/{group}_preproc.fq.gz",
sample=wildcards.sample,
seqtype=seqtype,
group=wildcards.group)
else:
return config["data"][seqtype][wildcards.group]

def get_input_hlatyping_PE(wildcards):
def get_input_filtering_hlatyping_PE(wildcards):
if config["preproc"]["activate"]:
return dict(
zip(
["fwd", "rev"],
expand("results/{sample}/{seqtype}/reads/{group}_{pair}_preproc.fq.gz",
sample=wildcards.sample,
seqtype = "dnaseq" if wildcards.nartype == "DNA" else "rnaseq",
group=wildcards.group,
pair=["R1","R2"])
)
)
return expand("results/{sample}/{seqtype}/reads/{group}_{readpair}_preproc.fq.gz",
sample=wildcards.sample,
seqtype="dnaseq" if wildcards.nartype == "DNA" else "rnaseq",
group=wildcards.group,
nartype=wildcards.nartype,
readpair=wildcards.readpair)
else:
return dict(
zip(
["fwd", "rev"],
config["data"][f"{wildcards.seqtype}"][wildcards.group]
)
)


def get_filtered_reads_hlatyping_SE(wildcards):
bam = []
idx = []

if wildcards.nartype == "DNA":
if len(config["data"]["dnaseq"]) != 0:
for key in config["data"]["dnaseq"].keys():
bam += expand("results/{sample}/hla/mhc-I/reads/{group}_DNA_flt_SE.bam",
sample=wildcards.sample,
group=key)
idx += expand("results/{sample}/hla/mhc-I/reads/{group}_DNA_flt_SE.bam.bai",
sample=wildcards.sample,
group=key)


if wildcards.nartype == "RNA":
if len(config["data"]["rnaseq"]) != 0:
for key in config["data"]["rnaseq"].keys():
bam += expand("results/{sample}/hla/mhc-I/reads/{group}_RNA_flt_SE.bam",
sample=wildcards.sample,
group=key)
idx += expand("results/{sample}/hla/mhc-I/reads/{group}_RNA_flt_SE.bam.bai",
sample=wildcards.sample,
group=key)

return dict(
zip(
["bam", "idx"],
[bam, idx]
)
)


def get_filtered_reads_hlatyping_PE(wildcards):
bam = []
idx = []

if wildcards.nartype == "DNA":
if len(config["data"]["dnaseq"]) != 0:
for key in config["data"]["dnaseq"].keys():
bam += expand("results/{sample}/hla/mhc-I/reads/{group}_DNA_flt_PE_{readpair}.bam",
sample=wildcards.sample,
group=key,
readpair=wildcards.readpair)
idx += expand("results/{sample}/hla/mhc-I/reads/{group}_DNA_flt_PE_{readpair}.bam.bai",
sample=wildcards.sample,
group=key,
readpair=wildcards.readpair)

if wildcards.nartype == "RNA":
if len(config["data"]["rnaseq"]) != 0:
for key in config["data"]["rnaseq"].keys():
bam += expand("results/{sample}/hla/mhc-I/reads/{group}_RNA_flt_PE_{readpair}.bam",
sample=wildcards.sample,
group=key,
readpair=wildcards.readpair)

idx += expand("results/{sample}/hla/mhc-I/reads/{group}_RNA_flt_PE_{readpair}.bam.bai",
sample=wildcards.sample,
group=key,
readpair=wildcards.readpair)

return dict(
zip(
["bam", "idx"],
[bam, idx]
)
)
seqtype = "dnaseq" if wildcards.nartype == "DNA" else "rnaseq"
return config["data"][f"{wildcards.seqtype}"][wildcards.group]


def aggregate_mhcI_SE(wildcards):
checkpoint_output = checkpoints.split_reads_mhcI_SE.get(**wildcards).output[0]
return expand("results/{sample}/hla/mhc-I/genotyping/{nartype}_flt_merged_SE/{no}_result.tsv",
return expand("results/{sample}/hla/mhc-I/genotyping/{group}_{nartype}_flt_SE/{no}_result.tsv",
sample=wildcards.sample,
group=wildcards.group,
nartype=wildcards.nartype,
no=glob_wildcards(os.path.join(checkpoint_output, "R_{no}.bam")).no)


def aggregate_mhcI_PE(wildcards):
checkpoint_output = checkpoints.split_reads_mhcI_PE.get(**wildcards).output[0]
return expand("results/{sample}/hla/mhc-I/genotyping/{nartype}_flt_merged_PE/{no}_result.tsv",
return expand("results/{sample}/hla/mhc-I/genotyping/{group}_{nartype}_flt_PE/{no}_result.tsv",
sample=wildcards.sample,
group=wildcards.group,
nartype=wildcards.nartype,
no=glob_wildcards(os.path.join(checkpoint_output, "R1_{no}.bam")).no)

Expand All @@ -287,21 +217,29 @@ def get_all_mhcI_alleles(wildcards):

if "DNA" in config["hlatyping"]["MHC-I_mode"]:
if len(config["data"]["dnaseq"]) != 0:
if config["data"]["dnaseq_readtype"] == "SE":
values += expand("results/{sample}/hla/mhc-I/genotyping/DNA_flt_merged_SE.tsv",
sample=wildcards.sample)
if config["data"]["dnaseq_readtype"] == "SE" or config["data"]["dnaseq_filetype"] == ".bam":
for key in config["data"]["dnaseq"].keys():
values += expand("results/{sample}/hla/mhc-I/genotyping/{group}_DNA_flt_SE.tsv",
sample=wildcards.sample,
group=key)
elif config["data"]["dnaseq_readtype"] == "PE":
values += expand("results/{sample}/hla/mhc-I/genotyping/DNA_flt_merged_PE.tsv",
sample=wildcards.sample)
for key in config["data"]["dnaseq"].keys():
values += expand("results/{sample}/hla/mhc-I/genotyping/{group}_DNA_flt_PE.tsv",
sample=wildcards.sample,
group=key)

if "RNA" in config["hlatyping"]["MHC-I_mode"]:
if len(config["data"]["rnaseq"]) != 0:
if config["data"]["rnaseq_readtype"] == "SE":
values += expand("results/{sample}/hla/mhc-I/genotyping/RNA_flt_merged_SE.tsv",
sample=wildcards.sample)
if config["data"]["rnaseq_readtype"] == "SE" or config["data"]["rnaseq_filetype"] == ".bam":
for key in config["data"]["rnaseq"].keys():
values += expand("results/{sample}/hla/mhc-I/genotyping/{group}_RNA_flt_SE.tsv",
sample=wildcards.sample,
group=key)
elif config["data"]["rnaseq_readtype"] == "PE":
values += expand("results/{sample}/hla/mhc-I/genotyping/RNA_flt_merged_PE.tsv",
sample=wildcards.sample)
for key in config["data"]["rnaseq"].keys():
values += expand("results/{sample}/hla/mhc-I/genotyping/{group}_RNA_flt_PE.tsv",
sample=wildcards.sample,
group=key)

if "custom" in config["hlatyping"]["MHC-I_mode"]:
values += [config["data"]["custom"]["hlatyping"]["MHC-I"]]
Expand Down
Loading

0 comments on commit b75b146

Please sign in to comment.