Skip to content

Commit

Permalink
Adding use_allele_specific_annotation arg and fixing task with empty …
Browse files Browse the repository at this point in the history
…input in JointVcfFiltering WDL (#8027)

* Small changes to JointVCFFiltering WDL

* making default for use_allele_specific_annotations

* addressing comments
  • Loading branch information
meganshand authored and rsasch committed Oct 17, 2022
1 parent 78fdd8d commit c95945c
Show file tree
Hide file tree
Showing 2 changed files with 81 additions and 0 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -9,5 +9,10 @@
"JointVcfFiltering.basename": "test_10_samples",
"JointVcfFiltering.snp_annotations": "-A ReadPosRankSum -A FS -A SOR -A QD -A AVERAGE_TREE_SCORE -A AVERAGE_ASSEMBLED_HAPS -A AVERAGE_FILTERED_HAPS",
"JointVcfFiltering.indel_annotations": "-A MQRankSum -A ReadPosRankSum -A FS -A SOR -A QD -A AVERAGE_TREE_SCORE",
<<<<<<< HEAD
"JointVcfFiltering.model_backend": "PYTHON_IFOREST"
=======
"JointVcfFiltering.model_backend": "PYTHON_IFOREST",
"JointVcfFiltering.use_allele_specific_annotations": false
>>>>>>> 99a985c7b (Adding use_allele_specific_annotation arg and fixing task with empty input in JointVcfFiltering WDL (#8027))
}
76 changes: 76 additions & 0 deletions scripts/vcf_site_level_filtering_wdl/JointVcfFiltering.wdl
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,11 @@ workflow JointVcfFiltering {
String indel_annotations
File? gatk_override

<<<<<<< HEAD
=======
Boolean use_allele_specific_annotations
>>>>>>> 99a985c7b (Adding use_allele_specific_annotation arg and fixing task with empty input in JointVcfFiltering WDL (#8027))
String snp_resource_args = "--resource:hapmap,training=true,calibration=true gs://gcp-public-data--broad-references/hg38/v0/hapmap_3.3.hg38.vcf.gz --resource:omni,training=true,calibration=true gs://gcp-public-data--broad-references/hg38/v0/1000G_omni2.5.hg38.vcf.gz --resource:1000G,training=true,calibration=false gs://gcp-public-data--broad-references/hg38/v0/1000G_phase1.snps.high_confidence.hg38.vcf.gz"
String indel_resource_args = "--resource:mills,training=true,calibration=true gs://gcp-public-data--broad-references/hg38/v0/Mills_and_1000G_gold_standard.indels.hg38.vcf.gz"
}
Expand All @@ -46,6 +51,10 @@ workflow JointVcfFiltering {
resource_args = snp_resource_args,
basename = basename,
interval_list = extract_interval_list,
<<<<<<< HEAD
=======
use_allele_specific_annotations = use_allele_specific_annotations,
>>>>>>> 99a985c7b (Adding use_allele_specific_annotation arg and fixing task with empty input in JointVcfFiltering WDL (#8027))
gatk_override = gatk_override,
gatk_docker = gatk_docker
}
Expand All @@ -59,6 +68,10 @@ workflow JointVcfFiltering {
resource_args = indel_resource_args,
basename = basename,
interval_list = extract_interval_list,
<<<<<<< HEAD
=======
use_allele_specific_annotations = use_allele_specific_annotations,
>>>>>>> 99a985c7b (Adding use_allele_specific_annotation arg and fixing task with empty input in JointVcfFiltering WDL (#8027))
gatk_override = gatk_override,
gatk_docker = gatk_docker
}
Expand Down Expand Up @@ -102,6 +115,10 @@ workflow JointVcfFiltering {
interval_list = score_interval_list,
model_files = TrainVariantAnnotationModelSNPs.outputs,
resource_args = snp_resource_args,
<<<<<<< HEAD
=======
use_allele_specific_annotations = use_allele_specific_annotations,
>>>>>>> 99a985c7b (Adding use_allele_specific_annotation arg and fixing task with empty input in JointVcfFiltering WDL (#8027))
gatk_override = gatk_override,
gatk_docker = gatk_docker
}
Expand All @@ -120,6 +137,7 @@ workflow JointVcfFiltering {
interval_list = score_interval_list,
model_files = TrainVariantAnnotationModelINDELs.outputs,
resource_args = indel_resource_args,
<<<<<<< HEAD
gatk_override = gatk_override,
gatk_docker = gatk_docker
}
Expand All @@ -128,6 +146,18 @@ workflow JointVcfFiltering {
output {
Array[File] variant_filtered_vcf = ScoreVariantAnnotationsINDELs.output_vcf
Array[File] variant_filtered_vcf_index = ScoreVariantAnnotationsINDELs.output_vcf_index
=======
use_allele_specific_annotations = use_allele_specific_annotations,
gatk_override = gatk_override,
gatk_docker = gatk_docker
}
}
output {
Array[File] variant_scored_vcf = ScoreVariantAnnotationsINDELs.output_vcf
Array[File] variant_scored_vcf_index = ScoreVariantAnnotationsINDELs.output_vcf_index
>>>>>>> 99a985c7b (Adding use_allele_specific_annotation arg and fixing task with empty input in JointVcfFiltering WDL (#8027))
}
}
Expand All @@ -143,6 +173,10 @@ task ExtractVariantAnnotations {
String annotations
String resource_args
File? interval_list
<<<<<<< HEAD
=======
Boolean use_allele_specific_annotations
>>>>>>> 99a985c7b (Adding use_allele_specific_annotation arg and fixing task with empty input in JointVcfFiltering WDL (#8027))
Int memory_mb = 14000
Int command_mem = memory_mb - 1000
Expand All @@ -157,6 +191,10 @@ task ExtractVariantAnnotations {
-V ~{input_vcf} \
-O ~{basename}.~{mode} \
~{annotations} \
<<<<<<< HEAD
=======
~{if use_allele_specific_annotations then "--use-allele-specific-annotations" else ""} \
>>>>>>> 99a985c7b (Adding use_allele_specific_annotation arg and fixing task with empty input in JointVcfFiltering WDL (#8027))
~{"-L " + interval_list} \
--mode ~{mode} \
~{resource_args}
Expand Down Expand Up @@ -232,13 +270,18 @@ task ScoreVariantAnnotations {
File extracted_training_vcf_index
File? interval_list
Array[File] model_files
<<<<<<< HEAD
=======
Boolean use_allele_specific_annotations
>>>>>>> 99a985c7b (Adding use_allele_specific_annotation arg and fixing task with empty input in JointVcfFiltering WDL (#8027))
Int memory_mb = 16000
Int command_mem = memory_mb - 1000
}
Int disk_size = ceil(size(vcf, "GB") *2 + 50)
command {
<<<<<<< HEAD
set -e
ln -s ~{sep=" . && ln -s " model_files} .
Expand All @@ -261,6 +304,39 @@ task ScoreVariantAnnotations {
output {
File scores = "~{basename}.~{mode}.scores.hdf5"
File annots = "~{basename}.~{mode}.annot.hdf5"
=======
zgrep -v '#' ~{vcf} > empty.txt
set -e
if [ -s empty.txt ]; then
ln -s ~{sep=" . && ln -s " model_files} .
export GATK_LOCAL_JAR=~{default="/root/gatk.jar" gatk_override}
gatk --java-options "-Xmx~{command_mem}m" \
ScoreVariantAnnotations \
~{"-L " + interval_list} \
-V ~{vcf} \
-O ~{basename}.~{mode} \
--model-backend ~{model_backend} \
~{"--python-script " + python_script} \
--model-prefix ~{basename} \
~{annotations} \
~{if use_allele_specific_annotations then "--use-allele-specific-annotations" else ""} \
-mode ~{mode} \
--resource:extracted,extracted=true ~{extracted_training_vcf} \
~{resource_args}
else
echo "Input VCF was empty so we'll return the same VCF that was input."
echo "Scores and annot hdf5 files will not be produced since the input was empty."
ln -s ~{vcf} ~{basename}.~{mode}.vcf.gz
ln -s ~{vcf_index} ~{basename}.~{mode}.vcf.gz.tbi
fi
}
output {
File? scores = "~{basename}.~{mode}.scores.hdf5"
File? annots = "~{basename}.~{mode}.annot.hdf5"
>>>>>>> 99a985c7b (Adding use_allele_specific_annotation arg and fixing task with empty input in JointVcfFiltering WDL (#8027))
File output_vcf = "~{basename}.~{mode}.vcf.gz"
File output_vcf_index = "~{basename}.~{mode}.vcf.gz.tbi"
}
Expand Down

0 comments on commit c95945c

Please sign in to comment.