From c95945ced3b39f561a8c5256af578cf77a9a3c64 Mon Sep 17 00:00:00 2001 From: meganshand Date: Thu, 22 Sep 2022 10:30:31 -0400 Subject: [PATCH] Adding use_allele_specific_annotation arg and fixing task with empty input in JointVcfFiltering WDL (#8027) * Small changes to JointVCFFiltering WDL * making default for use_allele_specific_annotations * addressing comments --- .../vcf_site_level_filtering_travis.json | 5 ++ .../JointVcfFiltering.wdl | 76 +++++++++++++++++++ 2 files changed, 81 insertions(+) diff --git a/scripts/vcf_site_level_filtering_cromwell_tests/vcf_site_level_filtering_travis.json b/scripts/vcf_site_level_filtering_cromwell_tests/vcf_site_level_filtering_travis.json index 2823cdc865c..250d8392aea 100644 --- a/scripts/vcf_site_level_filtering_cromwell_tests/vcf_site_level_filtering_travis.json +++ b/scripts/vcf_site_level_filtering_cromwell_tests/vcf_site_level_filtering_travis.json @@ -9,5 +9,10 @@ "JointVcfFiltering.basename": "test_10_samples", "JointVcfFiltering.snp_annotations": "-A ReadPosRankSum -A FS -A SOR -A QD -A AVERAGE_TREE_SCORE -A AVERAGE_ASSEMBLED_HAPS -A AVERAGE_FILTERED_HAPS", "JointVcfFiltering.indel_annotations": "-A MQRankSum -A ReadPosRankSum -A FS -A SOR -A QD -A AVERAGE_TREE_SCORE", +<<<<<<< HEAD "JointVcfFiltering.model_backend": "PYTHON_IFOREST" +======= + "JointVcfFiltering.model_backend": "PYTHON_IFOREST", + "JointVcfFiltering.use_allele_specific_annotations": false +>>>>>>> 99a985c7b (Adding use_allele_specific_annotation arg and fixing task with empty input in JointVcfFiltering WDL (#8027)) } \ No newline at end of file diff --git a/scripts/vcf_site_level_filtering_wdl/JointVcfFiltering.wdl b/scripts/vcf_site_level_filtering_wdl/JointVcfFiltering.wdl index 87b520aca0d..193356a393b 100644 --- a/scripts/vcf_site_level_filtering_wdl/JointVcfFiltering.wdl +++ b/scripts/vcf_site_level_filtering_wdl/JointVcfFiltering.wdl @@ -27,6 +27,11 @@ workflow JointVcfFiltering { String indel_annotations File? gatk_override +<<<<<<< HEAD +======= + Boolean use_allele_specific_annotations + +>>>>>>> 99a985c7b (Adding use_allele_specific_annotation arg and fixing task with empty input in JointVcfFiltering WDL (#8027)) String snp_resource_args = "--resource:hapmap,training=true,calibration=true gs://gcp-public-data--broad-references/hg38/v0/hapmap_3.3.hg38.vcf.gz --resource:omni,training=true,calibration=true gs://gcp-public-data--broad-references/hg38/v0/1000G_omni2.5.hg38.vcf.gz --resource:1000G,training=true,calibration=false gs://gcp-public-data--broad-references/hg38/v0/1000G_phase1.snps.high_confidence.hg38.vcf.gz" String indel_resource_args = "--resource:mills,training=true,calibration=true gs://gcp-public-data--broad-references/hg38/v0/Mills_and_1000G_gold_standard.indels.hg38.vcf.gz" } @@ -46,6 +51,10 @@ workflow JointVcfFiltering { resource_args = snp_resource_args, basename = basename, interval_list = extract_interval_list, +<<<<<<< HEAD +======= + use_allele_specific_annotations = use_allele_specific_annotations, +>>>>>>> 99a985c7b (Adding use_allele_specific_annotation arg and fixing task with empty input in JointVcfFiltering WDL (#8027)) gatk_override = gatk_override, gatk_docker = gatk_docker } @@ -59,6 +68,10 @@ workflow JointVcfFiltering { resource_args = indel_resource_args, basename = basename, interval_list = extract_interval_list, +<<<<<<< HEAD +======= + use_allele_specific_annotations = use_allele_specific_annotations, +>>>>>>> 99a985c7b (Adding use_allele_specific_annotation arg and fixing task with empty input in JointVcfFiltering WDL (#8027)) gatk_override = gatk_override, gatk_docker = gatk_docker } @@ -102,6 +115,10 @@ workflow JointVcfFiltering { interval_list = score_interval_list, model_files = TrainVariantAnnotationModelSNPs.outputs, resource_args = snp_resource_args, +<<<<<<< HEAD +======= + use_allele_specific_annotations = use_allele_specific_annotations, +>>>>>>> 99a985c7b (Adding use_allele_specific_annotation arg and fixing task with empty input in JointVcfFiltering WDL (#8027)) gatk_override = gatk_override, gatk_docker = gatk_docker } @@ -120,6 +137,7 @@ workflow JointVcfFiltering { interval_list = score_interval_list, model_files = TrainVariantAnnotationModelINDELs.outputs, resource_args = indel_resource_args, +<<<<<<< HEAD gatk_override = gatk_override, gatk_docker = gatk_docker } @@ -128,6 +146,18 @@ workflow JointVcfFiltering { output { Array[File] variant_filtered_vcf = ScoreVariantAnnotationsINDELs.output_vcf Array[File] variant_filtered_vcf_index = ScoreVariantAnnotationsINDELs.output_vcf_index +======= + use_allele_specific_annotations = use_allele_specific_annotations, + gatk_override = gatk_override, + gatk_docker = gatk_docker + } + + } + + output { + Array[File] variant_scored_vcf = ScoreVariantAnnotationsINDELs.output_vcf + Array[File] variant_scored_vcf_index = ScoreVariantAnnotationsINDELs.output_vcf_index +>>>>>>> 99a985c7b (Adding use_allele_specific_annotation arg and fixing task with empty input in JointVcfFiltering WDL (#8027)) } } @@ -143,6 +173,10 @@ task ExtractVariantAnnotations { String annotations String resource_args File? interval_list +<<<<<<< HEAD +======= + Boolean use_allele_specific_annotations +>>>>>>> 99a985c7b (Adding use_allele_specific_annotation arg and fixing task with empty input in JointVcfFiltering WDL (#8027)) Int memory_mb = 14000 Int command_mem = memory_mb - 1000 @@ -157,6 +191,10 @@ task ExtractVariantAnnotations { -V ~{input_vcf} \ -O ~{basename}.~{mode} \ ~{annotations} \ +<<<<<<< HEAD +======= + ~{if use_allele_specific_annotations then "--use-allele-specific-annotations" else ""} \ +>>>>>>> 99a985c7b (Adding use_allele_specific_annotation arg and fixing task with empty input in JointVcfFiltering WDL (#8027)) ~{"-L " + interval_list} \ --mode ~{mode} \ ~{resource_args} @@ -232,6 +270,10 @@ task ScoreVariantAnnotations { File extracted_training_vcf_index File? interval_list Array[File] model_files +<<<<<<< HEAD +======= + Boolean use_allele_specific_annotations +>>>>>>> 99a985c7b (Adding use_allele_specific_annotation arg and fixing task with empty input in JointVcfFiltering WDL (#8027)) Int memory_mb = 16000 Int command_mem = memory_mb - 1000 @@ -239,6 +281,7 @@ task ScoreVariantAnnotations { Int disk_size = ceil(size(vcf, "GB") *2 + 50) command { +<<<<<<< HEAD set -e ln -s ~{sep=" . && ln -s " model_files} . @@ -261,6 +304,39 @@ task ScoreVariantAnnotations { output { File scores = "~{basename}.~{mode}.scores.hdf5" File annots = "~{basename}.~{mode}.annot.hdf5" +======= + zgrep -v '#' ~{vcf} > empty.txt + set -e + + if [ -s empty.txt ]; then + ln -s ~{sep=" . && ln -s " model_files} . + + export GATK_LOCAL_JAR=~{default="/root/gatk.jar" gatk_override} + + gatk --java-options "-Xmx~{command_mem}m" \ + ScoreVariantAnnotations \ + ~{"-L " + interval_list} \ + -V ~{vcf} \ + -O ~{basename}.~{mode} \ + --model-backend ~{model_backend} \ + ~{"--python-script " + python_script} \ + --model-prefix ~{basename} \ + ~{annotations} \ + ~{if use_allele_specific_annotations then "--use-allele-specific-annotations" else ""} \ + -mode ~{mode} \ + --resource:extracted,extracted=true ~{extracted_training_vcf} \ + ~{resource_args} + else + echo "Input VCF was empty so we'll return the same VCF that was input." + echo "Scores and annot hdf5 files will not be produced since the input was empty." + ln -s ~{vcf} ~{basename}.~{mode}.vcf.gz + ln -s ~{vcf_index} ~{basename}.~{mode}.vcf.gz.tbi + fi + } + output { + File? scores = "~{basename}.~{mode}.scores.hdf5" + File? annots = "~{basename}.~{mode}.annot.hdf5" +>>>>>>> 99a985c7b (Adding use_allele_specific_annotation arg and fixing task with empty input in JointVcfFiltering WDL (#8027)) File output_vcf = "~{basename}.~{mode}.vcf.gz" File output_vcf_index = "~{basename}.~{mode}.vcf.gz.tbi" }