From 2889769ab3fb0cba9ea1f8ff2f8f083816f2ddbe Mon Sep 17 00:00:00 2001
From: rickymagner <81349869+rickymagner@users.noreply.github.com>
Date: Tue, 19 Mar 2024 11:23:57 -0400
Subject: [PATCH 1/5] Update README to include list of popular software
included in docker image
---
README.md | 29 +++++++++++++++++++++++++++++
1 file changed, 29 insertions(+)
diff --git a/README.md b/README.md
index 1dc2bb366b8..184c6e7c5ac 100644
--- a/README.md
+++ b/README.md
@@ -19,6 +19,7 @@ releases of the toolkit.
* [Requirements](#requirements)
* [Quick Start Guide](#quickstart)
* [Downloading GATK4](#downloading)
+ * [Tools Included in Docker Image](#dockerSoftware)
* [Building GATK4](#building)
* [Running GATK4](#running)
* [Passing JVM options to gatk](#jvmoptions)
@@ -115,6 +116,34 @@ You can download and run pre-built versions of GATK4 from the following places:
* You can download a GATK4 docker image from [our dockerhub repository](https://hub.docker.com/r/broadinstitute/gatk/). We also host unstable nightly development builds on [this dockerhub repository](https://hub.docker.com/r/broadinstitute/gatk-nightly/).
* Within the docker image, run gatk commands as usual from the default startup directory (/gatk).
+### Tools Included in Docker Image
+
+Our docker image contains the following bioinformatics tools, which can be run by invoking the tool name from the command line:
+* bedtools
+* samtools
+* bcftools
+* tabix
+
+We also include an installation of Python3 (3.6.10) with the following popular packages included:
+* numpy
+* scipy
+* tensorflow
+* pymc3
+* keras
+* scikit-learn
+* matplotlib
+* pandas
+* biopython
+* pyvcf
+* pysam
+
+We also include an installation of R (3.6.2) with the following popular packages included:
+* data.table
+* dplyr
+* ggplot2
+
+For more details on system packages, see the GATK [Base Dockerfile](scripts/docker/gatkbase/Dockerfile) and for more details on the Python3/R packages, see the [Conda environment setup file](scripts/gatkcondaenv.yml.template).
+
## Building GATK4
* **To do a full build of GATK4, first clone the GATK repository using "git clone", then run:**
From ba040fa478d760170c3850e3ce22fb0a69620bf8 Mon Sep 17 00:00:00 2001
From: rickymagner <81349869+rickymagner@users.noreply.github.com>
Date: Tue, 19 Mar 2024 13:29:04 -0400
Subject: [PATCH 2/5] Add versions and Dockerfile reminder
---
README.md | 8 ++++----
scripts/docker/gatkbase/Dockerfile | 1 +
2 files changed, 5 insertions(+), 4 deletions(-)
diff --git a/README.md b/README.md
index 184c6e7c5ac..94555c9f831 100644
--- a/README.md
+++ b/README.md
@@ -119,10 +119,10 @@ You can download and run pre-built versions of GATK4 from the following places:
### Tools Included in Docker Image
Our docker image contains the following bioinformatics tools, which can be run by invoking the tool name from the command line:
-* bedtools
-* samtools
-* bcftools
-* tabix
+* bedtools (v2.30.0)
+* samtools (1.13)
+* bcftools (1.13)
+* tabix (1.13+ds)
We also include an installation of Python3 (3.6.10) with the following popular packages included:
* numpy
diff --git a/scripts/docker/gatkbase/Dockerfile b/scripts/docker/gatkbase/Dockerfile
index 7b758d19829..209d1ef31db 100644
--- a/scripts/docker/gatkbase/Dockerfile
+++ b/scripts/docker/gatkbase/Dockerfile
@@ -1,5 +1,6 @@
# Using OpenJDK 17
# This Dockerfile does not require any files that are in the GATK4 repo.
+# NOTE: If you updated the ubtuntu version make sure to update the samtools/bcftools/bedtools versions in the README
FROM ubuntu:22.04
# Avoid interactive prompts during apt installs/upgrades
From 80a7e94c36d8c66f9a312547fd01bf75e0588e64 Mon Sep 17 00:00:00 2001
From: rickymagner <81349869+rickymagner@users.noreply.github.com>
Date: Tue, 19 Mar 2024 13:29:40 -0400
Subject: [PATCH 3/5] Add comment on finding package versions
---
README.md | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/README.md b/README.md
index 94555c9f831..26e731c26db 100644
--- a/README.md
+++ b/README.md
@@ -142,7 +142,7 @@ We also include an installation of R (3.6.2) with the following popular packages
* dplyr
* ggplot2
-For more details on system packages, see the GATK [Base Dockerfile](scripts/docker/gatkbase/Dockerfile) and for more details on the Python3/R packages, see the [Conda environment setup file](scripts/gatkcondaenv.yml.template).
+For more details on system packages, see the GATK [Base Dockerfile](scripts/docker/gatkbase/Dockerfile) and for more details on the Python3/R packages, see the [Conda environment setup file](scripts/gatkcondaenv.yml.template). Versions for the Python3/R packages can be found there.
## Building GATK4
From cafdecacc153d9fda598a8b24851d234e63fef98 Mon Sep 17 00:00:00 2001
From: rickymagner <81349869+rickymagner@users.noreply.github.com>
Date: Tue, 19 Mar 2024 13:48:00 -0400
Subject: [PATCH 4/5] Fix typo and add extra conda comment
---
scripts/docker/gatkbase/Dockerfile | 2 +-
scripts/gatkcondaenv.yml.template | 2 ++
2 files changed, 3 insertions(+), 1 deletion(-)
diff --git a/scripts/docker/gatkbase/Dockerfile b/scripts/docker/gatkbase/Dockerfile
index 209d1ef31db..0114ecd3519 100644
--- a/scripts/docker/gatkbase/Dockerfile
+++ b/scripts/docker/gatkbase/Dockerfile
@@ -1,6 +1,6 @@
# Using OpenJDK 17
# This Dockerfile does not require any files that are in the GATK4 repo.
-# NOTE: If you updated the ubtuntu version make sure to update the samtools/bcftools/bedtools versions in the README
+# NOTE: If you updated the ubuntu version make sure to update the samtools/bcftools/bedtools versions in the README
FROM ubuntu:22.04
# Avoid interactive prompts during apt installs/upgrades
diff --git a/scripts/gatkcondaenv.yml.template b/scripts/gatkcondaenv.yml.template
index a87b2acdda3..75284b829e8 100644
--- a/scripts/gatkcondaenv.yml.template
+++ b/scripts/gatkcondaenv.yml.template
@@ -10,6 +10,8 @@
# used by the testGATKPythonEnvironmentPackagePresent test in PythonEnvironmentIntegrationTest needs to be updated
# to reflect the changes.
#
+# NOTE: If you update any of the packages below, please make sure the main README is up to date with the latest package install information.
+#
name: $condaEnvName
channels:
# if channels other than conda-forge are added and the channel order is changed (note that conda channel_priority is currently set to flexible),
From 954d16f28e39af93960027b6946810264e69d520 Mon Sep 17 00:00:00 2001
From: rickymagner <81349869+rickymagner@users.noreply.github.com>
Date: Tue, 19 Mar 2024 13:52:53 -0400
Subject: [PATCH 5/5] updated -> update in comment
---
scripts/docker/gatkbase/Dockerfile | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/scripts/docker/gatkbase/Dockerfile b/scripts/docker/gatkbase/Dockerfile
index 0114ecd3519..201ff3c6237 100644
--- a/scripts/docker/gatkbase/Dockerfile
+++ b/scripts/docker/gatkbase/Dockerfile
@@ -1,6 +1,6 @@
# Using OpenJDK 17
# This Dockerfile does not require any files that are in the GATK4 repo.
-# NOTE: If you updated the ubuntu version make sure to update the samtools/bcftools/bedtools versions in the README
+# NOTE: If you update the ubuntu version make sure to update the samtools/bcftools/bedtools versions in the README
FROM ubuntu:22.04
# Avoid interactive prompts during apt installs/upgrades