Skip to content

Commit

Permalink
ul97 and hard fix some dependencies (#23)
Browse files Browse the repository at this point in the history
* ul97 and hard fix some dependencies

* integrate pfam script

* disorder back
  • Loading branch information
ojcharles committed Jul 21, 2024
1 parent 336c0ca commit c3d9483
Show file tree
Hide file tree
Showing 10 changed files with 10,891 additions and 19,224 deletions.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -6,3 +6,4 @@ t/
query/
lib/
test/
help.sh
79 changes: 40 additions & 39 deletions Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -2,64 +2,65 @@ FROM docker.io/ubuntu:22.04


# setup
ENV ENV TZ=Europe/London
RUN ln -snf /usr/share/zoneinfo/$TZ /etc/localtime && echo $TZ > /etc/timezone
ENV ENV TZ=Europe/London ; ln -snf /usr/share/zoneinfo/$TZ /etc/localtime && echo $TZ > /etc/timezone ; hwclock --hctosys
RUN apt-get update
ARG DEBIAN_FRONTEND=noninteractive


### general dependencies
RUN apt install -y default-jdk
RUN apt install -y wget
RUN apt install -y git
RUN apt install -y cmake
RUN apt install -y mrc
RUN apt install -y libboost-all-dev
RUN apt install -y libpcre2-8-0 libpcre2-dev liblzma-dev libbz2-dev
RUN apt install -y python3.10
RUN apt install -y python3-pip
RUN apt install -y python3.10-venv
RUN apt install -y --no-install-recommends r-base-core
RUN apt install -y mafft
RUN apt install -y python2


### data generating tools / dependencies
# psiblast
RUN apt install -y ncbi-blast+
# req of p2rank
RUN apt install -y python3-pymol
# netsurfp3 - ref: https://dtu.biolib.com/NetSurfP-3/
RUN python3 -m pip install -U pybiolib
RUN apt install -y default-jdk wget git cmake mrc libboost-all-dev libpcre2-dev liblzma-dev libbz2-dev nano curl libcurl4-openssl-dev libeigen3-dev
RUN apt install -y python3.10 python3-pip python3.10-venv python2 python3-pymol


### scientific informatics
RUN apt install -y ncbi-blast+ mafft hmmer


# R packages
RUN apt install -y r-cran-stringr r-cran-reshape2 r-cran-ggpubr r-cran-tidyr \
r-cran-readr r-cran-ape r-cran-devtools r-cran-biocmanager
RUN R CMD javareconf
RUN apt install -y curl libcurl4-openssl-dev libeigen3-dev
RUN apt install -y --no-install-recommends r-base-core r-cran-stringr r-cran-reshape2 r-cran-ggpubr r-cran-tidyr \
r-cran-readr r-cran-ape r-cran-devtools r-cran-biocmanager ; R CMD javareconf
COPY ./scripts/ /scripts/
COPY ./lib/ /mflibs/
RUN Rscript /scripts/install_r_packages.R
RUN /scripts/install_stuff.sh

RUN mkdir /tools && \
git clone https://github.com/PDB-REDO/libcifpp && \
cd libcifpp && \
git checkout 288b2bb72093054f9b66604fd7dca4a3a6ea0a27 && \
mkdir build && \
cd build && \
cmake .. && \
cmake --build . --config Release && \
cmake --install .
RUN cd /tools && \
git clone https://github.com/mhekkel/libmcfp.git && \
cd libmcfp && \
git reset --hard 4aa95505ded43e663fd9dae61c49b08fdc6cce0c && \
mkdir build && \
cd build && \
cmake .. && \
cmake --build . && \
cmake --install .

RUN cd /tools && \
git clone https://github.com/PDB-REDO/dssp.git && \
cd dssp && \
git checkout db629fb2282a5d9c58048d6ca833027e5a214cf3 && \
cmake -S . -B build && \
cmake --build build && \
cmake --install build


RUN bash /scripts/pdb2ProtLigSite.sh -s
RUN bash /scripts/msa2coupling.sh -s
RUN bash /scripts/Seq2Disorder.sh -s
#RUN bash /scripts/Seq2ProtLangRep.sh -s
#RUN bash /scripts/Seq2Disorder.sh -s
#RUN bash /scripts/Seq2SecStruc.sh -s

RUN python3 -m pip install biopython numpy pandas pybiolib
RUN apt install -y nano
RUN apt install -y hmmer

# dev
COPY Seq2PfamResidues.sh /scripts
RUN bash /scripts/Seq2PfamResidues.sh -s


COPY mf.R /scripts

RUN python3 -m pip install numpy pandas Bio


#ENTRYPOINT ["/bin/bash", "-c"]
CMD ["/bin/bash"]
Expand Down
8 changes: 6 additions & 2 deletions mf.R
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
# -------------------- Setup
### runtime vars
args = commandArgs(trailingOnly=TRUE)
infasta = as.character(args[1]) #"/query/HCMV_UL54.fasta"
infasta = as.character(args[1]) #"/query/HCMV_UL97.fasta"
blast_db_name = as.character(args[2]) # "uniref50.fasta"
threads = as.numeric(args[3]) # 32
v_eval = as.character(args[4]) # 1e-7 # psiblast e value
Expand Down Expand Up @@ -70,6 +70,10 @@ for(i in 1:nrow(t)){
}
writeLines(out_fasta, temp_blast)

# deduplicate fasta
system( paste0("awk '/^>/{f=!d[$1];d[$1]=1}f' ", temp_blast, " > /tmp/temp.fa") )
system( paste0("cp /tmp/temp.fa ", temp_blast) )

# align outputted fasta
command = paste0("mafft --add ", temp_blast," --keeplength --thread ", threads ," ",infasta, " > ", temp_blast_msa," 2>",tdir,"/err.txt" )
system(command)
Expand Down Expand Up @@ -399,7 +403,7 @@ df = cbind(df,physdat)


# ------------------------------------------------------------ Structural features
if(1 == 2){
if(1 == 1){
struc = list()
# -------------------- from sequence
### disorder
Expand Down
2 changes: 0 additions & 2 deletions query/HCMV_UL54.fasta

This file was deleted.

Loading

0 comments on commit c3d9483

Please sign in to comment.