This repository has been archived by the owner on Jul 4, 2021. It is now read-only.
forked from blab/zika-seq
-
Notifications
You must be signed in to change notification settings - Fork 0
/
smk5
executable file
·86 lines (74 loc) · 3.1 KB
/
smk5
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
import time
import subprocess
from cfg import config
import os
DEMUX_DIR = config['demux_dir']
BASECALLED_READS = config['basecalled_reads']
RAW_READS = config['raw_reads']
BUILD_DIR = config['build_dir']
def get_minion_analysis():
call = "find %s -name \"*.fast5\" | head -n 1" % (config['raw_reads'])
fname = subprocess.check_output(call,shell=True)
if type(fname) != str:
fname = str(fname)[2:-3]
fname = fname.replace(config['raw_reads'],"")[1:]
return fname
GET_MINION_ANALYSIS = get_minion_analysis()
rule all:
params:
build = BUILD_DIR
input:
"%s" % (BUILD_DIR)
def _get_basecall_config(wildcards):
return config["basecall_config"]
rule A_resume_basecall:
params:
cfg = _get_basecall_config
input:
raw = "%s" % (RAW_READS)
output:
directory("%s/pass" % (BASECALLED_READS))
shell:
"guppy_basecaller --resume --device auto --gpu_runners_per_device 64 --num_callers 26 --verbose_logs --qscore_filtering --records_per_fastq 0 --input_path %s --save_path %s --recursive --config {params.cfg}" % (RAW_READS, BASECALLED_READS)
def get_fastq_file():
call = "find %s -name \"*.fast5\" | head -n 1" % (config['basecalled_reads']+"/pass")
fname = subprocess.check_output(call,shell=True)
if type(fname) != str:
fname = str(fname)[2:-3]
fname = fname.replace(config['basecalled_reads']+"/pass","")[1:]
return fname
FASTQ = get_fastq_file()
rule B_demultiplex:
input:
basecall = "%s/pass" % (BASECALLED_READS)
output:
directory("%s/demux_guppy" % (RAW_READS))
shell:
"guppy_barcoder --worker_threads 20 --input_path %s/pass/%s --save_path %s/demux_guppy --recursive --verbose_logs --records_per_fastq 100 --require_barcodes_both_ends && tar -czvf %s/demux_guppy/unclassified.tar.gz %s/demux_guppy/unclassified && rm -rf %s/demux_guppy/unclassified" % (BASECALLED_READS, FASTQ, RAW_READS, RAW_READS, RAW_READS, RAW_READS)
rule C_redemultiplex:
input:
pre_demultiplex = "%s/demux_guppy" % (RAW_READS)
output:
directory("%s" % (DEMUX_DIR))
shell:
"porechop --input %s/demux_guppy --threads 20 --barcode_dir %s --require_two_barcodes --discard_middle" % (RAW_READS, DEMUX_DIR)
def _get_samples(wildcards):
"Build a string of all samples that will be processed in a pipeline.py run"
s = config['samples']
samples = " ".join(s)
return samples
rule D_assembly:
params:
dimension=config['dimension'],
samples=_get_samples,
raw=config['raw_reads'],
build=BUILD_DIR,
basecalled_reads=config['basecalled_reads'],
reference_genome=config['reference_genome'],
primer_scheme=config['primer_scheme']
input:
demultiplex = "%s" % (DEMUX_DIR)
output:
directory("%s" % (BUILD_DIR))
shell:
"mkdir build/ && python pipeline/scripts/pipeline.py --samples {params.samples} --dimension {params.dimension} --raw_reads {params.raw} --build_dir {params.build} --basecalled_reads {params.basecalled_reads} --reference_genome {params.reference_genome} --primer_scheme {params.primer_scheme}"