forked from cnobles/iGUIDE
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathtrim.rules
75 lines (72 loc) · 2.71 KB
/
trim.rules
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
# -*- mode: Snakemake -*-
# Sequence Trimming Rules
rule seq_trim_R1:
input:
RUN_DIR + "/process_data/binned/{sample}.R1.{bin}.fastq.gz"
output:
trim=temp(RUN_DIR + "/process_data/trimmed/{sample}.R1.{bin}.trim.fastq.gz"),
stat=temp(RUN_DIR + "/process_data/stats/{sample}.R1.{bin}.trim.stat")
params:
tool=ROOT_DIR + "/tools/rscripts/trim.R",
lead=lambda wildcards: R1_LEAD[wildcards.sample],
over=lambda wildcards: R1_OVER[wildcards.sample],
leadMis=config["R1leadMismatch"],
overMis=config["R1overMismatch"],
overLen=config["R1overMaxLength"]
log:
RUN_DIR + "/logs/{sample}.R1.{bin}.trim.log"
resources:
mem_mb=lambda wildcards, attempt: attempt * config["trimMB"]
shell:
"""
Rscript {params.tool} {input} -o {output.trim} \
-l {params.lead} --leadMismatch {params.leadMis} \
-r {params.over} --overMismatch {params.overMis} \
--overMaxLength {params.overLen} --stat {output.stat} \
--compress > {log} 2>&1
"""
rule seq_trim_R2_primer:
input:
RUN_DIR + "/process_data/binned/{sample}.R2.{bin}.fastq.gz"
output:
trim=temp(RUN_DIR + "/process_data/trimmed/primer/{sample}.R2.{bin}.primer.trim.fastq.gz"),
stat=temp(RUN_DIR + "/process_data/stats/{sample}.R2.{bin}.primer.trim.stat")
params:
tool=ROOT_DIR + "/tools/rscripts/trim.R",
lead=lambda wildcards: R2_LEAD[wildcards.sample],
over=lambda wildcards: R2_OVER[wildcards.sample],
leadMis=config["R2leadMismatch"],
overMis=config["R2overMismatch"],
overLen=config["R2overMaxLength"]
log:
RUN_DIR + "/logs/{sample}.R2.{bin}.primer.trim.log"
resources:
mem_mb=lambda wildcards, attempt: attempt * config["trimMB"]
shell:
"""
Rscript {params.tool} {input} -o {output.trim} \
-l {params.lead} --leadMismatch {params.leadMis} \
-r {params.over} --overMismatch {params.overMis} \
--overMaxLength {params.overLen} --stat {output.stat} \
--compress > {log} 2>&1
"""
rule seq_trim_R2_odn:
input:
RUN_DIR + "/process_data/trimmed/primer/{sample}.R2.{bin}.primer.trim.fastq.gz"
output:
trim=temp(RUN_DIR + "/process_data/trimmed/{sample}.R2.{bin}.trim.fastq.gz"),
stat=temp(RUN_DIR + "/process_data/stats/{sample}.R2.{bin}.trim.stat")
params:
tool=ROOT_DIR + "/tools/rscripts/trim.R",
lead=lambda wildcards: R2_LEAD_ODN[wildcards.sample],
leadMis=config["R2odnMismatch"]
log:
RUN_DIR + "/logs/{sample}.R2.{bin}.odn.trim.log"
resources:
mem_mb=lambda wildcards, attempt: attempt * config["trimMB"]
shell:
"""
Rscript {params.tool} {input} -o {output.trim} \
-l {params.lead} --leadMismatch {params.leadMis} \
--noQualTrimming --stat {output.stat} --compress > {log} 2>&1
"""