Skip to content

Commit

Permalink
New environment for additional benchmarks.
Browse files Browse the repository at this point in the history
  • Loading branch information
vsbuffalo committed May 18, 2024
1 parent 7448e82 commit b732b84
Show file tree
Hide file tree
Showing 3 changed files with 37 additions and 4 deletions.
7 changes: 7 additions & 0 deletions additional_benchmarks/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
# Additional Benchmarks

## Environment

mamba create -n granges_benchmark -c conda-forge -c bioconda --file requirements.txt --yes
pip install snakemake-executor-plugin-slurm

29 changes: 25 additions & 4 deletions additional_benchmarks/Snakefile
Original file line number Diff line number Diff line change
@@ -1,8 +1,11 @@
import numpy as np

RSCRIPT = "~/.conda/envs/granges_benchmark/bin/R"
GRANGES = "../target/release/granges"
SEQLENS = "../tests_data/hg38_seqlens.tsv"
NREPS = 1
NREPS = 50

localrules: granges

rule granges:
output: GRANGES
Expand All @@ -14,6 +17,10 @@ rule granges:
rule random_bed:
input: seqlens=SEQLENS, granges=GRANGES
output: "random_bed/{size}__{rep}.bed.gz"
resources:
runtime=30,
mem_mb_per_cpu=1800,
cpus_per_task=28
shell:
"""
{input.granges} random-bed --sort --num {wildcards.size} {input.seqlens} | gzip > {output}
Expand All @@ -24,6 +31,10 @@ rule granges_filter:
genome=SEQLENS, granges=GRANGES
output: "results/granges_filter__size_{size}.bed"
benchmark: repeat("benchmarks/granges_filter__size_{size}.tsv", NREPS)
resources:
runtime=30,
mem_mb_per_cpu=1800,
cpus_per_task=28
shell:
"""
{input.granges} filter --left {input.a} --right {input.b} --genome {input.genome} > {output}
Expand All @@ -34,6 +45,10 @@ rule bedtools_intersect:
input: a="random_bed/{size}__A.bed.gz", b="random_bed/{size}__B.bed.gz"
output: "results/bedtools_intersect__size_{size}.bed"
benchmark: repeat("benchmarks/bedtools_intersect__size_{size}.tsv", NREPS)
resources:
runtime=30,
mem_mb_per_cpu=1800,
cpus_per_task=28
shell:
"""
bedtools intersect -a {input.a} -b {input.b} > {output}
Expand All @@ -42,14 +57,20 @@ rule bedtools_intersect:
rule plyranges_join_overlap_inner:
input: a="random_bed/{size}__A.bed.gz", b="random_bed/{size}__B.bed.gz"
output: "results/plyranges_join_overlap_inner__size_{size}.bed"
params: rscript = RSCRIPT
benchmark: repeat("benchmarks/plyranges_join_overlap_inner__size_{size}.tsv", NREPS)
resources:
runtime=300,
mem_mb_per_cpu=1800,
cpus_per_task=28
shell:
"""
Rscript Rscripts/plyranges_join_overlap_inner.r {input.a} {input.b} > {output}
{params.rscript} Rscripts/plyranges_join_overlap_inner.r {input.a} {input.b} > {output}
"""

tools = ["bedtools_intersect", "granges_filter", "plyranges_join_overlap_inner"]
sizes = np.logspace(3, 9, 10)
#tools = ["bedtools_intersect", "granges_filter", "plyranges_join_overlap_inner"]
tools = ["bedtools_intersect", "granges_filter"]
sizes = np.logspace(3, 9, 10).astype('int')

all_benches = expand("results/{tool}__size_{size}.bed", tool=tools, size=sizes)

Expand Down
5 changes: 5 additions & 0 deletions additional_benchmarks/requirements.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
snakemake
numpy
scipy
matplotlib
R

0 comments on commit b732b84

Please sign in to comment.