From ea29c166abdc7c88fc985774055cd2be4a4cc186 Mon Sep 17 00:00:00 2001 From: dpryan79 Date: Tue, 2 May 2017 13:48:25 +0200 Subject: [PATCH] Fix the variant avoidance code. Both merged and unmerged contexts are now working properly --- extract.c | 4 +- tests/cg_with_variants.bam | Bin 506 -> 505 bytes tests/cg_with_variants.bam.bai | Bin 96 -> 96 bytes tests/test.py | 95 ++++++++++++++++++--------------- 4 files changed, 54 insertions(+), 45 deletions(-) diff --git a/extract.c b/extract.c index 7eb5225..4132637 100644 --- a/extract.c +++ b/extract.c @@ -236,9 +236,9 @@ void extractCalls(Config *config) { lastCHG->nmethyl = 0; lastCHG->nunmethyl = 0; } + ltid = tid; + lpos = pos; } - ltid = tid; - lpos = pos; continue; } diff --git a/tests/cg_with_variants.bam b/tests/cg_with_variants.bam index f0bda4fd5106238526d002c1e57882852d110c44..77996ad86cd2233edbe219111b188d5bf0dd8de4 100644 GIT binary patch delta 69 zcmV-L0J{JB1Nj565&;3)lM?}xGpr<}L?WPB5F@2yOg}6h27bUE55^EbkPHUK57^^j bX!!wqJeUkAKVXlC!R!YB-#TtAliC3wX%-og delta 55 zcmV-70LcIO1NsB75&;3*lM?}x9PAVw@S&600U$-06&L^j diff --git a/tests/cg_with_variants.bam.bai b/tests/cg_with_variants.bam.bai index aae5c0cd9c81c393955163cf5f765151710c7b5a..bdddf6ae697d18bf71750ad1310fce08a453935a 100644 GIT binary patch delta 14 UcmYdDm>|dabE3RBkTBN-04E^@8vp|b^Z=$?7kTBN-042f% 1 -rm('cg_aln_CpG.bedGraph') +rm('test2_CpG.bedGraph') # should be none with q > 10 -check_call('../MethylDackel extract cg100.fa cg_aln.bam -q 10', shell=True) -assert op.exists('cg_aln_CpG.bedGraph') -lines = sum(1 for _ in open('cg_aln_CpG.bedGraph')) +rm('test3_CpG.bedGraph') +check_call('../MethylDackel extract cg100.fa cg_aln.bam -q 10 -o test3', shell=True) +assert op.exists('test3_CpG.bedGraph') +lines = sum(1 for _ in open('test3_CpG.bedGraph')) assert lines == 1 -rm('cg_aln_CpG.bedGraph') +rm('test3_CpG.bedGraph') # Test the new methylKit option -check_call('../MethylDackel extract --methylKit --CHH --CHG cg100.fa cg_aln.bam -q 2', shell=True) -assert op.exists('cg_aln_CpG.methylKit') -lines = sum(1 for _ in open('cg_aln_CpG.methylKit')) +rm('test4_CpG.methylKit') +rm('test4_CHG.methylKit') +rm('test4_CHH.methylKit') +check_call('../MethylDackel extract --methylKit --CHH --CHG cg100.fa cg_aln.bam -q 2 -o test4', shell=True) +assert op.exists('test4_CpG.methylKit') +lines = sum(1 for _ in open('test4_CpG.methylKit')) assert lines > 1 -rm('cg_aln_CpG.methylKit') -lines = sum(1 for _ in open('cg_aln_CHG.methylKit')) +rm('test4_CpG.methylKit') +lines = sum(1 for _ in open('test4_CHG.methylKit')) assert lines == 1 -rm('cg_aln_CHG.methylKit') -assert op.exists('cg_aln_CHH.methylKit') -lines = sum(1 for _ in open('cg_aln_CHH.methylKit')) +rm('test4_CHG.methylKit') +assert op.exists('test4_CHH.methylKit') +lines = sum(1 for _ in open('test4_CHH.methylKit')) assert lines == 1 -rm('cg_aln_CHH.methylKit') +rm('test4_CHH.methylKit') # Check that --minDepth is working, which means there should be no called sites -check_call('../MethylDackel extract --minDepth 2 cg100.fa cg_aln.bam -q 2', shell=True) -assert op.exists('cg_aln_CpG.bedGraph') -lines = sum(1 for _ in open('cg_aln_CpG.bedGraph')) +rm('test5_CpG.bedGraph') +check_call('../MethylDackel extract --minDepth 2 cg100.fa cg_aln.bam -q 2 -o test5', shell=True) +assert op.exists('test5_CpG.bedGraph') +lines = sum(1 for _ in open('test5_CpG.bedGraph')) assert lines == 1 -rm('cg_aln_CpG.bedGraph') +rm('test5_CpG.bedGraph') # Check that --ignoreFlags is working, which means that there are now called sites -check_call('../MethylDackel extract --ignoreFlags 0xD00 cg100.fa cg_aln.bam -q 2', shell=True) -assert op.exists('cg_aln_CpG.bedGraph') -lines = sum(1 for _ in open('cg_aln_CpG.bedGraph')) +rm('test6_CpG.bedGraph') +check_call('../MethylDackel extract --ignoreFlags 0xD00 cg100.fa cg_aln.bam -q 2 -o test6', shell=True) +assert op.exists('test6_CpG.bedGraph') +lines = sum(1 for _ in open('test6_CpG.bedGraph')) assert lines == 49 -rm('cg_aln_CpG.bedGraph') +rm('test6_CpG.bedGraph') # Check that --requireFlags is working -check_call('../MethylDackel extract --requireFlags 0xD00 cg100.fa cg_aln.bam -q 2', shell=True) -assert op.exists('cg_aln_CpG.bedGraph') -lines = sum(1 for _ in open('cg_aln_CpG.bedGraph')) +rm('test7_CpG.bedGraph') +check_call('../MethylDackel extract --requireFlags 0xD00 cg100.fa cg_aln.bam -q 2 -o test7', shell=True) +assert op.exists('test7_CpG.bedGraph') +lines = sum(1 for _ in open('test7_CpG.bedGraph')) assert lines == 49 -rm('cg_aln_CpG.bedGraph') +rm('test7_CpG.bedGraph') # Check absolute trimming bounds -check_call('../MethylDackel extract --nOT 50,50,40,40 cg100.fa cg_aln.bam -q 2', shell=True) -assert op.exists('cg_aln_CpG.bedGraph') -lines = sum(1 for _ in open('cg_aln_CpG.bedGraph')) +rm('test8_CpG.bedGraph') +check_call('../MethylDackel extract --nOT 50,50,40,40 cg100.fa cg_aln.bam -q 2 -o test8', shell=True) +assert op.exists('test8_CpG.bedGraph') +lines = sum(1 for _ in open('test8_CpG.bedGraph')) assert lines == 12 -rm('cg_aln_CpG.bedGraph') +rm('test8_CpG.bedGraph') # Check variant filtering (there are 49 lines otherwise) -check_call('../MethylDackel extract -p 1 -q 0 --minOppositeDepth 3 --maxVariantFrac 0.25 cg100.fa cg_with_variants.bam', shell=True) -assert op.exists('cg_with_variants_CpG.bedGraph') -lines = sum(1 for _ in open('cg_with_variants_CpG.bedGraph')) +rm('test9_CpG.bedGraph') +check_call('../MethylDackel extract -p 1 -q 0 -o test9 --minOppositeDepth 3 --maxVariantFrac 0.25 cg100.fa cg_with_variants.bam', shell=True) +assert op.exists('test9_CpG.bedGraph') +lines = sum(1 for _ in open('test9_CpG.bedGraph')) assert lines == 48 -rm('cg_with_variants_CpG.bedGraph') +rm('test9_CpG.bedGraph')