From d6b7a1f774a65c37b7b98a8b6c69b7397b059241 Mon Sep 17 00:00:00 2001 From: "Brian P. Walenz" Date: Fri, 18 Sep 2020 12:42:39 -0400 Subject: [PATCH] Add -minlength option to discard short overlaps. --- src/mhap/mhapConvert.C | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) diff --git a/src/mhap/mhapConvert.C b/src/mhap/mhapConvert.C index 87da59f0c..ee1db19d8 100644 --- a/src/mhap/mhapConvert.C +++ b/src/mhap/mhapConvert.C @@ -28,6 +28,7 @@ int main(int argc, char **argv) { char *outName = NULL; char *seqName = NULL; + int32 minLength = 0; vector files; @@ -41,6 +42,9 @@ main(int argc, char **argv) { } else if (strcmp(argv[arg], "-S") == 0) { seqName = argv[++arg]; + } else if (strcmp(argv[arg], "-minlength") == 0) { + minLength = strtoint32(argv[++arg]); + } else if (fileExists(argv[arg])) { files.push_back(argv[arg]); @@ -55,6 +59,7 @@ main(int argc, char **argv) { if ((err) || (seqName == NULL) || (outName == NULL) || (files.size() == 0)) { fprintf(stderr, "usage: %s -S seqStore -o output.ovb input.mhap[.gz]\n", argv[0]); fprintf(stderr, " Converts mhap native output to ovb\n"); + fprintf(stderr, " -minlength X discards overlaps below X bp long.\n"); if (seqName == NULL) fprintf(stderr, "ERROR: no seqStore (-S) supplied\n"); @@ -147,9 +152,16 @@ main(int argc, char **argv) { ov.dat.ovl.bhg5, ov.dat.ovl.bhg3, (ov.dat.ovl.flipped) ? " flipped" : ""), exit(1); - // Overlap looks good, write it! + // Overlap looks good, write it if its long enough. Bogart is + // computing overlap length as the max number of bases covered on + // either read. + + int32 oalen = alen - ov.dat.ovl.ahg5 - ov.dat.ovl.ahg3; + int32 oblen = blen - ov.dat.ovl.bhg5 - ov.dat.ovl.bhg3; - of->writeOverlap(&ov); + if ((minLength <= oalen) || + (minLength <= oblen)) + of->writeOverlap(&ov); } delete in;