Merge pull request #16 from harazono/master

ADD: NEW script: split_paf
mkasa · Nov 7, 2022 · 43d194c · 43d194c
2 parents 0bd41f6 + f910c82
commit 43d194c
Show file tree

Hide file tree

Showing 9 changed files with 171 additions and 128 deletions.
diff --git a/README.md b/README.md
@@ -37,28 +37,26 @@ You may need to be root for installing the Perl libraries.
 List
 ----
 
-<table style="border=1 solid">
-	<tr><th>name</th><th>type</th><th>description</th><th>note</th></tr>
-	<tr><td>fatt</td><td>bio</td><td>FASTA/FASTQ manipulation tool.</td><td>see also doc/fatt.md</td></tr>
-	<tr><td>sieve</td><td>general</td><td>Random sampling of text files.</t><td>maybe useful for cross-validation</td></tr>
-    <tr><td>convertsequence</td><td>bio</td><td>Format conversion of sequence files.</td><td>see also perldoc</td></tr>
-    <tr><td>fixshebang</td><td>general</td><td>Fix shebang lines.</td><td>see also perldoc</td></tr>
-    <tr><td>gcc-color</td><td>general</td><td>Color the output of gcc/g++.</td><td>not extensively used.</td></tr>
-    <tr><td>icc-color</td><td>general</td><td>Color the output of Intel C++.</td><td></td></tr>
-    <tr><td>mydaemon</td><td>general</td><td>Automatically set ups crontab to ensure your daemon is running. (Works without root)</td><td></td></tr>
-    <tr><td>rep</td><td>general</td><td>Simple wrapper for different VCS such as svn/git/hg.</td><td>see also perldoc</td></tr>
-    <tr><td>sq</td><td>general</td><td>Execute SQL queries over CSV files.</td><td>Obsolete. Use q instead.</td></tr>
-    <tr><td>mddoc</td><td>general</td><td>Simple wrapper to view formatted Markdown (and restructured) texts via text browser</td><td>Obsolete. Use glow.</td></tr>
-    <tr><td>gmddoc</t><td>general</td><td>Simple wrapper to view formatted GitHub-flavored Markdown via (graphic) web browser</td><td>Obsolete. Standalone grip can do this now.</td></tr>
-    <tr><td>sha_scan</td><td>general</td><td>Find duplicated files by collecting SHA1 hashes of files in a given directory.</td><td></td></tr>
-    <tr><td>rep</td><td>general</td><td>Repository utilitiy (one command, manage svn/cvs/git/hg at once!)</td><td></td></tr>
-    <tr><rd>taw</td><td>general</td><td>Tiny Amazon EC2 Wrapper.</td><td>Moved to an independent repository</td></tr>
-    <tr><td>gfwhere</td><td>general</td><td>Find inconsistently replicated files in GlusterFS</td><td></td></tr>
-    <tr><td>json2csv</td><td>general</td><td>Convert JSON into CSV</td><td></td></tr>
-    <tr><td>csv2html</td><td>general</td><td>Convert CSV into HTML</td><td></td></tr>
-    <tr><td>csv2md</td><td>general</td><td>Convert CSV into a table in Markdown extra</td><td></td></tr>
-    <tr><td></td><td></td><td></td><td></td></tr>
-</table>
+|name|type|description|note|
+|---|---|---|---|
+|fatt|bio|FASTA/FASTQ manipulation tool.|see also doc/fatt.md|
+|sieve|general|Random sampling of text files.|maybe useful for cross-validation|
+|convertsequence|bio|Format conversion of sequence files.|see also perldoc|
+|fixshebang|general|Fix shebang lines.|see also perldoc|
+|gcc-color|general|Color the output of gcc/g++.|not extensively used.|
+|icc-color|general|Color the output of Intel C++.|
+|mydaemon|general|Automatically set ups crontab to ensure your daemon is running. (Works without root)|
+|rep|general|Simple wrapper for different VCS such as svn/git/hg.|see also perldoc|
+|sq|general|Execute SQL queries over CSV files.|see also perldoc. requires a bunch of Perl modules. DBD::CSV is required.|
+|mddoc|general|Simple wrapper to view formatted Markdown (and restructured) texts via text browser|Requires Markdown.pl or pandoc.|
+|gmddoc|general|Simple wrapper to view formatted GitHub-flavored Markdown via (graphic) web browser|Requires grip (python module).|
+|sha_scan|general|Find duplicated files by collecting SHA1 hashes of files in a given directory.|
+|rep|general|Repository utilitiy (one command, manage svn/cvs/git/hg at once!)|
+|gfwhere|general|Find inconsistently replicated files in GlusterFS|
+|json2csv|general|Convert JSON into CSV|
+|csv2html|general|Convert CSV into HTML|
+|csv2md|general|Convert CSV into a table in Markdown extra|
+|split_paf|bio|Split alignments(PAF format) by looking CIGAR string|
 
 Licenses
 --------

diff --git a/script/csv2html b/script/csv2html
@@ -168,7 +168,7 @@ def main():
     args = parser.parse_args()
 
     if args.version:
-        print "csv2html version %s" % version_string
+        print("csv2html version %s" % version_string)
         sys.exit(0)
     (column_names, header_attrs, data_attrs) = (None, None, None)
     # Get keys in the first line (using jq)
@@ -184,15 +184,15 @@ def main():
                 is_first_line = False
                 try:
                     (column_names, header_attrs, data_attrs) = parse_attributes(args.colfmt, len(line))
-                except Exception, e:
-                    print >> sys.argvstderr, "ERROR: attribute parsing failed: ", e
+                except Exception as e:
+                    print("ERROR: attribute parsing failed: ", e, file=sys.argvstderr)
                     sys.exit(2)
                 if not args.noheader:
                     writer.writeheadrow(line, header_attrs, column_names)
                     continue
             writer.writerow(line, data_attrs)
-    except Exception, e:
-        print >> sys.stderr, "ERROR: ", e, " at line %d" % line_count
+    except Exception as e:
+        print("ERROR: ", e, " at line %d" % line_count, file=sys.stderr)
         sys.exit(3)
     if not args.notabletag: w("</table>\n")
 

diff --git a/script/csv2md b/script/csv2md
@@ -162,7 +162,7 @@ def main():
     args = parser.parse_args()
 
     if args.version:
-        print "csv2md version %s" % version_string
+        print("csv2md version %s" % version_string)
         sys.exit(0)
     (column_names, header_attrs, data_attrs) = (None, None, None)
     # Get keys in the first line (using jq)
@@ -177,15 +177,15 @@ def main():
                 is_first_line = False
                 try:
                     (column_names, header_attrs, data_attrs) = parse_attributes(args.colfmt, len(line))
-                except Exception, e:
-                    print >> sys.argvstderr, "ERROR: attribute parsing failed: ", e
+                except Exception as e:
+                    print("ERROR: attribute parsing failed: ", e, file=sys.argvstderr)
                     sys.exit(2)
                 if not args.noheader:
                     writer.writeheadrow(line, data_attrs, column_names)
                     continue
             writer.writerow(line, data_attrs)
-    except Exception, e:
-        print >> sys.stderr, "ERROR: ", e, " at line %d" % line_count
+    except Exception as e:
+        print("ERROR: ", e, " at line %d" % line_count, file=sys.stderr)
         sys.exit(3)
 
 if __name__ == '__main__':

diff --git a/script/json2csv b/script/json2csv
@@ -35,7 +35,7 @@ import os,sys,subprocess,csv,json,argparse
 class TSVWriter:
     def writerow(self, cols):
         cols = [str(x) for x in cols]
-        print "\t".join(cols);
+        print("\t".join(cols));
 
 class HTMLWriter:
     def writeheadrow(self, cols):
@@ -61,17 +61,17 @@ def convert_json_to_anotherformat(jsonobj, array_of_keys, writer, line_no):
     try:
         for key in array_of_keys:
             if not key in jsonobj:
-                print >>sys.stderr, "ERROR: does not contain a key '%s' at line %d" % (key, line_no)
+                print("ERROR: does not contain a key '%s' at line %d" % (key, line_no), file=sys.stderr)
                 sys.exit(2)
             value = jsonobj[key]
             csv_array.append(value)
-    except Exception, e:
-        print >>sys.stderr, "ERROR: ", e, "at line %d" % line_no
+    except Exception as e:
+        print("ERROR: ", e, "at line %d" % line_no, file=sys.stderr)
         sys.exit(2)
     try:
         writer.writerow(csv_array)
-    except Exception, e:
-        print >>sys.stderr, "ERROR: Output error at line %d" % line_no, ": ", e
+    except Exception as e:
+        print("ERROR: Output error at line %d" % line_no, ": ", e, file=sys.stderr)
         sys.exit(2)
 
 def main():
@@ -88,45 +88,45 @@ def main():
     args = parser.parse_args()
 
     if args.version:
-        print "json2csv version %s" % version_string
+        print("json2csv version %s" % version_string)
         sys.exit(0)
 
     # Get keys in the first line (using jq)
     first_line = sys.stdin.readline().strip()
     if len(first_line) < 2:
         if args.notfound:
-            print args.notfound
+            print(args.notfound)
             sys.exit(0)
         if args.allowempty and len(first_line) <= 0:
             sys.exit(0)
-        print >>sys.stderr, "ERROR: Input too short. Did put a JSON recond in a line? (if not, pass data through 'jq -c .')"
+        print("ERROR: Input too short. Did put a JSON recond in a line? (if not, pass data through 'jq -c .')", file=sys.stderr)
         sys.exit(1)
     if first_line[0] != '{' or first_line[-1] != '}':
-        print >>sys.stderr, "ERROR: The first line of the input does not look like a JSON (hash) object."
+        print("ERROR: The first line of the input does not look like a JSON (hash) object.", file=sys.stderr)
         sys.exit(1)
     if args.columns != None:
         array_of_keys = args.columns.split(",")
     else:
         try:
             p = subprocess.Popen(["jq", "-c", "keys"], stdin=subprocess.PIPE, stdout=subprocess.PIPE, close_fds=True)
         except:
-            print >>sys.stderr, "ERROR: jq did not run. Probably you do not have jq installed properly?"
-            print >>sys.stderr, "       Please type jq [enter] to check if jq can be executed from command line."
-            print >>sys.stderr, "       If it does not help, please ask your system administrator."
+            print("ERROR: jq did not run. Probably you do not have jq installed properly?", file=sys.stderr)
+            print("       Please type jq [enter] to check if jq can be executed from command line.", file=sys.stderr)
+            print("       If it does not help, please ask your system administrator.", file=sys.stderr)
             sys.exit(1)
-        print >>p.stdin, first_line
+        print(first_line, file=p.stdin)
         p.stdin.close()
         processed = p.stdout.readline().strip()
         if len(processed) < 2:
-            print >>sys.stderr, "ERROR: jq error. The output of jq contains less than two characters."
+            print("ERROR: jq error. The output of jq contains less than two characters.", file=sys.stderr)
             sys.exit(1)
         if processed[0] != '[' or processed[-1] != ']':
-            print >>sys.stderr, "ERROR: jq error. The output of jq does not contain an array of keys."
+            print("ERROR: jq error. The output of jq does not contain an array of keys.", file=sys.stderr)
             sys.exit(1)
         try:
             array_of_keys = list(csv.reader([processed[1:-1]]))[0]
         except:
-            print >>sys.stderr, "ERROR: csv module (of Python) could not parse the output of jq (keys)"
+            print("ERROR: csv module (of Python) could not parse the output of jq (keys)", file=sys.stderr)
             sys.exit(1)
 
     # Output the header (if needed)
@@ -143,8 +143,8 @@ def main():
                 writer.writeheadrow(array_of_keys)
             else:
                 writer.writerow(array_of_keys)
-        except Exception, e:
-            print >>sys.stderr, "ERROR: Output error (header) :", e
+        except Exception as e:
+            print("ERROR: Output error (header) :", e, file=sys.stderr)
             sys.exit(1)
     # Output the CSV lines
     line_number = 1
@@ -155,8 +155,8 @@ def main():
             line_number += 1
             json_object = json.loads(line.strip())
             convert_json_to_anotherformat(json_object, array_of_keys, writer, line_number)
-    except Exception, e:
-        print >>sys.stderr, "ERROR: JSON parsing error at line %d : " % line_number, e
+    except Exception as e:
+        print("ERROR: JSON parsing error at line %d : " % line_number, e, file=sys.stderr)
         sys.exit(1)
     # Output the footer (if needed)
     if args.html:

diff --git a/script/ods2xls b/script/ods2xls
diff --git a/script/ods2xlsx b/script/ods2xlsx
diff --git a/script/recompressbyxz b/script/recompressbyxz
@@ -9,41 +9,41 @@ def file_name_looks_like_hard_to_compress_file(file_name):
 
 def recompress(file_name, base_name, uncompress_command):
     if file_name == '' or file_name == None:
-        print "ERROR (file name is empty). Skipped.\n"
+        print("ERROR (file name is empty). Skipped.\n")
         return 1
     if base_name == '' or base_name == None:
-        print "ERROR (base file name is empty). Skipped.\n";
+        print("ERROR (base file name is empty). Skipped.\n");
         return 1
     if uncompress_command == '' or uncompress_command == None:
-        print "ERROR (uncompress command is empty). Skipped.\n";
+        print("ERROR (uncompress command is empty). Skipped.\n");
         return 1
     if file_name == (base_name + ".xz"):
-        print "ERROR (file name is wrong). Skipped.\n"
+        print("ERROR (file name is wrong). Skipped.\n")
         return 1
-    print "compressing...\r",
+    print("compressing...\r", end=' ')
     # print >> sys.stderr, "%s %s | xz > %s.xz" % (uncompress_command, file_name, base_name)
     r = os.system("%s %s | xz > %s.xz" % (uncompress_command, file_name, base_name))
     if r != 0:
-        print file_name, "was not compressed   "
+        print(file_name, "was not compressed   ")
         return r
-    print file_name, "compressed. Veryfying...\r",
+    print(file_name, "compressed. Veryfying...\r", end=' ')
     # print >> sys.stderr, "xzcat %s.xz > %s.xz.uncompressed" % (base_name, base_name)
     r2 = os.system("xzcat %s.xz > %s.xz.uncompressed" % (base_name, base_name))
     if r2 != 0:
-        print file_name, "was compressed, but not verified (1)."
+        print(file_name, "was compressed, but not verified (1).")
         os.unlink("%s.xz.uncompressed" % base_name)
         return r2
     # print >> sys.stderr, "%s %s > %s.uncompressed" % (uncompress_command, file_name, file_name)
     r3 = os.system("%s %s > %s.uncompressed" % (uncompress_command, file_name, file_name))
     if r3 != 0:
-        print file_name, "was compressed, but not verified (2)."
+        print(file_name, "was compressed, but not verified (2).")
         os.unlink("%s.xz.uncompressed" % base_name)
         os.unlink("%s.uncompressed" % file_name)
         return r2
     # print >> sys.stderr, "cmp %s.xz.uncompressed %s.uncompressed" % (base_name, file_name)
     r4 = os.system("cmp %s.xz.uncompressed %s.uncompressed" % (base_name, file_name))
     if r4 == 0:
-        print file_name, "was compressed, and verified."
+        print(file_name, "was compressed, and verified.")
         os.unlink(file_name)
     os.unlink("%s.xz.uncompressed" % base_name)
     os.unlink("%s.uncompressed" % file_name)
@@ -54,42 +54,42 @@ def recompress_by_xz(file_names):
     for file_name in file_names:
         if file_name == None or file_name == '':
             continue
-        print file_name,
+        print(file_name, end=' ')
         if not os.path.exists(file_name):
-            print "does not exist. Skipped."
+            print("does not exist. Skipped.")
             continue
         if not os.path.isfile(file_name):
-            print "is not a file. Skipped."
+            print("is not a file. Skipped.")
             continue
         if file_name.endswith(".xz"):
-            print "is already an xzed file. Skipped."
+            print("is already an xzed file. Skipped.")
             continue
         if file_name_looks_like_hard_to_compress_file(file_name):
-            print "is probably incompressible. Skipped."
+            print("is probably incompressible. Skipped.")
             continue
         if file_name.endswith(".gz"):
             recompress(file_name, file_name[:-3], "zcat")
         elif file_name.endswith(".bz2"):
             recompress(file_name, file_name[:-4], "bzcat")
         elif os.path.getsize(file_name) < 4096:
-            print "is too small to gain space."
+            print("is too small to gain space.")
         else:
-            print "compressing...\r",
+            print("compressing...\r", end=' ')
             r = os.system("xz %s" % file_name)
             if r == 0:
-                print file_name, "compressed           "
+                print(file_name, "compressed           ")
             else:
-                print file_name, "was not compressed   "
+                print(file_name, "was not compressed   ")
 
 def ensure_tools_are_available():
     for tool in ["xz", "xzcat", "bunzip2", "zcat"]:
         if os.system("%s --help 2&>1 > /dev/null" % tool) != 0:
-            print >> sys.stderr, "ERROR %s is not available." % tool
+            print("ERROR %s is not available." % tool, file=sys.stderr)
             sys.exit(2)
 
 def main():
     if len(sys.argv) < 2:
-        print >>sys.stderr, "Usage: recompressbyxz <input file>"
+        print("Usage: recompressbyxz <input file>", file=sys.stderr)
     ensure_tools_are_available()
     recompress_by_xz(sys.argv[1:])