Skip to content

Commit

Permalink
Merge pull request #16 from harazono/master
Browse files Browse the repository at this point in the history
ADD: NEW script: split_paf
  • Loading branch information
mkasa authored Nov 7, 2022
2 parents 0bd41f6 + f910c82 commit 43d194c
Show file tree
Hide file tree
Showing 9 changed files with 171 additions and 128 deletions.
42 changes: 20 additions & 22 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -37,28 +37,26 @@ You may need to be root for installing the Perl libraries.
List
----

<table style="border=1 solid">
<tr><th>name</th><th>type</th><th>description</th><th>note</th></tr>
<tr><td>fatt</td><td>bio</td><td>FASTA/FASTQ manipulation tool.</td><td>see also doc/fatt.md</td></tr>
<tr><td>sieve</td><td>general</td><td>Random sampling of text files.</t><td>maybe useful for cross-validation</td></tr>
<tr><td>convertsequence</td><td>bio</td><td>Format conversion of sequence files.</td><td>see also perldoc</td></tr>
<tr><td>fixshebang</td><td>general</td><td>Fix shebang lines.</td><td>see also perldoc</td></tr>
<tr><td>gcc-color</td><td>general</td><td>Color the output of gcc/g++.</td><td>not extensively used.</td></tr>
<tr><td>icc-color</td><td>general</td><td>Color the output of Intel C++.</td><td></td></tr>
<tr><td>mydaemon</td><td>general</td><td>Automatically set ups crontab to ensure your daemon is running. (Works without root)</td><td></td></tr>
<tr><td>rep</td><td>general</td><td>Simple wrapper for different VCS such as svn/git/hg.</td><td>see also perldoc</td></tr>
<tr><td>sq</td><td>general</td><td>Execute SQL queries over CSV files.</td><td>Obsolete. Use q instead.</td></tr>
<tr><td>mddoc</td><td>general</td><td>Simple wrapper to view formatted Markdown (and restructured) texts via text browser</td><td>Obsolete. Use glow.</td></tr>
<tr><td>gmddoc</t><td>general</td><td>Simple wrapper to view formatted GitHub-flavored Markdown via (graphic) web browser</td><td>Obsolete. Standalone grip can do this now.</td></tr>
<tr><td>sha_scan</td><td>general</td><td>Find duplicated files by collecting SHA1 hashes of files in a given directory.</td><td></td></tr>
<tr><td>rep</td><td>general</td><td>Repository utilitiy (one command, manage svn/cvs/git/hg at once!)</td><td></td></tr>
<tr><rd>taw</td><td>general</td><td>Tiny Amazon EC2 Wrapper.</td><td>Moved to an independent repository</td></tr>
<tr><td>gfwhere</td><td>general</td><td>Find inconsistently replicated files in GlusterFS</td><td></td></tr>
<tr><td>json2csv</td><td>general</td><td>Convert JSON into CSV</td><td></td></tr>
<tr><td>csv2html</td><td>general</td><td>Convert CSV into HTML</td><td></td></tr>
<tr><td>csv2md</td><td>general</td><td>Convert CSV into a table in Markdown extra</td><td></td></tr>
<tr><td></td><td></td><td></td><td></td></tr>
</table>
|name|type|description|note|
|---|---|---|---|
|fatt|bio|FASTA/FASTQ manipulation tool.|see also doc/fatt.md|
|sieve|general|Random sampling of text files.|maybe useful for cross-validation|
|convertsequence|bio|Format conversion of sequence files.|see also perldoc|
|fixshebang|general|Fix shebang lines.|see also perldoc|
|gcc-color|general|Color the output of gcc/g++.|not extensively used.|
|icc-color|general|Color the output of Intel C++.|
|mydaemon|general|Automatically set ups crontab to ensure your daemon is running. (Works without root)|
|rep|general|Simple wrapper for different VCS such as svn/git/hg.|see also perldoc|
|sq|general|Execute SQL queries over CSV files.|see also perldoc. requires a bunch of Perl modules. DBD::CSV is required.|
|mddoc|general|Simple wrapper to view formatted Markdown (and restructured) texts via text browser|Requires Markdown.pl or pandoc.|
|gmddoc|general|Simple wrapper to view formatted GitHub-flavored Markdown via (graphic) web browser|Requires grip (python module).|
|sha_scan|general|Find duplicated files by collecting SHA1 hashes of files in a given directory.|
|rep|general|Repository utilitiy (one command, manage svn/cvs/git/hg at once!)|
|gfwhere|general|Find inconsistently replicated files in GlusterFS|
|json2csv|general|Convert JSON into CSV|
|csv2html|general|Convert CSV into HTML|
|csv2md|general|Convert CSV into a table in Markdown extra|
|split_paf|bio|Split alignments(PAF format) by looking CIGAR string|

Licenses
--------
Expand Down
10 changes: 5 additions & 5 deletions script/csv2html
Original file line number Diff line number Diff line change
Expand Up @@ -168,7 +168,7 @@ def main():
args = parser.parse_args()

if args.version:
print "csv2html version %s" % version_string
print("csv2html version %s" % version_string)
sys.exit(0)
(column_names, header_attrs, data_attrs) = (None, None, None)
# Get keys in the first line (using jq)
Expand All @@ -184,15 +184,15 @@ def main():
is_first_line = False
try:
(column_names, header_attrs, data_attrs) = parse_attributes(args.colfmt, len(line))
except Exception, e:
print >> sys.argvstderr, "ERROR: attribute parsing failed: ", e
except Exception as e:
print("ERROR: attribute parsing failed: ", e, file=sys.argvstderr)
sys.exit(2)
if not args.noheader:
writer.writeheadrow(line, header_attrs, column_names)
continue
writer.writerow(line, data_attrs)
except Exception, e:
print >> sys.stderr, "ERROR: ", e, " at line %d" % line_count
except Exception as e:
print("ERROR: ", e, " at line %d" % line_count, file=sys.stderr)
sys.exit(3)
if not args.notabletag: w("</table>\n")

Expand Down
10 changes: 5 additions & 5 deletions script/csv2md
Original file line number Diff line number Diff line change
Expand Up @@ -162,7 +162,7 @@ def main():
args = parser.parse_args()

if args.version:
print "csv2md version %s" % version_string
print("csv2md version %s" % version_string)
sys.exit(0)
(column_names, header_attrs, data_attrs) = (None, None, None)
# Get keys in the first line (using jq)
Expand All @@ -177,15 +177,15 @@ def main():
is_first_line = False
try:
(column_names, header_attrs, data_attrs) = parse_attributes(args.colfmt, len(line))
except Exception, e:
print >> sys.argvstderr, "ERROR: attribute parsing failed: ", e
except Exception as e:
print("ERROR: attribute parsing failed: ", e, file=sys.argvstderr)
sys.exit(2)
if not args.noheader:
writer.writeheadrow(line, data_attrs, column_names)
continue
writer.writerow(line, data_attrs)
except Exception, e:
print >> sys.stderr, "ERROR: ", e, " at line %d" % line_count
except Exception as e:
print("ERROR: ", e, " at line %d" % line_count, file=sys.stderr)
sys.exit(3)

if __name__ == '__main__':
Expand Down
42 changes: 21 additions & 21 deletions script/json2csv
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ import os,sys,subprocess,csv,json,argparse
class TSVWriter:
def writerow(self, cols):
cols = [str(x) for x in cols]
print "\t".join(cols);
print("\t".join(cols));

class HTMLWriter:
def writeheadrow(self, cols):
Expand All @@ -61,17 +61,17 @@ def convert_json_to_anotherformat(jsonobj, array_of_keys, writer, line_no):
try:
for key in array_of_keys:
if not key in jsonobj:
print >>sys.stderr, "ERROR: does not contain a key '%s' at line %d" % (key, line_no)
print("ERROR: does not contain a key '%s' at line %d" % (key, line_no), file=sys.stderr)
sys.exit(2)
value = jsonobj[key]
csv_array.append(value)
except Exception, e:
print >>sys.stderr, "ERROR: ", e, "at line %d" % line_no
except Exception as e:
print("ERROR: ", e, "at line %d" % line_no, file=sys.stderr)
sys.exit(2)
try:
writer.writerow(csv_array)
except Exception, e:
print >>sys.stderr, "ERROR: Output error at line %d" % line_no, ": ", e
except Exception as e:
print("ERROR: Output error at line %d" % line_no, ": ", e, file=sys.stderr)
sys.exit(2)

def main():
Expand All @@ -88,45 +88,45 @@ def main():
args = parser.parse_args()

if args.version:
print "json2csv version %s" % version_string
print("json2csv version %s" % version_string)
sys.exit(0)

# Get keys in the first line (using jq)
first_line = sys.stdin.readline().strip()
if len(first_line) < 2:
if args.notfound:
print args.notfound
print(args.notfound)
sys.exit(0)
if args.allowempty and len(first_line) <= 0:
sys.exit(0)
print >>sys.stderr, "ERROR: Input too short. Did put a JSON recond in a line? (if not, pass data through 'jq -c .')"
print("ERROR: Input too short. Did put a JSON recond in a line? (if not, pass data through 'jq -c .')", file=sys.stderr)
sys.exit(1)
if first_line[0] != '{' or first_line[-1] != '}':
print >>sys.stderr, "ERROR: The first line of the input does not look like a JSON (hash) object."
print("ERROR: The first line of the input does not look like a JSON (hash) object.", file=sys.stderr)
sys.exit(1)
if args.columns != None:
array_of_keys = args.columns.split(",")
else:
try:
p = subprocess.Popen(["jq", "-c", "keys"], stdin=subprocess.PIPE, stdout=subprocess.PIPE, close_fds=True)
except:
print >>sys.stderr, "ERROR: jq did not run. Probably you do not have jq installed properly?"
print >>sys.stderr, " Please type jq [enter] to check if jq can be executed from command line."
print >>sys.stderr, " If it does not help, please ask your system administrator."
print("ERROR: jq did not run. Probably you do not have jq installed properly?", file=sys.stderr)
print(" Please type jq [enter] to check if jq can be executed from command line.", file=sys.stderr)
print(" If it does not help, please ask your system administrator.", file=sys.stderr)
sys.exit(1)
print >>p.stdin, first_line
print(first_line, file=p.stdin)
p.stdin.close()
processed = p.stdout.readline().strip()
if len(processed) < 2:
print >>sys.stderr, "ERROR: jq error. The output of jq contains less than two characters."
print("ERROR: jq error. The output of jq contains less than two characters.", file=sys.stderr)
sys.exit(1)
if processed[0] != '[' or processed[-1] != ']':
print >>sys.stderr, "ERROR: jq error. The output of jq does not contain an array of keys."
print("ERROR: jq error. The output of jq does not contain an array of keys.", file=sys.stderr)
sys.exit(1)
try:
array_of_keys = list(csv.reader([processed[1:-1]]))[0]
except:
print >>sys.stderr, "ERROR: csv module (of Python) could not parse the output of jq (keys)"
print("ERROR: csv module (of Python) could not parse the output of jq (keys)", file=sys.stderr)
sys.exit(1)

# Output the header (if needed)
Expand All @@ -143,8 +143,8 @@ def main():
writer.writeheadrow(array_of_keys)
else:
writer.writerow(array_of_keys)
except Exception, e:
print >>sys.stderr, "ERROR: Output error (header) :", e
except Exception as e:
print("ERROR: Output error (header) :", e, file=sys.stderr)
sys.exit(1)
# Output the CSV lines
line_number = 1
Expand All @@ -155,8 +155,8 @@ def main():
line_number += 1
json_object = json.loads(line.strip())
convert_json_to_anotherformat(json_object, array_of_keys, writer, line_number)
except Exception, e:
print >>sys.stderr, "ERROR: JSON parsing error at line %d : " % line_number, e
except Exception as e:
print("ERROR: JSON parsing error at line %d : " % line_number, e, file=sys.stderr)
sys.exit(1)
# Output the footer (if needed)
if args.html:
Expand Down
25 changes: 0 additions & 25 deletions script/ods2xls

This file was deleted.

25 changes: 0 additions & 25 deletions script/ods2xlsx

This file was deleted.

42 changes: 21 additions & 21 deletions script/recompressbyxz
Original file line number Diff line number Diff line change
Expand Up @@ -9,41 +9,41 @@ def file_name_looks_like_hard_to_compress_file(file_name):

def recompress(file_name, base_name, uncompress_command):
if file_name == '' or file_name == None:
print "ERROR (file name is empty). Skipped.\n"
print("ERROR (file name is empty). Skipped.\n")
return 1
if base_name == '' or base_name == None:
print "ERROR (base file name is empty). Skipped.\n";
print("ERROR (base file name is empty). Skipped.\n");
return 1
if uncompress_command == '' or uncompress_command == None:
print "ERROR (uncompress command is empty). Skipped.\n";
print("ERROR (uncompress command is empty). Skipped.\n");
return 1
if file_name == (base_name + ".xz"):
print "ERROR (file name is wrong). Skipped.\n"
print("ERROR (file name is wrong). Skipped.\n")
return 1
print "compressing...\r",
print("compressing...\r", end=' ')
# print >> sys.stderr, "%s %s | xz > %s.xz" % (uncompress_command, file_name, base_name)
r = os.system("%s %s | xz > %s.xz" % (uncompress_command, file_name, base_name))
if r != 0:
print file_name, "was not compressed "
print(file_name, "was not compressed ")
return r
print file_name, "compressed. Veryfying...\r",
print(file_name, "compressed. Veryfying...\r", end=' ')
# print >> sys.stderr, "xzcat %s.xz > %s.xz.uncompressed" % (base_name, base_name)
r2 = os.system("xzcat %s.xz > %s.xz.uncompressed" % (base_name, base_name))
if r2 != 0:
print file_name, "was compressed, but not verified (1)."
print(file_name, "was compressed, but not verified (1).")
os.unlink("%s.xz.uncompressed" % base_name)
return r2
# print >> sys.stderr, "%s %s > %s.uncompressed" % (uncompress_command, file_name, file_name)
r3 = os.system("%s %s > %s.uncompressed" % (uncompress_command, file_name, file_name))
if r3 != 0:
print file_name, "was compressed, but not verified (2)."
print(file_name, "was compressed, but not verified (2).")
os.unlink("%s.xz.uncompressed" % base_name)
os.unlink("%s.uncompressed" % file_name)
return r2
# print >> sys.stderr, "cmp %s.xz.uncompressed %s.uncompressed" % (base_name, file_name)
r4 = os.system("cmp %s.xz.uncompressed %s.uncompressed" % (base_name, file_name))
if r4 == 0:
print file_name, "was compressed, and verified."
print(file_name, "was compressed, and verified.")
os.unlink(file_name)
os.unlink("%s.xz.uncompressed" % base_name)
os.unlink("%s.uncompressed" % file_name)
Expand All @@ -54,42 +54,42 @@ def recompress_by_xz(file_names):
for file_name in file_names:
if file_name == None or file_name == '':
continue
print file_name,
print(file_name, end=' ')
if not os.path.exists(file_name):
print "does not exist. Skipped."
print("does not exist. Skipped.")
continue
if not os.path.isfile(file_name):
print "is not a file. Skipped."
print("is not a file. Skipped.")
continue
if file_name.endswith(".xz"):
print "is already an xzed file. Skipped."
print("is already an xzed file. Skipped.")
continue
if file_name_looks_like_hard_to_compress_file(file_name):
print "is probably incompressible. Skipped."
print("is probably incompressible. Skipped.")
continue
if file_name.endswith(".gz"):
recompress(file_name, file_name[:-3], "zcat")
elif file_name.endswith(".bz2"):
recompress(file_name, file_name[:-4], "bzcat")
elif os.path.getsize(file_name) < 4096:
print "is too small to gain space."
print("is too small to gain space.")
else:
print "compressing...\r",
print("compressing...\r", end=' ')
r = os.system("xz %s" % file_name)
if r == 0:
print file_name, "compressed "
print(file_name, "compressed ")
else:
print file_name, "was not compressed "
print(file_name, "was not compressed ")

def ensure_tools_are_available():
for tool in ["xz", "xzcat", "bunzip2", "zcat"]:
if os.system("%s --help 2&>1 > /dev/null" % tool) != 0:
print >> sys.stderr, "ERROR %s is not available." % tool
print("ERROR %s is not available." % tool, file=sys.stderr)
sys.exit(2)

def main():
if len(sys.argv) < 2:
print >>sys.stderr, "Usage: recompressbyxz <input file>"
print("Usage: recompressbyxz <input file>", file=sys.stderr)
ensure_tools_are_available()
recompress_by_xz(sys.argv[1:])

Expand Down
Loading

0 comments on commit 43d194c

Please sign in to comment.