Skip to content

Commit

Permalink
Merge branch 'master' of github.com:xguse/rSeqPipeline
Browse files Browse the repository at this point in the history
  • Loading branch information
Augustine Dunn committed Jan 30, 2013
2 parents 576fe92 + 59277ff commit 04b635b
Show file tree
Hide file tree
Showing 4 changed files with 107 additions and 9 deletions.
37 changes: 37 additions & 0 deletions rSeq/scripts/unique_novel_genes_in_gtf_4mariangela.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
"""
tool for mariangela to sort out the number of cufflinks novel
genes who have at least one exon far from any annotated exon.
"""

import argparse


def main():

desc = """tool for mariangela to sort out the number of cufflinks novel
genes who have at least one exon far from any annotated exon."""

parser = argparse.ArgumentParser(description=desc)


parser.add_argument('gtf', type=str,
help="""Path to gtf file. \n(default: %(default)s)""")

args = parser.parse_args()

xloc_set = set()

gtf = open(args.gtf,'rU')

for line in gtf:
line = line.strip('\n').split('\t')
comments = line[8].split('"')
xloc = comments[1]
xloc_set.add(xloc)

print 'Number of unique xloc symbols: %s' % (len(xloc_set))
print 'Unique xloc symbols:\n%s' % ('\n'.join(sorted(list(xloc_set))))


if __name__ == "__main__":
main()
21 changes: 16 additions & 5 deletions rSeq/utils/convert.py
Original file line number Diff line number Diff line change
Expand Up @@ -164,17 +164,22 @@ def MB_2_gff3(resultTablePath,gff3Path):
gff3_lines = []

mb_table = tableFile2namedTuple(resultTablePath,sep='\t')

skipped = 0
for line in mb_table:
gff3_seqid = align_feat.chr
chrm,left,right = line.locus.replace('-',':').split(':')
if int(left) < 1:
skipped += 1
continue
gff3_seqid = chrm
gff3_source = 'Cufflinks'
gff3_type = 'Assembled Tx boundries'
gff3_start = left
gff3_end = right
gff3_score = line.q_value
gff3_strand = strandConvertions[align_feat.seq_region_strand]
gff3_strand = '?'
gff3_phase = '.'
gff3_attributes = 'ID=%s;Alias=%s' % (align_feat.dna_align_feature_id, align_feat.hit_name)
gff3_attributes = 'ID=%s;Alias=%s;Note=%s' % \
(line.tracking_id, line.nearest_ref_id, line.class_code)

gff3_lines.append([gff3_seqid,
gff3_source,
Expand All @@ -184,8 +189,14 @@ def MB_2_gff3(resultTablePath,gff3Path):
gff3_score,
gff3_strand,
gff3_phase,
gff3_attributes])
gff3_attributes])

gff3Out = open(gff3Path,'w')
for line in gff3_lines:
gff3Out.write('%s\n' % ('\t'.join(line)))
gff3Out.close()

return skipped

def vectorBaseESTs_2_gff3(resultTablePath,gff3Path):
"""
Expand Down
33 changes: 33 additions & 0 deletions rSeq/utils/files.py
Original file line number Diff line number Diff line change
Expand Up @@ -634,3 +634,36 @@ def renameChrom_in_SAM(path):
except:
raise


def rename_fasta_headers(in_path,out_path,header_func):
"""
GIVEN:
- in_path = path to original fasta file
- out_path = path to future altered fasta file
- header_func = function to take a header line and return an altered string version of it
DOES:
- Reads in in_path file one line at a time
- If the line is a fasta header (starts with '>')
uses header_func logic to rearrange the header and
writes out the changed line to out_path.
- If not a header, writes same line out to out_path.
- Closes both file objects.
RETURNS:
- None
"""

in_file = open(in_path,'rU')
out_file = open(out_path,'w')

for line in in_file:
if line.startswith('>'):
line = header_func(line)
# Handle and ensure that each modified line has one and only one \n
line = line.rstrip('\n') + '\n'
else:
pass

out_file.write(line)

in_file.close()
out_file.close()
25 changes: 21 additions & 4 deletions rSeq/utils/motifDiscovery/motifs.py
Original file line number Diff line number Diff line change
Expand Up @@ -271,8 +271,16 @@ def toXMSmotif(self):
this motif.
"""
# ++ initialize xms motif string ++
xMtf = '<motif>\n\t<name>%s_%.3f</name>\n\t\t<weightmatrix alphabet="DNA" columns="%s">\n' %\
(self.consensus,float(self.sigvalue),len(self.pwm['A']))
try:
xMtf = '<motif>\n\t<name>%s_%.3f</name>\n\t\t<weightmatrix alphabet="DNA" columns="%s">\n' %\
(self.consensus,float(self.sigvalue),len(self.pwm['A']))
except AttributeError as err:
if 'sigvalue' in str(err.message):
self.sigvalue = 'nan'
xMtf = '<motif>\n\t<name>%s_%.3f</name>\n\t\t<weightmatrix alphabet="DNA" columns="%s">\n' %\
(self.consensus,float(self.sigvalue),len(self.pwm['A']))
else:
raise err

# ++ create and add each column's data ++
for i in range(len(self.pwm['A'])):
Expand All @@ -290,8 +298,17 @@ def toXMSmotif(self):
(self.sigvalue)
xMtf += '\t\t<prop>\n\t\t\t<key>rank</key>\n\t\t\t<value>%s</value></prop>\n' % \
(self.rank)
xMtf += '\t\t<prop>\n\t\t\t<key>algorithm</key>\n\t\t\t<value>%s</value></prop>\n' % \
(self.algorithm)
try:
xMtf += '\t\t<prop>\n\t\t\t<key>algorithm</key>\n\t\t\t<value>%s</value></prop>\n' % \
(self.algorithm)
except AttributeError as err:
if 'algorithm' in str(err.message):
self.algorithm = 'nan'
xMtf += '\t\t<prop>\n\t\t\t<key>algorithm</key>\n\t\t\t<value>%s</value></prop>\n' % \
(self.algorithm)
else:
raise err

xMtf += '</motif>\n'

return xMtf
Expand Down

0 comments on commit 04b635b

Please sign in to comment.