Skip to content

Commit

Permalink
Better handling of newlines in data
Browse files Browse the repository at this point in the history
  • Loading branch information
chetan51 committed Sep 12, 2013
1 parent 228ce62 commit 55d1896
Show file tree
Hide file tree
Showing 2 changed files with 5 additions and 2 deletions.
4 changes: 2 additions & 2 deletions client/linguist.py
Original file line number Diff line number Diff line change
Expand Up @@ -81,8 +81,8 @@ def runLinguist(datapath):
c = f.read(1)
if not c: break

if ord(c) <= 31 and ord(c) >= 127: continue
if last_c == ' ' and c == last_c: continue
if ord(c) <= 31 and ord(c) >= 127 and ord(c) != 10: continue
if (last_c == ' ' or last_c == '\n') and c == last_c: continue

last_c = c

Expand Down
3 changes: 3 additions & 0 deletions tools/txt_to_csv.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
def convert(datapath):
data = file(datapath).read()
data = re.sub(' +', ' ', data)
data = re.sub('\n+', '\n', data)
N = len(data)

print "letter"
Expand All @@ -23,6 +24,8 @@ def convert(datapath):
c = data[i]
if ord(c) > 31 and ord(c) < 127:
print c
if ord(c) == 10:
print '|'

if __name__ == "__main__":
if len(sys.argv) > 1:
Expand Down

0 comments on commit 55d1896

Please sign in to comment.