Skip to content

Commit

Permalink
Merge pull request #2 from breznak/story-teller
Browse files Browse the repository at this point in the history
Merged in Story Teller
  • Loading branch information
chetan51 committed Nov 17, 2013
2 parents 46607b7 + 086bc97 commit 93a6c84
Show file tree
Hide file tree
Showing 4 changed files with 92 additions and 19 deletions.
11 changes: 9 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,7 +1,14 @@
Linguist
========

An AI running on NuPIC using the CLA to build a model of language, and predict the rest of a user's word, phrase, sentence.
An AI running on [NuPIC](https://github.com/numenta/nupic) using the CLA to build a model of language, and predict the rest of a user's word, phrase, sentence.

The current application works as a "story teller", it'll train on a dataset you give it and then produce a story for you. You can specify number of sentences the story
should have and the beginig words to kick off "There were two brothers ".

Research:

The CLA learns sentences letter-by-letter, we'd like to observe if grammatical features can emerge. See [Jeff Hinton's text-generation deep NNs](https://class.coursera.org/neuralnets-2012-001/lecture/91) for reference.

Usage
========
Expand All @@ -12,4 +19,4 @@ Then, run:

python client/linguist.py data/tiny.txt

It'll run on the tiny.txt dataset, feeding it the sentence over and over again, printing out Linguist's next 10 timestep predictions at each step.
It'll run on the tiny.txt dataset, feeding it the sentence over and over again, printing out Linguist's next 10 timestep predictions at each step.
69 changes: 59 additions & 10 deletions client/linguist.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,9 +30,14 @@
import model_params
import re

NUM_REPEATS = 1000
NUM_REPEATS = 5
PRINT_EVERY_REPEAT_N = 1

TERMINATORS = ['.','!','?','|']
NUM_SENTENCES = 5 # number for story sentences generated
STORY_START = "The dog"
_QUIT = "QUIT"

def clean(s):
return re.sub('\n', '|', s)

Expand Down Expand Up @@ -74,30 +79,74 @@ def runLinguist(datapath):
if should_print:
print "\n====== Repeat #%d =======\n" % (r + 1)

last_c = ''

with open(datapath) as f:
while True:
c = f.read(1)
if not c: break

if ord(c) <= 31 and ord(c) >= 127 and ord(c) != 10: continue
if (last_c == ' ' or last_c == '\n') and c == last_c: continue

last_c = c
if not ( (ord(c) >= 31 and ord(c) <= 127) or c == '\n'): continue

modelInput = {'letter': c}
result = model.run(modelInput)

if should_print:
print "[%i]\t %s ==> %s\t(%s)" % (i, clean(modelInput['letter']), prediction(result.inferences), confidences(result.inferences))
if c in TERMINATORS:
model.resetSequenceStates()
print "reset"

i += 1

return model

def tellStory(model, startSent, lenght):
"""feed startSent sequence and then continue to generate the story by the CLA.
param model: the trained CLA model
param startSent: starting sequence as a string, eg \"And so he raised the gun\"
param lenght: #sequences to generate. """

model.disableLearning()
for s in startSent:
print(s),
modelInput = {'letter': s}
result = model.run(modelInput)

numSent = 0
c = s
sentence_len = 0
while numSent <= lenght:
print(c),
modelInput = {'letter': c}
result = model.run(modelInput)
c=result.inferences['prediction'][0]

sentence_len += 1
if(sentence_len > 30): #limit, sometimes there's no sentence terminator generated and we'd run forever
numSent += 1
sentence_len = 0

if c in TERMINATORS:
numSent += 1
sentence_len = 0
print(' \n')


if __name__ == "__main__":
if len(sys.argv) > 1:
datapath = sys.argv[1]
runLinguist(datapath)
if len(sys.argv) > 2:
NUM_REPEATS = int(sys.argv[2])
if len(sys.argv) > 3:
NUM_SENTENCES = int(sys.argv[3])

model = runLinguist(datapath)
print('==========================================')
print('Welcome young adventurer, let me tell you a story! ')
while True:
STORY_START = raw_input('Enter story start (QUIT to go to work): ')
if(STORY_START == "QUIT"):
break
tellStory(model, STORY_START, NUM_SENTENCES)

print('Farewell')
else:
print "Usage: python linguist.py [path/to/data.txt]"
print "Usage: python linguist.py path/to/data.txt [num_repeats [num_sentences]]"
16 changes: 9 additions & 7 deletions client/model_params.py
Original file line number Diff line number Diff line change
Expand Up @@ -131,13 +131,14 @@
# threshold set below this default value.
# (This concept applies to both SP and TP and so 'cells'
# is correct here as opposed to 'columns')
'synPermConnected': 0.1,
'synPermConnected': 0.2,

'synPermActiveInc': 0.1,

'synPermInactiveDec': 0.025910586208889513,
'synPermInactiveDec': 0.1,

'randomSP': 0,
'spatialImp': 'cpp',
},

# Controls whether TP is enabled or disabled;
Expand All @@ -158,7 +159,7 @@
'columnCount': 2048,

# The number of cells (i.e., states), allocated per column.
'cellsPerColumn': 32,
'cellsPerColumn': 8,

'inputWidth': 2048,

Expand Down Expand Up @@ -227,7 +228,7 @@
# elements to append to the end of a learned sequence at a time.
# Smaller values are better for datasets with short sequences,
# higher values are better for datasets with long sequences.
'pamLength': 2,
'pamLength': 5,
},

'clParams': {
Expand All @@ -239,13 +240,14 @@

# This controls how fast the classifier learns/forgets. Higher values
# make it adapt faster and forget older patterns faster.
'alpha': 0.00080959960713530062,
'alpha': 0.01080959960713530062,

# This is set after the call to updateConfigFromSubConfig and is
# computed from the aggregationInfo and predictAheadTime.
'steps': '1,2,3,4,5,6,7,8,9,10',
#'steps': '1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16',
'steps': '1,2,3,4,5',
},

'trainSPNetOnlyIfRequested': False,
'trainSPNetOnlyIfRequested': True,
},
}
15 changes: 15 additions & 0 deletions data/grammar/grammar_likes.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
We like gramar.
We love grammar.
John loves grammar.
We run.
John runs.
We sing.
John sings.
We sit.
John sits.
We dig.
John digs.
We rock.
John rocks.
We spend time.
John spends time.

0 comments on commit 93a6c84

Please sign in to comment.