Skip to content

Commit

Permalink
Merge remote branch 'origin/master'
Browse files Browse the repository at this point in the history
  • Loading branch information
manning authored and Stanford NLP committed Oct 10, 2015
1 parent 90ad647 commit af70b51
Show file tree
Hide file tree
Showing 82 changed files with 5,713 additions and 3,461 deletions.
2 changes: 1 addition & 1 deletion doc/loglinear/QUICKSTART.txt
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ loglinear package quickstart:

First, read the ConcatVector section in ARCH.txt.

To jump straight into working code, go read generateSentenceModel() in edu.stanford.nlp.loglinear.learning.CoNLLBenchmark.
To jump straight into working code, go read generateSentenceModel() in edu.stanford.nlp.loglinear.CoNLLBenchmark.

#####################################################

Expand Down
2 changes: 1 addition & 1 deletion doc/loglinear/README.txt
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
For an explanation of how everything fits together, see ARCH.txt

For a quick runnable object, go run edu.stanford.nlp.loglinear.learning.CoNLLBenchmark in core's test package.
For a quick runnable object, go run edu.stanford.nlp.loglinear.CoNLLBenchmark in core's test package.

For a tutorial, see QUICKSTART.txt

Expand Down
135 changes: 135 additions & 0 deletions itest/src/edu/stanford/nlp/ie/qe/QuantifiableEntityExtractorITest.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,135 @@
package edu.stanford.nlp.ie.qe;

import edu.stanford.nlp.ling.tokensregex.MatchedExpression;
import edu.stanford.nlp.pipeline.*;
import junit.framework.TestCase;

import java.util.List;

/**
* Test for quantifiable entity extractor
* @author Angel Chang
*/
public class QuantifiableEntityExtractorITest extends TestCase {
static AnnotationPipeline pipeline = null;
static QuantifiableEntityExtractor extractor = null;

public void test() throws Exception {
// TODO: Enable tests after rules files are added to models
}

@Override
public void setUp() throws Exception {
synchronized(QuantifiableEntityExtractorITest.class) {
if (pipeline == null) {
pipeline = new AnnotationPipeline();
pipeline.addAnnotator(new TokenizerAnnotator(false, "en"));
pipeline.addAnnotator(new WordsToSentencesAnnotator(false));
pipeline.addAnnotator(new POSTaggerAnnotator(DefaultPaths.DEFAULT_POS_MODEL, false));
//pipeline.addAnnotator(new QuantifiableEntityNormalizingAnnotator(false, false));
}
extractor = new QuantifiableEntityExtractor();
//extractor.init(new Options());
}
}

protected static Annotation createDocument(String text) {
Annotation annotation = new Annotation(text);
pipeline.annotate(annotation);
return annotation;
}

public static class ExpectedQuantity {
String text;
String normalizedValue;
String type;

public ExpectedQuantity(String text, String normalizedValue, String type) {
this.text = text;
this.normalizedValue = normalizedValue;
this.type = type;
}
}

public void runAndCheck(String prefix, String[] sentences, ExpectedQuantity[][] expected) throws Exception {
for (int si = 0; si < sentences.length; si++) {
String sentence = sentences[si];
Annotation annotation = createDocument(sentence);
List<MatchedExpression> matchedExpressions = extractor.extract(annotation);

// Print out matched text and value
if (expected == null) {
for (int i = 0; i < matchedExpressions.size(); i++) {
String text = matchedExpressions.get(i).getText();
Object value = matchedExpressions.get(i).getValue();
System.out.println(prefix + ": Got expression " + text + " with value " + value);
}
assertTrue(prefix + ": No expected provided", false);
} else {
int minMatchable = Math.min(expected[si].length, matchedExpressions.size());
for (int i = 0; i < minMatchable; i++) {
ExpectedQuantity expectedQuantity = expected[si][i];
MatchedExpression matched = matchedExpressions.get(i);
SimpleQuantifiableEntity actualQuantity = (SimpleQuantifiableEntity) matched.getValue().get();
assertEquals(prefix + ".matched." + si + "." + i + ".text", expectedQuantity.text, matched.getText());
assertEquals(prefix + ".matched." + si + "." + i + ".normalizedValue", expectedQuantity.normalizedValue, actualQuantity.toString());
assertEquals(prefix + ".matched." + si + "." + i + ".type", expectedQuantity.type, actualQuantity.getUnit().type);
}
assertEquals(prefix + ".length." + si, expected[si].length, matchedExpressions.size());
}
}
}

public void _testMoney() throws Exception {
String[] sentences = {
"I have 1 dollar and 2 cents.",
"It cost 10 thousand million dollars."
};
// TODO: merge the 1 dollar and 2 cents
ExpectedQuantity[][] expected = {
{new ExpectedQuantity("1 dollar", "$1.00", "MONEY"), new ExpectedQuantity("2 cents", "$0.02", "MONEY")},
{new ExpectedQuantity("10 thousand million dollars", "$10000000000.00", "MONEY")}
};

runAndCheck("testMoney", sentences, expected);
}

public void _testLength() throws Exception {
String[] sentences = {
"We are 2 kilometer away.",
"We are 2 kilometers away.",
"We turn after 5 miles.",
"The box is 100 centimeters tall.",
"The box is 10cm wide.",
"The box is over 1000 mm long.",
"The box is 2ft long."
};
ExpectedQuantity[][] expected = {
{new ExpectedQuantity("2 kilometer", "2000.0m", "LENGTH")},
{new ExpectedQuantity("2 kilometers", "2000.0m", "LENGTH")},
{new ExpectedQuantity("5 miles", "5.0mi", "LENGTH")},
{new ExpectedQuantity("100 centimeters", "1.0m", "LENGTH")},
{new ExpectedQuantity("10cm", "0.1m", "LENGTH")},
{new ExpectedQuantity("1000 mm", "1.0m", "LENGTH")},
{new ExpectedQuantity("2ft", "2.0'", "LENGTH")}
};
runAndCheck("testLength", sentences, expected);
}

// We do weight instead of mass since in typical natural language
// kilograms are used to refer to weight vs mass (in scientific usage)
public void _testWeight() throws Exception {
String[] sentences = {
"The ball is 2 kilograms in weight.",
"There are five grams.",
"How much is seven pounds?"
};
ExpectedQuantity[][] expected = {
{new ExpectedQuantity("2 kilograms", "2.0kg", "WEIGHT")},
{new ExpectedQuantity("five grams", "0.005kg", "WEIGHT")},
{new ExpectedQuantity("seven pounds", "7.0lb", "WEIGHT")}
};
runAndCheck("testWeight", sentences, expected);
}

}
Original file line number Diff line number Diff line change
@@ -1,21 +1,21 @@
package edu.stanford.nlp.ling.tokensregex;

import edu.stanford.nlp.io.IOUtils;
import edu.stanford.nlp.ling.CoreAnnotations;
import edu.stanford.nlp.ling.CoreLabel;
import edu.stanford.nlp.pipeline.*;
import edu.stanford.nlp.process.CoreLabelTokenFactory;
import edu.stanford.nlp.process.PTBTokenizer;
import edu.stanford.nlp.process.TokenizerFactory;
import edu.stanford.nlp.util.CoreMap;
import edu.stanford.nlp.util.Pair;
import edu.stanford.nlp.util.StringUtils;
import edu.stanford.nlp.util.Timing;
import junit.framework.TestCase;

import java.io.File;
import java.io.IOException;
import java.io.StringReader;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.regex.Pattern;

public class TokenSequenceMatcherITest extends TestCase {
Expand Down Expand Up @@ -94,6 +94,50 @@ public void testTokenSequenceMatcherValue() throws IOException {
assertFalse(match);
}

public void testTokenSequenceMatcherBeginEnd() throws IOException {
CoreMap doc = createDocument(testText);

// Test simple sequence with begin sequence matching
TokenSequencePattern p = TokenSequencePattern.compile("^ [] []");
TokenSequenceMatcher m = p.getMatcher(doc.get(CoreAnnotations.TokensAnnotation.class));

boolean match = m.find();
assertTrue(match);
assertEquals("the number", m.group());

match = m.find();
assertFalse(match);

// Test simple sequence with end sequence matching
p = TokenSequencePattern.compile("[] [] $");
m = p.getMatcher(doc.get(CoreAnnotations.TokensAnnotation.class));

match = m.find();
assertTrue(match);
assertEquals("fifty.", m.group());

match = m.find();
assertFalse(match);

// Test simple sequence with begin and end sequence matching
p = TokenSequencePattern.compile("^ [] [] $");
m = p.getMatcher(doc.get(CoreAnnotations.TokensAnnotation.class));

match = m.find();
assertFalse(match);

// Test simple sequence with ^$ in a string regular expression
p = TokenSequencePattern.compile("/^number$/");
m = p.getMatcher(doc.get(CoreAnnotations.TokensAnnotation.class));

match = m.find();
assertTrue(match);
assertEquals("number", m.group());

match = m.find();
assertFalse(match);
}

private static final String testText1 = "Mellitus was the first Bishop of London, the third Archbishop of Canterbury, and a member of the Gregorian mission sent to England to convert the Anglo-Saxons. He arrived in 601 AD, and was consecrated as Bishop of London in 604.";
public void testTokenSequenceMatcher1() throws IOException {
CoreMap doc = createDocument(testText1);
Expand Down Expand Up @@ -179,7 +223,7 @@ public void testTokenSequenceMatcher1() throws IOException {
match = m.find();
assertTrue(match);
assertEquals(0, m.groupCount());
assertEquals("London in 604 .", m.group());
assertEquals("London in 604.", m.group());
match = m.find();
assertFalse(match);
}
Expand Down Expand Up @@ -435,6 +479,31 @@ public void testTokenSequenceMatcherConj() throws IOException {
assertFalse(match);
}

public void testTokenSequenceMatcherConj2() throws IOException {
String content = "The cat is sleeping on the floor.";
String greedyPattern = "(?: ([]* cat []*) & ([]* sleeping []*))";

TokenizerFactory tf = PTBTokenizer.factory(new CoreLabelTokenFactory(), "");
List<CoreLabel> tokens = tf.getTokenizer(new StringReader(content)).tokenize();
TokenSequencePattern seqPattern = TokenSequencePattern.compile(greedyPattern);
TokenSequenceMatcher matcher = seqPattern.getMatcher(tokens);

boolean entireMatch = matcher.matches();
assertTrue(entireMatch);

boolean match = matcher.find();
assertTrue(match);
assertEquals("The cat is sleeping on the floor.", matcher.group());

String reluctantPattern = "(?: ([]*? cat []*?) & ([]*? sleeping []*?))";
TokenSequencePattern seqPattern2 = TokenSequencePattern.compile(reluctantPattern);
TokenSequenceMatcher matcher2 = seqPattern2.getMatcher(tokens);

match = matcher2.find();
assertTrue(match);
assertEquals("The cat is sleeping", matcher2.group());
}

public void testTokenSequenceMatcherConjAll() throws IOException {
CoreMap doc = createDocument(testText1);
TokenSequencePattern p = TokenSequencePattern.compile(
Expand Down Expand Up @@ -979,7 +1048,7 @@ public void testTokenSequenceOptimizeOrString() throws IOException {
TokenSequenceMatcher m = p.getMatcher(doc.get(CoreAnnotations.TokensAnnotation.class));
boolean match = m.find();
assertTrue(match);
assertEquals("atropine we need to have many many words here but we do n't sweating", m.group(0));
assertEquals("atropine we need to have many many words here but we don't sweating", m.group(0));

match = m.find();
assertFalse(match);
Expand All @@ -1005,7 +1074,7 @@ public void testMultiplePatterns() throws IOException {
CoreMap doc = createDocument("atropine we need to have many many words here but we don't sweating");
MultiPatternMatcher<CoreMap> multiPatternMatcher = TokenSequencePattern.getMultiPatternMatcher(p1, p2);
List<String> expected = new ArrayList<String>();
expected.add("atropine we need to have many many words here but we do n't sweating");
expected.add("atropine we need to have many many words here but we don't sweating");
Iterator<String> expectedIter = expected.iterator();

Iterable<SequenceMatchResult<CoreMap>> matches =
Expand Down Expand Up @@ -1187,7 +1256,7 @@ public void testTokenSequenceMatcherNumber() throws IOException {
match = m.find();
assertTrue(match);
assertEquals(0, m.groupCount());
assertEquals("January 3 , 2002", m.group());
assertEquals("January 3, 2002", m.group());
match = m.find();
assertFalse(match);

Expand All @@ -1196,7 +1265,7 @@ public void testTokenSequenceMatcherNumber() throws IOException {
match = m.find();
assertTrue(match);
assertEquals(0, m.groupCount());
assertEquals("January 3 , 2002", m.group());
assertEquals("January 3, 2002", m.group());
match = m.find();
assertFalse(match);

Expand Down Expand Up @@ -1404,6 +1473,32 @@ public void testTokenSequenceMatcherMultiNodePattern() throws IOException {
assertFalse(match);
}

public void testTokenSequenceMatcherMultiNodePattern2() throws IOException {
CoreMap doc = createDocument("Replace the lamp with model wss.32dc55c3e945384dbc5e533ab711fd24");

// Greedy
TokenSequencePattern p = TokenSequencePattern.compile("/model/ ((?m){1,4}/\\w+\\.\\w+/)");
TokenSequenceMatcher m = p.getMatcher(doc.get(CoreAnnotations.TokensAnnotation.class));
boolean match = m.find();
assertTrue(match);
assertEquals(1, m.groupCount());
assertEquals("model wss.32dc55c3e945384dbc5e533ab711fd24", m.group());
assertEquals("wss.32dc55c3e945384dbc5e533ab711fd24", m.group(1));
match = m.find();
assertFalse(match);

// Reluctant
p = TokenSequencePattern.compile("/model/ ((?m){1,4}?/\\w+\\.\\w+/)");
m = p.getMatcher(doc.get(CoreAnnotations.TokensAnnotation.class));
match = m.find();
assertTrue(match);
assertEquals(1, m.groupCount());
assertEquals("model wss.32", m.group());
assertEquals("wss.32", m.group(1));
match = m.find();
assertFalse(match);
}

public void testTokenSequenceMatcherBackRef() throws IOException {
CoreMap doc = createDocument("A A A A A A A B A A B A C A E A A A A A A A A A A A B A A A");

Expand Down Expand Up @@ -1488,17 +1583,18 @@ public void testCompile() {
//assertEquals(m.group(), "matching this");
}

//This DOES NOT work right now!!
// public void testCompile2(){
public void testBindingCompile(){
Env env = TokenSequencePattern.getNewEnv();
env.bind("wordname",CoreAnnotations.TextAnnotation.class);
String s = "[wordname:\"name\"]{1,2}";
TokenSequencePattern p = TokenSequencePattern.compile(env, s);
}

// // This does not work!!!
// public void testNoBindingCompile(){
// Env env = TokenSequencePattern.getNewEnv();
// env.bind("wordname",CoreAnnotations.TextAnnotation.class);
// String s = "[" + CoreAnnotations.TextAnnotation.class.getName()+":\"name\"]{1,2}";
// TokenSequencePattern p = TokenSequencePattern.compile(env, s);
// for(Map.Entry<String, Object> vars: env.getVariables().entrySet()){
// if(vars.getValue().equals(CoreAnnotations.TextAnnotation.class)){
// System.out.println("Found " + vars.getKey() + " binding for " + vars.getValue());
// }
// }
// }

public void testCaseInsensitive1(){
Expand Down
Original file line number Diff line number Diff line change
@@ -1,6 +1,9 @@
package edu.stanford.nlp.loglinear.learning;
package edu.stanford.nlp.loglinear;

import edu.stanford.nlp.loglinear.inference.CliqueTree;
import edu.stanford.nlp.loglinear.learning.AbstractBatchOptimizer;
import edu.stanford.nlp.loglinear.learning.BacktrackingAdaGradOptimizer;
import edu.stanford.nlp.loglinear.learning.LogLikelihoodFunction;
import edu.stanford.nlp.loglinear.model.ConcatVector;
import edu.stanford.nlp.loglinear.model.GraphicalModel;
import edu.stanford.nlp.util.HashIndex;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -46,15 +46,15 @@ public void testDependencyParserEnglishSD() {
}

// Lower because we're evaluating on PTB + extraDevTest, not just PTB
private static final double EnglishUdLas = 84.9873;
private static final double EnglishUdLas = 88.72648417258083;

/**
* Test that the NN dependency parser performance doesn't change.
*/
public void testDependencyParserEnglishUD() {
DependencyParser parser = new DependencyParser();
parser.loadModelFile("/u/nlp/data/depparser/nn/distrib-2015-04-16/english_UD.gz");
double las = parser.testCoNLL("/u/nlp/data/depparser/nn/data/dependency_treebanks/USD/dev.conll", null);
double las = parser.testCoNLL("/u/nlp/data/depparser/nn/data/dependency_treebanks/UD-converted/dev.conll", null);
assertEquals(String.format("English UD LAS should be %.2f but was %.2f",
EnglishUdLas, las), EnglishUdLas, las, 1e-4);
}
Expand Down
Loading

0 comments on commit af70b51

Please sign in to comment.