Skip to content

Commit

Permalink
Revert "Minor cleanup to old CMMClassifier - final, etc."
Browse files Browse the repository at this point in the history
  • Loading branch information
manning authored and Stanford NLP committed Oct 10, 2015
1 parent 4c2b0c2 commit c0c635e
Show file tree
Hide file tree
Showing 83 changed files with 3,498 additions and 5,758 deletions.
2 changes: 1 addition & 1 deletion doc/loglinear/QUICKSTART.txt
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ loglinear package quickstart:

First, read the ConcatVector section in ARCH.txt.

To jump straight into working code, go read generateSentenceModel() in edu.stanford.nlp.loglinear.CoNLLBenchmark.
To jump straight into working code, go read generateSentenceModel() in edu.stanford.nlp.loglinear.learning.CoNLLBenchmark.

#####################################################

Expand Down
2 changes: 1 addition & 1 deletion doc/loglinear/README.txt
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
For an explanation of how everything fits together, see ARCH.txt

For a quick runnable object, go run edu.stanford.nlp.loglinear.CoNLLBenchmark in core's test package.
For a quick runnable object, go run edu.stanford.nlp.loglinear.learning.CoNLLBenchmark in core's test package.

For a tutorial, see QUICKSTART.txt

Expand Down
135 changes: 0 additions & 135 deletions itest/src/edu/stanford/nlp/ie/qe/QuantifiableEntityExtractorITest.java

This file was deleted.

Original file line number Diff line number Diff line change
@@ -1,21 +1,21 @@
package edu.stanford.nlp.ling.tokensregex;

import edu.stanford.nlp.io.IOUtils;
import edu.stanford.nlp.ling.CoreAnnotations;
import edu.stanford.nlp.ling.CoreLabel;
import edu.stanford.nlp.pipeline.*;
import edu.stanford.nlp.process.CoreLabelTokenFactory;
import edu.stanford.nlp.process.PTBTokenizer;
import edu.stanford.nlp.process.TokenizerFactory;
import edu.stanford.nlp.util.CoreMap;
import edu.stanford.nlp.util.Pair;
import edu.stanford.nlp.util.StringUtils;
import edu.stanford.nlp.util.Timing;
import junit.framework.TestCase;

import java.io.File;
import java.io.IOException;
import java.io.StringReader;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.regex.Pattern;

public class TokenSequenceMatcherITest extends TestCase {
Expand Down Expand Up @@ -94,50 +94,6 @@ public void testTokenSequenceMatcherValue() throws IOException {
assertFalse(match);
}

public void testTokenSequenceMatcherBeginEnd() throws IOException {
CoreMap doc = createDocument(testText);

// Test simple sequence with begin sequence matching
TokenSequencePattern p = TokenSequencePattern.compile("^ [] []");
TokenSequenceMatcher m = p.getMatcher(doc.get(CoreAnnotations.TokensAnnotation.class));

boolean match = m.find();
assertTrue(match);
assertEquals("the number", m.group());

match = m.find();
assertFalse(match);

// Test simple sequence with end sequence matching
p = TokenSequencePattern.compile("[] [] $");
m = p.getMatcher(doc.get(CoreAnnotations.TokensAnnotation.class));

match = m.find();
assertTrue(match);
assertEquals("fifty.", m.group());

match = m.find();
assertFalse(match);

// Test simple sequence with begin and end sequence matching
p = TokenSequencePattern.compile("^ [] [] $");
m = p.getMatcher(doc.get(CoreAnnotations.TokensAnnotation.class));

match = m.find();
assertFalse(match);

// Test simple sequence with ^$ in a string regular expression
p = TokenSequencePattern.compile("/^number$/");
m = p.getMatcher(doc.get(CoreAnnotations.TokensAnnotation.class));

match = m.find();
assertTrue(match);
assertEquals("number", m.group());

match = m.find();
assertFalse(match);
}

private static final String testText1 = "Mellitus was the first Bishop of London, the third Archbishop of Canterbury, and a member of the Gregorian mission sent to England to convert the Anglo-Saxons. He arrived in 601 AD, and was consecrated as Bishop of London in 604.";
public void testTokenSequenceMatcher1() throws IOException {
CoreMap doc = createDocument(testText1);
Expand Down Expand Up @@ -223,7 +179,7 @@ public void testTokenSequenceMatcher1() throws IOException {
match = m.find();
assertTrue(match);
assertEquals(0, m.groupCount());
assertEquals("London in 604.", m.group());
assertEquals("London in 604 .", m.group());
match = m.find();
assertFalse(match);
}
Expand Down Expand Up @@ -479,31 +435,6 @@ public void testTokenSequenceMatcherConj() throws IOException {
assertFalse(match);
}

public void testTokenSequenceMatcherConj2() throws IOException {
String content = "The cat is sleeping on the floor.";
String greedyPattern = "(?: ([]* cat []*) & ([]* sleeping []*))";

TokenizerFactory tf = PTBTokenizer.factory(new CoreLabelTokenFactory(), "");
List<CoreLabel> tokens = tf.getTokenizer(new StringReader(content)).tokenize();
TokenSequencePattern seqPattern = TokenSequencePattern.compile(greedyPattern);
TokenSequenceMatcher matcher = seqPattern.getMatcher(tokens);

boolean entireMatch = matcher.matches();
assertTrue(entireMatch);

boolean match = matcher.find();
assertTrue(match);
assertEquals("The cat is sleeping on the floor.", matcher.group());

String reluctantPattern = "(?: ([]*? cat []*?) & ([]*? sleeping []*?))";
TokenSequencePattern seqPattern2 = TokenSequencePattern.compile(reluctantPattern);
TokenSequenceMatcher matcher2 = seqPattern2.getMatcher(tokens);

match = matcher2.find();
assertTrue(match);
assertEquals("The cat is sleeping", matcher2.group());
}

public void testTokenSequenceMatcherConjAll() throws IOException {
CoreMap doc = createDocument(testText1);
TokenSequencePattern p = TokenSequencePattern.compile(
Expand Down Expand Up @@ -1048,7 +979,7 @@ public void testTokenSequenceOptimizeOrString() throws IOException {
TokenSequenceMatcher m = p.getMatcher(doc.get(CoreAnnotations.TokensAnnotation.class));
boolean match = m.find();
assertTrue(match);
assertEquals("atropine we need to have many many words here but we don't sweating", m.group(0));
assertEquals("atropine we need to have many many words here but we do n't sweating", m.group(0));

match = m.find();
assertFalse(match);
Expand All @@ -1074,7 +1005,7 @@ public void testMultiplePatterns() throws IOException {
CoreMap doc = createDocument("atropine we need to have many many words here but we don't sweating");
MultiPatternMatcher<CoreMap> multiPatternMatcher = TokenSequencePattern.getMultiPatternMatcher(p1, p2);
List<String> expected = new ArrayList<String>();
expected.add("atropine we need to have many many words here but we don't sweating");
expected.add("atropine we need to have many many words here but we do n't sweating");
Iterator<String> expectedIter = expected.iterator();

Iterable<SequenceMatchResult<CoreMap>> matches =
Expand Down Expand Up @@ -1256,7 +1187,7 @@ public void testTokenSequenceMatcherNumber() throws IOException {
match = m.find();
assertTrue(match);
assertEquals(0, m.groupCount());
assertEquals("January 3, 2002", m.group());
assertEquals("January 3 , 2002", m.group());
match = m.find();
assertFalse(match);

Expand All @@ -1265,7 +1196,7 @@ public void testTokenSequenceMatcherNumber() throws IOException {
match = m.find();
assertTrue(match);
assertEquals(0, m.groupCount());
assertEquals("January 3, 2002", m.group());
assertEquals("January 3 , 2002", m.group());
match = m.find();
assertFalse(match);

Expand Down Expand Up @@ -1473,32 +1404,6 @@ public void testTokenSequenceMatcherMultiNodePattern() throws IOException {
assertFalse(match);
}

public void testTokenSequenceMatcherMultiNodePattern2() throws IOException {
CoreMap doc = createDocument("Replace the lamp with model wss.32dc55c3e945384dbc5e533ab711fd24");

// Greedy
TokenSequencePattern p = TokenSequencePattern.compile("/model/ ((?m){1,4}/\\w+\\.\\w+/)");
TokenSequenceMatcher m = p.getMatcher(doc.get(CoreAnnotations.TokensAnnotation.class));
boolean match = m.find();
assertTrue(match);
assertEquals(1, m.groupCount());
assertEquals("model wss.32dc55c3e945384dbc5e533ab711fd24", m.group());
assertEquals("wss.32dc55c3e945384dbc5e533ab711fd24", m.group(1));
match = m.find();
assertFalse(match);

// Reluctant
p = TokenSequencePattern.compile("/model/ ((?m){1,4}?/\\w+\\.\\w+/)");
m = p.getMatcher(doc.get(CoreAnnotations.TokensAnnotation.class));
match = m.find();
assertTrue(match);
assertEquals(1, m.groupCount());
assertEquals("model wss.32", m.group());
assertEquals("wss.32", m.group(1));
match = m.find();
assertFalse(match);
}

public void testTokenSequenceMatcherBackRef() throws IOException {
CoreMap doc = createDocument("A A A A A A A B A A B A C A E A A A A A A A A A A A B A A A");

Expand Down Expand Up @@ -1583,18 +1488,17 @@ public void testCompile() {
//assertEquals(m.group(), "matching this");
}

public void testBindingCompile(){
Env env = TokenSequencePattern.getNewEnv();
env.bind("wordname",CoreAnnotations.TextAnnotation.class);
String s = "[wordname:\"name\"]{1,2}";
TokenSequencePattern p = TokenSequencePattern.compile(env, s);
}

// // This does not work!!!
// public void testNoBindingCompile(){
//This DOES NOT work right now!!
// public void testCompile2(){
// Env env = TokenSequencePattern.getNewEnv();
// env.bind("wordname",CoreAnnotations.TextAnnotation.class);
// String s = "[" + CoreAnnotations.TextAnnotation.class.getName()+":\"name\"]{1,2}";
// TokenSequencePattern p = TokenSequencePattern.compile(env, s);
// for(Map.Entry<String, Object> vars: env.getVariables().entrySet()){
// if(vars.getValue().equals(CoreAnnotations.TextAnnotation.class)){
// System.out.println("Found " + vars.getKey() + " binding for " + vars.getValue());
// }
// }
// }

public void testCaseInsensitive1(){
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -46,15 +46,15 @@ public void testDependencyParserEnglishSD() {
}

// Lower because we're evaluating on PTB + extraDevTest, not just PTB
private static final double EnglishUdLas = 88.72648417258083;
private static final double EnglishUdLas = 84.9873;

/**
* Test that the NN dependency parser performance doesn't change.
*/
public void testDependencyParserEnglishUD() {
DependencyParser parser = new DependencyParser();
parser.loadModelFile("/u/nlp/data/depparser/nn/distrib-2015-04-16/english_UD.gz");
double las = parser.testCoNLL("/u/nlp/data/depparser/nn/data/dependency_treebanks/UD-converted/dev.conll", null);
double las = parser.testCoNLL("/u/nlp/data/depparser/nn/data/dependency_treebanks/USD/dev.conll", null);
assertEquals(String.format("English UD LAS should be %.2f but was %.2f",
EnglishUdLas, las), EnglishUdLas, las, 1e-4);
}
Expand Down
Loading

0 comments on commit c0c635e

Please sign in to comment.