Skip to content

Commit

Permalink
Browse files Browse the repository at this point in the history
  • Loading branch information
Thomas L. Redman committed Oct 11, 2016
2 parents 7d9062a + c5e2ef9 commit d9bafc3
Show file tree
Hide file tree
Showing 61 changed files with 2,030 additions and 317,201 deletions.
3 changes: 3 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,6 +1,9 @@
# illinois-cogcomp-nlp

[![Build Status](https://semaphoreci.com/api/v1/cogcomp/illinois-cogcomp-nlp/branches/master/badge.svg)](https://semaphoreci.com/cogcomp/illinois-cogcomp-nlp)
[![Build Status](http://morgoth.cs.illinois.edu:8080/buildStatus/icon?job=cogcomp-nlp)](http://morgoth.cs.illinois.edu:8080/job/cogcomp-nlp/)
[![Build status](https://ci.appveyor.com/api/projects/status/f53iv8435rq875ex/branch/master?svg=true)](https://ci.appveyor.com/project/bhargavm/illinois-cogcomp-nlp/branch/master)


This project collects a number of core libraries for Natural Language Processing (NLP) developed
by the University of Illinois' [Cognitive Computation Group](https://cogcomp.cs.illinois.edu).
Expand Down
32 changes: 32 additions & 0 deletions appveyor.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
version: '{build}'
skip_tags: true
clone_depth: 10
environment:
JAVA_HOME: C:\Program Files\Java\jdk1.8.0
branches:
only:
- master
except:
- gh-pages
os: Windows Server 2012
install:
- ps: |
Add-Type -AssemblyName System.IO.Compression.FileSystem
if (!(Test-Path -Path "C:\maven" )) {
(new-object System.Net.WebClient).DownloadFile('http://apache.mirrors.tds.net/maven/maven-3/3.3.9/binaries/apache-maven-3.3.9-bin.zip', 'C:\maven-bin.zip')
[System.IO.Compression.ZipFile]::ExtractToDirectory("C:\maven-bin.zip", "C:\maven")
}
- cmd: SET PATH=C:\maven\apache-maven-3.3.9\bin;%JAVA_HOME%\bin;%PATH:C:\Ruby193\bin;=%
- cmd: SET MAVEN_OPTS=-XX:MaxPermSize=2g -Xmx4g
- cmd: SET JAVA_OPTS=-XX:MaxPermSize=2g -Xmx4g
- cmd: SET JAVA_TOOL_OPTIONS=-Dfile.encoding=UTF8
- cmd: SET org.ojalgo.OjAlgoUtils.ENVIRONMENT=x86_64
- cmd: mvn --version
- cmd: java -version
build_script:
- mvn clean
test_script:
- mvn test
cache:
- C:\maven\
- C:\Users\appveyor\.m2
8 changes: 4 additions & 4 deletions chunker/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
<parent>
<artifactId>illinois-cogcomp-nlp</artifactId>
<groupId>edu.illinois.cs.cogcomp</groupId>
<version>3.0.71</version>
<version>3.0.76</version>
</parent>

<modelVersion>4.0.0</modelVersion>
Expand All @@ -13,7 +13,7 @@
<dependency>
<groupId>edu.illinois.cs.cogcomp</groupId>
<artifactId>illinois-core-utilities</artifactId>
<version>3.0.71</version>
<version>3.0.76</version>
</dependency>

<dependency>
Expand All @@ -24,12 +24,12 @@
<dependency>
<groupId>edu.illinois.cs.cogcomp</groupId>
<artifactId>LBJava-NLP-tools</artifactId>
<version>1.0.1</version>
<version>3.0.76</version>
</dependency>
<dependency>
<groupId>edu.illinois.cs.cogcomp</groupId>
<artifactId>illinois-pos</artifactId>
<version>3.0.71</version>
<version>3.0.76</version>
</dependency>
<dependency>
<groupId>edu.illinois.cs.cogcomp</groupId>
Expand Down
5 changes: 3 additions & 2 deletions chunker/scripts/runBenchmarkTest.sh
Original file line number Diff line number Diff line change
@@ -1,11 +1,12 @@
#!/bin/bash

###
# This script is outdated since ChunkTester is not taking filename as input anymore. @@Qiang
# Runs a benchmark evaluation for the Illinois Chunker.
# Labelled test data needs to be provided (not included)


VERSION=2.1.0
VERSION=3.0.72
MAIN_JAR=target/illinois-chunker-${VERSION}.jar
MAIN=edu.illinois.cs.cogcomp.chunker.main.ChunkTester
LIB=target/dependency
Expand All @@ -17,7 +18,7 @@ for JAR in `ls $LIB`; do
done


TESTDATA="src/main/resources/test.txt"
TESTDATA="/shared/corpora/corporaWeb/written/eng/chunking/conll2000distributions/test.txt"


CMD="java -Xmx2g -cp $CP $MAIN $TESTDATA"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -42,39 +42,25 @@
* The output is generated by the <code>classify.TestDiscrete</code> class.
*
* @author Nick Rizzolo
* @author Updated: Qiang Ning
**/
public class ChunkTester {

/**
* Implements the program described above.
**/
public static void main(String[] args) {
ResourceManager rm = new ChunkerConfigurator().getDefaultConfig();
String testFileName = rm.getString("testGoldPOSData");
String testNoPOSFileName = rm.getString("testNoPOSData");

URL testFileURL = ChunkTester.class.getClassLoader().getResource(testFileName);
assertNotNull("Test file missing", testFileURL);
String testFile = testFileURL.getFile();

URL testNoPOSFileURL = ChunkTester.class.getClassLoader().getResource(testNoPOSFileName);
assertNotNull("Test file missing", testNoPOSFileURL);
String testNoPOSFile = testNoPOSFileURL.getFile();
public static void chunkTester(String testFile){
Parser parser;

System.out.println("\nWith Gold POS");

parser = new CoNLL2000Parser(testFile);

BIOTester tester =
new BIOTester(new Chunker(), new ChunkLabel(), new ChildrenFromVectors(parser));
tester.test().printPerformance(System.out);
}
public static void main(String[] args){
ResourceManager rm = new ChunkerConfigurator().getDefaultConfig();
String testFileName = rm.getString("testGoldPOSData");
String testNoPOSFileName = rm.getString("testNoPOSData");

System.out.println("\nWith NO POS");

parser = new CoNLL2000Parser(testNoPOSFile);
System.out.println("\nWith Gold POS");
chunkTester(testFileName);

tester = new BIOTester(new Chunker(), new ChunkLabel(), new ChildrenFromVectors(parser));
tester.test().printPerformance(System.out);
System.out.println("\nWith NO POS");
chunkTester(testNoPOSFileName);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,7 @@ public ChunkerAnnotator(boolean lazilyInitialize) {

@Override
public void initialize(ResourceManager rm) {
tagger = new Chunker();
tagger = new Chunker(rm.getString("modelPath"),rm.getString("modelLexPath"));
}


Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,8 @@
import edu.illinois.cs.cogcomp.core.utilities.configuration.ResourceManager;
import edu.illinois.cs.cogcomp.lbjava.parse.LinkedVector;
import edu.illinois.cs.cogcomp.lbjava.parse.Parser;
import edu.illinois.cs.cogcomp.core.io.IOUtils;
import java.io.File;

/**
* Trains chunker models with user specified labeled data in the CoNLL2000 format. Similar to
Expand Down Expand Up @@ -84,13 +86,18 @@ public void trainModelsWithParser(Parser parser) {

/**
* Saves the ".lc" and ".lex" models to disk in the modelPath specified by the constructor The
* modelName ("illinois-chunker") is fixed
* modelName ("Chunker", as specified in ChunkerConfigurator) is fixed
*/
public void writeModelsToDisk() {
IOUtils.mkdir(rm.getString("modelDirPath"));
chunker.save();
System.out.println("Done training, models are in " + rm.getString("modelDirPath"));
}

public void writeModelsToDisk(String dir, String modelName){
IOUtils.mkdir(dir);
chunker.write(dir + File.separator + modelName + ".lc", dir + File.separator + modelName + ".lex");
System.out.println("Done training, models are in " + dir+File.separator+modelName+".lc (.lex)");
}
public static void main(String[] args) {
ChunkerTrain trainer = new ChunkerTrain();
trainer.trainModels();
Expand Down
6 changes: 5 additions & 1 deletion core-utilities/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -344,4 +344,8 @@ TokenLabelView posView = (TokenLabelView) ta.getView(ViewNames.POS);
for (int i = 0; i < ta.size(); i++) {
System.out.println(i + ":" + posView.getLabel(i));
}
```
```

##Citation

Thank you for citing us if you use us in your work!
7 changes: 6 additions & 1 deletion core-utilities/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
<parent>
<artifactId>illinois-cogcomp-nlp</artifactId>
<groupId>edu.illinois.cs.cogcomp</groupId>
<version>3.0.71</version>
<version>3.0.76</version>
</parent>
<modelVersion>4.0.0</modelVersion>

Expand Down Expand Up @@ -80,5 +80,10 @@
<artifactId>h2</artifactId>
<version>1.4.191</version>
</dependency>
<dependency>
<groupId>org.mapdb</groupId>
<artifactId>mapdb</artifactId>
<version>3.0.0-M5</version>
</dependency>
</dependencies>
</project>
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
import edu.illinois.cs.cogcomp.core.utilities.configuration.ResourceManager;

import java.util.Properties;
import java.util.Set;

/**
* An interface for creating views of a specified name from a {@link TextAnnotation}
Expand Down Expand Up @@ -188,5 +189,15 @@ public String[] getRequiredViews() {
return requiredViews;
}

/**
* Return possible tag values that the annotator can produce.
*
* @return the set of string representing the tag values
*/
public Set<String> getTagValues() {
System.err.println("Not yet implemented.");
return null;
}


}
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
/**
* This software is released under the University of Illinois/Research and Academic Use License. See
* the LICENSE file in the root folder for details. Copyright (c) 2016
*
* Developed by: The Cognitive Computation Group University of Illinois at Urbana-Champaign
* http://cogcomp.cs.illinois.edu/
*/
package edu.illinois.cs.cogcomp.core.constants;

public class DocumentMetadata {
public static final String DocumentID = "documentID";
public static final String DocumentCreationTime = "documentCreationTime";
public static final String HeadLine = "headLine";
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
/**
* This software is released under the University of Illinois/Research and Academic Use License. See
* the LICENSE file in the root folder for details. Copyright (c) 2016
*
* Developed by: The Cognitive Computation Group University of Illinois at Urbana-Champaign
* http://cogcomp.cs.illinois.edu/
*/
package edu.illinois.cs.cogcomp.core.datastructures;

import java.util.Set;

/**
* Interface for common methods when supporting attributes.
*/
public interface HasAttributes {
void addAttribute(String key, String value);

String getAttribute(String key);

Set<String> getAttributeKeys();

boolean hasAttribute(String key);

void removeAllAttributes();
}
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
*/
package edu.illinois.cs.cogcomp.core.datastructures.textannotation;

import edu.illinois.cs.cogcomp.core.datastructures.HasAttributes;
import edu.illinois.cs.cogcomp.core.datastructures.IntPair;

import java.io.Serializable;
Expand All @@ -20,7 +21,7 @@
*
* @author Vivek Srikumar
*/
public class Constituent implements Serializable {
public class Constituent implements Serializable, HasAttributes {

private static final long serialVersionUID = -4241917156773356414L;

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,8 @@
*/
package edu.illinois.cs.cogcomp.core.datastructures.textannotation;

import edu.illinois.cs.cogcomp.core.datastructures.HasAttributes;

import java.io.Serializable;
import java.util.HashMap;
import java.util.HashSet;
Expand All @@ -18,7 +20,7 @@
* <p>
* Aug 4, 2009
*/
public class Relation implements Serializable {
public class Relation implements Serializable, HasAttributes {

private static final long serialVersionUID = -1005341815252250162L;
protected final int relationName;
Expand Down Expand Up @@ -99,7 +101,7 @@ public Constituent getTarget() {
}


public void setAttribute( String key, String value )
public void addAttribute(String key, String value )
{
if ( null == attributes )
attributes = new HashMap<>();
Expand All @@ -123,6 +125,10 @@ public Set<String> getAttributeKeys() {
return this.attributes.keySet();
}

public boolean hasAttribute(String key) {
return this.attributes != null && this.attributes.containsKey(key);
}

/**
* Removes all attributes from a Constituent.
*/
Expand Down
Loading

0 comments on commit d9bafc3

Please sign in to comment.