Merge branch 'chaganty-hypatia' of jamie.stanford.edu:/u/nlp/git/java…

…nlp into chaganty-hypatia
FelixHo · Nov 25, 2015 · e1f82ff · e1f82ff
1 parent 5bd6e62
commit e1f82ff
Show file tree

Hide file tree

Showing 860 changed files with 86,536 additions and 124,856 deletions.
diff --git a/README.md b/README.md
@@ -1,9 +1,9 @@
 Stanford CoreNLP
 ================
 
-Stanford CoreNLP provides a set of natural language analysis tools written in Java. It can take raw human language text input and give the base forms of words, their parts of speech, whether they are names of companies, people, etc., normalize and interpret dates, times, and numeric quantities, mark up the structure of sentences in terms of phrases or word dependencies, and indicate which noun phrases refer to the same entities. It was originally developed for English, but now also provides varying levels of support for (Modern Standard) Arabic, (mainland) Chinese, French, German, and Spanish. Stanford CoreNLP is an integrated framework, which make it very easy to apply a bunch of language analysis tools to a piece of text. Starting from plain text, you can run all the tools with just two lines of code. Its analyses provide the foundational building blocks for higher-level and domain-specific text understanding applications. Stanford CoreNLP is a set of stable and well-tested natural language processing tools, widely used by various groups in academia, industry, and government. The tools variously use rule-based, probabilistic machine learning, and deep learning components.
+Stanford CoreNLP provides a set of natural language analysis tools written in Java. It can take raw human language text input and give the base forms of words, their parts of speech, whether they are names of companies, people, etc., normalize dates, times, and numeric quantities, and mark up the structure of sentences in terms of phrases and word dependencies, and indicate which noun phrases refer to the same entities. It was originally developed for English, but now also provides varying levels of support for Arabic, (mainland) Chinese, French, German, and Spanish. Stanford CoreNLP is an integrated framework, which make it very easy to apply a bunch of language analysis tools to a piece of text. Starting from plain text, you can run all the tools on it with just two lines of code. Its analyses provide the foundational building blocks for higher-level and domain-specific text understanding applications. Stanford CoreNLP is a set of stable and well-tested natural language processing tools, widely used by various groups in academia, government, and industry.
 
-The Stanford CoreNLP code is written in Java and licensed under the GNU General Public License (v3 or later). Note that this is the full GPL, which allows many free uses, but not its use in proprietary software that you distribute to others.
+The Stanford CoreNLP code is written in Java and licensed under the GNU General Public License (v3 or later). Note that this is the full GPL, which allows many free uses, but not its use in proprietary software that you distribute.
 
 You can find releases of Stanford CoreNLP on [Maven Central](http://search.maven.org/#browse%7C11864822).
 

diff --git a/build.gradle b/build.gradle
@@ -47,7 +47,6 @@ task listDeps << {
 
 dependencies {
   compile fileTree(dir: 'lib', include: '*.jar')
-  testCompile fileTree(dir: 'liblocal', include: '*.jar')
 }
 
 // Eclipse plugin setup

diff --git a/build.xml b/build.xml
@@ -26,10 +26,6 @@
         <include name="*.jar"/>
         <exclude name="javanlp*"/>
       </fileset>
-      <fileset dir="${basedir}/liblocal">
-        <include name="*.jar"/>
-        <exclude name="javanlp*"/>
-      </fileset>
     </path>
   </target>
 
@@ -128,11 +124,6 @@
         <compilerarg value="-Xmaxwarns"/>
         <compilerarg value="10000"/>  -->
     </javac>
-    <copy todir="${build.path}/edu/stanford/nlp/pipeline/demo">
-      <fileset dir="${source.path}/edu/stanford/nlp/pipeline/demo">
-        <exclude name="**/*.java"/>
-      </fileset>
-    </copy>
   </target>
 
   <target name="test" depends="classpath,compile"

diff --git a/data/edu/stanford/nlp/ud/feature_map.txt b/data/edu/stanford/nlp/ud/feature_map.txt
diff --git a/data/edu/stanford/nlp/upos/ENUniversalPOS.tsurgeon b/data/edu/stanford/nlp/upos/ENUniversalPOS.tsurgeon
@@ -64,7 +64,7 @@ relabel target AUX
 %relabel target AUX
 
 % VB.* -> AUX (active, case 1)
-VP < VP < (/^VB.*$/=target <: /^(?i:will|have|can|would|do|is|was|be|are|has|could|should|did|been|may|were|had|'ll|'ve|does|am|might|ca|'m|being|'s|must|'d|'re|wo|shall|get|ve|s|got|r|m|getting|having|d|re|ll|wilt|v|of|my|nt|gets|du|wud|woud|with|willl|wil|wase|shoul|shal|`s|ould|-ll|most|made|hvae|hav|cold|as|art|ai|ar|a)$/)
+VP < VP < (/^VB.*$/=target <... {/.*/})
 
 relabel target AUX
 
@@ -78,13 +78,8 @@ relabel target AUX
 
 relabel target VERB
 
-% IN -> SCONJ (subordinating conjunctions)
-/^SBAR(-[^ ]+)?$/ < (IN=target $++ @S|FRAG|SBAR|SINV <... {/.*/})
-
-relabel target SCONJ
-
-% IN -> SCONJ (subordinating conjunctions II)
-@PP < (IN=target $+ @SBAR|S)
+% IN -> SCONJ (only in case of subordinating conjunctions)
+/^SBAR(-[^ ]+)?$/ < (IN=target $++ S|FRAG <... {/.*/})
 
 relabel target SCONJ
 
@@ -114,7 +109,7 @@ NFP=target <... {/.*/}
 relabel target SYM
 
 % RB -> PART when it is verbal negation (not or its reductions)
-@VP|SINV|SQ|FRAG|ADVP < (RB=target < /^(?i:not|n't|nt|t|n)$/)
+@VP|SINV|SQ|FRAG < (RB=target < /^(?i:not|n't|nt|t|n)$/)
 
 relabel target PART
 
@@ -123,16 +118,6 @@ RB=target <... {/.*/}
 
 relabel target ADV
 
-% DT -> PRON (pronominal this/that/these/those)
-@NP <: (DT=target < /^[Tt]h(is|at|ose|ese)$/)
-
-relabel target PRON
-
-%DT -> DET
-DT=target < __
-
-relabel target DET
-
 % ------------------------------
 % 1 to 1 mappings
 %
@@ -147,6 +132,11 @@ CD=target <... {/.*/}
 
 relabel target NUM
 
+% DT -> DET
+DT=target <... {/.*/}
+
+relabel target DET
+
 % EX -> PRON
 EX=target <... {/.*/}
 

diff --git a/doc/README b/doc/README
@@ -8,5 +8,3 @@ zips we release
 
 classify, lexparser, ner, segmenter: documentation included in various
 packages, such as readmes, build files, etc
-
-loglinear: architectural explanation and various tutorials
diff --git a/doc/loglinear/ARCH.txt b/doc/loglinear/ARCH.txt
diff --git a/doc/loglinear/OPTIMIZATION.txt b/doc/loglinear/OPTIMIZATION.txt
Original file line number	Diff line number	Diff line change
Expand Up		@@ -8,5 +8,3 @@ zips we release

		classify, lexparser, ner, segmenter: documentation included in various
		packages, such as readmes, build files, etc

		loglinear: architectural explanation and various tutorials