Skip to content

Commit

Permalink
One more sseeming bug fix and a couple more tidy ups.
Browse files Browse the repository at this point in the history
  • Loading branch information
manning authored and Stanford NLP committed Apr 21, 2018
1 parent 4336c22 commit 07781f6
Showing 1 changed file with 14 additions and 8 deletions.
22 changes: 14 additions & 8 deletions src/edu/stanford/nlp/pipeline/TokensRegexNERAnnotator.java
Original file line number Diff line number Diff line change
Expand Up @@ -228,6 +228,7 @@ private static Properties getProperties(String name, String mapping, boolean ign
private static final Pattern COMMA_DELIMITERS_PATTERN = Pattern.compile("\\s*,\\s*");
private static final Pattern SEMICOLON_DELIMITERS_PATTERN = Pattern.compile("\\s*;\\s*");
private static final Pattern EQUALS_DELIMITERS_PATTERN = Pattern.compile("\\s*=\\s*");
private static final Pattern NUMBER_PATTERN = Pattern.compile("-?[0-9]+(?:\\.[0-9]+)?");

public TokensRegexNERAnnotator(String name, Properties properties) {
String prefix = ! StringUtils.isNullOrEmpty(name) ? name + '.': "";
Expand Down Expand Up @@ -264,7 +265,7 @@ public TokensRegexNERAnnotator(String name, Properties properties) {
if (!mappingLine.contains("header")) {
mappingLine = "header=true, "+ mappingLine;
mappings[i] = mappingLine;
} else if ( ! Pattern.compile("header\\s*=\\s*true").matcher(mappingLine.toLowerCase()).find()){
} else if ( ! Pattern.compile("header\\s*=\\s*true").matcher(mappingLine.toLowerCase()).find()) {
throw new IllegalStateException("The annotator header property is set to true, but a different option has been provided for mapping file: " + mappingLine);
}
}
Expand Down Expand Up @@ -297,7 +298,7 @@ public TokensRegexNERAnnotator(String name, Properties properties) {
}

String noDefaultOverwriteLabelsProp = properties.getProperty(prefix + "noDefaultOverwriteLabels", "CITY");
this.noDefaultOverwriteLabels = Collections.unmodifiableSet(CollectionUtils.asSet(noDefaultOverwriteLabelsProp.split("\\s*,\\s*")));
this.noDefaultOverwriteLabels = Collections.unmodifiableSet(CollectionUtils.asSet(COMMA_DELIMITERS_PATTERN.split(noDefaultOverwriteLabelsProp)));
this.ignoreCase = PropertiesUtils.getBool(properties, prefix + "ignorecase", false);
this.verbose = PropertiesUtils.getBool(properties, prefix + "verbose", false);

Expand Down Expand Up @@ -577,14 +578,15 @@ public Entry(String tokensRegex, String[] regex, String[] types, Set<String> ove
}

public String getTypeDescription() {
return "[" + StringUtils.join(types, ",") + "]";
return Arrays.toString(types);
}

public String toString() {
return "Entry{" + ((tokensRegex != null) ? tokensRegex: StringUtils.join(regex)) + ' '
+ StringUtils.join(types) + ' ' + overwritableTypes + ' ' + priority + '}';
+ StringUtils.join(types) + ' ' + overwritableTypes + " prio:" + priority + '}';
}
}
} // end static class Entry


/**
* Creates a combined list of Entries using the provided mapping files.
Expand Down Expand Up @@ -733,15 +735,19 @@ private static List<Entry> readEntries(String annotatorName,
key = norm;
}
String[] types = new String[annotationCols.length];
for (int i=0; i < annotationCols.length; i++) {
for (int i = 0; i < annotationCols.length; i++) {
types[i] = split[annotationCols[i]].trim();
}

Set<String> overwritableTypes = Generics.newHashSet();
double priority = 0.0;

if (iOverwrite >= 0 && split.length > iOverwrite) {
overwritableTypes.addAll(Arrays.asList(split[iOverwrite].trim().split("\\s*,\\s*")));
if (NUMBER_PATTERN.matcher(split[iOverwrite].trim()).matches()) {
logger.warn("Number in types column for " + Arrays.toString(key) +
" is probably priority: " + split[iOverwrite]);
}
overwritableTypes.addAll(Arrays.asList(COMMA_DELIMITERS_PATTERN.split(split[iOverwrite].trim())));
}
if (iPriority >= 0 && split.length > iPriority) {
try {
Expand Down Expand Up @@ -793,7 +799,7 @@ private static List<Entry> readEntries(String annotatorName,

Entry entry = new Entry(tokensRegex, regexes, types, overwritableTypes, priority, weight, annotateGroup);

if (seenRegexes.containsKey(key)) {
if (seenRegexes.containsKey(Arrays.asList(key))) {
Entry oldEntry = seenRegexes.get(key);
if (priority > oldEntry.priority) {
logger.warn(annotatorName +
Expand Down

0 comments on commit 07781f6

Please sign in to comment.