Skip to content

Commit

Permalink
Merge branch 'feature/coref-resolution'
Browse files Browse the repository at this point in the history
  • Loading branch information
Gram21 committed Aug 13, 2021
2 parents f131dde + c263013 commit 4cf317a
Show file tree
Hide file tree
Showing 45 changed files with 198,927 additions and 165,829 deletions.
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
package edu.kit.kastel.mcse.ardoco.core.connectiongenerator.extractors;
package edu.kit.kastel.mcse.ardoco.core.connectiongenerator.agents;

import java.util.Map;

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -105,7 +105,7 @@ private void createRecommendedInstancesForTerm(IWord node) {

}

private ImmutableList<ITermMapping> getPossibleOccurredTermMappingsToThisSpot(ImmutableList<ITermMapping> termMappings, IWord n) {
private static ImmutableList<ITermMapping> getPossibleOccurredTermMappingsToThisSpot(ImmutableList<ITermMapping> termMappings, IWord n) {

MutableList<ITermMapping> possibleOccuredTermMappings = Lists.mutable.empty();
String word = n.getText();
Expand Down Expand Up @@ -133,11 +133,15 @@ private ImmutableList<ITermMapping> getPossibleOccurredTermMappingsToThisSpot(Im

}

private boolean areNextWordsEqualToReferences(ImmutableList<INounMapping> references, IWord currentWord, int currentPosition) {
private static boolean areNextWordsEqualToReferences(ImmutableList<INounMapping> references, IWord currentWord, int currentPosition) {
var stop = false;

for (int i = currentPosition + 1; i < references.size() && !stop; i++) {
String postWord = currentWord.getNextWord().getText();
var nextWord = currentWord.getNextWord();
if (nextWord == null) {
continue;
}
String postWord = nextWord.getText();
String reference = references.get(i).getReference();

if (!SimilarityUtils.areWordsSimilar(reference, postWord)) {
Expand All @@ -148,11 +152,15 @@ private boolean areNextWordsEqualToReferences(ImmutableList<INounMapping> refere
return stop;
}

private boolean arePreviousWordsEqualToReferences(ImmutableList<INounMapping> references, IWord currentWord, int currentPosition) {
private static boolean arePreviousWordsEqualToReferences(ImmutableList<INounMapping> references, IWord currentWord, int currentPosition) {
var stop = false;

for (int i = currentPosition - 1; i >= 0 && !stop; i--) {
String preWord = currentWord.getPreWord().getText();
IWord previousWord = currentWord.getPreWord();
if (previousWord == null) {
continue;
}
String preWord = previousWord.getText();
String reference = references.get(i).getReference();
if (!SimilarityUtils.areWordsSimilar(reference, preWord)) {
stop = true;
Expand All @@ -170,7 +178,7 @@ private void createRecommendedInstancesForAdjacentTermMappings(IWord termStartNo
createRecommendedInstancesOfAdjacentTerms(term, adjCompleteTermMappings.toImmutable());
}

private ImmutableList<ITermMapping> getCompletePreAdjTermMappings(ImmutableList<ITermMapping> possibleTermMappings, IWord termStartNode) {
private static ImmutableList<ITermMapping> getCompletePreAdjTermMappings(ImmutableList<ITermMapping> possibleTermMappings, IWord termStartNode) {
int sentence = termStartNode.getSentenceNo();
IWord preTermNode = termStartNode.getPreWord();

Expand All @@ -183,7 +191,7 @@ private ImmutableList<ITermMapping> getCompletePreAdjTermMappings(ImmutableList<
while (!nounMappings.isEmpty()) {
INounMapping resultOfPreMatch = matchNode(nounMappings.toImmutable(), preTermNode);

if (sentence == preTermNode.getSentenceNo() || resultOfPreMatch == null) {
if (preTermNode == null || sentence == preTermNode.getSentenceNo() || resultOfPreMatch == null) {
break;
}

Expand All @@ -199,7 +207,7 @@ private ImmutableList<ITermMapping> getCompletePreAdjTermMappings(ImmutableList<
return adjCompleteTermMappings.toImmutable();
}

private ImmutableList<ITermMapping> getCompleteAfterAdjTermMappings(ImmutableList<ITermMapping> possibleTermMappings, IWord termStartNode,
private static ImmutableList<ITermMapping> getCompleteAfterAdjTermMappings(ImmutableList<ITermMapping> possibleTermMappings, IWord termStartNode,
ITermMapping term) {

int sentence = termStartNode.getSentenceNo();
Expand Down Expand Up @@ -235,7 +243,10 @@ private ImmutableList<ITermMapping> getCompleteAfterAdjTermMappings(ImmutableLis

}

private INounMapping matchNode(ImmutableList<INounMapping> nounMappings, IWord node) {
private static INounMapping matchNode(ImmutableList<INounMapping> nounMappings, IWord node) {
if (node == null) {
return null;
}
for (INounMapping mapping : nounMappings) {
if (mapping.getWords().contains(node)) {
return mapping;
Expand All @@ -244,7 +255,7 @@ private INounMapping matchNode(ImmutableList<INounMapping> nounMappings, IWord n
return null;
}

private IWord getAfterTermNode(IWord termStartNode, ITermMapping term) {
private static IWord getAfterTermNode(IWord termStartNode, ITermMapping term) {
IWord afterTermNode = termStartNode.getNextWord();
for (var i = 0; i < term.getMappings().size() && afterTermNode != null; i++) {
afterTermNode = afterTermNode.getNextWord();
Expand All @@ -264,11 +275,11 @@ private ImmutableList<INounMapping> getTermAdjacentNounMappings(IWord node, ITer

if (preTermNode != null && sentence == preTermNode.getSentenceNo()) {

ImmutableList<INounMapping> nounMappingsOfPreTermNode = textState.getNounMappingsByNode(preTermNode);
ImmutableList<INounMapping> nounMappingsOfPreTermNode = textState.getNounMappingsByWord(preTermNode);
possibleMappings.addAll(nounMappingsOfPreTermNode.castToCollection());
}
if (afterTermNode != null && sentence == afterTermNode.getSentenceNo()) {
ImmutableList<INounMapping> nounMappingsOfAfterTermNode = textState.getNounMappingsByNode(afterTermNode);
ImmutableList<INounMapping> nounMappingsOfAfterTermNode = textState.getNounMappingsByWord(afterTermNode);
possibleMappings.addAll(nounMappingsOfAfterTermNode.castToCollection());
}
return possibleMappings.select(nounMapping -> nounMapping.getKind() != kind).toImmutable();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -110,7 +110,7 @@ public String getNameOccurencesAsString() {
Set<String> names = new HashSet<>();
MutableList<Integer> namePositions = Lists.mutable.empty();
for (INounMapping nameMapping : textualInstance.getNameMappings()) {
names.addAll(nameMapping.getOccurrences().castToCollection());
names.addAll(nameMapping.getSurfaceForms().castToCollection());
namePositions.addAll(nameMapping.getMappingSentenceNo().castToCollection());
}

Expand All @@ -126,11 +126,11 @@ public String toString() {
MutableList<Integer> typePositions = Lists.mutable.empty();

for (INounMapping nameMapping : textualInstance.getNameMappings()) {
names.addAll(nameMapping.getOccurrences().castToCollection());
names.addAll(nameMapping.getSurfaceForms().castToCollection());
namePositions.addAll(nameMapping.getMappingSentenceNo().castToCollection());
}
for (INounMapping typeMapping : textualInstance.getTypeMappings()) {
types.addAll(typeMapping.getOccurrences().castToCollection());
types.addAll(typeMapping.getSurfaceForms().castToCollection());
typePositions.addAll(typeMapping.getMappingSentenceNo().castToCollection());
}
return "InstanceMapping [ uid=" + modelInstance.getUid() + ", name=" + modelInstance.getLongestName() + //
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

import java.io.File;
import java.io.IOException;
import java.nio.charset.StandardCharsets;
import java.util.Map;
import java.util.Scanner;

Expand Down Expand Up @@ -54,6 +55,19 @@ protected static double getPropertyAsDouble(String key, Map<String, String> conf
return Double.parseDouble(configs.get(key));
}

/**
* Returns the specified property of the config file as a boolean if it is set.
*
* @param key name of the specified property
* @param configs the configuration map
* @return value of the property as a boolean. True, if the value for the key is "true", "yes", or "1" ignoring
* case.
*/
protected static boolean isPropertyEnabled(String key, Map<String, String> configs) {
var propValue = configs.get(key).strip();
return Boolean.parseBoolean(propValue) || propValue.equalsIgnoreCase("yes") || propValue.equalsIgnoreCase("1");
}

/**
* Get all properties and raw values of a configuration
*
Expand Down Expand Up @@ -84,21 +98,19 @@ public static void mergeConfigToMap(Map<String, String> configs, Configuration c
* @param additionalConfigs the file with additional configs
*/
public static void overrideConfigInMap(Map<String, String> configs, File additionalConfigs) {
try (var scan = new Scanner(additionalConfigs)) {
try (var scan = new Scanner(additionalConfigs, StandardCharsets.UTF_8)) {
while (scan.hasNextLine()) {
String line = scan.nextLine();
String[] kv = new String[0];
if (line == null || line.isBlank()) {
logger.warn("Illegal Line in config: \"{}\"", line);
continue;
} else {
kv = line.trim().split("=", 2);
}

String[] kv = line.trim().split("=", 2);
if (kv.length != 2) {
logger.warn("Illegal Line in config: \"{}\"", line);
continue;
}

if (configs.containsKey(kv[0].trim())) {
} else if (configs.containsKey(kv[0].trim())) {
configs.put(kv[0].trim(), kv[1].trim());
}
}
Expand Down
Original file line number Diff line number Diff line change
@@ -1,8 +1,9 @@
package edu.kit.kastel.mcse.ardoco.core.datastructures.definitions;

import org.eclipse.collections.api.factory.Lists;
import org.eclipse.collections.api.factory.Sets;
import org.eclipse.collections.api.list.ImmutableList;
import org.eclipse.collections.api.list.MutableList;
import org.eclipse.collections.api.set.ImmutableSet;
import org.eclipse.collections.api.set.MutableSet;

import edu.kit.kastel.mcse.ardoco.core.datastructures.modules.IState;

Expand Down Expand Up @@ -56,14 +57,19 @@ public interface IConnectionState extends IState<IConnectionState> {
*
* @return list of tracelinks within this connection state
*/
default ImmutableList<Tracelink> getTraceLinks() {
MutableList<Tracelink> tracelinks = Lists.mutable.empty();
default ImmutableSet<Tracelink> getTraceLinks() {
MutableSet<Tracelink> tracelinks = Sets.mutable.empty();
for (var instanceLink : getInstanceLinks()) {
for (var nm : instanceLink.getTextualInstance().getNameMappings()) {
var textualInstance = instanceLink.getTextualInstance();
for (var nm : textualInstance.getNameMappings()) {
for (var word : nm.getWords()) {
var tracelink = new Tracelink(instanceLink, instanceLink.getModelInstance(), word);
tracelinks.add(tracelink);
}
for (var word : nm.getCoreferences()) {
var tracelink = new Tracelink(instanceLink, instanceLink.getModelInstance(), word);
tracelinks.add(tracelink);
}
}
}
return tracelinks.toImmutable();
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
/**
*
*/
package edu.kit.kastel.mcse.ardoco.core.datastructures.definitions;

import java.util.StringJoiner;

import org.eclipse.collections.api.list.ImmutableList;

/**
* This defines the interface of a Coreference Cluster (CorefCluster). A CorefCluster is a cluster that collects all
* mentions of an entity and has a representative mention.
*
* @author Jan Keim
*
*/
public interface ICorefCluster {

int getId();

String getRepresentativeMention();

ImmutableList<ImmutableList<IWord>> getMentions();

static String getTextForMention(ImmutableList<IWord> mention) {
var textJoiner = new StringJoiner(" ");
for (var word : mention) {
textJoiner.add(word.getText());
}
return textJoiner.toString();
}
}
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
package edu.kit.kastel.mcse.ardoco.core.datastructures.definitions;

import java.util.Collection;
import java.util.Map;

import org.eclipse.collections.api.list.ImmutableList;
Expand Down Expand Up @@ -27,16 +28,16 @@ public interface INounMapping extends ICopyable<INounMapping> {
void hardSetProbability(double probability);

/**
* Returns the occurrences of this mapping.
* Returns the surface forms (previously called occurrences) of this mapping.
*
* @return all appearances of the mapping
*/
ImmutableList<String> getOccurrences();
ImmutableList<String> getSurfaceForms();

/**
* Returns all nodes contained by the mapping.
* Returns all words that are contained by the mapping. This should include coreferences.
*
* @return all mapping nodes
* @return all words that are referenced with this mapping
*/
ImmutableList<IWord> getWords();

Expand All @@ -45,14 +46,14 @@ public interface INounMapping extends ICopyable<INounMapping> {
*
* @param nodes graph nodes to add to the mapping
*/
void addNodes(ImmutableList<IWord> nodes);
void addWords(ImmutableList<IWord> nodes);

/**
* Adds a node to the mapping, it its not already contained.
*
* @param n graph node to add.
*/
void addNode(IWord n);
void addWord(IWord n);

/**
* Returns the probability of being a mapping of its kind.
Expand Down Expand Up @@ -148,4 +149,36 @@ public interface INounMapping extends ICopyable<INounMapping> {
* @return the distribution
*/
Map<MappingKind, Double> getDistribution();

/**
* @param coreference the coreference to add
*/
void addCoreference(IWord coreference);

/**
* @param coreferences the coreferences to add
*/
void addCoreferences(Collection<IWord> coreferences);

/**
* @return the coreferences
*/
ImmutableList<IWord> getCoreferences();

/**
* Creates a new INounMapping that resutls when merging the data from the INounMapping with a given other
* INounMapping
*
* @param other the other INounMapping
* @return new INounMapping that is a merge of the given INounMappings
*/
INounMapping merge(INounMapping other);

/**
* Adds the kind with probability.
*
* @param kind the kind
* @param probability the probability
*/
void addKindWithProbability(MappingKind kind, double probability);
}
Original file line number Diff line number Diff line change
Expand Up @@ -29,4 +29,11 @@ default int getLength() {
* @return the words
*/
ImmutableList<IWord> getWords();

/**
* Get all Coreference Clusters that the text has
*
* @return the coreference clusters
*/
ImmutableList<ICorefCluster> getCorefClusters();
}
Loading

0 comments on commit 4cf317a

Please sign in to comment.