Skip to content

Commit

Permalink
Merge branch 'release/0.9.0'
Browse files Browse the repository at this point in the history
  • Loading branch information
shilad committed Jan 16, 2018
2 parents 541425e + 8c93cc9 commit 8ee6dab
Show file tree
Hide file tree
Showing 43 changed files with 636 additions and 268 deletions.
2 changes: 1 addition & 1 deletion .travis.yml
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ branches:
- master
- develop
jdk:
- oraclejdk7
- oraclejdk8
install:
- mvn -B clean install -DskipTests
script:
Expand Down
12 changes: 5 additions & 7 deletions pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@

<groupId>org.wikibrainapi</groupId>
<artifactId>wikibrain-parent</artifactId>
<version>0.8.0</version>
<version>0.9.0</version>
<packaging>pom</packaging>
<name>WikiBrain parent</name>
<description>The WikiBrain base pom</description>
Expand Down Expand Up @@ -222,13 +222,11 @@
</configuration>
</plugin>
<plugin>
<groupId>external.atlassian.jgitflow</groupId>
<artifactId>jgitflow-maven-plugin</artifactId>
<version>1.0-m4.3</version>
<groupId>com.amashchenko.maven.plugin</groupId>
<artifactId>gitflow-maven-plugin</artifactId>
<version>1.8.0</version>
<configuration>
<autoVersionSubmodules>true</autoVersionSubmodules>
<allowUntracked>true</allowUntracked>
<pushReleases>true</pushReleases>
<!-- optional configuration -->
</configuration>
</plugin>
</plugins>
Expand Down
16 changes: 8 additions & 8 deletions wikibrain-cookbook/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
<parent>
<groupId>org.wikibrainapi</groupId>
<artifactId>wikibrain-parent</artifactId>
<version>0.8.0</version>
<version>0.9.0</version>
</parent>

<artifactId>wikibrain-cookbook</artifactId>
Expand All @@ -14,37 +14,37 @@
<dependency>
<groupId>org.wikibrainapi</groupId>
<artifactId>wikibrain-mapper</artifactId>
<version>0.8.0</version>
<version>0.9.0</version>
</dependency>
<dependency>
<groupId>org.wikibrainapi</groupId>
<artifactId>wikibrain-sr</artifactId>
<version>0.8.0</version>
<version>0.9.0</version>
</dependency>
<dependency>
<groupId>org.wikibrainapi</groupId>
<artifactId>wikibrain-wikidata</artifactId>
<version>0.8.0</version>
<version>0.9.0</version>
</dependency>
<dependency>
<groupId>org.wikibrainapi</groupId>
<artifactId>wikibrain-loader</artifactId>
<version>0.8.0</version>
<version>0.9.0</version>
</dependency>
<dependency>
<groupId>org.wikibrainapi</groupId>
<artifactId>wikibrain-parser</artifactId>
<version>0.8.0</version>
<version>0.9.0</version>
</dependency>
<dependency>
<groupId>org.wikibrainapi</groupId>
<artifactId>wikibrain-pageview</artifactId>
<version>0.8.0</version>
<version>0.9.0</version>
</dependency>
<dependency>
<groupId>org.wikibrainapi</groupId>
<artifactId>wikibrain-spatial</artifactId>
<version>0.8.0</version>
<version>0.9.0</version>
</dependency>
<dependency>
<groupId>com.sun.corba</groupId>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -23,12 +23,12 @@ public static void main(String args[]) throws ConfigurationException, DaoExcepti

Env env = new EnvBuilder().build();
Configurator c = env.getConfigurator();
Language lang = Language.getByLangCode("simple"); // simple english
PhraseAnalyzer pa = c.get(PhraseAnalyzer.class, "stanford");
Language lang = env.getDefaultLanguage(); // simple english
PhraseAnalyzer pa = c.get(PhraseAnalyzer.class, "anchortext");
LocalPageDao pageDao = c.get(LocalPageDao.class);
LocalPage page = pageDao.getByTitle(new Title("Obama", lang), NameSpace.ARTICLE);
System.out.println("description of " + page + ":"); // should resolve redirect to Barack Obama
LinkedHashMap<String, Float> description = pa.describe(lang, page, 20);
LinkedHashMap<String, Float> description = pa.describe(lang, page, 100);
if (description == null) {
System.out.println("\tno description!");
} else {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -22,11 +22,11 @@ public static void main(String[] args) throws Exception{
Env env = EnvBuilder.envFromArgs(args);
Configurator conf = env.getConfigurator();
LocalPageDao lpDao = conf.get(LocalPageDao.class);
Language simple = Language.getByLangCode("simple");
Language simple = env.getDefaultLanguage();

// Retrieve the "milnewitten" sr metric for simple english
SRMetric sr = conf.get(
SRMetric.class, "simple-ensemble",
SRMetric.class, "prebuiltword2vec",
"language", simple.getLangCode());

//Similarity between strings
Expand Down
6 changes: 3 additions & 3 deletions wikibrain-core/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
<parent>
<groupId>org.wikibrainapi</groupId>
<artifactId>wikibrain-parent</artifactId>
<version>0.8.0</version>
<version>0.9.0</version>
</parent>

<artifactId>wikibrain-core</artifactId>
Expand All @@ -13,12 +13,12 @@
<dependency>
<groupId>org.wikibrainapi</groupId>
<artifactId>wikibrain-utils</artifactId>
<version>0.8.0</version>
<version>0.9.0</version>
</dependency>
<dependency>
<groupId>org.wikibrainapi</groupId>
<artifactId>wikibrain-matrix</artifactId>
<version>0.8.0</version>
<version>0.9.0</version>
</dependency>
<dependency>
<groupId>org.jooq</groupId>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -124,6 +124,9 @@ public enum FileMatcher {
WIKIDATA_ITEMS (
"wikidata_items",
Pattern.compile(".*?([a-zA-Z_-]+)wiki.+-wb_items_per_site.sql.gz")),
WIKIDATA_JSON(
"wikidata",
Pattern.compile(".*?wikidata-.+-all.json.bz")),
MD5 (
"md5_checksums",
Pattern.compile(".*?([a-zA-Z_-]+)wiki.+-md5sums.txt"));
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -140,9 +140,9 @@ public void computePageRanks(CategoryGraph graph) {
}

for (int i = 0; i < 20; i++) {
LOG.info("performing page ranks iteration {0}.", i);
LOG.info("performing page ranks iteration {}.", i);
double error = onePageRankIteration(graph);
LOG.info("Error for iteration is {0}.", error);
LOG.info("Error for iteration is {}.", error);
if (error == 0) {
break;
}
Expand Down
Original file line number Diff line number Diff line change
@@ -1,9 +1,11 @@
package org.wikibrain.core.model;

import gnu.trove.map.TIntIntMap;
import org.apache.commons.lang.ArrayUtils;
import org.wikibrain.core.lang.Language;

import java.io.Serializable;
import java.util.*;

/**
* A compact graph representation of the category structure.
Expand All @@ -18,15 +20,28 @@ public class CategoryGraph implements Serializable{
static final long serialVersionUID = -3429823331722647576l;
public Language language;

// Mapping from local page id to internal dense index.
// Mapping from external local page id to internal dense index.
public TIntIntMap catIndexes;

public int[] catIds; // dense category ids to sparse local page ids
public double[] catCosts; // the cost of travelling through each category
// Dense internal category index to sparse external local page ids
public int[] catIds;

// the cost of travelling through each category based on page rank
public double[] catCosts;

// The category graph. Category to list of parents
public int[][] catParents;

// Category to list of local page ids (articles, not categories)
public int[][] catPages;

// Category to list of children dense internal category index
public int[][] catChildren;

// Names of categories indexed by internal dense index
public String[] cats;

// ??
public double minCost = -1;

public CategoryGraph(Language language){
Expand All @@ -36,31 +51,88 @@ public CategoryGraph(Language language){
public int catIdToIndex(int catId) {
return catIndexes.containsKey(catId) ? catIndexes.get(catId) : -1;
}


public int catIndexToId(int catIndex) { return (catIndex < 0) ? -1 : catIds[catIndex]; }

/**
* Return the wikipedia page ids for child of the specified category
* @param wpId
* @return
*/
public int[] getChildCategories(int wpId) {
int parentIndex = catIdToIndex(wpId);
if (parentIndex < 0) {
public int[] getFamilyMembersCategories(int wpId, String familyMember) {
int index = catIdToIndex(wpId);
if (index < 0) {
return new int[0];
}
int[] denseIds;
if(familyMember == "child") {
denseIds = catChildren[index]; //gets the children of index
}else if(familyMember == "parent"){
denseIds = catParents[index]; //gets theparents of index
}else{
return new int[0];
}
int [] denseIds = catChildren[parentIndex];
int childIds[] = new int[denseIds.length];

int famMembersIds[] = new int[denseIds.length];
for (int i = 0; i < denseIds.length; i++) {
childIds[i] = catIds[denseIds[i]];
famMembersIds[i] = catIndexToId(denseIds[i]);
}
return childIds;
return famMembersIds;
}

/**
*
* @param wpId
* @return
*/
public int[] getCategoryPages(int wpId) {
int parentIndex = catIdToIndex(wpId);
if (parentIndex < 0) {
return new int[0];
}
return catPages[parentIndex];
}

/**
*
* @param wpId
* @return
*/
public String getCategoryName(int wpId){
int cid = catIdToIndex(wpId); //sparse to dense
if(cid >=0) {
String cname = cats[cid];
return cname;
}else{
return "";
}
}

/**
*
* @param wpId
* @param max
* @return
*/
public Integer getMaxMinParentPageRank(int wpId, boolean max){
int[] parents = getFamilyMembersCategories(wpId, "parent");
Integer arg = null;
if (max) {
for (int par : parents) {
if (catIdToIndex(par) >= 0) {
arg = (arg == null || (catCosts[catIdToIndex(arg)] < catCosts[catIdToIndex(par)])) ? par : arg;
}
}
} else {
for (int par : parents) {
if (catIdToIndex(par) >= 0) {
arg = (arg == null || (catCosts[catIdToIndex(arg)] > catCosts[catIdToIndex(par)])) ? par : arg;
}
}
}

return ((arg != null) ? arg: -1);

}
}
10 changes: 6 additions & 4 deletions wikibrain-core/src/main/resources/reference.conf
Original file line number Diff line number Diff line change
Expand Up @@ -684,7 +684,7 @@ sr : {
}
fast-word2vec-ensemble : {
type : ensemble
metrics : ["inlink","outlink","category","word2vec","milnewitten","directlink"]
metrics : ["inlink","outlink","category","prebuiltword2vec","milnewitten","directlink"]
similaritynormalizer : percentile
mostsimilarnormalizer : percentile
ensemble : linear
Expand All @@ -694,12 +694,12 @@ sr : {
}
simple-ensemble : {
type : simple-ensemble
metrics : ["directlink","inlink","outlink","category","word2vec"]
metrics : ["directlink","inlink","outlink","category","prebuiltword2vec"]
coefficients : [ 0.2, 0.2, 0.2, 0.1, 0.3 ]
}
word2vec-ensemble : {
type : ensemble
metrics : ["ESA","inlink","outlink","category","word2vec","milnewitten","directlink"]
metrics : ["ESA","inlink","outlink","category","prebuiltword2vec","milnewitten","directlink"]
similaritynormalizer : percentile
mostsimilarnormalizer : percentile
ensemble : linear
Expand Down Expand Up @@ -859,14 +859,16 @@ sr : {
identityWikifier : identity
localLinkDao : matrix
useLinkProbabilityCache : true
desiredWikifiedFraction : 0.25
}
websail-final : {
type : websail
phraseAnalyzer : anchortext
sr : prebuiltword2vec
sr : word2vec
identityWikifier : identity
localLinkDao : matrix
useLinkProbabilityCache : true
desiredWikifiedFraction : 0.25
}
}

Expand Down
4 changes: 2 additions & 2 deletions wikibrain-download/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
<parent>
<groupId>org.wikibrainapi</groupId>
<artifactId>wikibrain-parent</artifactId>
<version>0.8.0</version>
<version>0.9.0</version>
</parent>

<artifactId>wikibrain-download</artifactId>
Expand All @@ -14,7 +14,7 @@
<dependency>
<groupId>org.wikibrainapi</groupId>
<artifactId>wikibrain-core</artifactId>
<version>0.8.0</version>
<version>0.9.0</version>
</dependency>
<dependency>
<groupId>org.jsoup</groupId>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@ public class FileDownloader {


public FileDownloader() {
RequestedLinkGetter.FIX_CERTS();
}

public File download(URL url, File file) throws InterruptedException {
Expand Down
Loading

0 comments on commit 8ee6dab

Please sign in to comment.