Skip to content

Commit

Permalink
Removed MapCollections dead code and JsonParser class (castorini#885)
Browse files Browse the repository at this point in the history
  • Loading branch information
lintool authored Nov 23, 2019
1 parent 6d97b36 commit 0b283cc
Show file tree
Hide file tree
Showing 8 changed files with 28 additions and 445 deletions.
7 changes: 0 additions & 7 deletions docs/additional.md
Original file line number Diff line number Diff line change
Expand Up @@ -3,10 +3,3 @@ Anserini Additional Documentation

+ [Axiomatic Reranking](docs/axiom-reranking.md)
+ `IndexUtils` is a utility to interact with an index using the command line (e.g., print index statistics). Refer to `target/appassembler/bin/IndexUtils -h` for more details.
+ `MapCollections` is a generic mapper framework for processing a document collection in parallel. Developers can write their own mappers for different tasks: one simple example is `CountDocumentMapper` which counts the number of documents in a collection:

```
target/appassembler/bin/MapCollections -collection ClueWeb09Collection \
-threads 16 -input ~/collections/web/ClueWeb09b/ClueWeb09_English_1/ \
-mapper CountDocumentMapper -context CountDocumentMapperContext
```
39 changes: 28 additions & 11 deletions src/main/java/io/anserini/collection/TweetCollection.java
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,6 @@
import com.fasterxml.jackson.databind.JsonNode;
import com.fasterxml.jackson.databind.ObjectMapper;
import com.fasterxml.jackson.datatype.jdk8.Jdk8Module;
import io.anserini.util.JsonParser;
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;

Expand All @@ -44,6 +43,7 @@
import java.util.NoSuchElementException;
import java.util.Optional;
import java.util.OptionalDouble;
import java.util.OptionalInt;
import java.util.OptionalLong;
import java.util.zip.GZIPInputStream;

Expand Down Expand Up @@ -98,7 +98,7 @@ private void parseJson(String json) throws ParseException {
throw new ParseException("IOException in parseJson", 0);
}

if (JsonParser.isFieldAvailable(tweetObj.getDelete())) {
if (isFieldAvailable(tweetObj.getDelete())) {
throw new ParseException("Ignore deleted tweets", 0);
}

Expand All @@ -117,21 +117,21 @@ private void parseJson(String json) throws ParseException {
throw e;
}

if (JsonParser.isFieldAvailable(tweetObj.getInReplyToStatusId())) {
if (isFieldAvailable(tweetObj.getInReplyToStatusId())) {
bufferedRecord.inReplyToStatusId = tweetObj.getInReplyToStatusId();
} else {
bufferedRecord.inReplyToStatusId = OptionalLong.empty();
}

if (JsonParser.isFieldAvailable(tweetObj.getInReplyToUserId())) {
if (isFieldAvailable(tweetObj.getInReplyToUserId())) {
bufferedRecord.inReplyToUserId = tweetObj.getInReplyToUserId();
} else {
bufferedRecord.inReplyToUserId = OptionalLong.empty();
}

if (JsonParser.isFieldAvailable(tweetObj.getRetweetedStatus())) {
if (isFieldAvailable(tweetObj.getRetweetedStatus())) {
bufferedRecord.retweetStatusId = tweetObj.getRetweetedStatus().get().getId();
if (JsonParser.isFieldAvailable(tweetObj.getRetweetedStatus().get().getUser())) {
if (isFieldAvailable(tweetObj.getRetweetedStatus().get().getUser())) {
bufferedRecord.retweetUserId = tweetObj.getRetweetedStatus().get().getUser().get().getId();
} else {
bufferedRecord.retweetUserId = OptionalLong.empty();
Expand All @@ -143,8 +143,8 @@ private void parseJson(String json) throws ParseException {
bufferedRecord.retweetCount = OptionalLong.empty();
}

if (JsonParser.isFieldAvailable(tweetObj.getCoordinates()) &&
JsonParser.isFieldAvailable(tweetObj.getCoordinates().get().getCoordinates()) &&
if (isFieldAvailable(tweetObj.getCoordinates()) &&
isFieldAvailable(tweetObj.getCoordinates().get().getCoordinates()) &&
tweetObj.getCoordinates().get().getCoordinates().get().size() >= 2) {
bufferedRecord.longitude = tweetObj.getCoordinates().get().getCoordinates().get().get(0);
bufferedRecord.latitude = tweetObj.getCoordinates().get().getCoordinates().get().get(1);
Expand All @@ -153,7 +153,7 @@ private void parseJson(String json) throws ParseException {
bufferedRecord.longitude = OptionalDouble.empty();
}

if (JsonParser.isFieldAvailable(tweetObj.getLang())) {
if (isFieldAvailable(tweetObj.getLang())) {
bufferedRecord.lang = tweetObj.getLang();
} else {
bufferedRecord.lang = Optional.empty();
Expand All @@ -164,13 +164,13 @@ private void parseJson(String json) throws ParseException {
bufferedRecord.statusesCount = tweetObj.getUser().getStatusesCount();
bufferedRecord.screenName = tweetObj.getUser().getScreenName();

if (JsonParser.isFieldAvailable(tweetObj.getUser().getName())) {
if (isFieldAvailable(tweetObj.getUser().getName())) {
bufferedRecord.name = tweetObj.getUser().getName();
} else {
bufferedRecord.name = Optional.empty();
}

if (JsonParser.isFieldAvailable(tweetObj.getUser().getProfileImageUrl())) {
if (isFieldAvailable(tweetObj.getUser().getProfileImageUrl())) {
bufferedRecord.profileImageUrl = tweetObj.getUser().getProfileImageUrl();
} else {
bufferedRecord.profileImageUrl = Optional.empty();
Expand All @@ -179,6 +179,23 @@ private void parseJson(String json) throws ParseException {
bufferedRecord.jsonString = json;
bufferedRecord.jsonObject = tweetObj;
}

private boolean isFieldAvailable(Object field) {
if (field == null) {
return false;
}
boolean isPresent;
if (field.getClass() == OptionalLong.class) {
isPresent = ((OptionalLong)field).isPresent();
} else if (field.getClass() == OptionalDouble.class) {
isPresent = ((OptionalDouble)field).isPresent();
} else if (field.getClass() == OptionalInt.class) {
isPresent = ((OptionalInt)field).isPresent();
} else {
isPresent = ((Optional)field).isPresent();
}
return isPresent;
}
}

/**
Expand Down
41 changes: 0 additions & 41 deletions src/main/java/io/anserini/util/JsonParser.java

This file was deleted.

216 changes: 0 additions & 216 deletions src/main/java/io/anserini/util/MapCollections.java

This file was deleted.

Loading

0 comments on commit 0b283cc

Please sign in to comment.