Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Adding support for named graphs #196

Merged
merged 32 commits into from
Sep 14, 2023
Merged
Show file tree
Hide file tree
Changes from 6 commits
Commits
Show all changes
32 commits
Select commit Hold shift + click to select a range
f69df74
Implemented quad dictionaries for .nq files
QuentinJanuel May 31, 2023
728446d
temp triples can handle quads
QuentinJanuel Jun 1, 2023
3d9e29a
making sure graphs don't affect normal hdt
QuentinJanuel Jun 5, 2023
9a7d09d
FourQuadSectionDictionary needs to implement FourSectionDictionary, n…
QuentinJanuel Jun 15, 2023
7a024f5
quad patterns support (not implemented yet)
QuentinJanuel Jun 15, 2023
82cb465
fixed quadstring
QuentinJanuel Jun 22, 2023
3439c49
save triples (could be optimized)
QuentinJanuel Jun 22, 2023
a4f5bfb
search wip
QuentinJanuel Jun 26, 2023
d672ee9
Fixed search for triples (non quad)
QuentinJanuel Jun 29, 2023
c26b85e
number of graphs is now dynamic
QuentinJanuel Jun 29, 2023
f58cfa6
Adapt code for large files (RoaringBitmap)
QuentinJanuel Jul 11, 2023
bcc095a
roaring bitmap 64
QuentinJanuel Jul 13, 2023
f45facb
accelerated wildcard pattern a bit
QuentinJanuel Jul 13, 2023
1e612c3
BitmapQuadsIterator
QuentinJanuel Jul 17, 2023
8d31b3d
fixed roaring bitmap
QuentinJanuel Jul 20, 2023
c8b0c1e
let quads iterator variables be longs
QuentinJanuel Jul 20, 2023
b1ba3a6
support for ???G, S??G, SP?G & SPOG
QuentinJanuel Jul 20, 2023
0f1685b
?POG & ??OG (ZGFOQ)
QuentinJanuel Jul 28, 2023
5ccc86b
removed forgotten log
QuentinJanuel Jul 28, 2023
257aa30
?PO? & ??O? (ZFOQ)
QuentinJanuel Jul 28, 2023
76ee129
?P?G (YGFOQ)
QuentinJanuel Jul 31, 2023
41abd02
?P?? (YFOQ)
QuentinJanuel Aug 4, 2023
cbfeedc
address https://github.com/rdfhdt/hdt-java/pull/196#discussion_r12416…
QuentinJanuel Aug 28, 2023
b9002a6
address https://github.com/rdfhdt/hdt-java/pull/196#discussion_r12955…
QuentinJanuel Aug 28, 2023
fef55fb
address https://github.com/rdfhdt/hdt-java/pull/196#discussion_r12955…
QuentinJanuel Aug 28, 2023
5233663
address https://github.com/rdfhdt/hdt-java/pull/196#discussion_r12955…
QuentinJanuel Aug 28, 2023
59bba7c
address https://github.com/rdfhdt/hdt-java/pull/196#discussion_r12955…
QuentinJanuel Aug 28, 2023
7ae491c
address https://github.com/rdfhdt/hdt-java/pull/196#discussion_r12958…
QuentinJanuel Aug 28, 2023
eacd5bb
address https://github.com/rdfhdt/hdt-java/pull/196#discussion_r12958…
QuentinJanuel Aug 28, 2023
f61bc58
address https://github.com/rdfhdt/hdt-java/pull/196#discussion_r12959…
QuentinJanuel Aug 31, 2023
c675a01
cat tests for quads
QuentinJanuel Sep 4, 2023
3f4ba41
test quads iterators
QuentinJanuel Sep 5, 2023
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -132,4 +132,10 @@ public interface Dictionary extends Closeable {
* @return type
*/
String getType();

/**
* Returns whether the dictionary supports graphs
* @return true if it supports graphs, false otherwise
*/
boolean supportGraphs();
}
5 changes: 1 addition & 4 deletions hdt-api/src/main/java/org/rdfhdt/hdt/quad/QuadString.java
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ public QuadString(CharSequence subject, CharSequence predicate, CharSequence obj

public QuadString(TripleString other) {
super(other);
this.context = other.getObject();
this.context = other.getGraph();
}

@Override
Expand All @@ -29,9 +29,6 @@ public void clear() {

@Override
public boolean equals(Object other) {
if (context.length() == 0) {
return super.equals(other);
}
if (!(other instanceof QuadString)) {
return false;
}
Expand Down
86 changes: 77 additions & 9 deletions hdt-api/src/main/java/org/rdfhdt/hdt/triples/TripleID.java
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,8 @@ public final class TripleID implements Comparable<TripleID>, Serializable, Clone
private long subject;
private long predicate;
private long object;
private long graph;
private boolean isQuad = false;

/**
* Basic constructor
Expand All @@ -68,6 +70,27 @@ public TripleID(long subject, long predicate, long object) {
this.object = object;
}

/**
* Constructor
*
* @param subject
* The subject
* @param predicate
* The predicate
* @param object
* The object
* @param graph
* The graph
*/
public TripleID(long subject, long predicate, long object, long graph) {
super();
this.subject = subject;
this.predicate = predicate;
this.object = object;
this.graph = graph;
this.isQuad = true;
}

/**
* Build a TripleID as a copy of another one.
* @param other the triple ID to copy
Expand All @@ -77,6 +100,12 @@ public TripleID(TripleID other) {
this.subject = other.subject;
this.predicate = other.predicate;
this.object = other.object;
this.graph = other.graph;
this.isQuad = other.isQuad;
}

public boolean isQuad() {
return isQuad;
}

/**
Expand Down Expand Up @@ -124,6 +153,22 @@ public void setPredicate(long predicate) {
this.predicate = predicate;
}

/**
* @return long the graph
*/
public long getGraph() {
return graph;
}

/**
* @param graph
* the graph to set
*/
public void setGraph(long graph) {
this.graph = graph;
this.isQuad = true;
}

/**
* Replace all components of a TripleID at once. Useful to reuse existing objects.
* @param subject subject ID
Expand All @@ -136,17 +181,35 @@ public void setAll(long subject, long predicate, long object) {
this.object = object;
}

/**
* Replace all components of a TripleID at once. Useful to reuse existing objects.
* @param subject subject ID
* @param predicate predicate ID
* @param object object ID
* @param graph graph ID
*/
public void setAll(long subject, long predicate, long object, long graph) {
this.subject = subject;
this.predicate = predicate;
this.object = object;
this.graph = graph;
this.isQuad = true;
}

public void assign(TripleID replacement) {
subject = replacement.getSubject();
object = replacement.getObject();
predicate = replacement.getPredicate();
graph = replacement.getGraph();
isQuad = replacement.isQuad();
}

/**
* Set all components to zero.
*/
public void clear() {
subject = predicate = object = 0;
subject = predicate = object = graph = 0;
isQuad = false;
}

/*
Expand All @@ -156,6 +219,8 @@ public void clear() {
*/
@Override
public String toString() {
if (isQuad)
return subject + " " + predicate + " " + object + " " + graph;
return subject + " " + predicate + " " + object;
}

Expand Down Expand Up @@ -192,11 +257,13 @@ public boolean match(TripleID pattern) {
long subjectPattern = pattern.getSubject();
long predicatePattern = pattern.getPredicate();
long objectPattern = pattern.getObject();
long graphPattern = pattern.getGraph();

/* Remember that 0 acts as a wildcard */
if (subjectPattern == 0 || this.subject == subjectPattern) {
if (predicatePattern == 0 || this.predicate == predicatePattern) {
return objectPattern == 0 || this.object == objectPattern;
if (objectPattern == 0 || this.object == objectPattern) {
return graphPattern == 0 || this.graph == graphPattern;
}
}
}
return false;
Expand All @@ -207,23 +274,23 @@ public boolean match(TripleID pattern) {
* @return boolean
*/
public boolean isEmpty() {
return !(subject != 0 || predicate != 0 || object != 0);
return !(subject != 0 || predicate != 0 || object != 0 || graph != 0);
}

/**
* Check whether none of the components of the triple are empty.
* @return boolean
*/
public boolean isValid() {
return subject>0 && predicate>0 && object>0;
return subject>0 && predicate>0 && object>0 && (isQuad ? graph>0 : true);
}

/**
* Checks whether any of the components of the triple are "no match" (-1).
* @return boolean
*/
public boolean isNoMatch() {
return subject == -1 || predicate == -1 || object == -1;
return subject == -1 || predicate == -1 || object == -1 || graph == -1;
QuentinJanuel marked this conversation as resolved.
Show resolved Hide resolved
}

/**
Expand All @@ -234,7 +301,8 @@ public String getPatternString() {
return "" +
(subject==0 ? '?' : 'S') +
(predicate==0 ? '?' : 'P') +
(object==0 ? '?' : 'O');
(object==0 ? '?' : 'O') +
(isQuad ? (graph==0 ? '?' : 'G') : "");
}

/**
Expand All @@ -252,7 +320,7 @@ public boolean equals(Object o) {
return false;
}
TripleID other = (TripleID) o;
return !( subject!=other.subject || predicate!=other.predicate || object!=other.object );
return !( subject!=other.subject || predicate!=other.predicate || object!=other.object || graph!=other.graph );
}

@Override
Expand All @@ -266,6 +334,6 @@ public TripleID clone() {

@Override
public int hashCode() {
return (int) (subject * 13 + predicate * 17 + object * 31);
return (int) (subject * 13 + predicate * 17 + object * 31 + graph * 37);
}
}
81 changes: 63 additions & 18 deletions hdt-java-cli/src/main/java/org/rdfhdt/hdt/tools/HdtSearch.java
Original file line number Diff line number Diff line change
Expand Up @@ -32,13 +32,15 @@
import java.util.ArrayList;
import java.util.List;

import org.rdfhdt.hdt.dictionary.Dictionary;
import org.rdfhdt.hdt.exceptions.NotFoundException;
import org.rdfhdt.hdt.exceptions.ParserException;
import org.rdfhdt.hdt.hdt.HDT;
import org.rdfhdt.hdt.hdt.HDTManager;
import org.rdfhdt.hdt.hdt.HDTVersion;
import org.rdfhdt.hdt.listener.ProgressListener;
import org.rdfhdt.hdt.options.HDTOptions;
import org.rdfhdt.hdt.quad.QuadString;
import org.rdfhdt.hdt.triples.IteratorTripleString;
import org.rdfhdt.hdt.triples.TripleString;
import org.rdfhdt.hdt.util.StopWatch;
Expand Down Expand Up @@ -79,20 +81,26 @@ public class HdtSearch {
@Parameter(names = "-memory", description = "Load the whole file into main memory. Ensures fastest querying.")
public boolean loadInMemory;

protected static void iterate(HDT hdt, CharSequence subject, CharSequence predicate, CharSequence object) throws NotFoundException {
protected static void iterate(
HDT hdt,
boolean isHDTQ,
CharSequence subject,
CharSequence predicate,
CharSequence object,
CharSequence graph
) throws NotFoundException {
StopWatch iterateTime = new StopWatch();
int count;

subject = subject.length()==1 && subject.charAt(0)=='?' ? "" : subject;
predicate = predicate.length()==1 && predicate.charAt(0)=='?' ? "" : predicate;
object = object.length()==1 && object.charAt(0)=='?' ? "" : object;

// Iterate over triples as Strings
IteratorTripleString it = hdt.search(subject,predicate,object);
count = 0;
while(it.hasNext()) {
TripleString triple = it.next();
System.out.println(triple);
System.out.println(triple + " (graph: " + triple.getGraph() + ")");
count++;
}

Expand All @@ -118,34 +126,58 @@ private void help() {
* Read from a line, where each component is separated by space.
* @param line line to parse
*/
private static void parseTriplePattern(TripleString dest, String line) throws ParserException {
private static void parseTriplePattern(TripleString dest, String line, boolean isHDTQ) throws ParserException {
int split, posa, posb;
ParserException ex = new ParserException(
QuentinJanuel marked this conversation as resolved.
Show resolved Hide resolved
isHDTQ
? "Make sure that you included four terms."
: "Make sure that you included three terms."
); // Not found, error.
dest.clear();

// SET SUBJECT
posa = 0;
posb = split = line.indexOf(' ', posa);

if(posb==-1) throw new ParserException("Make sure that you included three terms."); // Not found, error.
if(posb==-1) throw ex;

dest.setSubject(UnicodeEscape.unescapeString(line.substring(posa, posb)));

// SET PREDICATE
posa = split+1;
posb = split = line.indexOf(' ', posa);

if(posb==-1) throw new ParserException("Make sure that you included three terms.");
if(posb==-1) throw ex;

dest.setPredicate(UnicodeEscape.unescapeString(line.substring(posa, posb)));

// SET OBJECT
posa = split+1;
posb = line.length();

if(line.charAt(posb-1)=='.') posb--; // Remove trailing <space> <dot> from NTRIPLES.
if(line.charAt(posb-1)==' ') posb--;

dest.setObject(UnicodeEscape.unescapeString(line.substring(posa, posb)));
if (isHDTQ) {
// SET OBJECT
posa = split+1;
posb = split = line.indexOf(' ', posa);

if(posb==-1) throw ex;

dest.setObject(UnicodeEscape.unescapeString(line.substring(posa, posb)));

// SET GRAPH
posa = split+1;
posb = line.length();

if(line.charAt(posb-1)=='.') posb--;
if(line.charAt(posb-1)==' ') posb--;

dest.setGraph(UnicodeEscape.unescapeString(line.substring(posa, posb)));
} else {
// SET OBJECT
posa = split+1;
posb = line.length();

if(line.charAt(posb-1)=='.') posb--; // Remove trailing <space> <dot> from NTRIPLES.
if(line.charAt(posb-1)==' ') posb--;

dest.setObject(UnicodeEscape.unescapeString(line.substring(posa, posb)));
}
}

public void execute() throws IOException {
Expand All @@ -169,10 +201,12 @@ public void execute() throws IOException {
} else {
hdt= HDTManager.mapIndexedHDT(hdtInput, spec, listenerConsole);
}
Dictionary dict = hdt.getDictionary();
boolean isHDTQ = dict.supportGraphs();

BufferedReader in = new BufferedReader(new InputStreamReader(System.in, UTF_8));
try {
TripleString triplePattern = new TripleString();
TripleString triplePattern = new QuadString();

while(true) {
System.out.print(">> ");
Expand All @@ -187,10 +221,21 @@ public void execute() throws IOException {
}

try {
parseTriplePattern(triplePattern, line);
System.out.println("Query: |"+triplePattern.getSubject()+"| |"+triplePattern.getPredicate()+"| |" + triplePattern.getObject()+"|");
parseTriplePattern(triplePattern, line, isHDTQ);
if (isHDTQ) {
System.out.println("Query: |"+triplePattern.getSubject()+"| |"+triplePattern.getPredicate()+"| |" + triplePattern.getObject()+"| |" + triplePattern.getGraph()+"|");
} else {
System.out.println("Query: |"+triplePattern.getSubject()+"| |"+triplePattern.getPredicate()+"| |" + triplePattern.getObject()+"|");
}

iterate(hdt,triplePattern.getSubject(),triplePattern.getPredicate(),triplePattern.getObject());
iterate(
hdt,
isHDTQ,
triplePattern.getSubject(),
triplePattern.getPredicate(),
triplePattern.getObject(),
triplePattern.getGraph()
);
} catch (ParserException e) {
System.err.println("Could not parse triple pattern: "+e.getMessage());
help();
Expand Down
7 changes: 5 additions & 2 deletions hdt-java-cli/src/main/java/org/rdfhdt/hdt/tools/RDF2HDT.java
Original file line number Diff line number Diff line change
Expand Up @@ -173,7 +173,9 @@ public void execute() throws ParserException, IOException {
}
}

if (notation == RDFNotation.NQUAD) {
boolean isQuad = notation == RDFNotation.NQUAD;

if (isQuad) {
spec.set(HDTOptionsKeys.TEMP_DICTIONARY_IMPL_KEY, HDTOptionsKeys.TEMP_DICTIONARY_IMPL_VALUE_HASH_QUAD);
spec.set(HDTOptionsKeys.DICTIONARY_TYPE_KEY, HDTOptionsKeys.DICTIONARY_TYPE_VALUE_FOUR_QUAD_SECTION);
}
Expand Down Expand Up @@ -256,7 +258,8 @@ public void execute() throws ParserException, IOException {
colorTool.logValue("Different subjects .... ", "" + hdt.getDictionary().getNsubjects());
colorTool.logValue("Different predicates .. ", "" + hdt.getDictionary().getNpredicates());
colorTool.logValue("Different objects ..... ", "" + hdt.getDictionary().getNobjects());
colorTool.logValue("Different graphs ...... ", "" + hdt.getDictionary().getNgraphs());
if (isQuad)
colorTool.logValue("Different graphs ...... ", "" + hdt.getDictionary().getNgraphs());
colorTool.logValue("Common Subject/Object . ", "" + hdt.getDictionary().getNshared());
}

Expand Down
Loading