Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Adding support for named graphs #196

Merged
merged 32 commits into from
Sep 14, 2023
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
Show all changes
32 commits
Select commit Hold shift + click to select a range
f69df74
Implemented quad dictionaries for .nq files
QuentinJanuel May 31, 2023
728446d
temp triples can handle quads
QuentinJanuel Jun 1, 2023
3d9e29a
making sure graphs don't affect normal hdt
QuentinJanuel Jun 5, 2023
9a7d09d
FourQuadSectionDictionary needs to implement FourSectionDictionary, n…
QuentinJanuel Jun 15, 2023
7a024f5
quad patterns support (not implemented yet)
QuentinJanuel Jun 15, 2023
82cb465
fixed quadstring
QuentinJanuel Jun 22, 2023
3439c49
save triples (could be optimized)
QuentinJanuel Jun 22, 2023
a4f5bfb
search wip
QuentinJanuel Jun 26, 2023
d672ee9
Fixed search for triples (non quad)
QuentinJanuel Jun 29, 2023
c26b85e
number of graphs is now dynamic
QuentinJanuel Jun 29, 2023
f58cfa6
Adapt code for large files (RoaringBitmap)
QuentinJanuel Jul 11, 2023
bcc095a
roaring bitmap 64
QuentinJanuel Jul 13, 2023
f45facb
accelerated wildcard pattern a bit
QuentinJanuel Jul 13, 2023
1e612c3
BitmapQuadsIterator
QuentinJanuel Jul 17, 2023
8d31b3d
fixed roaring bitmap
QuentinJanuel Jul 20, 2023
c8b0c1e
let quads iterator variables be longs
QuentinJanuel Jul 20, 2023
b1ba3a6
support for ???G, S??G, SP?G & SPOG
QuentinJanuel Jul 20, 2023
0f1685b
?POG & ??OG (ZGFOQ)
QuentinJanuel Jul 28, 2023
5ccc86b
removed forgotten log
QuentinJanuel Jul 28, 2023
257aa30
?PO? & ??O? (ZFOQ)
QuentinJanuel Jul 28, 2023
76ee129
?P?G (YGFOQ)
QuentinJanuel Jul 31, 2023
41abd02
?P?? (YFOQ)
QuentinJanuel Aug 4, 2023
cbfeedc
address https://github.com/rdfhdt/hdt-java/pull/196#discussion_r12416…
QuentinJanuel Aug 28, 2023
b9002a6
address https://github.com/rdfhdt/hdt-java/pull/196#discussion_r12955…
QuentinJanuel Aug 28, 2023
fef55fb
address https://github.com/rdfhdt/hdt-java/pull/196#discussion_r12955…
QuentinJanuel Aug 28, 2023
5233663
address https://github.com/rdfhdt/hdt-java/pull/196#discussion_r12955…
QuentinJanuel Aug 28, 2023
59bba7c
address https://github.com/rdfhdt/hdt-java/pull/196#discussion_r12955…
QuentinJanuel Aug 28, 2023
7ae491c
address https://github.com/rdfhdt/hdt-java/pull/196#discussion_r12958…
QuentinJanuel Aug 28, 2023
eacd5bb
address https://github.com/rdfhdt/hdt-java/pull/196#discussion_r12958…
QuentinJanuel Aug 28, 2023
f61bc58
address https://github.com/rdfhdt/hdt-java/pull/196#discussion_r12959…
QuentinJanuel Aug 31, 2023
c675a01
cat tests for quads
QuentinJanuel Sep 4, 2023
3f4ba41
test quads iterators
QuentinJanuel Sep 5, 2023
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
search wip
  • Loading branch information
QuentinJanuel committed Jun 26, 2023
commit a4f5bfb9a3667a87b7ea5932c9bf7d4036cbef9b
24 changes: 24 additions & 0 deletions hdt-api/src/main/java/org/rdfhdt/hdt/rdf/RDFAccess.java
Original file line number Diff line number Diff line change
Expand Up @@ -52,4 +52,28 @@ public interface RDFAccess {
* @throws NotFoundException when the triple cannot be found
*/
IteratorTripleString search(CharSequence subject, CharSequence predicate, CharSequence object) throws NotFoundException;

/**
* Iterate over the triples of an RDF Set that match the specified pattern.
* null and empty strings act as a wildcard.
* (e.g. search(null, null, null, null) iterates over all elements)
*
* @param subject
* The subject to search
* @param predicate
* The predicate to search
* @param object
* The object to search
* @param graph
* The graph to search
*
* @return Iterator of TripleStrings
* @throws NotFoundException when the triple cannot be found
*/
IteratorTripleString search(
CharSequence subject,
CharSequence predicate,
CharSequence object,
CharSequence graph
) throws NotFoundException;
}
10 changes: 8 additions & 2 deletions hdt-java-cli/src/main/java/org/rdfhdt/hdt/tools/HdtSearch.java
Original file line number Diff line number Diff line change
Expand Up @@ -95,12 +95,18 @@ protected static void iterate(
subject = subject.length()==1 && subject.charAt(0)=='?' ? "" : subject;
predicate = predicate.length()==1 && predicate.charAt(0)=='?' ? "" : predicate;
object = object.length()==1 && object.charAt(0)=='?' ? "" : object;
graph = graph.length()==1 && graph.charAt(0)=='?' ? "" : graph;
// Iterate over triples as Strings
IteratorTripleString it = hdt.search(subject,predicate,object);
System.out.println("Searching for: "+subject+" "+predicate+" "+object+" "+graph);
System.out.println("hdt class name: " + hdt.getClass().getName());
IteratorTripleString it =
isHDTQ
? hdt.search(subject, predicate, object, graph)
: hdt.search(subject, predicate, object);
count = 0;
while(it.hasNext()) {
TripleString triple = it.next();
System.out.println(triple + " (graph: " + triple.getGraph() + ")");
System.out.println(triple);
count++;
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,12 @@
import org.rdfhdt.hdt.enums.TripleComponentRole;

public class DictionaryPFCOptimizedExtractor implements OptimizedExtractor{
private final PFCOptimizedExtractor shared, subjects, predicates, objects;
private final PFCOptimizedExtractor
shared,
subjects,
predicates,
objects,
graphs;
private final long numshared;

public DictionaryPFCOptimizedExtractor(FourSectionDictionary origDict) {
Expand All @@ -14,6 +19,11 @@ public DictionaryPFCOptimizedExtractor(FourSectionDictionary origDict) {
subjects = new PFCOptimizedExtractor((PFCDictionarySectionMap) origDict.subjects);
predicates = new PFCOptimizedExtractor((PFCDictionarySectionMap) origDict.predicates);
objects = new PFCOptimizedExtractor((PFCDictionarySectionMap) origDict.objects);
if (origDict.graphs == null) {
graphs = null;
} else {
graphs = new PFCOptimizedExtractor((PFCDictionarySectionMap) origDict.graphs);
}
}

public CharSequence idToString(long id, TripleComponentRole role) {
Expand All @@ -38,6 +48,8 @@ private PFCOptimizedExtractor getSection(long id, TripleComponentRole role) {
} else {
return objects;
}
case GRAPH:
return graphs;
}
throw new IllegalArgumentException();
}
Expand All @@ -54,6 +66,8 @@ private long getLocalId(long id, TripleComponentRole position) {
}
case PREDICATE:
return id;
case GRAPH:
return id;
}

throw new IllegalArgumentException();
Expand Down
73 changes: 73 additions & 0 deletions hdt-java-core/src/main/java/org/rdfhdt/hdt/hdt/impl/HDTImpl.java
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,7 @@
import org.rdfhdt.hdt.header.HeaderPrivate;
import org.rdfhdt.hdt.iterator.DictionaryTranslateIterator;
import org.rdfhdt.hdt.iterator.DictionaryTranslateIteratorBuffer;
import org.rdfhdt.hdt.iterator.SuppliableIteratorTripleID;
import org.rdfhdt.hdt.listener.ProgressListener;
import org.rdfhdt.hdt.options.ControlInfo;
import org.rdfhdt.hdt.options.ControlInformation;
Expand Down Expand Up @@ -320,6 +321,78 @@ public long getLastTriplePosition() {
}
}

@Override
public IteratorTripleString search(
CharSequence subject,
CharSequence predicate,
CharSequence object,
CharSequence graph
) throws NotFoundException {
if(isClosed) {
throw new IllegalStateException("Cannot search an already closed HDT");
}

// Conversion from TripleString to TripleID
TripleID triple = new TripleID(
dictionary.stringToId(subject, TripleComponentRole.SUBJECT),
dictionary.stringToId(predicate, TripleComponentRole.PREDICATE),
dictionary.stringToId(object, TripleComponentRole.OBJECT),
dictionary.stringToId(graph, TripleComponentRole.GRAPH)
);

if(triple.isNoMatch()) {
return new IteratorTripleString() {
@Override
public TripleString next() {
return null;
}
@Override
public boolean hasNext() {
return false;
}
@Override
public ResultEstimationType numResultEstimation() {
return ResultEstimationType.EXACT;
}
@Override
public void goToStart() {
}
@Override
public long estimatedNumResults() {
return 0;
}

@Override
public long getLastTriplePosition() {
throw new NotImplementedException();
}
};
}

if(isMapped) {
try {
if(dictionary instanceof MultipleSectionDictionary){
return new DictionaryTranslateIteratorBuffer(triples.search(triple), (MultipleSectionDictionary) dictionary, subject, predicate, object, graph);
}else{
SuppliableIteratorTripleID iterator = triples.search(triple);
return new DictionaryTranslateIteratorBuffer(
iterator,
(FourSectionDictionary) dictionary,
subject,
predicate,
object,
graph
);
}
}catch(NullPointerException e) {
e.printStackTrace();
return new DictionaryTranslateIterator(triples.search(triple), dictionary, subject, predicate, object, graph);
}
} else {
return new DictionaryTranslateIterator(triples.search(triple), dictionary, subject, predicate, object, graph);
}
}

public void loadFromParts(HeaderPrivate h, DictionaryPrivate d, TriplesPrivate t) {
this.header = h;
this.dictionary = d;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -121,4 +121,13 @@ public void close() throws IOException {
public IteratorTripleString search(CharSequence subject, CharSequence predicate, CharSequence object) {
throw new NotImplementedException();
}
@Override
public IteratorTripleString search(
CharSequence subject,
CharSequence predicate,
CharSequence object,
CharSequence graph
) {
throw new NotImplementedException();
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -92,4 +92,13 @@ public void close() throws IOException {
public IteratorTripleString search(CharSequence subject, CharSequence predicate, CharSequence object) throws NotFoundException {
return mapOrGetHDT().search(subject, predicate, object);
}
@Override
public IteratorTripleString search(
CharSequence subject,
CharSequence predicate,
CharSequence object,
CharSequence graph
) throws NotFoundException {
return mapOrGetHDT().search(subject, predicate, object, graph);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,6 @@
import java.io.InputStream;
import java.io.OutputStream;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;

import org.rdfhdt.hdt.enums.RDFNotation;
Expand All @@ -44,6 +43,7 @@
import org.rdfhdt.hdt.options.ControlInfo;
import org.rdfhdt.hdt.options.HDTOptions;
import org.rdfhdt.hdt.options.HDTSpecification;
import org.rdfhdt.hdt.quad.QuadString;
import org.rdfhdt.hdt.rdf.RDFParserCallback.RDFCallback;
import org.rdfhdt.hdt.rdf.parsers.RDFParserSimple;
import org.rdfhdt.hdt.triples.IteratorTripleString;
Expand Down Expand Up @@ -163,6 +163,32 @@ public IteratorTripleString search(CharSequence subject, CharSequence predicate,
}
return new PlainHeaderIterator(this, pattern);
}
@Override
public IteratorTripleString search(
CharSequence subject,
CharSequence predicate,
CharSequence object,
CharSequence graph
) {
TripleString pattern;
String objStr = object.toString();
if(objStr.isEmpty() || objStr.charAt(0)=='<'|| objStr.charAt(0)=='"' || objStr.startsWith("http://")||objStr.startsWith("file://")) {
pattern = new QuadString(
HeaderUtil.cleanURI(subject),
HeaderUtil.cleanURI(predicate),
HeaderUtil.cleanURI(object),
HeaderUtil.cleanURI(graph)
);
} else {
pattern = new QuadString(
HeaderUtil.cleanURI(subject),
HeaderUtil.cleanURI(predicate),
'"'+objStr+'"',
HeaderUtil.cleanURI(graph)
);
}
return new PlainHeaderIterator(this, pattern);
}

@Override
public void processTriple(TripleString triple, long pos) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@ public class DictionaryTranslateIterator implements IteratorTripleString {
/** The dictionary */
final Dictionary dictionary;

CharSequence s, p, o;
CharSequence s, p, o, g;

long lastSid, lastPid, lastOid;
CharSequence lastSstr, lastPstr, lastOstr;
Expand All @@ -62,7 +62,7 @@ public class DictionaryTranslateIterator implements IteratorTripleString {
public DictionaryTranslateIterator(IteratorTripleID iteratorTripleID, Dictionary dictionary) {
this.iterator = iteratorTripleID;
this.dictionary = dictionary;
this.s = this.p = this.o = "";
this.s = this.p = this.o = this.g = "";
}

/**
Expand All @@ -79,6 +79,31 @@ public DictionaryTranslateIterator(IteratorTripleID iteratorTripleID, Dictionary
this.s = s==null ? "" : s;
this.p = p==null ? "" : p;
this.o = o==null ? "" : o;
this.g = "";
}

/**
* Basic constructor
*
* @param iteratorTripleID
* Iterator of TripleID to be used
* @param dictionary
* The dictionary to be used
*/
public DictionaryTranslateIterator(
IteratorTripleID iteratorTripleID,
Dictionary dictionary,
CharSequence s,
CharSequence p,
CharSequence o,
CharSequence g
) {
this.iterator = iteratorTripleID;
this.dictionary = dictionary;
this.s = s==null ? "" : s;
this.p = p==null ? "" : p;
this.o = o==null ? "" : o;
this.g = g==null ? "" : g;
}

/*
Expand Down
Loading