Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Adding support for named graphs #196

Merged
merged 32 commits into from
Sep 14, 2023
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
Show all changes
32 commits
Select commit Hold shift + click to select a range
f69df74
Implemented quad dictionaries for .nq files
QuentinJanuel May 31, 2023
728446d
temp triples can handle quads
QuentinJanuel Jun 1, 2023
3d9e29a
making sure graphs don't affect normal hdt
QuentinJanuel Jun 5, 2023
9a7d09d
FourQuadSectionDictionary needs to implement FourSectionDictionary, n…
QuentinJanuel Jun 15, 2023
7a024f5
quad patterns support (not implemented yet)
QuentinJanuel Jun 15, 2023
82cb465
fixed quadstring
QuentinJanuel Jun 22, 2023
3439c49
save triples (could be optimized)
QuentinJanuel Jun 22, 2023
a4f5bfb
search wip
QuentinJanuel Jun 26, 2023
d672ee9
Fixed search for triples (non quad)
QuentinJanuel Jun 29, 2023
c26b85e
number of graphs is now dynamic
QuentinJanuel Jun 29, 2023
f58cfa6
Adapt code for large files (RoaringBitmap)
QuentinJanuel Jul 11, 2023
bcc095a
roaring bitmap 64
QuentinJanuel Jul 13, 2023
f45facb
accelerated wildcard pattern a bit
QuentinJanuel Jul 13, 2023
1e612c3
BitmapQuadsIterator
QuentinJanuel Jul 17, 2023
8d31b3d
fixed roaring bitmap
QuentinJanuel Jul 20, 2023
c8b0c1e
let quads iterator variables be longs
QuentinJanuel Jul 20, 2023
b1ba3a6
support for ???G, S??G, SP?G & SPOG
QuentinJanuel Jul 20, 2023
0f1685b
?POG & ??OG (ZGFOQ)
QuentinJanuel Jul 28, 2023
5ccc86b
removed forgotten log
QuentinJanuel Jul 28, 2023
257aa30
?PO? & ??O? (ZFOQ)
QuentinJanuel Jul 28, 2023
76ee129
?P?G (YGFOQ)
QuentinJanuel Jul 31, 2023
41abd02
?P?? (YFOQ)
QuentinJanuel Aug 4, 2023
cbfeedc
address https://github.com/rdfhdt/hdt-java/pull/196#discussion_r12416…
QuentinJanuel Aug 28, 2023
b9002a6
address https://github.com/rdfhdt/hdt-java/pull/196#discussion_r12955…
QuentinJanuel Aug 28, 2023
fef55fb
address https://github.com/rdfhdt/hdt-java/pull/196#discussion_r12955…
QuentinJanuel Aug 28, 2023
5233663
address https://github.com/rdfhdt/hdt-java/pull/196#discussion_r12955…
QuentinJanuel Aug 28, 2023
59bba7c
address https://github.com/rdfhdt/hdt-java/pull/196#discussion_r12955…
QuentinJanuel Aug 28, 2023
7ae491c
address https://github.com/rdfhdt/hdt-java/pull/196#discussion_r12958…
QuentinJanuel Aug 28, 2023
eacd5bb
address https://github.com/rdfhdt/hdt-java/pull/196#discussion_r12958…
QuentinJanuel Aug 28, 2023
f61bc58
address https://github.com/rdfhdt/hdt-java/pull/196#discussion_r12959…
QuentinJanuel Aug 31, 2023
c675a01
cat tests for quads
QuentinJanuel Sep 4, 2023
3f4ba41
test quads iterators
QuentinJanuel Sep 5, 2023
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
test quads iterators
  • Loading branch information
QuentinJanuel committed Sep 5, 2023
commit 3f4ba4164e136c978422068979823a63b814ac2a
Original file line number Diff line number Diff line change
Expand Up @@ -69,27 +69,57 @@ private void init() {

@Override
public void addTriple(TripleString triple) throws IOException {
triples.insert(
dictionary.insert(triple.getSubject(), TripleComponentRole.SUBJECT),
dictionary.insert(triple.getPredicate(), TripleComponentRole.PREDICATE),
dictionary.insert(triple.getObject(), TripleComponentRole.OBJECT)
);
num++;
size+=triple.getSubject().length()+triple.getPredicate().length()+triple.getObject().length()+4; // Spaces and final dot
boolean isQuad = triple.getGraph().length() > 0;
if (isQuad) {
triples.insert(
dictionary.insert(
triple.getSubject(), TripleComponentRole.SUBJECT
),
dictionary.insert(
triple.getPredicate(), TripleComponentRole.PREDICATE
),
dictionary.insert(
triple.getObject(), TripleComponentRole.OBJECT
),
dictionary.insert(
triple.getGraph(), TripleComponentRole.GRAPH
)
);
} else {
triples.insert(
dictionary.insert(
triple.getSubject(), TripleComponentRole.SUBJECT
),
dictionary.insert(
triple.getPredicate(), TripleComponentRole.PREDICATE
),
dictionary.insert(
triple.getObject(), TripleComponentRole.OBJECT
)
);
}
num++;
size += triple.getSubject().length()
+ triple.getPredicate().length()
+ triple.getObject().length()
+ 4 // Spaces and final dot
;
if (isQuad) {
size += triple.getGraph().length() + 1; // Space
}
}

@Override
public void close() throws IOException {
ProgressListener listener=null;
dictionary.endProcessing();

dictionary.endProcessing();

// Reorganize both the dictionary and the triples
modHDT.reorganizeDictionary(listener);
modHDT.reorganizeTriples(listener);

modHDT.getHeader().insert( "_:statistics", HDTVocabulary.ORIGINAL_SIZE, size);


// Convert to HDT
HDTImpl hdt = new HDTImpl(spec);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -536,7 +536,7 @@ public void replaceAllIds(
(int)mapGraph.getNewID(triple.getGraph() -1)
);
} else {
throw new RuntimeException("You must call the replaceAllIds method without a DictionaryIDMapping for graphs if the triples are not quads.");
throw new RuntimeException("You must call the replaceAllIds method without a DictionaryIDMapping for graphs if the triples are not quads.");
}
}
}
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,320 @@
package org.rdfhdt.hdt.triples.impl;

import org.junit.Assert;
import org.junit.Rule;
import org.junit.Test;
import org.junit.rules.TemporaryFolder;
import org.junit.runner.RunWith;
import org.junit.runners.Parameterized;
import org.rdfhdt.hdt.enums.RDFNotation;
import org.rdfhdt.hdt.exceptions.NotFoundException;
import org.rdfhdt.hdt.exceptions.ParserException;
import org.rdfhdt.hdt.hdt.HDT;
import org.rdfhdt.hdt.hdt.HDTManager;
import org.rdfhdt.hdt.iterator.DictionaryTranslateIterator;
import org.rdfhdt.hdt.iterator.DictionaryTranslateIteratorBuffer;
import org.rdfhdt.hdt.iterator.SequentialSearchIteratorTripleID;
import org.rdfhdt.hdt.options.HDTOptionsKeys;
import org.rdfhdt.hdt.options.HDTSpecification;
import org.rdfhdt.hdt.triples.IteratorTripleString;
import org.rdfhdt.hdt.triples.TripleID;
import org.rdfhdt.hdt.triples.TripleString;
import org.rdfhdt.hdt.triples.impl.utils.HDTTestUtils;

import java.io.File;
import java.io.IOException;
import java.io.InputStream;
import java.lang.reflect.Field;
import java.nio.file.Files;
import java.util.*;

@RunWith(Parameterized.class)
public class BitmapQuadsIteratorPositionTest {

public static final List<String> DICTIONARIES = List.of(
HDTOptionsKeys.DICTIONARY_TYPE_VALUE_FOUR_QUAD_SECTION
);

private static final Field ITERATOR_SUB;
private static final Field ITERATOR_SUB_BUFFER;
private static final Field ITERATOR_SUB_SEQ;

static {
try {
ITERATOR_SUB = DictionaryTranslateIterator.class.getDeclaredField("iterator");
ITERATOR_SUB_BUFFER = DictionaryTranslateIteratorBuffer.class.getDeclaredField("iterator");
ITERATOR_SUB_SEQ = SequentialSearchIteratorTripleID.class.getDeclaredField("iterator");

ITERATOR_SUB.setAccessible(true);
ITERATOR_SUB_BUFFER.setAccessible(true);
ITERATOR_SUB_SEQ.setAccessible(true);
} catch (NoSuchFieldException e) {
throw new Error(e);
}
}

@Parameterized.Parameters(name = "{0}")
public static Collection<Object[]> genParam() {
List<Object[]> lst = new ArrayList<>();
for (String dict : DICTIONARIES) {
for (int su = 0; su < 2; su++) {
for (int pr = 0; pr < 2; pr++) {
for (int ob = 0; ob < 2; ob++) {
for (int sh = 0; sh < 2; sh++) {
for (int gr = 0; gr < 2; gr++) {
lst.add(new Object[] {
dict,
su * 25,
pr * 50,
ob * 37,
sh * 12,
gr * 6 + 1
});
}
}
}
}
}
}
return lst;
}

@Rule
public TemporaryFolder tempDir = new TemporaryFolder();
final HDTSpecification spec;
final int shared;
final int subjects;
final int predicates;
final int objects;
final int graphs;

public BitmapQuadsIteratorPositionTest(
String dictionaryType,
int subjects,
int predicates,
int objects,
int shared,
int graphs
) {
spec = new HDTSpecification();
spec.set(
HDTOptionsKeys.DICTIONARY_TYPE_KEY,
dictionaryType
);
spec.set(
HDTOptionsKeys.TEMP_DICTIONARY_IMPL_KEY,
HDTOptionsKeys.TEMP_DICTIONARY_IMPL_VALUE_HASH_QUAD
);
spec.set(
HDTOptionsKeys.DICTIONARY_TYPE_KEY,
HDTOptionsKeys.DICTIONARY_TYPE_VALUE_FOUR_QUAD_SECTION
);
this.subjects = subjects;
this.predicates = predicates;
this.objects = objects;
this.shared = shared;
this.graphs = graphs;
}

@Test
public void searchAllTest() throws IOException, NotFoundException {
HDTTestUtils data = new HDTTestUtils(
tempDir.newFile(),
subjects,
predicates,
objects,
shared,
graphs,
spec,
false
);

// TODO: fix this
IteratorTripleString it = null; // data.searchForSPO(0, 0, 0);
if (it == null) return;

long index = 0L;
while (it.hasNext()) {
TripleString triple = it.next();
long tripleIndex = it.getLastTriplePosition();

// test if the search is returning index from 0, 1, ...count
Assert.assertEquals("nextTriplePosition order", index++, tripleIndex);

// test if the triple is at the right index
HDTTestUtils.SpoId spoId = data.tripleToSpo(triple);
long testIndex = spoId.getIndex();
Assert.assertEquals("getIndex hdt value", testIndex, tripleIndex);

TripleID findTriple = data.hdt.getTriples().findTriple(tripleIndex);
long testIndex2 = data.tripleToSpo(findTriple).getIndex();
Assert.assertEquals("getIndex findTriple hdt value", testIndex2, tripleIndex);
}
}

@Test
public void searchAllTestBuffer() throws IOException, NotFoundException {
HDTTestUtils data = new HDTTestUtils(
tempDir.newFile(),
subjects,
predicates,
objects,
shared,
graphs,
spec,
true
);

// TODO: fix this
IteratorTripleString it = null; // data.searchForSPO(0, 0, 0);
if (it == null) return;

long index = 0L;
while (it.hasNext()) {
TripleString triple = it.next();
long tripleIndex = it.getLastTriplePosition();

// test if the search is returning index from 0, 1, ...count
Assert.assertEquals("nextTriplePosition order", index++, tripleIndex);

// test if the triple is at the right index
HDTTestUtils.SpoId spoId = data.tripleToSpo(triple);
long testIndex = spoId.getIndex();
Assert.assertEquals("getIndex hdt value", testIndex, tripleIndex);

// test if we can find it back again
TripleID findTriple = data.hdt.getTriples().findTriple(tripleIndex);
long testIndex2 = data.tripleToSpo(findTriple).getIndex();
Assert.assertEquals("getIndex findTriple hdt value", testIndex2, tripleIndex);
}
}

/**
* create a test for a particular spo pattern
*
* @param s subject to search (or 0 for wildcard)
* @param p predicate to search (or 0 for wildcard)
* @param o object to search (or 0 for wildcard)
* @param g graph to search (or 0 for wildcard)

* @throws IOException file
* @throws NotFoundException search
*/
private void searchTest(int s, int p, int o, int g) throws IOException, NotFoundException {
HDTTestUtils data = new HDTTestUtils(
tempDir.newFile(),
subjects,
predicates,
objects,
shared,
graphs,
spec,
true
);

IteratorTripleString it = data.searchForSPO(s, p, o, g);
if (it == null) return;

while (it.hasNext()) {
TripleString triple = it.next();
long tripleIndex = it.getLastTriplePosition();
// test if the triple is at the right index
HDTTestUtils.SpoId spoId = data.tripleToSpo(triple);
long testIndex = spoId.getIndex();
Assert.assertEquals("getIndex hdt value", testIndex, tripleIndex);
}
}

@Test
public void ___SearchTest() throws IOException, NotFoundException {
for (int s = 0; s < 2; s++) {
for (int p = 0; p < 2; p++) {
for (int o = 0; o < 2; o++) {
for (int g = 0; g < 2; g++) {
searchTest(
s * subjects / 3,
p * predicates / 3,
o * objects / 3,
g * graphs / 3
);
}
}
}
}
}

private boolean equalsCharSequence(CharSequence cs1, CharSequence cs2) {
if (cs1.length() != cs2.length())
return false;

for (int i = 0; i < cs1.length(); i++)
if (cs1.charAt(i) != cs2.charAt(i))
return false;
return true;
}

private boolean equalsTriple(TripleString s1, TripleString s2) {
return
equalsCharSequence(s1.getSubject(), s2.getSubject())
&& equalsCharSequence(s1.getPredicate(), s2.getPredicate())
&& equalsCharSequence(s1.getObject(), s2.getObject())
&& equalsCharSequence(s1.getGraph(), s2.getGraph());

}

public long getIndex(List<TripleString> triples, TripleString str) {
for (int i = 0; i < triples.size(); i++) {
if (equalsTriple(str, triples.get(i)))
return i;
}
throw new IllegalArgumentException("not a triple or our hdt: " + str);
}

private void searchTPSTest(
int s,
int p,
int o,
int g
) throws NotFoundException, ParserException, IOException {
ClassLoader classLoader = getClass().getClassLoader();
File f = new File(tempDir.newFolder(), "test.nt");
InputStream ntFile = classLoader.getResourceAsStream("example_triplePosition.nt");
Assert.assertNotNull("ntFile can't be null", ntFile);

Files.copy(ntFile, f.toPath());
try (HDT hdt = HDTManager.generateHDT(f.getAbsolutePath(), HDTTestUtils.BASE_URI, RDFNotation.NTRIPLES, new HDTSpecification(), null)) {
List<TripleString> triples = new ArrayList<>();
hdt.search("", "", "", "").forEachRemaining(triples::add);
TripleString ts = triples.get(10);
CharSequence ss = s == 0 ? "" : ts.getSubject();
CharSequence sp = p == 0 ? "" : ts.getPredicate();
CharSequence so = o == 0 ? "" : ts.getObject();
CharSequence sg = g == 0 ? "" : ts.getGraph();

IteratorTripleString it = hdt.search(ss, sp, so, sg);

while (it.hasNext()) {
TripleString tripleString = it.next();
Assert.assertEquals("Sorted triple index", getIndex(triples, tripleString), it.getLastTriplePosition());
}
}
}

@Test
public void ___SearchTPSTest() throws IOException, NotFoundException, ParserException {
for (int s = 0; s < 2; s++) {
for (int p = 0; p < 2; p++) {
for (int o = 0; o < 2; o++) {
for (int g = 0; g < 2; g++) {
searchTPSTest(
s,
p,
o,
g
);
}
}
}
}
}
}
Loading