Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Adding support for named graphs #196

Merged
merged 32 commits into from
Sep 14, 2023
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
Show all changes
32 commits
Select commit Hold shift + click to select a range
f69df74
Implemented quad dictionaries for .nq files
QuentinJanuel May 31, 2023
728446d
temp triples can handle quads
QuentinJanuel Jun 1, 2023
3d9e29a
making sure graphs don't affect normal hdt
QuentinJanuel Jun 5, 2023
9a7d09d
FourQuadSectionDictionary needs to implement FourSectionDictionary, n…
QuentinJanuel Jun 15, 2023
7a024f5
quad patterns support (not implemented yet)
QuentinJanuel Jun 15, 2023
82cb465
fixed quadstring
QuentinJanuel Jun 22, 2023
3439c49
save triples (could be optimized)
QuentinJanuel Jun 22, 2023
a4f5bfb
search wip
QuentinJanuel Jun 26, 2023
d672ee9
Fixed search for triples (non quad)
QuentinJanuel Jun 29, 2023
c26b85e
number of graphs is now dynamic
QuentinJanuel Jun 29, 2023
f58cfa6
Adapt code for large files (RoaringBitmap)
QuentinJanuel Jul 11, 2023
bcc095a
roaring bitmap 64
QuentinJanuel Jul 13, 2023
f45facb
accelerated wildcard pattern a bit
QuentinJanuel Jul 13, 2023
1e612c3
BitmapQuadsIterator
QuentinJanuel Jul 17, 2023
8d31b3d
fixed roaring bitmap
QuentinJanuel Jul 20, 2023
c8b0c1e
let quads iterator variables be longs
QuentinJanuel Jul 20, 2023
b1ba3a6
support for ???G, S??G, SP?G & SPOG
QuentinJanuel Jul 20, 2023
0f1685b
?POG & ??OG (ZGFOQ)
QuentinJanuel Jul 28, 2023
5ccc86b
removed forgotten log
QuentinJanuel Jul 28, 2023
257aa30
?PO? & ??O? (ZFOQ)
QuentinJanuel Jul 28, 2023
76ee129
?P?G (YGFOQ)
QuentinJanuel Jul 31, 2023
41abd02
?P?? (YFOQ)
QuentinJanuel Aug 4, 2023
cbfeedc
address https://github.com/rdfhdt/hdt-java/pull/196#discussion_r12416…
QuentinJanuel Aug 28, 2023
b9002a6
address https://github.com/rdfhdt/hdt-java/pull/196#discussion_r12955…
QuentinJanuel Aug 28, 2023
fef55fb
address https://github.com/rdfhdt/hdt-java/pull/196#discussion_r12955…
QuentinJanuel Aug 28, 2023
5233663
address https://github.com/rdfhdt/hdt-java/pull/196#discussion_r12955…
QuentinJanuel Aug 28, 2023
59bba7c
address https://github.com/rdfhdt/hdt-java/pull/196#discussion_r12955…
QuentinJanuel Aug 28, 2023
7ae491c
address https://github.com/rdfhdt/hdt-java/pull/196#discussion_r12958…
QuentinJanuel Aug 28, 2023
eacd5bb
address https://github.com/rdfhdt/hdt-java/pull/196#discussion_r12958…
QuentinJanuel Aug 28, 2023
f61bc58
address https://github.com/rdfhdt/hdt-java/pull/196#discussion_r12959…
QuentinJanuel Aug 31, 2023
c675a01
cat tests for quads
QuentinJanuel Sep 4, 2023
3f4ba41
test quads iterators
QuentinJanuel Sep 5, 2023
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Next Next commit
Implemented quad dictionaries for .nq files
  • Loading branch information
QuentinJanuel committed May 31, 2023
commit f69df74e8c12801fab9f387c6fedbcacb78efacc
Original file line number Diff line number Diff line change
Expand Up @@ -99,6 +99,11 @@ public interface Dictionary extends Closeable {
*/
long getNobjects();

/**
* Returns the number of objects in the dictionary. Note: Includes shared
*/
long getNgraphs();

/**
* Returns the number of subjects/objects in the dictionary.
*/
Expand All @@ -111,6 +116,8 @@ public interface Dictionary extends Closeable {

DictionarySection getObjects();

DictionarySection getGraphs();

Map<? extends CharSequence, DictionarySection> getAllObjects();

DictionarySection getShared();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,5 +4,6 @@ public enum DictionarySectionRole {
SUBJECT,
PREDICATE,
OBJECT,
SHARED
SHARED,
GRAPH,
}
Original file line number Diff line number Diff line change
Expand Up @@ -38,4 +38,6 @@ public enum TripleComponentRole {
PREDICATE,
/** The triple is an object */
OBJECT,
/** The triple is a graph */
GRAPH,
}
2 changes: 2 additions & 0 deletions hdt-api/src/main/java/org/rdfhdt/hdt/hdt/HDTVocabulary.java
Original file line number Diff line number Diff line change
Expand Up @@ -84,6 +84,7 @@ public class HDTVocabulary {
public static final String DICTIONARY_TYPE_PLAIN = HDT_DICTIONARY_BASE+"Plain>";
public static final String DICTIONARY_TYPE_FOUR_SECTION = HDT_DICTIONARY_BASE+"Four>";
public static final String DICTIONARY_TYPE_MULT_SECTION = HDT_DICTIONARY_BASE+"Mult>";
public static final String DICTIONARY_TYPE_FOUR_QUAD_SECTION = HDT_DICTIONARY_BASE+"FourQuad>";

public static final String DICTIONARY_TYPE_FOUR_PSFC_SECTION = HDT_DICTIONARY_BASE+"FourPsfc>";

Expand All @@ -106,6 +107,7 @@ public class HDTVocabulary {
public static final String TRIPLES_TYPE_PLAIN = HDT_TRIPLES_BASE+"Plain>";
public static final String TRIPLES_TYPE_COMPACT = HDT_TRIPLES_BASE+"Compact>";
public static final String TRIPLES_TYPE_BITMAP = HDT_TRIPLES_BASE+"Bitmap>";
public static final String TRIPLES_TYPE_BITMAP_QUAD = HDT_TRIPLES_BASE+"BitmapQuad>";

// Index type
public static final String INDEX_TYPE_FOQ = HDT_BASE+"indexFoQ>";
Expand Down
11 changes: 10 additions & 1 deletion hdt-api/src/main/java/org/rdfhdt/hdt/options/HDTOptionsKeys.java
Original file line number Diff line number Diff line change
Expand Up @@ -263,6 +263,10 @@ public class HDTOptionsKeys {
*/
@Value(key = TEMP_DICTIONARY_IMPL_KEY, desc = "hash dictionary")
public static final String TEMP_DICTIONARY_IMPL_VALUE_HASH = "hash";
/**
* use Hash quad to create the HDTQ
*/
public static final String TEMP_DICTIONARY_IMPL_VALUE_HASH_QUAD = "hashQuad";
/**
* use Hash map to create the HDT and store the multisection dictionary, mandatory to create MSC
*/
Expand All @@ -282,8 +286,13 @@ public class HDTOptionsKeys {
/**
* 4 Section dictionary
*/
@Value(key = DICTIONARY_TYPE_KEY, desc = "Four sectiob dictionary")
@Value(key = DICTIONARY_TYPE_KEY, desc = "Four section dictionary")
public static final String DICTIONARY_TYPE_VALUE_FOUR_SECTION = HDTVocabulary.DICTIONARY_TYPE_FOUR_SECTION;
/*
* 4 Quad Section dictionary
*/
@Value(key = DICTIONARY_TYPE_KEY, desc = "Four quad section dictionary")
public static final String DICTIONARY_TYPE_VALUE_FOUR_QUAD_SECTION = HDTVocabulary.DICTIONARY_TYPE_FOUR_QUAD_SECTION;
/**
* Prefix AND Suffix front-coded (PSFC) 4 Section dictionary
*/
Expand Down
5 changes: 5 additions & 0 deletions hdt-api/src/main/java/org/rdfhdt/hdt/quad/QuadString.java
Original file line number Diff line number Diff line change
Expand Up @@ -116,4 +116,9 @@ public int hashCode() {
public QuadString tripleToString() {
return new QuadString(subject.toString(), predicate.toString(), object.toString(), context.toString());
}

@Override
public String toString() {
return super.toString() + " " + context;
}
}
6 changes: 6 additions & 0 deletions hdt-java-cli/src/main/java/org/rdfhdt/hdt/tools/RDF2HDT.java
Original file line number Diff line number Diff line change
Expand Up @@ -173,6 +173,11 @@ public void execute() throws ParserException, IOException {
}
}

if (notation == RDFNotation.NQUAD) {
spec.set(HDTOptionsKeys.TEMP_DICTIONARY_IMPL_KEY, HDTOptionsKeys.TEMP_DICTIONARY_IMPL_VALUE_HASH_QUAD);
spec.set(HDTOptionsKeys.DICTIONARY_TYPE_KEY, HDTOptionsKeys.DICTIONARY_TYPE_VALUE_FOUR_QUAD_SECTION);
}

colorTool.log("Converting " +rdfInput+" to "+hdtOutput+" as "+notation.name());

if (ntSimpleLoading) {
Expand Down Expand Up @@ -251,6 +256,7 @@ public void execute() throws ParserException, IOException {
colorTool.logValue("Different subjects .... ", "" + hdt.getDictionary().getNsubjects());
colorTool.logValue("Different predicates .. ", "" + hdt.getDictionary().getNpredicates());
colorTool.logValue("Different objects ..... ", "" + hdt.getDictionary().getNobjects());
colorTool.logValue("Different graphs ...... ", "" + hdt.getDictionary().getNgraphs());
colorTool.logValue("Common Subject/Object . ", "" + hdt.getDictionary().getNshared());
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -27,10 +27,12 @@

package org.rdfhdt.hdt.dictionary;

import org.rdfhdt.hdt.dictionary.impl.FourQuadSectionDictionary;
import org.rdfhdt.hdt.dictionary.impl.FourSectionDictionary;
import org.rdfhdt.hdt.dictionary.impl.FourSectionDictionaryBig;
import org.rdfhdt.hdt.dictionary.impl.FourSectionDictionaryDiff;
import org.rdfhdt.hdt.dictionary.impl.HashDictionary;
import org.rdfhdt.hdt.dictionary.impl.HashQuadDictionary;
import org.rdfhdt.hdt.dictionary.impl.MultipleSectionDictionary;
import org.rdfhdt.hdt.dictionary.impl.MultipleSectionDictionaryDiff;
import org.rdfhdt.hdt.dictionary.impl.PSFCFourSectionDictionary;
Expand Down Expand Up @@ -108,12 +110,13 @@ public static Dictionary createDefaultDictionary()
*/
public static TempDictionary createTempDictionary(HDTOptions spec) {
String name = spec.get(HDTOptionsKeys.TEMP_DICTIONARY_IMPL_KEY, "");

// Implementations available in the Core
switch (name) {
case "":
case HDTOptionsKeys.TEMP_DICTIONARY_IMPL_VALUE_HASH:
return new HashDictionary(spec, false);
case HDTOptionsKeys.TEMP_DICTIONARY_IMPL_VALUE_HASH_QUAD:
return new HashQuadDictionary(spec, false);
case HDTOptionsKeys.TEMP_DICTIONARY_IMPL_VALUE_HASH_PSFC:
return new PSFCTempDictionary(new HashDictionary(spec, false));
case HDTOptionsKeys.TEMP_DICTIONARY_IMPL_VALUE_MULT_HASH:
Expand All @@ -135,6 +138,8 @@ public static DictionaryPrivate createDictionary(HDTOptions spec) {
case "":
case HDTOptionsKeys.DICTIONARY_TYPE_VALUE_FOUR_SECTION:
return new FourSectionDictionary(spec);
case HDTOptionsKeys.DICTIONARY_TYPE_VALUE_FOUR_QUAD_SECTION:
return new FourQuadSectionDictionary(spec);
case HDTOptionsKeys.DICTIONARY_TYPE_VALUE_FOUR_PSFC_SECTION:
return new PSFCFourSectionDictionary(spec);
case HDTOptionsKeys.DICTIONARY_TYPE_VALUE_FOUR_SECTION_BIG:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,8 @@ public interface TempDictionary extends Closeable {

TempDictionarySection getShared();

TempDictionarySection getGraphs();

/**
* To be executed at the start of the processing
*
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,7 @@ public abstract class BaseDictionary implements DictionaryPrivate {
protected DictionarySectionPrivate predicates;
protected DictionarySectionPrivate objects;
protected DictionarySectionPrivate shared;
protected DictionarySectionPrivate graphs;

public BaseDictionary(HDTOptions spec) {
this.spec = spec;
Expand All @@ -69,7 +70,8 @@ protected long getGlobalId(long id, DictionarySectionRole position) {
return shared.getNumberOfElements()+id;

case PREDICATE:
case SHARED:
case GRAPH:
case SHARED:
return id;
default:
throw new IllegalArgumentException();
Expand All @@ -86,6 +88,7 @@ protected long getLocalId(long id, TripleComponentRole position) {
return id-shared.getNumberOfElements();
}
case PREDICATE:
case GRAPH:
return id;
default:
throw new IllegalArgumentException();
Expand Down Expand Up @@ -126,6 +129,12 @@ public long stringToId(CharSequence str, TripleComponentRole position) {
return getGlobalId(ret, DictionarySectionRole.PREDICATE);
}
return -1;
case GRAPH:
ret = graphs.locate(str);
if(ret!=0) {
return getGlobalId(ret, DictionarySectionRole.GRAPH);
}
return -1;
case OBJECT:
if(str.charAt(0)!='"') {
ret = shared.locate(str);
Expand All @@ -145,12 +154,12 @@ public long stringToId(CharSequence str, TripleComponentRole position) {

@Override
public long getNumberOfElements() {
return subjects.getNumberOfElements()+predicates.getNumberOfElements()+objects.getNumberOfElements()+shared.getNumberOfElements();
return subjects.getNumberOfElements()+predicates.getNumberOfElements()+objects.getNumberOfElements()+shared.getNumberOfElements()+graphs.getNumberOfElements();
}

@Override
public long size() {
return subjects.size()+predicates.size()+objects.size()+shared.size();
return subjects.size()+predicates.size()+objects.size()+shared.size()+graphs.size();
}

@Override
Expand All @@ -168,6 +177,11 @@ public long getNobjects() {
return objects.getNumberOfElements()+shared.getNumberOfElements();
}

@Override
public long getNgraphs() {
return graphs.getNumberOfElements();
}

@Override
public long getNshared() {
return shared.getNumberOfElements();
Expand All @@ -187,6 +201,11 @@ public DictionarySection getPredicates() {
public DictionarySection getObjects() {
return objects;
}

@Override
public DictionarySection getGraphs() {
return graphs;
}

@Override
public DictionarySection getShared() {
Expand All @@ -203,6 +222,8 @@ private DictionarySectionPrivate getSection(long id, TripleComponentRole role) {
}
case PREDICATE:
return predicates;
case GRAPH:
return graphs;
case OBJECT:
if(id<=shared.getNumberOfElements()) {
return shared;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -153,6 +153,11 @@ public TempDictionarySection getObjects() {
public TempDictionarySection getShared() {
return shared;
}

@Override
public TempDictionarySection getGraphs() {
throw new NotImplementedException();
}

protected long getGlobalId(long id, DictionarySectionRole position) {
switch (position) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -189,6 +189,11 @@ public TempDictionarySection getShared() {
return shared;
}

@Override
public TempDictionarySection getGraphs() {
throw new NotImplementedException();
}

@Override
public void startProcessing() {
}
Expand Down
Loading