Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Adding support for named graphs #196

Merged
merged 32 commits into from
Sep 14, 2023
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
Show all changes
32 commits
Select commit Hold shift + click to select a range
f69df74
Implemented quad dictionaries for .nq files
QuentinJanuel May 31, 2023
728446d
temp triples can handle quads
QuentinJanuel Jun 1, 2023
3d9e29a
making sure graphs don't affect normal hdt
QuentinJanuel Jun 5, 2023
9a7d09d
FourQuadSectionDictionary needs to implement FourSectionDictionary, n…
QuentinJanuel Jun 15, 2023
7a024f5
quad patterns support (not implemented yet)
QuentinJanuel Jun 15, 2023
82cb465
fixed quadstring
QuentinJanuel Jun 22, 2023
3439c49
save triples (could be optimized)
QuentinJanuel Jun 22, 2023
a4f5bfb
search wip
QuentinJanuel Jun 26, 2023
d672ee9
Fixed search for triples (non quad)
QuentinJanuel Jun 29, 2023
c26b85e
number of graphs is now dynamic
QuentinJanuel Jun 29, 2023
f58cfa6
Adapt code for large files (RoaringBitmap)
QuentinJanuel Jul 11, 2023
bcc095a
roaring bitmap 64
QuentinJanuel Jul 13, 2023
f45facb
accelerated wildcard pattern a bit
QuentinJanuel Jul 13, 2023
1e612c3
BitmapQuadsIterator
QuentinJanuel Jul 17, 2023
8d31b3d
fixed roaring bitmap
QuentinJanuel Jul 20, 2023
c8b0c1e
let quads iterator variables be longs
QuentinJanuel Jul 20, 2023
b1ba3a6
support for ???G, S??G, SP?G & SPOG
QuentinJanuel Jul 20, 2023
0f1685b
?POG & ??OG (ZGFOQ)
QuentinJanuel Jul 28, 2023
5ccc86b
removed forgotten log
QuentinJanuel Jul 28, 2023
257aa30
?PO? & ??O? (ZFOQ)
QuentinJanuel Jul 28, 2023
76ee129
?P?G (YGFOQ)
QuentinJanuel Jul 31, 2023
41abd02
?P?? (YFOQ)
QuentinJanuel Aug 4, 2023
cbfeedc
address https://github.com/rdfhdt/hdt-java/pull/196#discussion_r12416…
QuentinJanuel Aug 28, 2023
b9002a6
address https://github.com/rdfhdt/hdt-java/pull/196#discussion_r12955…
QuentinJanuel Aug 28, 2023
fef55fb
address https://github.com/rdfhdt/hdt-java/pull/196#discussion_r12955…
QuentinJanuel Aug 28, 2023
5233663
address https://github.com/rdfhdt/hdt-java/pull/196#discussion_r12955…
QuentinJanuel Aug 28, 2023
59bba7c
address https://github.com/rdfhdt/hdt-java/pull/196#discussion_r12955…
QuentinJanuel Aug 28, 2023
7ae491c
address https://github.com/rdfhdt/hdt-java/pull/196#discussion_r12958…
QuentinJanuel Aug 28, 2023
eacd5bb
address https://github.com/rdfhdt/hdt-java/pull/196#discussion_r12958…
QuentinJanuel Aug 28, 2023
f61bc58
address https://github.com/rdfhdt/hdt-java/pull/196#discussion_r12959…
QuentinJanuel Aug 31, 2023
c675a01
cat tests for quads
QuentinJanuel Sep 4, 2023
3f4ba41
test quads iterators
QuentinJanuel Sep 5, 2023
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
making sure graphs don't affect normal hdt
  • Loading branch information
QuentinJanuel committed Jun 5, 2023
commit 3d9e29a401ee5cfd299ce89a892aabe15f62a488
Original file line number Diff line number Diff line change
Expand Up @@ -132,4 +132,10 @@ public interface Dictionary extends Closeable {
* @return type
*/
String getType();

/**
* Returns whether the dictionary supports graphs
* @return true if it supports graphs, false otherwise
*/
boolean supportGraphs();
}
7 changes: 5 additions & 2 deletions hdt-java-cli/src/main/java/org/rdfhdt/hdt/tools/RDF2HDT.java
Original file line number Diff line number Diff line change
Expand Up @@ -173,7 +173,9 @@ public void execute() throws ParserException, IOException {
}
}

if (notation == RDFNotation.NQUAD) {
boolean isQuad = notation == RDFNotation.NQUAD;

if (isQuad) {
spec.set(HDTOptionsKeys.TEMP_DICTIONARY_IMPL_KEY, HDTOptionsKeys.TEMP_DICTIONARY_IMPL_VALUE_HASH_QUAD);
spec.set(HDTOptionsKeys.DICTIONARY_TYPE_KEY, HDTOptionsKeys.DICTIONARY_TYPE_VALUE_FOUR_QUAD_SECTION);
}
Expand Down Expand Up @@ -256,7 +258,8 @@ public void execute() throws ParserException, IOException {
colorTool.logValue("Different subjects .... ", "" + hdt.getDictionary().getNsubjects());
colorTool.logValue("Different predicates .. ", "" + hdt.getDictionary().getNpredicates());
colorTool.logValue("Different objects ..... ", "" + hdt.getDictionary().getNobjects());
colorTool.logValue("Different graphs ...... ", "" + hdt.getDictionary().getNgraphs());
if (isQuad)
colorTool.logValue("Different graphs ...... ", "" + hdt.getDictionary().getNgraphs());
colorTool.logValue("Common Subject/Object . ", "" + hdt.getDictionary().getNshared());
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,12 @@ public interface TempDictionary extends Closeable {

TempDictionarySection getGraphs();

/**
* Returns whether the dictionary supports graphs
* @return true if it supports graphs, false otherwise
*/
boolean supportGraphs();

/**
* To be executed at the start of the processing
*
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -154,12 +154,24 @@ public long stringToId(CharSequence str, TripleComponentRole position) {

@Override
public long getNumberOfElements() {
return subjects.getNumberOfElements()+predicates.getNumberOfElements()+objects.getNumberOfElements()+shared.getNumberOfElements()+graphs.getNumberOfElements();
long s = subjects.getNumberOfElements();
long p = predicates.getNumberOfElements();
long o = objects.getNumberOfElements();
if (!this.supportGraphs())
return s+p+o;
long g = graphs.getNumberOfElements();
return s+p+o+g;
}

@Override
public long size() {
return subjects.size()+predicates.size()+objects.size()+shared.size()+graphs.size();
long s = subjects.size();
long p = predicates.size();
long o = objects.size();
if (!this.supportGraphs())
return s+p+o;
long g = graphs.size();
return s+p+o+g;
}

@Override
Expand All @@ -179,6 +191,8 @@ public long getNobjects() {

@Override
public long getNgraphs() {
if (graphs == null)
return 0;
return graphs.getNumberOfElements();
}

Expand Down Expand Up @@ -261,4 +275,9 @@ public long getNAllObjects() {
public void loadAsync(TempDictionary other, ProgressListener listener) throws InterruptedException {
throw new NotImplementedException();
}

@Override
public boolean supportGraphs() {
return false;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -214,5 +214,10 @@ public long stringToId(CharSequence str, TripleComponentRole position) {
default:
throw new IllegalArgumentException();
}
}
}

@Override
public boolean supportGraphs() {
return false;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -252,4 +252,9 @@ public interface NodeConsumer {
private interface NodeConsumerMethod {
void consume(long id, long header);
}

@Override
public boolean supportGraphs() {
return false;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -190,4 +190,9 @@ public void close() throws IOException {
graphs
);
}

@Override
public boolean supportGraphs() {
return true;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -151,4 +151,9 @@ public void endProcessing() {
public void close() throws IOException {
// Do nothing.
}

@Override
public boolean supportGraphs() {
return true;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -317,4 +317,9 @@ public AbstractMap.SimpleEntry<Long,Long> getDataTypeRange(CharSequence dataType
}
return new AbstractMap.SimpleEntry<>(0L,0L);
}

@Override
public boolean supportGraphs() {
return false;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -36,4 +36,5 @@ public long insert(CharSequence str, TripleComponentRole position) {
@Override public void reorganize(TempTriples triples) { delegate.reorganize(triples); }
@Override public void clear() { delegate.clear(); }
@Override public void close() throws IOException { delegate.close(); }
@Override public boolean supportGraphs() { return delegate.supportGraphs(); }
}
Original file line number Diff line number Diff line change
Expand Up @@ -223,5 +223,10 @@ public long stringToId(CharSequence str, TripleComponentRole position) {
default:
throw new IllegalArgumentException();
}
}
}

@Override
public boolean supportGraphs() {
return false;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -62,12 +62,15 @@ public TripleAppender(TempDictionary dict, TempTriples triples, ProgressListener

@Override
public void processTriple(TripleString triple, long pos) {
triples.insert(
dict.insert(triple.getSubject(), TripleComponentRole.SUBJECT),
dict.insert(triple.getPredicate(), TripleComponentRole.PREDICATE),
dict.insert(triple.getObject(), TripleComponentRole.OBJECT),
dict.insert(triple.getGraph(), TripleComponentRole.GRAPH)
);
long s = dict.insert(triple.getSubject(), TripleComponentRole.SUBJECT);
long p = dict.insert(triple.getPredicate(), TripleComponentRole.PREDICATE);
long o = dict.insert(triple.getObject(), TripleComponentRole.OBJECT);
if (dict.supportGraphs()) {
long g = dict.insert(triple.getGraph(), TripleComponentRole.GRAPH);
triples.insert(s, p, o, g);
} else {
triples.insert(s, p, o);
}
num++;
size+=triple.getSubject().length()+triple.getPredicate().length()+triple.getObject().length()+triple.getGraph().length()+4; // Spaces and final dot
ListenerUtil.notifyCond(listener, "Loaded "+num+" triples", num, 0, 100);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,9 @@
import org.rdfhdt.hdt.hdt.HDTVocabulary;
import org.rdfhdt.hdt.options.ControlInfo;
import org.rdfhdt.hdt.options.HDTOptions;
import org.rdfhdt.hdt.options.HDTOptionsKeys;
import org.rdfhdt.hdt.options.HDTSpecification;
import org.rdfhdt.hdt.triples.impl.BitmapQuadTriples;
import org.rdfhdt.hdt.triples.impl.BitmapTriples;
import org.rdfhdt.hdt.triples.impl.TriplesList;

Expand Down Expand Up @@ -69,8 +71,13 @@ static public TempTriples createTempTriples(HDTOptions spec) {
*/
static public TriplesPrivate createTriples(HDTOptions spec) throws IOException {
String type = spec.get("triples.format");

boolean isQuad = spec.get(
HDTOptionsKeys.DICTIONARY_TYPE_KEY,
""
).equals(HDTOptionsKeys.DICTIONARY_TYPE_VALUE_FOUR_QUAD_SECTION);
if(type==null) {
if (isQuad)
return new BitmapQuadTriples(spec);
return new BitmapTriples(spec);
} else if(HDTVocabulary.TRIPLES_TYPE_TRIPLESLIST.equals(type)) {
return new TriplesList(spec);
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,65 @@
/**
* File: $HeadURL: https://hdt-java.googlecode.com/svn/trunk/hdt-java/src/org/rdfhdt/hdt/triples/impl/BitmapTriples.java $
* Revision: $Rev: 203 $
* Last modified: $Date: 2013-05-24 10:48:53 +0100 (vie, 24 may 2013) $
* Last modified by: $Author: mario.arias $
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation;
* version 3.0 of the License.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this library; if not, write to the Free Software
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
*
* Contacting the authors:
* Mario Arias: [email protected]
* Javier D. Fernandez: [email protected]
* Miguel A. Martinez-Prieto: [email protected]
* Alejandro Andres: [email protected]
*/

package org.rdfhdt.hdt.triples.impl;

import org.rdfhdt.hdt.compact.bitmap.*;
import org.rdfhdt.hdt.compact.sequence.Sequence;
import org.rdfhdt.hdt.enums.TripleComponentOrder;
import org.rdfhdt.hdt.hdt.HDTVocabulary;
import org.rdfhdt.hdt.options.*;

import java.io.*;

/**
* @author mario.arias
*
*/
public class BitmapQuadTriples extends BitmapTriples {
public BitmapQuadTriples() throws IOException {
super();
}

public BitmapQuadTriples(HDTOptions spec) throws IOException {
super(spec);
System.out.println("CREATE BITMAP QUAD TRIPLES");
}

public BitmapQuadTriples(HDTOptions spec, Sequence seqY, Sequence seqZ, Bitmap bitY, Bitmap bitZ, TripleComponentOrder order) throws IOException {
super(spec, seqY, seqZ, bitY, bitZ, order);
}

/* (non-Javadoc)
* @see hdt.triples.Triples#getType()
*/
@Override
public String getType() {
return HDTVocabulary.TRIPLES_TYPE_BITMAP_QUAD;
}
// load search save getNumberOfElements, size, mapFromFiles
}

Original file line number Diff line number Diff line change
Expand Up @@ -79,7 +79,7 @@
*
*/
public class BitmapTriples implements TriplesPrivate {
private static final Logger log = LoggerFactory.getLogger(BitmapTriples.class);
protected static final Logger log = LoggerFactory.getLogger(BitmapTriples.class);

protected TripleComponentOrder order;

Expand All @@ -95,7 +95,7 @@ public class BitmapTriples implements TriplesPrivate {
boolean diskSubIndex;
CreateOnUsePath diskSequenceLocation;

private boolean isClosed;
protected boolean isClosed;

public BitmapTriples() throws IOException {
this(new HDTSpecification());
Expand Down Expand Up @@ -138,7 +138,7 @@ public BitmapTriples(HDTOptions spec, Sequence seqY, Sequence seqZ, Bitmap bitY,
isClosed=false;
}

private void loadDiskSequence(HDTOptions spec) throws IOException {
protected void loadDiskSequence(HDTOptions spec) throws IOException {
diskSequence = spec != null && spec.getBoolean(HDTOptionsKeys.BITMAPTRIPLES_SEQUENCE_DISK, false);
diskSubIndex = spec != null && spec.getBoolean(HDTOptionsKeys.BITMAPTRIPLES_SEQUENCE_DISK_SUBINDEX, false);

Expand Down