Skip to content

Commit

Permalink
add disk HDT indexing
Browse files Browse the repository at this point in the history
  • Loading branch information
ate47 committed Nov 22, 2022
1 parent 969c049 commit ce35ba6
Show file tree
Hide file tree
Showing 29 changed files with 893 additions and 253 deletions.
5 changes: 5 additions & 0 deletions hdt-java-cli/bin/hdtVerify.bat
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
@echo off

call "%~dp0\javaenv.bat"

"%JAVACMD%" -XX:NewRatio=1 -XX:SurvivorRatio=9 %JAVAOPTIONS% -classpath %JAVACP% org.rdfhdt.hdt.tools.HDTVerify %*
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
import java.nio.file.Path;
import java.util.Arrays;

/**
Expand All @@ -51,6 +52,14 @@ public Bitmap375Disk(String location, long nbits) {
super(location, nbits);
}

public Bitmap375Disk(Path location) {
super(location);
}

public Bitmap375Disk(Path location, long nbits) {
super(location, nbits);
}

public void dump() {
int count = (int) numWords(this.numbits);
for(int i=0;i<count;i++) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
import java.nio.file.Path;

/**
* Version of Bitmap64 which is backed up on disk
Expand All @@ -54,7 +55,16 @@ public Bitmap64Disk(String location) {

public Bitmap64Disk(String location, long nbits) {
this.numbits = 0;
this.words = new LongArrayDisk(location,numWords(nbits));
this.words = new LongArrayDisk(location, numWords(nbits));
}

public Bitmap64Disk(Path location) {
this(location, W);
}

public Bitmap64Disk(Path location, long nbits) {
this.numbits = 0;
this.words = new LongArrayDisk(location, numWords(nbits));
}

/**
Expand All @@ -79,8 +89,10 @@ protected static int lastWordNumBits(long numbits) {
return (int) ((numbits-1) % W)+1; // +1 To have output in the range 1-64, -1 to compensate.
}

protected final void ensureSize(long wordsRequired) {
words.resize(Math.max(words.getSize()*2, wordsRequired));
protected final void ensureSize(long wordsRequired) throws IOException {
if (words.length() < wordsRequired) {
words.resize(Math.max(words.getSize() * 2, wordsRequired));
}
}

public void trim(long numbits) {
Expand All @@ -90,7 +102,11 @@ public void trim(long numbits) {
public void trimToSize() {
int wordNum = (int) numWords(numbits);
if(wordNum!=words.length()) {
words.resize(wordNum);
try {
words.resize(wordNum);
} catch (IOException e) {
throw new RuntimeException(e);
}
}
}

Expand Down Expand Up @@ -177,7 +193,11 @@ public void set(long bitIndex, boolean value) {
throw new IndexOutOfBoundsException("bitIndex < 0: " + bitIndex);

long wordIndex = wordIndex(bitIndex);
ensureSize(wordIndex+1);
try {
ensureSize(wordIndex+1);
} catch (IOException e) {
throw new RuntimeException(e);
}

if(value) {
words.set(wordIndex, words.get(wordIndex) | (1L << bitIndex));
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -56,4 +56,6 @@ public interface DynamicSequence extends Sequence, LongArray {
default long length() {
return getNumberOfElements();
}

void resize(long size);
}
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,16 @@ public SequenceInt32(int capacity) {
data = new int[capacity];
numelements = 0;
}


@Override
public void resize(long numentries) {
if (numentries > (long) Integer.MAX_VALUE) {
throw new IllegalArgumentException("Can't resize to size bigger than int value");
}
resizeArray((int) numentries);
this.numelements = (int) numentries;
}

private void resizeArray(int size) {
int [] newData = new int[size];
System.arraycopy(data, 0, newData, 0, Math.min(newData.length, data.length));
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,16 @@ public SequenceInt64(long capacity) {
data = new long[(int)capacity];
numelements = 0;
}


@Override
public void resize(long numentries) {
if (numentries > (long) Integer.MAX_VALUE) {
throw new IllegalArgumentException("Can't resize to size bigger than int value");
}
resizeArray((int) numentries);
this.numelements = numentries;
}

private void resizeArray(int size) {
long [] newData = new long[size];
System.arraycopy(data, 0, newData, 0, Math.min(newData.length, data.length));
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -281,7 +281,8 @@ public void aggressiveTrimToSize() {
public void trimToSize() {
resizeArray((int)numWordsFor(numbits, numentries));
}


@Override
public void resize(long numentries) {
this.numentries = numentries;
resizeArray((int)numWordsFor(numbits, numentries));
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -156,9 +156,11 @@ private static void setField(LongLargeArray data, int bitsField, long index, lon
private void resizeArray(long size) {
//data = Arrays.copyOf(data, size);
if(size > 0) {
LongLargeArray a = new LongLargeArray(size);
LargeArrayUtils.arraycopy(data, 0, a, 0, Math.min(size, data.length()));
data = a;
if (data.length() != size) {
LongLargeArray a = new LongLargeArray(size);
LargeArrayUtils.arraycopy(data, 0, a, 0, Math.min(size, data.length()));
data = a;
}
}else{
this.numentries = 0;
}
Expand Down Expand Up @@ -295,7 +297,8 @@ public void aggressiveTrimToSize() {
public void trimToSize() {
resizeArray(numWordsFor(numbits, numentries));
}


@Override
public void resize(long numentries) {
this.numentries = numentries;
resizeArray(numWordsFor(numbits, numentries));
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
import java.nio.file.Path;
import java.util.ArrayList;
import java.util.Iterator;

Expand All @@ -53,16 +54,31 @@ public SequenceLog64BigDisk(String location, int numbits) {
}

public SequenceLog64BigDisk(String location, int numbits, long capacity) {
this(location, numbits, capacity, false);
}

public SequenceLog64BigDisk(String location, int numbits, long capacity, boolean initialize) {
this(Path.of(location), numbits, capacity, initialize);
}
public SequenceLog64BigDisk(Path location) {
this(location, W);
}

public SequenceLog64BigDisk(Path location, int numbits) {
this(location, numbits, 0);
}

public SequenceLog64BigDisk(Path location, int numbits, long capacity) {
this(location, numbits, capacity, false);
}

public SequenceLog64BigDisk(Path location, int numbits, long capacity, boolean initialize) {
this.numentries = 0;
this.numbits = numbits;
this.maxvalue = BitUtil.maxVal(numbits);
long size = numWordsFor(numbits, capacity);
data = new LongArrayDisk(location, Math.max(size,1));
}

public SequenceLog64BigDisk(String location, int numbits, long capacity, boolean initialize) {
this(location, numbits, capacity);
if(initialize) {
if (initialize) {
numentries = capacity;
}
}
Expand Down Expand Up @@ -132,7 +148,7 @@ private static void setField(LongArrayDisk data, int bitsField, long index, long
}
}

private void resizeArray(long size) {
private void resizeArray(long size) throws IOException {
data.resize(size);
}

Expand Down Expand Up @@ -179,7 +195,11 @@ public void append(long value) {

long neededSize = numWordsFor(numbits, numentries+1);
if(data.length()<neededSize) {
resizeArray(data.length()*2);
try {
resizeArray(data.length()*2);
} catch (IOException e) {
throw new RuntimeException(e);
}
}
this.set(numentries, value);
numentries++;
Expand Down Expand Up @@ -208,20 +228,32 @@ public void aggressiveTrimToSize() {
long totalSize = numWordsFor(numbits, numentries);

if (totalSize!=data.length()){
resizeArray(totalSize);
try {
resizeArray(totalSize);
} catch (IOException e) {
throw new RuntimeException(e);
}
}
}

}

@Override
public void trimToSize() {
resizeArray(numWordsFor(numbits, numentries));
try {
resizeArray(numWordsFor(numbits, numentries));
} catch (IOException e) {
throw new RuntimeException(e);
}
}

public void resize(long numentries) {
this.numentries = numentries;
resizeArray(numWordsFor(numbits, numentries));
try {
resizeArray(numWordsFor(numbits, numentries));
} catch (IOException e) {
throw new RuntimeException(e);
}
}

/* (non-Javadoc)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,8 @@
import org.rdfhdt.hdt.options.HDTOptions;
import org.rdfhdt.hdt.options.HDTSpecification;

import java.io.IOException;

/**
* Factory that creates HDT objects
*
Expand All @@ -45,7 +47,7 @@ private HDTFactory() {}
*
* @return HDT
*/
public static HDT createHDT() {
public static HDT createHDT() throws IOException {
return new HDTImpl(new HDTSpecification());
}

Expand All @@ -54,7 +56,7 @@ public static HDT createHDT() {
*
* @return HDT
*/
public static HDT createHDT(HDTOptions spec) {
public static HDT createHDT(HDTOptions spec) throws IOException {
return new HDTImpl(spec);
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -93,7 +93,7 @@ public HDT doLoadHDT(InputStream hdtFile, ProgressListener listener, HDTOptions
public HDT doLoadIndexedHDT(String hdtFileName, ProgressListener listener, HDTOptions spec) throws IOException {
HDTPrivate hdt = new HDTImpl(spec);
hdt.loadFromHDT(hdtFileName, listener);
hdt.loadOrCreateIndex(listener);
hdt.loadOrCreateIndex(listener, spec);
return hdt;
}

Expand All @@ -102,21 +102,21 @@ public HDT doLoadIndexedHDT(String hdtFileName, ProgressListener listener, HDTOp
public HDT doMapIndexedHDT(String hdtFileName, ProgressListener listener, HDTOptions spec) throws IOException {
HDTPrivate hdt = new HDTImpl(spec);
hdt.mapFromHDT(new File(hdtFileName), 0, listener);
hdt.loadOrCreateIndex(listener);
hdt.loadOrCreateIndex(listener, spec);
return hdt;
}

@Override
public HDT doLoadIndexedHDT(InputStream hdtFile, ProgressListener listener, HDTOptions spec) throws IOException {
HDTPrivate hdt = new HDTImpl(spec);
hdt.loadFromHDT(hdtFile, listener);
hdt.loadOrCreateIndex(listener);
hdt.loadOrCreateIndex(listener, spec);
return hdt;
}

@Override
public HDT doIndexedHDT(HDT hdt, ProgressListener listener) throws IOException {
((HDTPrivate) hdt).loadOrCreateIndex(listener);
((HDTPrivate)hdt).loadOrCreateIndex(listener, new HDTSpecification());
return hdt;
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
import java.io.InputStream;

import org.rdfhdt.hdt.listener.ProgressListener;
import org.rdfhdt.hdt.options.HDTOptions;

/**
* HDT Operations that are using internally from the implementation.
Expand Down Expand Up @@ -36,7 +37,7 @@ public interface HDTPrivate extends HDT {
*
* @param listener A listener to be notified of the progress.
*/
void loadOrCreateIndex(ProgressListener listener) throws IOException;
void loadOrCreateIndex(ProgressListener listener, HDTOptions disk) throws IOException;

void populateHeaderStructure(String baseUri);
}
Original file line number Diff line number Diff line change
Expand Up @@ -111,7 +111,7 @@ public class HDTImpl extends HDTBase<HeaderPrivate, DictionaryPrivate, TriplesPr
private boolean isMapped;
private boolean isClosed=false;

public HDTImpl(HDTOptions spec) {
public HDTImpl(HDTOptions spec) throws IOException {
super(spec);

header = HeaderFactory.createHeader(this.spec);
Expand Down Expand Up @@ -369,7 +369,7 @@ public void loadFromModifiableHDT(TempHDT modHdt, ProgressListener listener) {
* @see hdt.hdt.HDT#generateIndex(hdt.listener.ProgressListener)
*/
@Override
public void loadOrCreateIndex(ProgressListener listener) throws IOException {
public void loadOrCreateIndex(ProgressListener listener, HDTOptions spec) throws IOException {
if(triples.getNumberOfElements()==0) {
// We need no index.
return;
Expand Down Expand Up @@ -402,7 +402,7 @@ public void loadOrCreateIndex(ProgressListener listener) throws IOException {

// GENERATE
StopWatch st = new StopWatch();
triples.generateIndex(listener);
triples.generateIndex(listener, spec);

// SAVE
if(this.hdtFileName!=null) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -67,7 +67,7 @@ public void mapFromHDT(File f, long offset, ProgressListener listener) {
}

@Override
public void loadOrCreateIndex(ProgressListener listener) {
public void loadOrCreateIndex(ProgressListener listener, HDTOptions disk) {
throw new NotImplementedException();
}

Expand Down
Loading

0 comments on commit ce35ba6

Please sign in to comment.