Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

add disk HDT indexing #178

Merged
merged 1 commit into from
Nov 22, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
add disk HDT indexing
  • Loading branch information
ate47 committed Nov 22, 2022
commit ce35ba66fd4ffb3bae72550a9e0fc6764e40cbc9
5 changes: 5 additions & 0 deletions hdt-java-cli/bin/hdtVerify.bat
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
@echo off

call "%~dp0\javaenv.bat"

"%JAVACMD%" -XX:NewRatio=1 -XX:SurvivorRatio=9 %JAVAOPTIONS% -classpath %JAVACP% org.rdfhdt.hdt.tools.HDTVerify %*
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
import java.nio.file.Path;
import java.util.Arrays;

/**
Expand All @@ -51,6 +52,14 @@ public Bitmap375Disk(String location, long nbits) {
super(location, nbits);
}

public Bitmap375Disk(Path location) {
super(location);
}

public Bitmap375Disk(Path location, long nbits) {
super(location, nbits);
}

public void dump() {
int count = (int) numWords(this.numbits);
for(int i=0;i<count;i++) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
import java.nio.file.Path;

/**
* Version of Bitmap64 which is backed up on disk
Expand All @@ -54,7 +55,16 @@ public Bitmap64Disk(String location) {

public Bitmap64Disk(String location, long nbits) {
this.numbits = 0;
this.words = new LongArrayDisk(location,numWords(nbits));
this.words = new LongArrayDisk(location, numWords(nbits));
}

public Bitmap64Disk(Path location) {
this(location, W);
}

public Bitmap64Disk(Path location, long nbits) {
this.numbits = 0;
this.words = new LongArrayDisk(location, numWords(nbits));
}

/**
Expand All @@ -79,8 +89,10 @@ protected static int lastWordNumBits(long numbits) {
return (int) ((numbits-1) % W)+1; // +1 To have output in the range 1-64, -1 to compensate.
}

protected final void ensureSize(long wordsRequired) {
words.resize(Math.max(words.getSize()*2, wordsRequired));
protected final void ensureSize(long wordsRequired) throws IOException {
if (words.length() < wordsRequired) {
words.resize(Math.max(words.getSize() * 2, wordsRequired));
}
}

public void trim(long numbits) {
Expand All @@ -90,7 +102,11 @@ public void trim(long numbits) {
public void trimToSize() {
int wordNum = (int) numWords(numbits);
if(wordNum!=words.length()) {
words.resize(wordNum);
try {
words.resize(wordNum);
} catch (IOException e) {
throw new RuntimeException(e);
}
}
}

Expand Down Expand Up @@ -177,7 +193,11 @@ public void set(long bitIndex, boolean value) {
throw new IndexOutOfBoundsException("bitIndex < 0: " + bitIndex);

long wordIndex = wordIndex(bitIndex);
ensureSize(wordIndex+1);
try {
ensureSize(wordIndex+1);
} catch (IOException e) {
throw new RuntimeException(e);
}

if(value) {
words.set(wordIndex, words.get(wordIndex) | (1L << bitIndex));
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -56,4 +56,6 @@ public interface DynamicSequence extends Sequence, LongArray {
default long length() {
return getNumberOfElements();
}

void resize(long size);
}
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,16 @@ public SequenceInt32(int capacity) {
data = new int[capacity];
numelements = 0;
}


@Override
public void resize(long numentries) {
if (numentries > (long) Integer.MAX_VALUE) {
throw new IllegalArgumentException("Can't resize to size bigger than int value");
}
resizeArray((int) numentries);
this.numelements = (int) numentries;
}

private void resizeArray(int size) {
int [] newData = new int[size];
System.arraycopy(data, 0, newData, 0, Math.min(newData.length, data.length));
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,16 @@ public SequenceInt64(long capacity) {
data = new long[(int)capacity];
numelements = 0;
}


@Override
public void resize(long numentries) {
if (numentries > (long) Integer.MAX_VALUE) {
throw new IllegalArgumentException("Can't resize to size bigger than int value");
}
resizeArray((int) numentries);
this.numelements = numentries;
}

private void resizeArray(int size) {
long [] newData = new long[size];
System.arraycopy(data, 0, newData, 0, Math.min(newData.length, data.length));
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -281,7 +281,8 @@ public void aggressiveTrimToSize() {
public void trimToSize() {
resizeArray((int)numWordsFor(numbits, numentries));
}


@Override
public void resize(long numentries) {
this.numentries = numentries;
resizeArray((int)numWordsFor(numbits, numentries));
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -156,9 +156,11 @@ private static void setField(LongLargeArray data, int bitsField, long index, lon
private void resizeArray(long size) {
//data = Arrays.copyOf(data, size);
if(size > 0) {
LongLargeArray a = new LongLargeArray(size);
LargeArrayUtils.arraycopy(data, 0, a, 0, Math.min(size, data.length()));
data = a;
if (data.length() != size) {
LongLargeArray a = new LongLargeArray(size);
LargeArrayUtils.arraycopy(data, 0, a, 0, Math.min(size, data.length()));
data = a;
}
}else{
this.numentries = 0;
}
Expand Down Expand Up @@ -295,7 +297,8 @@ public void aggressiveTrimToSize() {
public void trimToSize() {
resizeArray(numWordsFor(numbits, numentries));
}


@Override
public void resize(long numentries) {
this.numentries = numentries;
resizeArray(numWordsFor(numbits, numentries));
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
import java.nio.file.Path;
import java.util.ArrayList;
import java.util.Iterator;

Expand All @@ -53,16 +54,31 @@ public SequenceLog64BigDisk(String location, int numbits) {
}

public SequenceLog64BigDisk(String location, int numbits, long capacity) {
this(location, numbits, capacity, false);
}

public SequenceLog64BigDisk(String location, int numbits, long capacity, boolean initialize) {
this(Path.of(location), numbits, capacity, initialize);
}
public SequenceLog64BigDisk(Path location) {
this(location, W);
}

public SequenceLog64BigDisk(Path location, int numbits) {
this(location, numbits, 0);
}

public SequenceLog64BigDisk(Path location, int numbits, long capacity) {
this(location, numbits, capacity, false);
}

public SequenceLog64BigDisk(Path location, int numbits, long capacity, boolean initialize) {
this.numentries = 0;
this.numbits = numbits;
this.maxvalue = BitUtil.maxVal(numbits);
long size = numWordsFor(numbits, capacity);
data = new LongArrayDisk(location, Math.max(size,1));
}

public SequenceLog64BigDisk(String location, int numbits, long capacity, boolean initialize) {
this(location, numbits, capacity);
if(initialize) {
if (initialize) {
numentries = capacity;
}
}
Expand Down Expand Up @@ -132,7 +148,7 @@ private static void setField(LongArrayDisk data, int bitsField, long index, long
}
}

private void resizeArray(long size) {
private void resizeArray(long size) throws IOException {
data.resize(size);
}

Expand Down Expand Up @@ -179,7 +195,11 @@ public void append(long value) {

long neededSize = numWordsFor(numbits, numentries+1);
if(data.length()<neededSize) {
resizeArray(data.length()*2);
try {
resizeArray(data.length()*2);
} catch (IOException e) {
throw new RuntimeException(e);
}
}
this.set(numentries, value);
numentries++;
Expand Down Expand Up @@ -208,20 +228,32 @@ public void aggressiveTrimToSize() {
long totalSize = numWordsFor(numbits, numentries);

if (totalSize!=data.length()){
resizeArray(totalSize);
try {
resizeArray(totalSize);
} catch (IOException e) {
throw new RuntimeException(e);
}
}
}

}

@Override
public void trimToSize() {
resizeArray(numWordsFor(numbits, numentries));
try {
resizeArray(numWordsFor(numbits, numentries));
} catch (IOException e) {
throw new RuntimeException(e);
}
}

public void resize(long numentries) {
this.numentries = numentries;
resizeArray(numWordsFor(numbits, numentries));
try {
resizeArray(numWordsFor(numbits, numentries));
} catch (IOException e) {
throw new RuntimeException(e);
}
}

/* (non-Javadoc)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,8 @@
import org.rdfhdt.hdt.options.HDTOptions;
import org.rdfhdt.hdt.options.HDTSpecification;

import java.io.IOException;

/**
* Factory that creates HDT objects
*
Expand All @@ -45,7 +47,7 @@ private HDTFactory() {}
*
* @return HDT
*/
public static HDT createHDT() {
public static HDT createHDT() throws IOException {
return new HDTImpl(new HDTSpecification());
}

Expand All @@ -54,7 +56,7 @@ public static HDT createHDT() {
*
* @return HDT
*/
public static HDT createHDT(HDTOptions spec) {
public static HDT createHDT(HDTOptions spec) throws IOException {
return new HDTImpl(spec);
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -93,7 +93,7 @@ public HDT doLoadHDT(InputStream hdtFile, ProgressListener listener, HDTOptions
public HDT doLoadIndexedHDT(String hdtFileName, ProgressListener listener, HDTOptions spec) throws IOException {
HDTPrivate hdt = new HDTImpl(spec);
hdt.loadFromHDT(hdtFileName, listener);
hdt.loadOrCreateIndex(listener);
hdt.loadOrCreateIndex(listener, spec);
return hdt;
}

Expand All @@ -102,21 +102,21 @@ public HDT doLoadIndexedHDT(String hdtFileName, ProgressListener listener, HDTOp
public HDT doMapIndexedHDT(String hdtFileName, ProgressListener listener, HDTOptions spec) throws IOException {
HDTPrivate hdt = new HDTImpl(spec);
hdt.mapFromHDT(new File(hdtFileName), 0, listener);
hdt.loadOrCreateIndex(listener);
hdt.loadOrCreateIndex(listener, spec);
return hdt;
}

@Override
public HDT doLoadIndexedHDT(InputStream hdtFile, ProgressListener listener, HDTOptions spec) throws IOException {
HDTPrivate hdt = new HDTImpl(spec);
hdt.loadFromHDT(hdtFile, listener);
hdt.loadOrCreateIndex(listener);
hdt.loadOrCreateIndex(listener, spec);
return hdt;
}

@Override
public HDT doIndexedHDT(HDT hdt, ProgressListener listener) throws IOException {
((HDTPrivate) hdt).loadOrCreateIndex(listener);
((HDTPrivate)hdt).loadOrCreateIndex(listener, new HDTSpecification());
return hdt;
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
import java.io.InputStream;

import org.rdfhdt.hdt.listener.ProgressListener;
import org.rdfhdt.hdt.options.HDTOptions;

/**
* HDT Operations that are using internally from the implementation.
Expand Down Expand Up @@ -36,7 +37,7 @@ public interface HDTPrivate extends HDT {
*
* @param listener A listener to be notified of the progress.
*/
void loadOrCreateIndex(ProgressListener listener) throws IOException;
void loadOrCreateIndex(ProgressListener listener, HDTOptions disk) throws IOException;

void populateHeaderStructure(String baseUri);
}
Original file line number Diff line number Diff line change
Expand Up @@ -111,7 +111,7 @@ public class HDTImpl extends HDTBase<HeaderPrivate, DictionaryPrivate, TriplesPr
private boolean isMapped;
private boolean isClosed=false;

public HDTImpl(HDTOptions spec) {
public HDTImpl(HDTOptions spec) throws IOException {
super(spec);

header = HeaderFactory.createHeader(this.spec);
Expand Down Expand Up @@ -369,7 +369,7 @@ public void loadFromModifiableHDT(TempHDT modHdt, ProgressListener listener) {
* @see hdt.hdt.HDT#generateIndex(hdt.listener.ProgressListener)
*/
@Override
public void loadOrCreateIndex(ProgressListener listener) throws IOException {
public void loadOrCreateIndex(ProgressListener listener, HDTOptions spec) throws IOException {
if(triples.getNumberOfElements()==0) {
// We need no index.
return;
Expand Down Expand Up @@ -402,7 +402,7 @@ public void loadOrCreateIndex(ProgressListener listener) throws IOException {

// GENERATE
StopWatch st = new StopWatch();
triples.generateIndex(listener);
triples.generateIndex(listener, spec);

// SAVE
if(this.hdtFileName!=null) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -67,7 +67,7 @@ public void mapFromHDT(File f, long offset, ProgressListener listener) {
}

@Override
public void loadOrCreateIndex(ProgressListener listener) {
public void loadOrCreateIndex(ProgressListener listener, HDTOptions disk) {
throw new NotImplementedException();
}

Expand Down
Loading