Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Update JLargeArrays and use SequenceLog64Big instead of SequenceLog64 #174

Merged
merged 2 commits into from
Sep 28, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 8 additions & 2 deletions hdt-java-core/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -49,8 +49,9 @@
<artifactId>jena-arq</artifactId>
</dependency>
<dependency>
<groupId>pl.edu.icm</groupId>
<groupId>org.visnow</groupId>
<artifactId>JLargeArrays</artifactId>
<version>1.7-20220624.150242-7</version>
</dependency>
<dependency>
<groupId>pl.pragmatists</groupId>
Expand All @@ -63,7 +64,12 @@
<version>1.6</version>
</dependency>
</dependencies>

<repositories>
<repository>
<id>gitlab-maven</id>
<url>https://gitlab.com/api/v4/projects/375779/packages/maven</url>
</repository>
</repositories>
<build>
<plugins>
<plugin>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -111,7 +111,7 @@ public boolean access(long bitIndex) {
if (bitIndex < 0)
throw new IndexOutOfBoundsException("bitIndex < 0: " + bitIndex);

int wordIndex = wordIndex(bitIndex);
long wordIndex = wordIndex(bitIndex);
if(wordIndex>=words.length()) {
return false;
}
Expand Down Expand Up @@ -415,4 +415,4 @@ public void load(InputStream input, ProgressListener listener) throws IOExceptio
super.load(input, listener);
updateIndex();
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@

import org.rdfhdt.hdt.compact.integer.VByte;
import org.rdfhdt.hdt.exceptions.NotImplementedException;
import org.rdfhdt.hdt.hdt.HDTVocabulary;
import org.rdfhdt.hdt.listener.ProgressListener;
import org.rdfhdt.hdt.util.BitUtil;
import org.rdfhdt.hdt.util.crc.CRC32;
Expand All @@ -37,7 +38,7 @@
/**
* Version of Bitmap64 which is backed up on disk
*/
public class Bitmap64Disk implements Closeable {
public class Bitmap64Disk implements Closeable, ModifiableBitmap {

// Constants
protected final static int LOGW = 6;
Expand All @@ -59,7 +60,7 @@ public Bitmap64Disk(String location, long nbits) {
/**
* Given a bit index, return word index containing it.
*/
protected static int wordIndex(long bitIndex) {
protected static long wordIndex(long bitIndex) {
return (int) (bitIndex >>> LOGW);
}

Expand All @@ -78,7 +79,7 @@ protected static int lastWordNumBits(long numbits) {
return (int) ((numbits-1) % W)+1; // +1 To have output in the range 1-64, -1 to compensate.
}

protected final void ensureSize(int wordsRequired) {
protected final void ensureSize(long wordsRequired) {
words.resize(Math.max(words.getSize()*2, wordsRequired));
}

Expand All @@ -97,14 +98,73 @@ public boolean access(long bitIndex) {
if (bitIndex < 0)
throw new IndexOutOfBoundsException("bitIndex < 0: " + bitIndex);

int wordIndex = wordIndex(bitIndex);
long wordIndex = wordIndex(bitIndex);
if(wordIndex>=words.length()) {
return false;
}

return (words.get(wordIndex) & (1L << bitIndex)) != 0;
}

@Override
public long rank1(long pos) {
throw new NotImplementedException();
}

@Override
public long rank0(long pos) {
throw new NotImplementedException();
}

@Override
public long selectNext1(long fromIndex) {
if (fromIndex < 0)
throw new IndexOutOfBoundsException("fromIndex < 0: " + fromIndex);

long wordIndex = wordIndex(fromIndex);
if (wordIndex >= words.length())
return -1;

long word = words.get(wordIndex) & (~0L << fromIndex);

while (true) {
if (word != 0)
return ((long)wordIndex * W) + Long.numberOfTrailingZeros(word);
if (++wordIndex == words.length())
return -1;
word = words.get(wordIndex);
}
}

@Override
public long select0(long n) {
throw new NotImplementedException();
}

@Override
public long select1(long n) {
throw new NotImplementedException();
}

@Override
public long countOnes() {
if (words.length() == 0)
return 0;
long acc = 0;
long end = wordIndex(numbits);
if (end >= words.length()) {
end = words.length() - 1;
}
for (int i = 0; i <= end; i++)
acc += Long.bitCount(words.get(i));
return acc;
}

@Override
public long countZeros() {
return words.length() * 64L - countOnes();
}

/* (non-Javadoc)
* @see hdt.compact.bitmap.ModifiableBitmap#append(boolean)
*/
Expand All @@ -116,7 +176,7 @@ public void set(long bitIndex, boolean value) {
if (bitIndex < 0)
throw new IndexOutOfBoundsException("bitIndex < 0: " + bitIndex);

int wordIndex = wordIndex(bitIndex);
long wordIndex = wordIndex(bitIndex);
ensureSize(wordIndex+1);

if(value) {
Expand All @@ -128,27 +188,13 @@ public void set(long bitIndex, boolean value) {
this.numbits = Math.max(this.numbits, bitIndex+1);
}

public long selectPrev1(long start) {
throw new NotImplementedException();
@Override
public String getType() {
return HDTVocabulary.BITMAP_TYPE_PLAIN;
}

public long selectNext1(long fromIndex) {
if (fromIndex < 0)
throw new IndexOutOfBoundsException("fromIndex < 0: " + fromIndex);

int wordIndex = wordIndex(fromIndex);
if (wordIndex >= words.length())
return -1;

long word = words.get(wordIndex) & (~0L << fromIndex);

while (true) {
if (word != 0)
return ((long)wordIndex * W) + Long.numberOfTrailingZeros(word);
if (++wordIndex == words.length())
return -1;
word = words.get(wordIndex);
}
public long selectPrev1(long start) {
throw new NotImplementedException();
}

public long getWord(int word) {
Expand Down Expand Up @@ -211,4 +257,4 @@ public long getRealSizeBytes() {
public void close() throws IOException {
words.close();
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -105,9 +105,13 @@ public void set(long position, long value) {
@Override
public void append(long value) {
assert value>=0 && value<=Integer.MAX_VALUE;

if(data.length<numelements+1) {
resizeArray(data.length*2);

long neededSize = numelements+1L;
if (neededSize > Integer.MAX_VALUE - 5) {
throw new IllegalArgumentException("Needed size exceeds the maximum size of this data structure " + neededSize);
}
if(data.length < neededSize) {
resizeArray((int) Math.min(Integer.MAX_VALUE - 5L, data.length*2L));
}
data[numelements++] = (int) value;
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -94,19 +94,23 @@ public long get(long position) {
@Override
public void set(long position, long value) {
assert position>=0 && position<=Integer.MAX_VALUE;
assert value>=0 && value<=Long.MAX_VALUE;
assert value>=0;

data[(int)position] = value;
numelements = (int) Math.max(numelements, position+1);
}

@Override
public void append(long value) {
assert value>=0 && value<=Long.MAX_VALUE;
assert value>=0;
assert numelements<Long.MAX_VALUE;

if(data.length<numelements+1) {
resizeArray(data.length*2);

long neededSize = numelements+1L;
if (neededSize > Integer.MAX_VALUE - 5) {
throw new IllegalArgumentException("Needed size exceeds the maximum size of this data structure " + neededSize);
}
if(data.length < neededSize) {
resizeArray((int) Math.min(Integer.MAX_VALUE - 5L, data.length*2L));
}
data[(int)numelements++] = value;
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -241,8 +241,11 @@ public void append(long value) {
}

long neededSize = numWordsFor(numbits, numentries+1);
if(data.length<neededSize) {
resizeArray(data.length*2);
if (neededSize > Integer.MAX_VALUE - 5) {
throw new IllegalArgumentException("Needed size exceeds the maximum size of this data structure " + neededSize);
}
if(data.length < neededSize) {
resizeArray((int) Math.min(Integer.MAX_VALUE - 5L, data.length*2L));
}

this.set((int)numentries, value);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -44,9 +44,8 @@
import org.rdfhdt.hdt.util.crc.CRCInputStream;
import org.rdfhdt.hdt.util.crc.CRCOutputStream;
import org.rdfhdt.hdt.util.io.IOUtil;

import pl.edu.icm.jlargearrays.LongLargeArray;
import pl.edu.icm.jlargearrays.LargeArrayUtils;
import org.visnow.jlargearrays.LargeArrayUtils;
import org.visnow.jlargearrays.LongLargeArray;

/**
* @author mario.arias,Lyudmila Balakireva
Expand All @@ -55,8 +54,8 @@
public class SequenceLog64Big implements DynamicSequence {
private static final byte W = 64;
private static final int INDEX = 1073741824;
LongLargeArray data;

LongLargeArray data;
private int numbits;
private long numentries=0;
private long maxvalue;
Expand All @@ -77,7 +76,7 @@ public SequenceLog64Big(int numbits, long capacity) {
long size = numWordsFor(numbits, capacity);
LongLargeArray.setMaxSizeOf32bitArray(SequenceLog64Big.INDEX);

data = new LongLargeArray(Math.max((int)size,1));
data = new LongLargeArray(Math.max(size,1));
}

public SequenceLog64Big(int numbits, long capacity, boolean initialize) {
Expand Down Expand Up @@ -158,11 +157,7 @@ private void resizeArray(long size) {
//data = Arrays.copyOf(data, size);
if(size > 0) {
LongLargeArray a = new LongLargeArray(size);
if (size < data.length()) {
LargeArrayUtils.arraycopy(data, 0, a, 0, size);
} else {
LargeArrayUtils.arraycopy(data, 0, a, 0, data.length());
}
LargeArrayUtils.arraycopy(data, 0, a, 0, Math.min(size, data.length()));
data = a;
}else{
this.numentries = 0;
Expand All @@ -182,7 +177,7 @@ public void add(Iterator<Long> elements) {
// Count and calculate number of bits needed per element.
while(elements.hasNext()) {
long val = elements.next();
max = val>max ? val : max;
max = Math.max(val, max);
numentries++;
}

Expand All @@ -208,7 +203,7 @@ public void addIntegers(ArrayList<Integer> elements) {
// Count and calculate number of bits needed per element.
for (int i=0;i<elements.size();i++){
long val = elements.get(i).longValue();
max = val>max ? val : max;
max = Math.max(val, max);
numentries++;
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -786,7 +786,7 @@ public void diffBit(String location, HDT hdt, Bitmap deleteBitmap, ProgressListe
log.debug("Generating Triples...");
il.notifyProgress(40, "Generating Triples...");
// map the triples based on the new dictionary
BitmapTriplesIteratorMapDiff mapIter = new BitmapTriplesIteratorMapDiff(hdt, deleteBitmap, diff, iter.getCount() + 1);
BitmapTriplesIteratorMapDiff mapIter = new BitmapTriplesIteratorMapDiff(hdt, deleteBitmap, diff);

BitmapTriples triples = new BitmapTriples(spec);
triples.load(mapIter, listener);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -129,8 +129,8 @@ public void load(IteratorTripleID it, ProgressListener listener) {

long number = it.estimatedNumResults();

SequenceLog64 vectorY = new SequenceLog64(BitUtil.log2(number), number);
SequenceLog64 vectorZ = new SequenceLog64(BitUtil.log2(number), number);
DynamicSequence vectorY = new SequenceLog64Big(BitUtil.log2(number), number);
DynamicSequence vectorZ = new SequenceLog64Big(BitUtil.log2(number), number);

ModifiableBitmap bitY = new Bitmap375(number);
ModifiableBitmap bitZ = new Bitmap375(number);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -32,13 +32,13 @@ public class BitmapTriplesIteratorMapDiff implements IteratorTripleID {
TripleIDComparator tripleIDComparator = new TripleIDComparator(TripleComponentOrder.SPO);
Bitmap bitArrayDisk;

public BitmapTriplesIteratorMapDiff(HDT hdtOriginal, Bitmap deleteBitmap, DictionaryDiff dictionaryDiff, long countTriples) {
public BitmapTriplesIteratorMapDiff(HDT hdtOriginal, Bitmap deleteBitmap, DictionaryDiff dictionaryDiff) {
this.subjMapping = dictionaryDiff.getAllMappings().get("subject");
this.objMapping = dictionaryDiff.getAllMappings().get("object");
this.predMapping = dictionaryDiff.getAllMappings().get("predicate");
this.sharedMapping = dictionaryDiff.getAllMappings().get("shared");
this.dictionaryDiff = dictionaryDiff;
this.countTriples = countTriples;
this.countTriples = Math.max(0, hdtOriginal.getTriples().getNumberOfElements() - deleteBitmap.countOnes());
this.triples = hdtOriginal.getTriples();
this.bitArrayDisk = deleteBitmap;
list = getTripleID(0).listIterator();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@
import org.apache.commons.compress.compressors.xz.XZCompressorInputStream;
import org.rdfhdt.hdt.listener.ProgressListener;
import org.rdfhdt.hdt.util.string.ByteStringUtil;
import pl.edu.icm.jlargearrays.LargeArrayUtils;
import org.visnow.jlargearrays.LargeArrayUtils;

import java.io.*;
import java.net.URL;
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
package org.rdfhdt.hdt.compact.sequence;

import org.junit.Test;
import org.visnow.jlargearrays.LargeArray;
import org.visnow.jlargearrays.LongLargeArray;

public class LargeArrayTest {

@Test
public void allocationTest() {
int old = LargeArray.getMaxSizeOf32bitArray();
try {
LargeArray.setMaxSizeOf32bitArray(100);
long size = LargeArray.getMaxSizeOf32bitArray() + 2L;
new LongLargeArray(size);
} finally {
LargeArray.setMaxSizeOf32bitArray(old);
}
}
}