Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix IndexOutOfBound for loaded HDT BigByteBuffer of size > 2^31 #161

Merged
merged 2 commits into from
May 10, 2022
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Next Next commit
fix IndexOutOfBound for loaded HDT BigByteBuffer of size > 2^31
  • Loading branch information
ate47 committed May 10, 2022
commit 4e0517bac9e599b4eb85499d27449d2f6643eb41
Original file line number Diff line number Diff line change
Expand Up @@ -143,11 +143,7 @@ public static int strcmp(CharSequence str, byte [] buff2, int off2) {
return a-b;
}
if(a==0) {
if(b==0) {
return 0;
} else {
return -1;
}
return 0;
}
}

Expand All @@ -164,7 +160,7 @@ public static int strcmp(CharSequence str, byte [] buff2, int off2) {
public static int strcmp(CharSequence str, BigByteBuffer buff2, long off2) {
byte [] buff1;
int off1;
int len1;
long len1;
long len2=buff2.size();

if(str instanceof CompactString) {
Expand All @@ -183,7 +179,7 @@ public static int strcmp(CharSequence str, BigByteBuffer buff2, long off2) {
throw new NotImplementedException();
}

int n = Math.min(len1-off1, (int) (len2-off2));
int n = (int) Math.min(len1-off1, len2-off2);

int p1 = off1;
long p2 = off2;
Expand All @@ -194,11 +190,7 @@ public static int strcmp(CharSequence str, BigByteBuffer buff2, long off2) {
return a-b;
}
if(a==0) {
if(b==0) {
return 0;
} else {
return -1;
}
return 0;
}
}

Expand Down Expand Up @@ -235,7 +227,7 @@ public static int strcmp(CharSequence str, ByteBuffer buffer, int offset) {

// Compare
int i=0;
int n = Math.min(len, buffer.capacity()-offset);
long n = Math.min(len, buffer.capacity()-offset);
while(i<n) {
int v1 = buf[i] & 0xFF;
int v2 = buffer.get(offset+i) & 0xFF;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,9 @@
import org.junit.rules.TemporaryFolder;
import org.rdfhdt.hdt.compact.bitmap.BitmapFactory;
import org.rdfhdt.hdt.compact.bitmap.ModifiableBitmap;
import org.rdfhdt.hdt.util.string.ByteStringUtil;
import org.rdfhdt.hdt.util.string.CompactString;
import org.rdfhdt.hdt.util.string.ReplazableString;

import java.io.File;
import java.io.FileOutputStream;
Expand Down Expand Up @@ -40,6 +43,7 @@ public void prepare() {
// save the size if we want to update it
oldSize = BigByteBuffer.maxBufferSize;
}

@After
public void complete() {
BigByteBuffer.maxBufferSize = oldSize;
Expand All @@ -54,6 +58,7 @@ public void capacityBuffer() {
Assert.assertEquals(8, buffer.getBuffers().size());
Assert.assertEquals(size, buffer.size());
}

@Test
@Ignore("large, should be run with at least 3G or ram -Xmx3G")
public void capacityBufferLarge() {
Expand All @@ -76,6 +81,7 @@ public void get() {
supplier.reset();
supplier.generate(size / 10, size, e -> Assert.assertEquals(e.value, buffer.get(e.index)));
}

@Test
public void getArr() {
int size = 10000;
Expand Down Expand Up @@ -137,6 +143,7 @@ public void readFileTest() throws IOException {
assertArrayEquals(real, i, test, 0, test.length - i);
}
}

@Test
public void writeFileTest() throws IOException {
int size = BigByteBuffer.BUFFER_SIZE * 10;
Expand Down Expand Up @@ -170,6 +177,60 @@ public void writeFileTest() throws IOException {
}
}

private CharSequence[] charSequences(String value) {
CharSequence[] out = new CharSequence[3];
out[0] = value;
byte[] bytes = value.getBytes(ByteStringUtil.STRING_ENCODING);
out[1] = new ReplazableString(bytes.length);
((ReplazableString) out[1]).append(bytes, 0, bytes.length);
out[2] = new CompactString(value);
return out;
}

private int normalizeCompare(int v) {
return Integer.compare(v, 0);
}

@Test
public void strcmpTest() {
String s1 = "aaaégbbbccc";
String s2 = "aaaéeccc";

CharSequence[] css1 = charSequences(s1);
CharSequence[] css2 = charSequences(s2);

byte[] bs1 = s1.getBytes(ByteStringUtil.STRING_ENCODING);
byte[] bs2 = s2.getBytes(ByteStringUtil.STRING_ENCODING);

BigByteBuffer.maxBufferSize = Math.max(bs1.length, bs2.length) / 2 + 1;

BigByteBuffer buffer1 = BigByteBuffer.allocate(bs1.length + 4);
BigByteBuffer buffer2 = BigByteBuffer.allocate(bs2.length + 5);
buffer1.set(1, bs1, 0, bs1.length);
buffer1.set(0, (byte) 13);
buffer1.set(bs1.length + 1, (byte) 0);
buffer1.set(bs1.length + 2, (byte) 42);
buffer1.set(bs1.length + 3, (byte) 32);
buffer2.set(2, bs2, 0, bs2.length);
buffer2.set(0, (byte) 27);
buffer2.set(1, (byte) 34);
buffer2.set(bs2.length + 2, (byte) 0);
buffer2.set(bs2.length + 3, (byte) 67);

Assert.assertEquals(ByteStringUtil.strlen(buffer1, 1), bs1.length);
Assert.assertEquals(ByteStringUtil.strlen(buffer2, 2), bs2.length);

for (CharSequence cs1 : css1) {
for (CharSequence cs2 : css2) {
Assert.assertEquals(normalizeCompare(ByteStringUtil.strcmp(cs1, buffer2, 2)), normalizeCompare(s1.compareTo(s2)));
Assert.assertEquals(normalizeCompare(ByteStringUtil.strcmp(cs2, buffer1, 1)), normalizeCompare(s2.compareTo(s1)));

Assert.assertEquals(ByteStringUtil.strcmp(cs1, buffer1, 1), 0);
Assert.assertEquals(ByteStringUtil.strcmp(cs2, buffer2, 2), 0);
}
}
}


private static class Entry {
long index;
Expand Down