Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

HDTCatTree + HDTGenDisk #179

Merged
merged 9 commits into from
Nov 21, 2022
Prev Previous commit
Next Next commit
Add genDisk with Multi Section Dictionaries (MSC), progress bar for t…
…he rdf2hdt logs and check MSC with hdtVerify
  • Loading branch information
ate47 committed Nov 16, 2022
commit e7b2cdd935f1b29aaff819e1b3d05ae7363f155f
6 changes: 3 additions & 3 deletions hdt-api/src/main/java/org/rdfhdt/hdt/options/HDTOptions.java
Original file line number Diff line number Diff line change
Expand Up @@ -170,11 +170,11 @@ default RDFFluxStop getFluxStop(String key, RDFFluxStop defaultValue) {
* @return long or defaultValue if the value isn't defined
*/
default long getInt(String key, LongSupplier defaultValue) {
long l = getInt(key);
if (l == 0) {
String l = get(key);
if (l == null) {
return defaultValue.getAsLong();
}
return l;
return Long.parseLong(l);
}

/**
Expand Down
156 changes: 127 additions & 29 deletions hdt-java-cli/src/main/java/org/rdfhdt/hdt/tools/HDTVerify.java
Original file line number Diff line number Diff line change
@@ -1,28 +1,59 @@
package org.rdfhdt.hdt.tools;

import com.beust.jcommander.JCommander;
import com.beust.jcommander.Parameter;
import com.beust.jcommander.internal.Lists;
import org.rdfhdt.hdt.dictionary.DictionarySection;
import org.rdfhdt.hdt.dictionary.impl.MultipleBaseDictionary;
import org.rdfhdt.hdt.hdt.HDT;
import org.rdfhdt.hdt.hdt.HDTManager;
import org.rdfhdt.hdt.util.listener.ColorTool;
import org.rdfhdt.hdt.util.string.ByteString;
import org.rdfhdt.hdt.util.string.CharSequenceComparator;
import org.rdfhdt.hdt.util.string.CompactString;
import org.rdfhdt.hdt.util.string.ReplazableString;

import java.util.Comparator;
import java.io.IOException;
import java.util.Iterator;
import java.util.List;
import java.util.Map;

public class HDTVerify {

private HDTVerify() {
}

private static void print(byte[] arr) {
@Parameter(description = "<input HDT>")
public List<String> parameters = Lists.newArrayList();

@Parameter(names = "-unicode", description = "Ignore UNICODE order")
public boolean unicode;

@Parameter(names = "-color", description = "Print using color (if available)")
public boolean color;

@Parameter(names = "-binary", description = "Print binaries of the string in case of signum error")
public boolean binary;

@Parameter(names = "-quiet", description = "Do not show progress of the conversion")
public boolean quiet;

@Parameter(names = "-load", description = "Load the HDT in memory for faster results (might be impossible for large a HDT)")
public boolean load;

public ColorTool colorTool;

private HDT loadOrMap(String file) throws IOException {
return load ? HDTManager.loadHDT(file) : HDTManager.mapHDT(file);
}

private void print(byte[] arr) {
for (byte b : arr) {
System.out.printf("%02X ", b);
}
System.out.println();
}

private static void print(CharSequence seq) {
private void print(CharSequence seq) {
if (seq instanceof CompactString) {
CompactString cs1 = (CompactString) seq;
print(cs1.getData());
Expand All @@ -34,52 +65,119 @@ private static void print(CharSequence seq) {
}
}

public static void checkDictionarySectionOrder(Iterator<? extends CharSequence> it) {
public boolean checkDictionarySectionOrder(Iterator<? extends CharSequence> it) {
ReplazableString prev = new ReplazableString();
String lastStr = "";
boolean error = false;
while (it.hasNext()) {
ByteString charSeq = ByteString.of(it.next());
String str = charSeq.toString();

int cmp = prev.compareTo(charSeq);

if (cmp >= 0) {
System.out.println("ERRA: " + prev + " / " + charSeq);
error = true;
if (cmp == 0) {
colorTool.error("Duplicated(bs)", prev + " == " + charSeq);
} else {
colorTool.error("Bad order(bs)", prev + " > " + charSeq);
}
}

int cmp2 = lastStr.compareTo(str);
if (!unicode) {
int cmp2 = lastStr.compareTo(str);

if (cmp2 >= 0) {
System.out.println("ERRB: " + lastStr + " / " + str);
}
if (cmp2 >= 0) {
error = true;
if (cmp == 0) {
colorTool.error("Duplicated(str)", lastStr + " == " + str);
} else {
colorTool.error("Bad order(str)", lastStr + " > " + str);
}
}

if (Math.signum(cmp) != Math.signum(cmp2)) {
System.out.println("Not equal: " + cmp + " / " + cmp2);
print(prev);
print(charSeq);
print(lastStr);
print(str);
if (Math.signum(cmp) != Math.signum(cmp2)) {
error = true;
colorTool.error("Not equal", cmp + " != " + cmp2 + " for " + lastStr + " / " + str);
if (binary) {
print(prev);
print(charSeq);
print(lastStr);
print(str);
}
}

lastStr = str;
}

prev.replace(charSeq);
lastStr = str;
}
if (error) {
colorTool.warn("Not valid section");
} else {
colorTool.log("valid section");
}
return error;
}

public void exec() throws Throwable {
try (HDT hdt = loadOrMap(parameters.get(0))) {
boolean error;
long count = 0;
if (hdt.getDictionary() instanceof MultipleBaseDictionary) {
colorTool.log("Checking subject entries");
error = checkDictionarySectionOrder(hdt.getDictionary().getSubjects().getSortedEntries());
count += hdt.getDictionary().getSubjects().getNumberOfElements();
colorTool.log("Checking predicate entries");
error |= checkDictionarySectionOrder(hdt.getDictionary().getPredicates().getSortedEntries());
count += hdt.getDictionary().getPredicates().getNumberOfElements();
colorTool.log("Checking object entries");
Map<? extends CharSequence, DictionarySection> allObjects = hdt.getDictionary().getAllObjects();
for (Map.Entry<? extends CharSequence, DictionarySection> entry : allObjects.entrySet()) {
CharSequence sectionName = entry.getKey();
DictionarySection section = entry.getValue();
colorTool.log("Checking object section " + sectionName);
error |= checkDictionarySectionOrder(section.getSortedEntries());
count += section.getNumberOfElements();
}
colorTool.log("Checking shared entries");
error |= checkDictionarySectionOrder(hdt.getDictionary().getShared().getSortedEntries());
count += hdt.getDictionary().getShared().getNumberOfElements();
} else {
colorTool.log("Checking subject entries");
error = checkDictionarySectionOrder(hdt.getDictionary().getSubjects().getSortedEntries());
count += hdt.getDictionary().getSubjects().getNumberOfElements();
colorTool.log("Checking predicate entries");
error |= checkDictionarySectionOrder(hdt.getDictionary().getPredicates().getSortedEntries());
count += hdt.getDictionary().getPredicates().getNumberOfElements();
colorTool.log("Checking object entries");
error |= checkDictionarySectionOrder(hdt.getDictionary().getObjects().getSortedEntries());
count += hdt.getDictionary().getObjects().getNumberOfElements();
colorTool.log("Checking shared entries");
error |= checkDictionarySectionOrder(hdt.getDictionary().getShared().getSortedEntries());
count += hdt.getDictionary().getShared().getNumberOfElements();
}

if (error) {
colorTool.error("This HDT isn't valid", true);
System.exit(-1);
} else {
colorTool.log(count + " element(s) parsed");
colorTool.log(colorTool.color(0, 5, 0) + "This HDT is valid", true);
}
}
}

public static void main(String[] args) throws Throwable {
if (args.length < 1) {
System.out.println("hdtVerify <file.hdt>");
HDTVerify verify = new HDTVerify();
JCommander com = new JCommander(verify);
com.parse(args);
verify.colorTool = new ColorTool(verify.color, verify.quiet);
com.setProgramName("hdtVerify");
if (verify.parameters.size() < 1) {
com.usage();
System.exit(-1);
}
try (HDT hdt = HDTManager.mapHDT(args[0], null)) {
System.out.println("Checking subject entries");
checkDictionarySectionOrder(hdt.getDictionary().getSubjects().getSortedEntries());
System.out.println("Checking predicate entries");
checkDictionarySectionOrder(hdt.getDictionary().getPredicates().getSortedEntries());
System.out.println("Checking object entries");
checkDictionarySectionOrder(hdt.getDictionary().getObjects().getSortedEntries());
System.out.println("Checking shared entries");
checkDictionarySectionOrder(hdt.getDictionary().getShared().getSortedEntries());
}
verify.exec();
}
}
Loading