Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

K-HDTCat #181

Merged
merged 2 commits into from
Dec 1, 2022
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Next Next commit
Implement k-HDTCat
  • Loading branch information
ate47 committed Nov 30, 2022
commit d39af554524755cdd0f216ffddc9e8eb2458b3e6
14 changes: 14 additions & 0 deletions hdt-api/src/main/java/org/rdfhdt/hdt/hdt/HDTManager.java
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
import java.io.InputStream;
import java.io.OutputStream;
import java.util.Iterator;
import java.util.List;

import org.rdfhdt.hdt.compact.bitmap.Bitmap;
import org.rdfhdt.hdt.enums.CompressionType;
Expand Down Expand Up @@ -461,6 +462,18 @@ public static TripleWriter getHDTWriter(String outFile, String baseURI, HDTOptio
public static HDT catHDT(String location, String hdtFileName1, String hdtFileName2, HDTOptions hdtFormat, ProgressListener listener) throws IOException {
return HDTManager.getInstance().doHDTCat(location, hdtFileName1, hdtFileName2, hdtFormat, listener);
}

/**
* Create an HDT file from HDT files by joining the triples.
* @param hdtFileNames hdt file names
* @param hdtFormat Parameters to tune the generated HDT.
* @param listener Listener to get notified of loading progress. Can be null if no notifications needed.
* @throws IOException when the file cannot be found
* @return HDT
*/
public static HDT catHDT(List<String> hdtFileNames, HDTOptions hdtFormat, ProgressListener listener) throws IOException {
return HDTManager.getInstance().doHDTCat(hdtFileNames, hdtFormat, listener);
}
/**
* Create a new HDT by removing from hdt1 the triples of hdt2.
* @param hdtFileName1 First hdt file name
Expand Down Expand Up @@ -561,6 +574,7 @@ public static HDT catTree(RDFFluxStop fluxStop, HDTSupplier supplier, Iterator<T
protected abstract TripleWriter doGetHDTWriter(OutputStream out, String baseURI, HDTOptions hdtFormat) throws IOException;
protected abstract TripleWriter doGetHDTWriter(String outFile, String baseURI, HDTOptions hdtFormat) throws IOException;
protected abstract HDT doHDTCat(String location, String hdtFileName1, String hdtFileName2, HDTOptions hdtFormat, ProgressListener listener) throws IOException;
protected abstract HDT doHDTCat(List<String> hdtFileNames, HDTOptions hdtFormat, ProgressListener listener) throws IOException;
protected abstract HDT doHDTDiff(String hdtFileName1, String hdtFileName2, HDTOptions hdtFormat, ProgressListener listener) throws IOException;
protected abstract HDT doHDTDiffBit(String location, String hdtFileName, Bitmap deleteBitmap, HDTOptions hdtFormat, ProgressListener listener) throws IOException;
protected abstract HDT doHDTCatTree(RDFFluxStop fluxStop, HDTSupplier supplier, String filename, String baseURI, RDFNotation rdfNotation, HDTOptions hdtFormat, ProgressListener listener) throws IOException, ParserException;
Expand Down
20 changes: 20 additions & 0 deletions hdt-api/src/main/java/org/rdfhdt/hdt/options/HDTOptions.java
Original file line number Diff line number Diff line change
Expand Up @@ -27,10 +27,12 @@

package org.rdfhdt.hdt.options;

import org.rdfhdt.hdt.exceptions.NotImplementedException;
import org.rdfhdt.hdt.rdf.RDFFluxStop;
import org.rdfhdt.hdt.util.Profiler;

import java.util.Objects;
import java.util.Set;
import java.util.function.DoubleSupplier;
import java.util.function.LongSupplier;
import java.util.function.Supplier;
Expand All @@ -55,6 +57,10 @@ public interface HDTOptions {
*/
String get(String key);

default Set<Object> getKeys() {
throw new NotImplementedException();
}

/**
* get a value
*
Expand Down Expand Up @@ -86,6 +92,20 @@ default String get(String key, Supplier<String> defaultValue) {
default boolean getBoolean(String key) {
return "true".equalsIgnoreCase(get(key));
}
/**
* get a boolean
*
* @param key key
* @param defaultValue default value
* @return boolean or false if the value isn't defined
*/
default boolean getBoolean(String key, boolean defaultValue) {
String v = get(key);
if (v == null) {
return defaultValue;
}
return "true".equalsIgnoreCase(v);
}

/**
* get a double
Expand Down
16 changes: 16 additions & 0 deletions hdt-api/src/main/java/org/rdfhdt/hdt/options/HDTOptionsKeys.java
Original file line number Diff line number Diff line change
Expand Up @@ -271,6 +271,22 @@ public class HDTOptionsKeys {
@Value(key = DICTIONARY_TYPE_KEY, desc = "Multi section dictionary")
public static final String DICTIONARY_TYPE_VALUE_MULTI_OBJECTS = "dictionaryMultiObj";

/**
* Location of the HDTCat temp files
*/
@Key(type = Key.Type.PATH, desc = "Location of the HDTCat temp files")
public static final String HDTCAT_LOCATION = "hdtcat.location";
/**
* Location of the HDTCat hdt after the loading
*/
@Key(type = Key.Type.PATH, desc = "Location of the HDTCat hdt after the loading")
public static final String HDTCAT_FUTURE_LOCATION = "hdtcat.location.future";
/**
* Delete the HDTCat temp files directory after HDTCat
*/
@Key(type = Key.Type.BOOLEAN, desc = "Delete the HDTCat temp files directory after HDTCat, default to true")
public static final String HDTCAT_DELETE_LOCATION = "hdtcat.deleteLocation";

// use tree-map to have a better order
private static final Map<String, Option> OPTION_MAP = new TreeMap<>();

Expand Down
11 changes: 10 additions & 1 deletion hdt-api/src/main/java/org/rdfhdt/hdt/triples/TripleID.java
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@
* TripleID holds a triple using Long IDs
*
*/
public final class TripleID implements Comparable<TripleID>, Serializable {
public final class TripleID implements Comparable<TripleID>, Serializable, Cloneable {
private static final long serialVersionUID = -4685524566493494912L;

private long subject;
Expand Down Expand Up @@ -255,6 +255,15 @@ public boolean equals(Object o) {
return !( subject!=other.subject || predicate!=other.predicate || object!=other.object );
}

@Override
public TripleID clone() {
try {
return (TripleID) super.clone();
} catch (CloneNotSupportedException e) {
throw new AssertionError(e);
}
}

@Override
public int hashCode() {
return (int) (subject * 13 + predicate * 17 + object * 31);
Expand Down
1 change: 1 addition & 0 deletions hdt-java-cli/bin/javaenv.bat
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
set JAVAOPTIONS=-Xmx1G
set JAVACMD=java
set RDFHDT_COLOR=false

set JAVACP="%~dp0\..\target;%~dp0\..\target\classes;%~dp0\..\target\dependency\*.jar;.

Expand Down
6 changes: 6 additions & 0 deletions hdt-java-cli/bin/javaenv.sh
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,13 @@ else
JAVA="$JAVA_HOME/bin/java -server"
fi

# Set HDT Color options, set to true to allow color
if [ "$RDFHDT_COLOR" = "" ] ; then
export RDFHDT_COLOR="false"
fi

# Set Java options
if [ "$JAVA_OPTIONS" = "" ] ; then
JAVA_OPTIONS="-Xmx1g"
fi

98 changes: 66 additions & 32 deletions hdt-java-cli/src/main/java/org/rdfhdt/hdt/tools/HDTCat.java
Original file line number Diff line number Diff line change
Expand Up @@ -24,31 +24,34 @@
import com.beust.jcommander.internal.Lists;

import org.apache.commons.io.FileUtils;
import org.rdfhdt.hdt.exceptions.ParserException;
import org.rdfhdt.hdt.hdt.HDT;
import org.rdfhdt.hdt.hdt.HDTManager;
import org.rdfhdt.hdt.hdt.HDTVersion;
import org.rdfhdt.hdt.listener.ProgressListener;
import org.rdfhdt.hdt.options.HDTOptions;
import org.rdfhdt.hdt.options.HDTOptionsKeys;
import org.rdfhdt.hdt.options.HDTSpecification;
import org.rdfhdt.hdt.util.StopWatch;
import org.rdfhdt.hdt.util.listener.ColorTool;
import org.rdfhdt.hdt.util.listener.MultiThreadListenerConsole;

import java.io.File;
import java.io.IOException;
import java.nio.file.Files;
import java.nio.file.Path;
import java.nio.file.Paths;
import java.util.List;
import java.util.stream.Collectors;

/**
* @author Dennis Diefenbach
*
*/
public class HDTCat implements ProgressListener {

public String hdtInput1;
public String hdtInput2;
public String hdtOutput;
private ColorTool colorTool;

@Parameter(description = "<input HDT1> <input HDT2> <output HDT>")
@Parameter(description = "<input HDTs>+ <output HDT>")
public List<String> parameters = Lists.newArrayList();

@Parameter(names = "-options", description = "HDT Conversion options (override those of config file)")
Expand All @@ -57,6 +60,9 @@ public class HDTCat implements ProgressListener {
@Parameter(names = "-config", description = "Conversion config file")
public String configFile;

@Parameter(names = "-kcat", description = "Use KCat algorithm, default if the count of input HDTs isn't 2")
public boolean kcat;

@Parameter(names = "-index", description = "Generate also external indices to solve all queries")
public boolean generateIndex;

Expand All @@ -66,8 +72,19 @@ public class HDTCat implements ProgressListener {
@Parameter(names = "-quiet", description = "Do not show progress of the conversion")
public boolean quiet;

public void execute() throws IOException {
@Parameter(names = "-color", description = "Print using color (if available)")
public boolean color;

private HDT cat(String location, HDTOptions spec, ProgressListener listener) throws IOException{
if (kcat) {
return HDTManager.catHDT(parameters.subList(0, parameters.size() - 1), spec, listener);
} else {
return HDTManager.catHDT(location, parameters.get(0), parameters.get(1), spec, listener);
}
}


public void execute() throws IOException {
HDTSpecification spec;
if(configFile!=null) {
spec = new HDTSpecification(configFile);
Expand All @@ -78,35 +95,50 @@ public void execute() throws IOException {
spec.setOptions(options);
}

String hdtOutput = parameters.get(parameters.size() - 1);
File file = new File(hdtOutput);
File theDir = new File(file.getAbsolutePath()+"_tmp");

String locationOpt = spec.get(HDTOptionsKeys.HDTCAT_LOCATION);

if (locationOpt == null) {
locationOpt = file.getAbsolutePath()+"_tmp";
spec.set(HDTOptionsKeys.HDTCAT_LOCATION, locationOpt);
}

File theDir = new File(locationOpt);
Files.createDirectories(theDir.toPath());
String location = theDir.getAbsolutePath()+"/";

try (HDT hdt = HDTManager.catHDT(location,hdtInput1, hdtInput2 , spec,this)) {
ProgressListener listenerConsole =
!quiet ? (kcat ? new MultiThreadListenerConsole(color) : this)
: null;
StopWatch startCat = new StopWatch();
try (HDT hdt = cat(location, spec, listenerConsole)) {
colorTool.logValue("Files cat in .......... ", startCat.stopAndShow(), true);
assert hdt != null;
// Show Basic stats
if(!quiet){
System.out.println("Total Triples: "+hdt.getTriples().getNumberOfElements());
System.out.println("Different subjects: "+hdt.getDictionary().getNsubjects());
System.out.println("Different predicates: "+hdt.getDictionary().getNpredicates());
System.out.println("Different objects: "+hdt.getDictionary().getNobjects());
System.out.println("Common Subject/Object:"+hdt.getDictionary().getNshared());
colorTool.logValue("Total Triples ......... ", "" + hdt.getTriples().getNumberOfElements());
colorTool.logValue("Different subjects .... ", "" + hdt.getDictionary().getNsubjects());
colorTool.logValue("Different predicates .. ", "" + hdt.getDictionary().getNpredicates());
colorTool.logValue("Different objects ..... ", "" + hdt.getDictionary().getNobjects());
colorTool.logValue("Common Subject/Object . ", "" + hdt.getDictionary().getNshared());
}

// Dump to HDT file
StopWatch sw = new StopWatch();
hdt.saveToHDT(hdtOutput, this);
System.out.println("HDT saved to file in: "+sw.stopAndShow());
Files.delete(Paths.get(location+"dictionary"));
Files.delete(Paths.get(location+"triples"));
colorTool.logValue("HDT saved to file in .. ", sw.stopAndShow());
Files.deleteIfExists(Path.of(location + "dictionary"));
Files.deleteIfExists(Path.of(location+"triples"));
FileUtils.deleteDirectory(theDir);


// Generate index and dump it to .hdt.index file
sw.reset();
if(generateIndex) {
if (generateIndex) {
HDTManager.indexedHDT(hdt,this);
System.out.println("Index generated and saved in: "+sw.stopAndShow());
colorTool.logValue("Index generated and saved in ", sw.stopAndShow());
}
}

Expand All @@ -124,29 +156,31 @@ public void notifyProgress(float level, String message) {
}
}

@SuppressWarnings("deprecation")
public static void main(String[] args) throws Throwable {
HDTCat hdtCat = new HDTCat();
System.out.println("Welcome to hdtCat!");
System.out.println("This tool was developed by Dennis Diefenbach and Jośe M. Giḿenez-Garćıa");
JCommander com = new JCommander(hdtCat, args);
JCommander com = new JCommander(hdtCat);
com.parse(args);
com.setProgramName("hdtCat");
hdtCat.colorTool = new ColorTool(hdtCat.color, hdtCat.quiet);

if(hdtCat.parameters.size()==3) {
hdtCat.hdtInput1 = hdtCat.parameters.get(0);
hdtCat.hdtInput2 = hdtCat.parameters.get(1);
hdtCat.hdtOutput = hdtCat.parameters.get(2);
} else if (showVersion){
System.out.println(HDTVersion.get_version_string("."));
hdtCat.colorTool.log("Welcome to hdtCat!");
hdtCat.colorTool.log("This tool was developed by Dennis Diefenbach and Jośe M. Giḿenez-Garćıa");

if (showVersion) {
hdtCat.colorTool.log(HDTVersion.get_version_string("."));
System.exit(0);
}
else{
} else if (hdtCat.parameters.size() > 3) {
// force k-cat if we have more than 2 HDTs to cat
hdtCat.kcat = true;
} else if (hdtCat.parameters.size() < 3) {
com.usage();
System.exit(1);
}

System.out.println("Cat "+ hdtCat.hdtInput1+" and "+ hdtCat.hdtInput2+" to "+ hdtCat.hdtOutput);

hdtCat.colorTool.log("Cat " + hdtCat.parameters.stream()
.limit(hdtCat.parameters.size() - 1)
.collect(Collectors.joining(", "))
+ " to " + hdtCat.parameters.get(hdtCat.parameters.size() - 1));
hdtCat.execute();
}
}
Loading