Skip to content

Commit

Permalink
Merge pull request #181 from ate47/dev_k_hdtcat
Browse files Browse the repository at this point in the history
K-HDTCat
  • Loading branch information
D063520 authored Dec 1, 2022
2 parents aa1ff09 + eac943b commit d8d4b2a
Show file tree
Hide file tree
Showing 44 changed files with 3,247 additions and 305 deletions.
14 changes: 14 additions & 0 deletions hdt-api/src/main/java/org/rdfhdt/hdt/hdt/HDTManager.java
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
import java.io.InputStream;
import java.io.OutputStream;
import java.util.Iterator;
import java.util.List;

import org.rdfhdt.hdt.compact.bitmap.Bitmap;
import org.rdfhdt.hdt.enums.CompressionType;
Expand Down Expand Up @@ -461,6 +462,18 @@ public static TripleWriter getHDTWriter(String outFile, String baseURI, HDTOptio
public static HDT catHDT(String location, String hdtFileName1, String hdtFileName2, HDTOptions hdtFormat, ProgressListener listener) throws IOException {
return HDTManager.getInstance().doHDTCat(location, hdtFileName1, hdtFileName2, hdtFormat, listener);
}

/**
* Create an HDT file from HDT files by joining the triples.
* @param hdtFileNames hdt file names
* @param hdtFormat Parameters to tune the generated HDT.
* @param listener Listener to get notified of loading progress. Can be null if no notifications needed.
* @throws IOException when the file cannot be found
* @return HDT
*/
public static HDT catHDT(List<String> hdtFileNames, HDTOptions hdtFormat, ProgressListener listener) throws IOException {
return HDTManager.getInstance().doHDTCat(hdtFileNames, hdtFormat, listener);
}
/**
* Create a new HDT by removing from hdt1 the triples of hdt2.
* @param hdtFileName1 First hdt file name
Expand Down Expand Up @@ -561,6 +574,7 @@ public static HDT catTree(RDFFluxStop fluxStop, HDTSupplier supplier, Iterator<T
protected abstract TripleWriter doGetHDTWriter(OutputStream out, String baseURI, HDTOptions hdtFormat) throws IOException;
protected abstract TripleWriter doGetHDTWriter(String outFile, String baseURI, HDTOptions hdtFormat) throws IOException;
protected abstract HDT doHDTCat(String location, String hdtFileName1, String hdtFileName2, HDTOptions hdtFormat, ProgressListener listener) throws IOException;
protected abstract HDT doHDTCat(List<String> hdtFileNames, HDTOptions hdtFormat, ProgressListener listener) throws IOException;
protected abstract HDT doHDTDiff(String hdtFileName1, String hdtFileName2, HDTOptions hdtFormat, ProgressListener listener) throws IOException;
protected abstract HDT doHDTDiffBit(String location, String hdtFileName, Bitmap deleteBitmap, HDTOptions hdtFormat, ProgressListener listener) throws IOException;
protected abstract HDT doHDTCatTree(RDFFluxStop fluxStop, HDTSupplier supplier, String filename, String baseURI, RDFNotation rdfNotation, HDTOptions hdtFormat, ProgressListener listener) throws IOException, ParserException;
Expand Down
20 changes: 20 additions & 0 deletions hdt-api/src/main/java/org/rdfhdt/hdt/options/HDTOptions.java
Original file line number Diff line number Diff line change
Expand Up @@ -27,10 +27,12 @@

package org.rdfhdt.hdt.options;

import org.rdfhdt.hdt.exceptions.NotImplementedException;
import org.rdfhdt.hdt.rdf.RDFFluxStop;
import org.rdfhdt.hdt.util.Profiler;

import java.util.Objects;
import java.util.Set;
import java.util.function.DoubleSupplier;
import java.util.function.LongSupplier;
import java.util.function.Supplier;
Expand All @@ -55,6 +57,10 @@ public interface HDTOptions {
*/
String get(String key);

default Set<Object> getKeys() {
throw new NotImplementedException();
}

/**
* get a value
*
Expand Down Expand Up @@ -86,6 +92,20 @@ default String get(String key, Supplier<String> defaultValue) {
default boolean getBoolean(String key) {
return "true".equalsIgnoreCase(get(key));
}
/**
* get a boolean
*
* @param key key
* @param defaultValue default value
* @return boolean or false if the value isn't defined
*/
default boolean getBoolean(String key, boolean defaultValue) {
String v = get(key);
if (v == null) {
return defaultValue;
}
return "true".equalsIgnoreCase(v);
}

/**
* get a double
Expand Down
22 changes: 22 additions & 0 deletions hdt-api/src/main/java/org/rdfhdt/hdt/options/HDTOptionsKeys.java
Original file line number Diff line number Diff line change
Expand Up @@ -140,6 +140,12 @@ public class HDTOptionsKeys {
*/
@Key(type = Key.Type.DOUBLE, desc = "Memory fault factor for HDTCat tree method split")
public static final String LOADER_CATTREE_MEMORY_FAULT_FACTOR = "loader.cattree.memoryFaultFactor";
/**
* Key for the k-merge HDTCat for the {@link org.rdfhdt.hdt.hdt.HDTManager} catTree default to 2 using default
* implementation of HDTCat, not K-HDTCat
*/
@Key(type = Key.Type.NUMBER, desc = "Number of HDT to merge at the same time with K-HDTCat, by default it use the default HDTCat implementation")
public static final String LOADER_CATTREE_KCAT = "loader.cattree.kcat";

/**
* Key for the hdt supplier type, default to memory
Expand Down Expand Up @@ -271,6 +277,22 @@ public class HDTOptionsKeys {
@Value(key = DICTIONARY_TYPE_KEY, desc = "Multi section dictionary")
public static final String DICTIONARY_TYPE_VALUE_MULTI_OBJECTS = "dictionaryMultiObj";

/**
* Location of the HDTCat temp files
*/
@Key(type = Key.Type.PATH, desc = "Location of the HDTCat temp files")
public static final String HDTCAT_LOCATION = "hdtcat.location";
/**
* Location of the HDTCat hdt after the loading
*/
@Key(type = Key.Type.PATH, desc = "Location of the HDTCat hdt after the loading")
public static final String HDTCAT_FUTURE_LOCATION = "hdtcat.location.future";
/**
* Delete the HDTCat temp files directory after HDTCat
*/
@Key(type = Key.Type.BOOLEAN, desc = "Delete the HDTCat temp files directory after HDTCat, default to true")
public static final String HDTCAT_DELETE_LOCATION = "hdtcat.deleteLocation";

// use tree-map to have a better order
private static final Map<String, Option> OPTION_MAP = new TreeMap<>();

Expand Down
11 changes: 10 additions & 1 deletion hdt-api/src/main/java/org/rdfhdt/hdt/triples/TripleID.java
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@
* TripleID holds a triple using Long IDs
*
*/
public final class TripleID implements Comparable<TripleID>, Serializable {
public final class TripleID implements Comparable<TripleID>, Serializable, Cloneable {
private static final long serialVersionUID = -4685524566493494912L;

private long subject;
Expand Down Expand Up @@ -255,6 +255,15 @@ public boolean equals(Object o) {
return !( subject!=other.subject || predicate!=other.predicate || object!=other.object );
}

@Override
public TripleID clone() {
try {
return (TripleID) super.clone();
} catch (CloneNotSupportedException e) {
throw new AssertionError(e);
}
}

@Override
public int hashCode() {
return (int) (subject * 13 + predicate * 17 + object * 31);
Expand Down
1 change: 1 addition & 0 deletions hdt-api/src/main/java/org/rdfhdt/hdt/util/Profiler.java
Original file line number Diff line number Diff line change
Expand Up @@ -270,6 +270,7 @@ public void writeToDisk(Path outputPath) throws IOException {
public Section getMainSection() {
if (this.mainSection == null) {
this.mainSection = new Section(name);
maxSize = Math.max(name.length() + deep * 2, maxSize);
}
return this.mainSection;
}
Expand Down
5 changes: 5 additions & 0 deletions hdt-java-cli/bin/hdtCat.bat
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
@echo off

call "%~dp0\javaenv.bat"

"%JAVACMD%" %JAVAOPTIONS% -classpath %~dp0\..\lib\* org.rdfhdt.hdt.tools.HDTCat %*
1 change: 1 addition & 0 deletions hdt-java-cli/bin/javaenv.bat
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
set JAVAOPTIONS=-Xmx1G
set JAVACMD=java
set RDFHDT_COLOR=false

set JAVACP="%~dp0\..\target;%~dp0\..\target\classes;%~dp0\..\target\dependency\*.jar;.

Expand Down
6 changes: 6 additions & 0 deletions hdt-java-cli/bin/javaenv.sh
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,13 @@ else
JAVA="$JAVA_HOME/bin/java -server"
fi

# Set HDT Color options, set to true to allow color
if [ "$RDFHDT_COLOR" = "" ] ; then
export RDFHDT_COLOR="false"
fi

# Set Java options
if [ "$JAVA_OPTIONS" = "" ] ; then
JAVA_OPTIONS="-Xmx1g"
fi

98 changes: 66 additions & 32 deletions hdt-java-cli/src/main/java/org/rdfhdt/hdt/tools/HDTCat.java
Original file line number Diff line number Diff line change
Expand Up @@ -24,31 +24,34 @@
import com.beust.jcommander.internal.Lists;

import org.apache.commons.io.FileUtils;
import org.rdfhdt.hdt.exceptions.ParserException;
import org.rdfhdt.hdt.hdt.HDT;
import org.rdfhdt.hdt.hdt.HDTManager;
import org.rdfhdt.hdt.hdt.HDTVersion;
import org.rdfhdt.hdt.listener.ProgressListener;
import org.rdfhdt.hdt.options.HDTOptions;
import org.rdfhdt.hdt.options.HDTOptionsKeys;
import org.rdfhdt.hdt.options.HDTSpecification;
import org.rdfhdt.hdt.util.StopWatch;
import org.rdfhdt.hdt.util.listener.ColorTool;
import org.rdfhdt.hdt.util.listener.MultiThreadListenerConsole;

import java.io.File;
import java.io.IOException;
import java.nio.file.Files;
import java.nio.file.Path;
import java.nio.file.Paths;
import java.util.List;
import java.util.stream.Collectors;

/**
* @author Dennis Diefenbach
*
*/
public class HDTCat implements ProgressListener {

public String hdtInput1;
public String hdtInput2;
public String hdtOutput;
private ColorTool colorTool;

@Parameter(description = "<input HDT1> <input HDT2> <output HDT>")
@Parameter(description = "<input HDTs>+ <output HDT>")
public List<String> parameters = Lists.newArrayList();

@Parameter(names = "-options", description = "HDT Conversion options (override those of config file)")
Expand All @@ -57,6 +60,9 @@ public class HDTCat implements ProgressListener {
@Parameter(names = "-config", description = "Conversion config file")
public String configFile;

@Parameter(names = "-kcat", description = "Use KCat algorithm, default if the count of input HDTs isn't 2")
public boolean kcat;

@Parameter(names = "-index", description = "Generate also external indices to solve all queries")
public boolean generateIndex;

Expand All @@ -66,8 +72,19 @@ public class HDTCat implements ProgressListener {
@Parameter(names = "-quiet", description = "Do not show progress of the conversion")
public boolean quiet;

public void execute() throws IOException {
@Parameter(names = "-color", description = "Print using color (if available)")
public boolean color;

private HDT cat(String location, HDTOptions spec, ProgressListener listener) throws IOException{
if (kcat) {
return HDTManager.catHDT(parameters.subList(0, parameters.size() - 1), spec, listener);
} else {
return HDTManager.catHDT(location, parameters.get(0), parameters.get(1), spec, listener);
}
}


public void execute() throws IOException {
HDTSpecification spec;
if(configFile!=null) {
spec = new HDTSpecification(configFile);
Expand All @@ -78,35 +95,50 @@ public void execute() throws IOException {
spec.setOptions(options);
}

String hdtOutput = parameters.get(parameters.size() - 1);
File file = new File(hdtOutput);
File theDir = new File(file.getAbsolutePath()+"_tmp");

String locationOpt = spec.get(HDTOptionsKeys.HDTCAT_LOCATION);

if (locationOpt == null) {
locationOpt = file.getAbsolutePath()+"_tmp";
spec.set(HDTOptionsKeys.HDTCAT_LOCATION, locationOpt);
}

File theDir = new File(locationOpt);
Files.createDirectories(theDir.toPath());
String location = theDir.getAbsolutePath()+"/";

try (HDT hdt = HDTManager.catHDT(location,hdtInput1, hdtInput2 , spec,this)) {
ProgressListener listenerConsole =
!quiet ? (kcat ? new MultiThreadListenerConsole(color) : this)
: null;
StopWatch startCat = new StopWatch();
try (HDT hdt = cat(location, spec, listenerConsole)) {
colorTool.logValue("Files cat in .......... ", startCat.stopAndShow(), true);
assert hdt != null;
// Show Basic stats
if(!quiet){
System.out.println("Total Triples: "+hdt.getTriples().getNumberOfElements());
System.out.println("Different subjects: "+hdt.getDictionary().getNsubjects());
System.out.println("Different predicates: "+hdt.getDictionary().getNpredicates());
System.out.println("Different objects: "+hdt.getDictionary().getNobjects());
System.out.println("Common Subject/Object:"+hdt.getDictionary().getNshared());
colorTool.logValue("Total Triples ......... ", "" + hdt.getTriples().getNumberOfElements());
colorTool.logValue("Different subjects .... ", "" + hdt.getDictionary().getNsubjects());
colorTool.logValue("Different predicates .. ", "" + hdt.getDictionary().getNpredicates());
colorTool.logValue("Different objects ..... ", "" + hdt.getDictionary().getNobjects());
colorTool.logValue("Common Subject/Object . ", "" + hdt.getDictionary().getNshared());
}

// Dump to HDT file
StopWatch sw = new StopWatch();
hdt.saveToHDT(hdtOutput, this);
System.out.println("HDT saved to file in: "+sw.stopAndShow());
Files.delete(Paths.get(location+"dictionary"));
Files.delete(Paths.get(location+"triples"));
colorTool.logValue("HDT saved to file in .. ", sw.stopAndShow());
Files.deleteIfExists(Path.of(location + "dictionary"));
Files.deleteIfExists(Path.of(location+"triples"));
FileUtils.deleteDirectory(theDir);


// Generate index and dump it to .hdt.index file
sw.reset();
if(generateIndex) {
if (generateIndex) {
HDTManager.indexedHDT(hdt,this);
System.out.println("Index generated and saved in: "+sw.stopAndShow());
colorTool.logValue("Index generated and saved in ", sw.stopAndShow());
}
}

Expand All @@ -124,29 +156,31 @@ public void notifyProgress(float level, String message) {
}
}

@SuppressWarnings("deprecation")
public static void main(String[] args) throws Throwable {
HDTCat hdtCat = new HDTCat();
System.out.println("Welcome to hdtCat!");
System.out.println("This tool was developed by Dennis Diefenbach and Jośe M. Giḿenez-Garćıa");
JCommander com = new JCommander(hdtCat, args);
JCommander com = new JCommander(hdtCat);
com.parse(args);
com.setProgramName("hdtCat");
hdtCat.colorTool = new ColorTool(hdtCat.color, hdtCat.quiet);

if(hdtCat.parameters.size()==3) {
hdtCat.hdtInput1 = hdtCat.parameters.get(0);
hdtCat.hdtInput2 = hdtCat.parameters.get(1);
hdtCat.hdtOutput = hdtCat.parameters.get(2);
} else if (showVersion){
System.out.println(HDTVersion.get_version_string("."));
hdtCat.colorTool.log("Welcome to hdtCat!");
hdtCat.colorTool.log("This tool was developed by Dennis Diefenbach and Jośe M. Giḿenez-Garćıa");

if (showVersion) {
hdtCat.colorTool.log(HDTVersion.get_version_string("."));
System.exit(0);
}
else{
} else if (hdtCat.parameters.size() > 3) {
// force k-cat if we have more than 2 HDTs to cat
hdtCat.kcat = true;
} else if (hdtCat.parameters.size() < 3) {
com.usage();
System.exit(1);
}

System.out.println("Cat "+ hdtCat.hdtInput1+" and "+ hdtCat.hdtInput2+" to "+ hdtCat.hdtOutput);

hdtCat.colorTool.log("Cat " + hdtCat.parameters.stream()
.limit(hdtCat.parameters.size() - 1)
.collect(Collectors.joining(", "))
+ " to " + hdtCat.parameters.get(hdtCat.parameters.size() - 1));
hdtCat.execute();
}
}
Loading

0 comments on commit d8d4b2a

Please sign in to comment.