Skip to content

Commit

Permalink
Merge pull request #186 from ate47/dev_async_cattree
Browse files Browse the repository at this point in the history
Async CatTree and dir parser
  • Loading branch information
D063520 authored Jan 16, 2023
2 parents 1298c4b + 8335366 commit c2abc85
Show file tree
Hide file tree
Showing 48 changed files with 2,779 additions and 1,066 deletions.
8 changes: 6 additions & 2 deletions hdt-api/src/main/java/org/rdfhdt/hdt/enums/RDFNotation.java
Original file line number Diff line number Diff line change
Expand Up @@ -185,8 +185,12 @@ public static RDFNotation guess(String fileName) throws IllegalArgumentException

throw new IllegalArgumentException("Could not guess the format for "+fileName);
}

public static RDFNotation guess(File fileName) throws IllegalArgumentException {
return guess(fileName.getName());
return guess(fileName.getAbsolutePath());
}

public static RDFNotation guess(Path fileName) throws IllegalArgumentException {
return guess(fileName.toAbsolutePath().toString());
}
}
54 changes: 54 additions & 0 deletions hdt-api/src/main/java/org/rdfhdt/hdt/options/HDTOptions.java
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,8 @@
import org.rdfhdt.hdt.rdf.RDFFluxStop;
import org.rdfhdt.hdt.util.Profiler;

import java.util.HashMap;
import java.util.Map;
import java.util.Objects;
import java.util.Set;
import java.util.function.DoubleSupplier;
Expand All @@ -42,6 +44,58 @@
* @author mario.arias
*/
public interface HDTOptions {
/**
* empty option, can be used to set values
*/
HDTOptions EMPTY = new HDTOptions() {
@Override
public void clear() {
// already empty
}

@Override
public String get(String key) {
// no value for key
return null;
}

@Override
public void set(String key, String value) {
throw new NotImplementedException("set");
}
};

/**
* @return create empty, modifiable options
*/
static HDTOptions of() {
return of(Map.of());
}

/**
* create modifiable options starting from the copy of the data map
* @param data data map
* @return options
*/
static HDTOptions of(Map<String, String> data) {
Map<String, String> map = new HashMap<>(data);
return new HDTOptions() {
@Override
public void clear() {
map.clear();
}

@Override
public String get(String key) {
return map.get(key);
}

@Override
public void set(String key, String value) {
map.put(key, value);
}
};
}


/**
Expand Down
29 changes: 29 additions & 0 deletions hdt-api/src/main/java/org/rdfhdt/hdt/options/HDTOptionsKeys.java
Original file line number Diff line number Diff line change
Expand Up @@ -122,6 +122,12 @@ public class HDTOptionsKeys {
*/
@Key(type = Key.Type.PATH, desc = "Path of the CatTree generation")
public static final String LOADER_CATTREE_LOCATION_KEY = "loader.cattree.location";
/**
* Key to use async version of the {@link org.rdfhdt.hdt.hdt.HDTManager} catTree methods, will run the k-HDTCAT
* algorithm, by default the value is false, boolean value
*/
@Key(type = Key.Type.BOOLEAN, desc = "Use async version")
public static final String LOADER_CATTREE_ASYNC_KEY = "loader.cattree.async";
/**
* Same as {@link #LOADER_TYPE_KEY} for loader in the CATTREE method
*/
Expand Down Expand Up @@ -202,11 +208,27 @@ public class HDTOptionsKeys {
*/
@Key(type = Key.Type.PATH, desc = "Profiler output file")
public static final String PROFILER_OUTPUT_KEY = "profiler.output";
/**
* Key for enabling the profiler (if implemented) for async bi tasks, default to false. Boolean value
*/
@Key(type = Key.Type.BOOLEAN, desc = "Run a second profiler for async bi tasks")
public static final String PROFILER_ASYNC_KEY = "profiler.async";
/**
* Key for the profiler output (if implemented). File value
*/
@Key(type = Key.Type.PATH, desc = "Profiler output file")
public static final String PROFILER_ASYNC_OUTPUT_KEY = "profiler.async.output";
/**
* Key for enabling the canonical NTriple file simple parser, default to false. Boolean value
*/
@Key(type = Key.Type.BOOLEAN, desc = "Use the canonical NT file parser, removing checks")
public static final String NT_SIMPLE_PARSER_KEY = "parser.ntSimpleParser";
/**
* Key for setting the maximum amount of file loaded with the directory parser, 1 for no async parsing, 0
* for the number of processors, default 1. Number value
*/
@Key(type = Key.Type.NUMBER, desc = "Use async dir parser")
public static final String ASYNC_DIR_PARSER_KEY = "parser.dir.async";
/**
* Key for setting the triple order. see {@link org.rdfhdt.hdt.enums.TripleComponentOrder}'s names to have the values
* default to {@link org.rdfhdt.hdt.enums.TripleComponentOrder#SPO}
Expand Down Expand Up @@ -293,6 +315,13 @@ public class HDTOptionsKeys {
@Key(type = Key.Type.BOOLEAN, desc = "Delete the HDTCat temp files directory after HDTCat, default to true")
public static final String HDTCAT_DELETE_LOCATION = "hdtcat.deleteLocation";

@Key(type = Key.Type.BOOLEAN, desc = "Use disk implementation to generate the hdt sub-index")
public static final String BITMAPTRIPLES_SEQUENCE_DISK = "bitmaptriples.sequence.disk";
@Key(type = Key.Type.BOOLEAN, desc = "Use disk 375 subindex implementation to generate the hdt sub-index")
public static final String BITMAPTRIPLES_SEQUENCE_DISK_SUBINDEX = "bitmaptriples.sequence.disk.subindex";

@Key(type = Key.Type.BOOLEAN, desc = "Disk location for the " + BITMAPTRIPLES_SEQUENCE_DISK + " option")
public static final String BITMAPTRIPLES_SEQUENCE_DISK_LOCATION = "bitmaptriples.sequence.disk.location";
// use tree-map to have a better order
private static final Map<String, Option> OPTION_MAP = new TreeMap<>();

Expand Down
11 changes: 11 additions & 0 deletions hdt-api/src/main/java/org/rdfhdt/hdt/rdf/RDFParserCallback.java
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,17 @@ public interface RDFParserCallback {
@FunctionalInterface
interface RDFCallback {
void processTriple(TripleString triple, long pos);

/**
* @return an async version of this callback
*/
default RDFCallback async() {
return ((triple, pos) -> {
synchronized (this) {
this.processTriple(triple, pos);
}
});
}
}

void doParse(String fileName, String baseUri, RDFNotation notation, boolean keepBNode, RDFCallback callback) throws ParserException;
Expand Down
48 changes: 41 additions & 7 deletions hdt-api/src/main/java/org/rdfhdt/hdt/util/Profiler.java
Original file line number Diff line number Diff line change
Expand Up @@ -111,11 +111,24 @@ private static byte[] readBuffer(InputStream input, int length) throws IOExcepti
* @return profiler
*/
public static Profiler createOrLoadSubSection(String name, HDTOptions options, boolean setId) {
return createOrLoadSubSection(name, options, setId, false);
}

/**
* create or load a profiler from the options into a subsection
*
* @param name name
* @param options options
* @param setId set the id after loading (if required)
* @param async use async profiler
* @return profiler
*/
public static Profiler createOrLoadSubSection(String name, HDTOptions options, boolean setId, boolean async) {
// no options, we can't create
if (options == null) {
return new Profiler(name, null);
return new Profiler(name, null, async);
}
String profiler = options.get(HDTOptionsKeys.PROFILER_KEY);
String profiler = options.get(async ? HDTOptionsKeys.PROFILER_ASYNC_KEY : HDTOptionsKeys.PROFILER_KEY);
if (profiler != null && profiler.length() != 0 && profiler.charAt(0) == '!') {
Profiler prof = getProfilerById(Long.parseLong(profiler.substring(1)));
if (prof != null) {
Expand All @@ -125,9 +138,9 @@ public static Profiler createOrLoadSubSection(String name, HDTOptions options, b
}
}
// no id, not an id
Profiler prof = new Profiler(name, options);
Profiler prof = new Profiler(name, options, async);
if (setId) {
options.set(HDTOptionsKeys.PROFILER_KEY, prof);
options.set(async ? HDTOptionsKeys.PROFILER_ASYNC_KEY : HDTOptionsKeys.PROFILER_KEY, prof);
}
return prof;
}
Expand All @@ -147,7 +160,17 @@ public static Profiler createOrLoadSubSection(String name, HDTOptions options, b
* @param name the profiler name
*/
public Profiler(String name) {
this(name, null);
this(name, false);
}

/**
* create a disabled profiler
*
* @param name the profiler name
* @param async async profiler
*/
public Profiler(String name, boolean async) {
this(name, null, async);
}

/**
Expand All @@ -157,13 +180,24 @@ public Profiler(String name) {
* @param spec spec (nullable)
*/
public Profiler(String name, HDTOptions spec) {
this(name, spec, false);
}

/**
* create a profiler from specifications
*
* @param name profiler name
* @param spec spec (nullable)
* @param async async profiler
*/
public Profiler(String name, HDTOptions spec, boolean async) {
this.id = PROFILER_IDS.incrementAndGet();
PROFILER.put(this.id, this);
this.name = Objects.requireNonNull(name, "name can't be null!");
if (spec != null) {
String b = spec.get(HDTOptionsKeys.PROFILER_KEY);
String b = spec.get(async ? HDTOptionsKeys.PROFILER_ASYNC_KEY : HDTOptionsKeys.PROFILER_KEY);
disabled = b == null || b.length() == 0 || !(b.charAt(0) == '!' || "true".equalsIgnoreCase(b));
String profilerOutputLocation = spec.get(HDTOptionsKeys.PROFILER_OUTPUT_KEY);
String profilerOutputLocation = spec.get(async ? HDTOptionsKeys.PROFILER_ASYNC_OUTPUT_KEY : HDTOptionsKeys.PROFILER_OUTPUT_KEY);
if (profilerOutputLocation != null && !profilerOutputLocation.isEmpty()) {
outputPath = Path.of(profilerOutputLocation);
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
import java.nio.file.Path;

/**
* Implements an index on top of the Bitmap64 to solve select and rank queries more efficiently.
Expand All @@ -43,7 +44,9 @@
* select1 -&gt; O(log log n)
*
* @author mario.arias
* @deprecated Use {@link Bitmap375Big#memory(long, Path)}} instead
*/
@Deprecated
public class Bitmap375 extends Bitmap64 implements ModifiableBitmap {
// Constants
private static final int BLOCKS_PER_SUPER = 4;
Expand Down
Loading

0 comments on commit c2abc85

Please sign in to comment.