Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Async CatTree and dir parser #186

Merged
merged 3 commits into from
Jan 16, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 6 additions & 2 deletions hdt-api/src/main/java/org/rdfhdt/hdt/enums/RDFNotation.java
Original file line number Diff line number Diff line change
Expand Up @@ -185,8 +185,12 @@ public static RDFNotation guess(String fileName) throws IllegalArgumentException

throw new IllegalArgumentException("Could not guess the format for "+fileName);
}

public static RDFNotation guess(File fileName) throws IllegalArgumentException {
return guess(fileName.getName());
return guess(fileName.getAbsolutePath());
}

public static RDFNotation guess(Path fileName) throws IllegalArgumentException {
return guess(fileName.toAbsolutePath().toString());
}
}
54 changes: 54 additions & 0 deletions hdt-api/src/main/java/org/rdfhdt/hdt/options/HDTOptions.java
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,8 @@
import org.rdfhdt.hdt.rdf.RDFFluxStop;
import org.rdfhdt.hdt.util.Profiler;

import java.util.HashMap;
import java.util.Map;
import java.util.Objects;
import java.util.Set;
import java.util.function.DoubleSupplier;
Expand All @@ -42,6 +44,58 @@
* @author mario.arias
*/
public interface HDTOptions {
/**
* empty option, can be used to set values
*/
HDTOptions EMPTY = new HDTOptions() {
@Override
public void clear() {
// already empty
}

@Override
public String get(String key) {
// no value for key
return null;
}

@Override
public void set(String key, String value) {
throw new NotImplementedException("set");
}
};

/**
* @return create empty, modifiable options
*/
static HDTOptions of() {
return of(Map.of());
}

/**
* create modifiable options starting from the copy of the data map
* @param data data map
* @return options
*/
static HDTOptions of(Map<String, String> data) {
Map<String, String> map = new HashMap<>(data);
return new HDTOptions() {
@Override
public void clear() {
map.clear();
}

@Override
public String get(String key) {
return map.get(key);
}

@Override
public void set(String key, String value) {
map.put(key, value);
}
};
}


/**
Expand Down
29 changes: 29 additions & 0 deletions hdt-api/src/main/java/org/rdfhdt/hdt/options/HDTOptionsKeys.java
Original file line number Diff line number Diff line change
Expand Up @@ -122,6 +122,12 @@ public class HDTOptionsKeys {
*/
@Key(type = Key.Type.PATH, desc = "Path of the CatTree generation")
public static final String LOADER_CATTREE_LOCATION_KEY = "loader.cattree.location";
/**
* Key to use async version of the {@link org.rdfhdt.hdt.hdt.HDTManager} catTree methods, will run the k-HDTCAT
* algorithm, by default the value is false, boolean value
*/
@Key(type = Key.Type.BOOLEAN, desc = "Use async version")
public static final String LOADER_CATTREE_ASYNC_KEY = "loader.cattree.async";
/**
* Same as {@link #LOADER_TYPE_KEY} for loader in the CATTREE method
*/
Expand Down Expand Up @@ -202,11 +208,27 @@ public class HDTOptionsKeys {
*/
@Key(type = Key.Type.PATH, desc = "Profiler output file")
public static final String PROFILER_OUTPUT_KEY = "profiler.output";
/**
* Key for enabling the profiler (if implemented) for async bi tasks, default to false. Boolean value
*/
@Key(type = Key.Type.BOOLEAN, desc = "Run a second profiler for async bi tasks")
public static final String PROFILER_ASYNC_KEY = "profiler.async";
/**
* Key for the profiler output (if implemented). File value
*/
@Key(type = Key.Type.PATH, desc = "Profiler output file")
public static final String PROFILER_ASYNC_OUTPUT_KEY = "profiler.async.output";
/**
* Key for enabling the canonical NTriple file simple parser, default to false. Boolean value
*/
@Key(type = Key.Type.BOOLEAN, desc = "Use the canonical NT file parser, removing checks")
public static final String NT_SIMPLE_PARSER_KEY = "parser.ntSimpleParser";
/**
* Key for setting the maximum amount of file loaded with the directory parser, 1 for no async parsing, 0
* for the number of processors, default 1. Number value
*/
@Key(type = Key.Type.NUMBER, desc = "Use async dir parser")
public static final String ASYNC_DIR_PARSER_KEY = "parser.dir.async";
/**
* Key for setting the triple order. see {@link org.rdfhdt.hdt.enums.TripleComponentOrder}'s names to have the values
* default to {@link org.rdfhdt.hdt.enums.TripleComponentOrder#SPO}
Expand Down Expand Up @@ -293,6 +315,13 @@ public class HDTOptionsKeys {
@Key(type = Key.Type.BOOLEAN, desc = "Delete the HDTCat temp files directory after HDTCat, default to true")
public static final String HDTCAT_DELETE_LOCATION = "hdtcat.deleteLocation";

@Key(type = Key.Type.BOOLEAN, desc = "Use disk implementation to generate the hdt sub-index")
public static final String BITMAPTRIPLES_SEQUENCE_DISK = "bitmaptriples.sequence.disk";
@Key(type = Key.Type.BOOLEAN, desc = "Use disk 375 subindex implementation to generate the hdt sub-index")
public static final String BITMAPTRIPLES_SEQUENCE_DISK_SUBINDEX = "bitmaptriples.sequence.disk.subindex";

@Key(type = Key.Type.BOOLEAN, desc = "Disk location for the " + BITMAPTRIPLES_SEQUENCE_DISK + " option")
public static final String BITMAPTRIPLES_SEQUENCE_DISK_LOCATION = "bitmaptriples.sequence.disk.location";
// use tree-map to have a better order
private static final Map<String, Option> OPTION_MAP = new TreeMap<>();

Expand Down
11 changes: 11 additions & 0 deletions hdt-api/src/main/java/org/rdfhdt/hdt/rdf/RDFParserCallback.java
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,17 @@ public interface RDFParserCallback {
@FunctionalInterface
interface RDFCallback {
void processTriple(TripleString triple, long pos);

/**
* @return an async version of this callback
*/
default RDFCallback async() {
return ((triple, pos) -> {
synchronized (this) {
this.processTriple(triple, pos);
}
});
}
}

void doParse(String fileName, String baseUri, RDFNotation notation, boolean keepBNode, RDFCallback callback) throws ParserException;
Expand Down
48 changes: 41 additions & 7 deletions hdt-api/src/main/java/org/rdfhdt/hdt/util/Profiler.java
Original file line number Diff line number Diff line change
Expand Up @@ -111,11 +111,24 @@ private static byte[] readBuffer(InputStream input, int length) throws IOExcepti
* @return profiler
*/
public static Profiler createOrLoadSubSection(String name, HDTOptions options, boolean setId) {
return createOrLoadSubSection(name, options, setId, false);
}

/**
* create or load a profiler from the options into a subsection
*
* @param name name
* @param options options
* @param setId set the id after loading (if required)
* @param async use async profiler
* @return profiler
*/
public static Profiler createOrLoadSubSection(String name, HDTOptions options, boolean setId, boolean async) {
// no options, we can't create
if (options == null) {
return new Profiler(name, null);
return new Profiler(name, null, async);
}
String profiler = options.get(HDTOptionsKeys.PROFILER_KEY);
String profiler = options.get(async ? HDTOptionsKeys.PROFILER_ASYNC_KEY : HDTOptionsKeys.PROFILER_KEY);
if (profiler != null && profiler.length() != 0 && profiler.charAt(0) == '!') {
Profiler prof = getProfilerById(Long.parseLong(profiler.substring(1)));
if (prof != null) {
Expand All @@ -125,9 +138,9 @@ public static Profiler createOrLoadSubSection(String name, HDTOptions options, b
}
}
// no id, not an id
Profiler prof = new Profiler(name, options);
Profiler prof = new Profiler(name, options, async);
if (setId) {
options.set(HDTOptionsKeys.PROFILER_KEY, prof);
options.set(async ? HDTOptionsKeys.PROFILER_ASYNC_KEY : HDTOptionsKeys.PROFILER_KEY, prof);
}
return prof;
}
Expand All @@ -147,7 +160,17 @@ public static Profiler createOrLoadSubSection(String name, HDTOptions options, b
* @param name the profiler name
*/
public Profiler(String name) {
this(name, null);
this(name, false);
}

/**
* create a disabled profiler
*
* @param name the profiler name
* @param async async profiler
*/
public Profiler(String name, boolean async) {
this(name, null, async);
}

/**
Expand All @@ -157,13 +180,24 @@ public Profiler(String name) {
* @param spec spec (nullable)
*/
public Profiler(String name, HDTOptions spec) {
this(name, spec, false);
}

/**
* create a profiler from specifications
*
* @param name profiler name
* @param spec spec (nullable)
* @param async async profiler
*/
public Profiler(String name, HDTOptions spec, boolean async) {
this.id = PROFILER_IDS.incrementAndGet();
PROFILER.put(this.id, this);
this.name = Objects.requireNonNull(name, "name can't be null!");
if (spec != null) {
String b = spec.get(HDTOptionsKeys.PROFILER_KEY);
String b = spec.get(async ? HDTOptionsKeys.PROFILER_ASYNC_KEY : HDTOptionsKeys.PROFILER_KEY);
disabled = b == null || b.length() == 0 || !(b.charAt(0) == '!' || "true".equalsIgnoreCase(b));
String profilerOutputLocation = spec.get(HDTOptionsKeys.PROFILER_OUTPUT_KEY);
String profilerOutputLocation = spec.get(async ? HDTOptionsKeys.PROFILER_ASYNC_OUTPUT_KEY : HDTOptionsKeys.PROFILER_OUTPUT_KEY);
if (profilerOutputLocation != null && !profilerOutputLocation.isEmpty()) {
outputPath = Path.of(profilerOutputLocation);
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
import java.nio.file.Path;

/**
* Implements an index on top of the Bitmap64 to solve select and rank queries more efficiently.
Expand All @@ -43,7 +44,9 @@
* select1 -&gt; O(log log n)
*
* @author mario.arias
* @deprecated Use {@link Bitmap375Big#memory(long, Path)}} instead
*/
@Deprecated
public class Bitmap375 extends Bitmap64 implements ModifiableBitmap {
// Constants
private static final int BLOCKS_PER_SUPER = 4;
Expand Down
Loading