-
Notifications
You must be signed in to change notification settings - Fork 70
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Implement HDTCatTree to create an HDT with low resources using HDTCat
- Loading branch information
Showing
18 changed files
with
1,328 additions
and
54 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,42 @@ | ||
package org.rdfhdt.hdt.hdt; | ||
|
||
import org.rdfhdt.hdt.exceptions.ParserException; | ||
import org.rdfhdt.hdt.listener.ProgressListener; | ||
import org.rdfhdt.hdt.options.HDTOptions; | ||
import org.rdfhdt.hdt.triples.TripleString; | ||
|
||
import java.io.IOException; | ||
import java.nio.file.Path; | ||
import java.util.Iterator; | ||
|
||
/** | ||
* Interface describing an HDT generator method | ||
* | ||
* @author Antoine Willerval | ||
*/ | ||
@FunctionalInterface | ||
public interface HDTSupplier { | ||
/** | ||
* @return implementation using in-memory hdt | ||
*/ | ||
static HDTSupplier memory() { | ||
return (iterator, baseURI, hdtFormat, listener, location) -> { | ||
try (HDT hdt = HDTManager.generateHDT(iterator, baseURI, hdtFormat, listener)) { | ||
hdt.saveToHDT(location.toAbsolutePath().toString(), listener); | ||
} | ||
}; | ||
} | ||
|
||
/** | ||
* Generate the HDT | ||
* | ||
* @param iterator the iterator to create the hdt | ||
* @param baseURI the base URI (useless, but asked by some methods) | ||
* @param hdtFormat the HDT options to create the HDT | ||
* @param listener listener | ||
* @param location where to write the HDT | ||
* @throws IOException io exception while creating the HDT | ||
* @throws ParserException parser exception while retrieving the triples | ||
*/ | ||
void doGenerateHDT(Iterator<TripleString> iterator, String baseURI, HDTOptions hdtFormat, ProgressListener listener, Path location) throws IOException, ParserException; | ||
} |
102 changes: 102 additions & 0 deletions
102
hdt-api/src/main/java/org/rdfhdt/hdt/rdf/RDFFluxStop.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,102 @@ | ||
package org.rdfhdt.hdt.rdf; | ||
|
||
import org.rdfhdt.hdt.triples.TripleString; | ||
|
||
import java.io.IOException; | ||
import java.nio.charset.StandardCharsets; | ||
|
||
/** | ||
* Rdf flux stopper descriptor | ||
* @author Antoine Willerval | ||
*/ | ||
public interface RDFFluxStop { | ||
/** | ||
* @return basic implementation without any limit | ||
*/ | ||
static RDFFluxStop noLimit() { | ||
return new RDFFluxStop() { | ||
@Override | ||
public boolean canHandle(TripleString ts) { | ||
return true; | ||
} | ||
|
||
@Override | ||
public void restart() { | ||
// nothing | ||
} | ||
}; | ||
} | ||
|
||
/** | ||
* implementation of flux stop stopping after a maximum triple count | ||
* | ||
* @param maxTriple maximum count | ||
* @return FluxStop | ||
*/ | ||
static RDFFluxStop countLimit(long maxTriple) { | ||
if (maxTriple <= 0) { | ||
throw new IllegalArgumentException("Can't have a limit of 0 or a negative value!"); | ||
} | ||
return new RDFFluxStop() { | ||
long current = 0; | ||
|
||
@Override | ||
public boolean canHandle(TripleString ts) { | ||
return current++ < maxTriple; | ||
} | ||
|
||
@Override | ||
public void restart() { | ||
current = 0; | ||
} | ||
}; | ||
} | ||
|
||
/** | ||
* implementation of flux stop stopping after a maximum NTriple size | ||
* | ||
* @param maxSize maximum size | ||
* @return FluxStop | ||
*/ | ||
static RDFFluxStop sizeLimit(long maxSize) { | ||
if (maxSize <= 0) { | ||
throw new IllegalArgumentException("Can't have a limit of 0 or a negative value!"); | ||
} | ||
return new RDFFluxStop() { | ||
long size = 0; | ||
|
||
@Override | ||
public boolean canHandle(TripleString ts) { | ||
long tsSize; | ||
try { | ||
tsSize = ts.asNtriple().toString().getBytes(StandardCharsets.UTF_8).length; | ||
} catch (IOException e) { | ||
throw new RuntimeException("Can't estimate the size of the triple " + ts, e); | ||
} | ||
try { | ||
return size < maxSize; | ||
} finally { | ||
size += tsSize; | ||
} | ||
} | ||
|
||
@Override | ||
public void restart() { | ||
size = 0; | ||
} | ||
}; | ||
} | ||
|
||
/** | ||
* should we stop the flux after this triple or not? | ||
* | ||
* @param ts the triple | ||
* @return true if the flux can handle this triple, false otherwise | ||
*/ | ||
boolean canHandle(TripleString ts); | ||
|
||
/** | ||
* restart the flux stop | ||
*/ | ||
void restart(); | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.