Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

HDTCatTree + HDTGenDisk #179

Merged
merged 9 commits into from
Nov 21, 2022
52 changes: 25 additions & 27 deletions hdt-api/src/main/java/org/rdfhdt/hdt/dictionary/Dictionary.java
Original file line number Diff line number Diff line change
Expand Up @@ -27,13 +27,12 @@
*/


import java.io.Closeable;
import java.util.HashMap;
import java.util.TreeMap;

import org.rdfhdt.hdt.enums.TripleComponentRole;
import org.rdfhdt.hdt.header.Header;

import java.io.Closeable;
import java.util.Map;


/**
* Interface that specifies the basic methods for any Dictionary implementation
Expand All @@ -53,7 +52,7 @@ public interface Dictionary extends Closeable {
* TriplePosition of the id in the dictionary
* @return String
*/
public CharSequence idToString(long id, TripleComponentRole position);
CharSequence idToString(long id, TripleComponentRole position);

/**
* Returns the id for a given string
Expand All @@ -64,11 +63,7 @@ public interface Dictionary extends Closeable {
* TriplePosition of the string in the dictionary
* @return int
*/
public long stringToId(CharSequence str, TripleComponentRole position);

/**
* Returns the number of elements in the dictionary
*/
long stringToId(CharSequence str, TripleComponentRole position);

/**
* Returns the data type of a given literal string
Expand All @@ -77,54 +72,57 @@ public interface Dictionary extends Closeable {
* The id to get the data type for
* @return String
*/
public String dataTypeOfId(long id);
CharSequence dataTypeOfId(long id);

public long getNumberOfElements();
/**
* Returns the number of elements in the dictionary
*/
long getNumberOfElements();

/**
* Return the combined size of the sections of the dictionary (in bytes)
*/
public long size();
long size();

/**
* Returns the number of subjects in the dictionary. Note: Includes shared.
*/
public long getNsubjects();
long getNsubjects();

/**
* Returns the number of predicates in the dictionary.
*/
public long getNpredicates();
long getNpredicates();

/**
* Returns the number of objects in the dictionary. Note: Includes shared
*/
public long getNobjects();
long getNobjects();

/**
* Returns the number of subjects/objects in the dictionary.
*/
public long getNAllObjects();
public long getNshared();
long getNAllObjects();
long getNshared();

public DictionarySection getSubjects();
DictionarySection getSubjects();

public DictionarySection getPredicates();
DictionarySection getPredicates();

public DictionarySection getObjects();
DictionarySection getObjects();

public TreeMap<String,DictionarySection> getAllObjects();
Map<? extends CharSequence, DictionarySection> getAllObjects();

public DictionarySection getShared();
DictionarySection getShared();

/**
* Fills the header with information from the dictionary
*/
public void populateHeader(Header header, String rootNode);
void populateHeader(Header header, String rootNode);

/**
* Returns the type of the dictionary (the way it is written onto file/held in memory)
* @return
* @return type
*/
public String getType();
}
String getType();
}
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@ public interface DictionarySection extends Closeable {
* @return
* the corresponding ID in the dictionary
*/
public long locate(CharSequence s);
long locate(CharSequence s);

/**
* Find the String associated to a given ID
Expand All @@ -55,7 +55,7 @@ public interface DictionarySection extends Closeable {
* @return
* the corresponding string
*/
public CharSequence extract(long pos);
CharSequence extract(long pos);

/**
* Size in bytes of the strings held in the dictionary section.
Expand All @@ -67,7 +67,7 @@ public interface DictionarySection extends Closeable {
* Number of entries in the dictionary section.
* @return long
*/
public long getNumberOfElements();
long getNumberOfElements();

/**
* Iterator over all entries in the dictionary, sorted lexicographically.
Expand Down
52 changes: 52 additions & 0 deletions hdt-api/src/main/java/org/rdfhdt/hdt/enums/CompressionType.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
package org.rdfhdt.hdt.enums;

/**
* A compression type
* @author Antoine Willerval
*/
public enum CompressionType {

/**
* gzip compression (.gz .tgz)
*/
GZIP("gz", "tgz"),
/**
* bzip compression (.bz2 .bz)
*/
BZIP("bz2", "bz"),
/**
* bzip compression (.xz)
*/
XZ("xz"),
/**
* no compression
*/
NONE;

/**
* try to guess a compression of a file with its name
* @param fileName the file name to guess
* @return the compression type or none if it can't be guessed
*/
public static CompressionType guess(String fileName) {
String str = fileName.toLowerCase();

int idx = str.lastIndexOf('.');
if(idx!=-1) {
String ext = str.substring(idx + 1);
for (CompressionType type: values()) {
for (String typeExt : type.ext) {
if (typeExt.equals(ext)) {
return type;
}
}
}
}
return NONE;
}

private final String[] ext;
CompressionType(String... ext) {
this.ext = ext;
}
}
50 changes: 29 additions & 21 deletions hdt-api/src/main/java/org/rdfhdt/hdt/enums/RDFNotation.java
Original file line number Diff line number Diff line change
Expand Up @@ -110,26 +110,34 @@ public static RDFNotation parse(String str) {
return NTRIPLES;
}
str = str.toLowerCase();
if(str.equals("ntriples")||str.equals("nt")) {
return NTRIPLES;
} else if(str.equals("n3")) {
return N3;
} else if(str.equals("nq")||str.equals("nquad")) {
return NQUAD;
} else if(str.equals("rdfxml")||str.equals("rdf-xml") || str.equals("owl")) {
return RDFXML;
} else if(str.equals("turtle")) {
return TURTLE;
} else if(str.equals("rar")) {
return RAR;
} else if(str.equals("tar")||str.equals("tgz")||str.equals("tbz")||str.equals("tbz2")) {
return TAR;
} else if(str.equals("zip")) {
return ZIP;
} else if(str.equals("list")) {
return LIST;
} else if(str.equals("hdt")) {
return HDT;
switch (str) {
case "ntriples":
case "nt":
return NTRIPLES;
case "n3":
return N3;
case "nq":
case "nquad":
return NQUAD;
case "rdfxml":
case "rdf-xml":
case "owl":
return RDFXML;
case "turtle":
return TURTLE;
case "rar":
return RAR;
case "tar":
case "tgz":
case "tbz":
case "tbz2":
return TAR;
case "zip":
return ZIP;
case "list":
return LIST;
case "hdt":
return HDT;
}
throw new IllegalArgumentException();
}
Expand All @@ -147,7 +155,7 @@ public static RDFNotation guess(String fileName) throws IllegalArgumentException

int idx = str.lastIndexOf('.');
if(idx!=-1) {
String ext = str.substring(idx+1, str.length());
String ext = str.substring(idx+1);
if(ext.equals("gz") || ext.equals("bz") || ext.equals("bz2")|| ext.equals("xz")) {
str = str.substring(0,idx);
}
Expand Down
Loading