Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

HDTCatTree + HDTGenDisk #179

Merged
merged 9 commits into from
Nov 21, 2022
Prev Previous commit
Next Next commit
Start MultipleDictionary implementation for disk generation, fix delt…
…a merger, cleanup code, better ByteString handle
  • Loading branch information
ate47 committed Nov 4, 2022
commit da187f8900a900fee31375e06b1345babd3b120b
52 changes: 25 additions & 27 deletions hdt-api/src/main/java/org/rdfhdt/hdt/dictionary/Dictionary.java
Original file line number Diff line number Diff line change
Expand Up @@ -27,13 +27,12 @@
*/


import java.io.Closeable;
import java.util.HashMap;
import java.util.TreeMap;

import org.rdfhdt.hdt.enums.TripleComponentRole;
import org.rdfhdt.hdt.header.Header;

import java.io.Closeable;
import java.util.Map;


/**
* Interface that specifies the basic methods for any Dictionary implementation
Expand All @@ -53,7 +52,7 @@ public interface Dictionary extends Closeable {
* TriplePosition of the id in the dictionary
* @return String
*/
public CharSequence idToString(long id, TripleComponentRole position);
CharSequence idToString(long id, TripleComponentRole position);

/**
* Returns the id for a given string
Expand All @@ -64,11 +63,7 @@ public interface Dictionary extends Closeable {
* TriplePosition of the string in the dictionary
* @return int
*/
public long stringToId(CharSequence str, TripleComponentRole position);

/**
* Returns the number of elements in the dictionary
*/
long stringToId(CharSequence str, TripleComponentRole position);

/**
* Returns the data type of a given literal string
Expand All @@ -77,54 +72,57 @@ public interface Dictionary extends Closeable {
* The id to get the data type for
* @return String
*/
public String dataTypeOfId(long id);
CharSequence dataTypeOfId(long id);

public long getNumberOfElements();
/**
* Returns the number of elements in the dictionary
*/
long getNumberOfElements();

/**
* Return the combined size of the sections of the dictionary (in bytes)
*/
public long size();
long size();

/**
* Returns the number of subjects in the dictionary. Note: Includes shared.
*/
public long getNsubjects();
long getNsubjects();

/**
* Returns the number of predicates in the dictionary.
*/
public long getNpredicates();
long getNpredicates();

/**
* Returns the number of objects in the dictionary. Note: Includes shared
*/
public long getNobjects();
long getNobjects();

/**
* Returns the number of subjects/objects in the dictionary.
*/
public long getNAllObjects();
public long getNshared();
long getNAllObjects();
long getNshared();

public DictionarySection getSubjects();
DictionarySection getSubjects();

public DictionarySection getPredicates();
DictionarySection getPredicates();

public DictionarySection getObjects();
DictionarySection getObjects();

public TreeMap<String,DictionarySection> getAllObjects();
Map<? extends CharSequence, DictionarySection> getAllObjects();

public DictionarySection getShared();
DictionarySection getShared();

/**
* Fills the header with information from the dictionary
*/
public void populateHeader(Header header, String rootNode);
void populateHeader(Header header, String rootNode);

/**
* Returns the type of the dictionary (the way it is written onto file/held in memory)
* @return
* @return type
*/
public String getType();
}
String getType();
}
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@ public interface DictionarySection extends Closeable {
* @return
* the corresponding ID in the dictionary
*/
public long locate(CharSequence s);
long locate(CharSequence s);

/**
* Find the String associated to a given ID
Expand All @@ -55,7 +55,7 @@ public interface DictionarySection extends Closeable {
* @return
* the corresponding string
*/
public CharSequence extract(long pos);
CharSequence extract(long pos);

/**
* Size in bytes of the strings held in the dictionary section.
Expand All @@ -67,7 +67,7 @@ public interface DictionarySection extends Closeable {
* Number of entries in the dictionary section.
* @return long
*/
public long getNumberOfElements();
long getNumberOfElements();

/**
* Iterator over all entries in the dictionary, sorted lexicographically.
Expand Down
50 changes: 29 additions & 21 deletions hdt-api/src/main/java/org/rdfhdt/hdt/enums/RDFNotation.java
Original file line number Diff line number Diff line change
Expand Up @@ -110,26 +110,34 @@ public static RDFNotation parse(String str) {
return NTRIPLES;
}
str = str.toLowerCase();
if(str.equals("ntriples")||str.equals("nt")) {
return NTRIPLES;
} else if(str.equals("n3")) {
return N3;
} else if(str.equals("nq")||str.equals("nquad")) {
return NQUAD;
} else if(str.equals("rdfxml")||str.equals("rdf-xml") || str.equals("owl")) {
return RDFXML;
} else if(str.equals("turtle")) {
return TURTLE;
} else if(str.equals("rar")) {
return RAR;
} else if(str.equals("tar")||str.equals("tgz")||str.equals("tbz")||str.equals("tbz2")) {
return TAR;
} else if(str.equals("zip")) {
return ZIP;
} else if(str.equals("list")) {
return LIST;
} else if(str.equals("hdt")) {
return HDT;
switch (str) {
case "ntriples":
case "nt":
return NTRIPLES;
case "n3":
return N3;
case "nq":
case "nquad":
return NQUAD;
case "rdfxml":
case "rdf-xml":
case "owl":
return RDFXML;
case "turtle":
return TURTLE;
case "rar":
return RAR;
case "tar":
case "tgz":
case "tbz":
case "tbz2":
return TAR;
case "zip":
return ZIP;
case "list":
return LIST;
case "hdt":
return HDT;
}
throw new IllegalArgumentException();
}
Expand All @@ -147,7 +155,7 @@ public static RDFNotation guess(String fileName) throws IllegalArgumentException

int idx = str.lastIndexOf('.');
if(idx!=-1) {
String ext = str.substring(idx+1, str.length());
String ext = str.substring(idx+1);
if(ext.equals("gz") || ext.equals("bz") || ext.equals("bz2")|| ext.equals("xz")) {
str = str.substring(0,idx);
}
Expand Down
4 changes: 2 additions & 2 deletions hdt-api/src/main/java/org/rdfhdt/hdt/hdt/HDTVersion.java
Original file line number Diff line number Diff line change
Expand Up @@ -17,9 +17,9 @@ public class HDTVersion {

public static String get_version_string(String delimiter) {
return "v" + HDT_VERSION + delimiter + INDEX_VERSION + delimiter + RELEASE_VERSION;
};
}

public static String get_index_suffix(String delimiter) {
return ".index.v" + HDT_VERSION + delimiter+INDEX_VERSION;
};
}
}
Loading