Skip to content

Commit

Permalink
Merge pull request #179 from ate47/hcat_tree_external
Browse files Browse the repository at this point in the history
HDTCatTree + HDTGenDisk
  • Loading branch information
D063520 authored Nov 21, 2022
2 parents 0531f87 + f0969fa commit 969c049
Show file tree
Hide file tree
Showing 231 changed files with 19,035 additions and 2,108 deletions.
52 changes: 25 additions & 27 deletions hdt-api/src/main/java/org/rdfhdt/hdt/dictionary/Dictionary.java
Original file line number Diff line number Diff line change
Expand Up @@ -27,13 +27,12 @@
*/


import java.io.Closeable;
import java.util.HashMap;
import java.util.TreeMap;

import org.rdfhdt.hdt.enums.TripleComponentRole;
import org.rdfhdt.hdt.header.Header;

import java.io.Closeable;
import java.util.Map;


/**
* Interface that specifies the basic methods for any Dictionary implementation
Expand All @@ -53,7 +52,7 @@ public interface Dictionary extends Closeable {
* TriplePosition of the id in the dictionary
* @return String
*/
public CharSequence idToString(long id, TripleComponentRole position);
CharSequence idToString(long id, TripleComponentRole position);

/**
* Returns the id for a given string
Expand All @@ -64,11 +63,7 @@ public interface Dictionary extends Closeable {
* TriplePosition of the string in the dictionary
* @return int
*/
public long stringToId(CharSequence str, TripleComponentRole position);

/**
* Returns the number of elements in the dictionary
*/
long stringToId(CharSequence str, TripleComponentRole position);

/**
* Returns the data type of a given literal string
Expand All @@ -77,54 +72,57 @@ public interface Dictionary extends Closeable {
* The id to get the data type for
* @return String
*/
public String dataTypeOfId(long id);
CharSequence dataTypeOfId(long id);

public long getNumberOfElements();
/**
* Returns the number of elements in the dictionary
*/
long getNumberOfElements();

/**
* Return the combined size of the sections of the dictionary (in bytes)
*/
public long size();
long size();

/**
* Returns the number of subjects in the dictionary. Note: Includes shared.
*/
public long getNsubjects();
long getNsubjects();

/**
* Returns the number of predicates in the dictionary.
*/
public long getNpredicates();
long getNpredicates();

/**
* Returns the number of objects in the dictionary. Note: Includes shared
*/
public long getNobjects();
long getNobjects();

/**
* Returns the number of subjects/objects in the dictionary.
*/
public long getNAllObjects();
public long getNshared();
long getNAllObjects();
long getNshared();

public DictionarySection getSubjects();
DictionarySection getSubjects();

public DictionarySection getPredicates();
DictionarySection getPredicates();

public DictionarySection getObjects();
DictionarySection getObjects();

public TreeMap<String,DictionarySection> getAllObjects();
Map<? extends CharSequence, DictionarySection> getAllObjects();

public DictionarySection getShared();
DictionarySection getShared();

/**
* Fills the header with information from the dictionary
*/
public void populateHeader(Header header, String rootNode);
void populateHeader(Header header, String rootNode);

/**
* Returns the type of the dictionary (the way it is written onto file/held in memory)
* @return
* @return type
*/
public String getType();
}
String getType();
}
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@ public interface DictionarySection extends Closeable {
* @return
* the corresponding ID in the dictionary
*/
public long locate(CharSequence s);
long locate(CharSequence s);

/**
* Find the String associated to a given ID
Expand All @@ -55,7 +55,7 @@ public interface DictionarySection extends Closeable {
* @return
* the corresponding string
*/
public CharSequence extract(long pos);
CharSequence extract(long pos);

/**
* Size in bytes of the strings held in the dictionary section.
Expand All @@ -67,7 +67,7 @@ public interface DictionarySection extends Closeable {
* Number of entries in the dictionary section.
* @return long
*/
public long getNumberOfElements();
long getNumberOfElements();

/**
* Iterator over all entries in the dictionary, sorted lexicographically.
Expand Down
52 changes: 52 additions & 0 deletions hdt-api/src/main/java/org/rdfhdt/hdt/enums/CompressionType.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
package org.rdfhdt.hdt.enums;

/**
* A compression type
* @author Antoine Willerval
*/
public enum CompressionType {

/**
* gzip compression (.gz .tgz)
*/
GZIP("gz", "tgz"),
/**
* bzip compression (.bz2 .bz)
*/
BZIP("bz2", "bz"),
/**
* bzip compression (.xz)
*/
XZ("xz"),
/**
* no compression
*/
NONE;

/**
* try to guess a compression of a file with its name
* @param fileName the file name to guess
* @return the compression type or none if it can't be guessed
*/
public static CompressionType guess(String fileName) {
String str = fileName.toLowerCase();

int idx = str.lastIndexOf('.');
if(idx!=-1) {
String ext = str.substring(idx + 1);
for (CompressionType type: values()) {
for (String typeExt : type.ext) {
if (typeExt.equals(ext)) {
return type;
}
}
}
}
return NONE;
}

private final String[] ext;
CompressionType(String... ext) {
this.ext = ext;
}
}
50 changes: 29 additions & 21 deletions hdt-api/src/main/java/org/rdfhdt/hdt/enums/RDFNotation.java
Original file line number Diff line number Diff line change
Expand Up @@ -110,26 +110,34 @@ public static RDFNotation parse(String str) {
return NTRIPLES;
}
str = str.toLowerCase();
if(str.equals("ntriples")||str.equals("nt")) {
return NTRIPLES;
} else if(str.equals("n3")) {
return N3;
} else if(str.equals("nq")||str.equals("nquad")) {
return NQUAD;
} else if(str.equals("rdfxml")||str.equals("rdf-xml") || str.equals("owl")) {
return RDFXML;
} else if(str.equals("turtle")) {
return TURTLE;
} else if(str.equals("rar")) {
return RAR;
} else if(str.equals("tar")||str.equals("tgz")||str.equals("tbz")||str.equals("tbz2")) {
return TAR;
} else if(str.equals("zip")) {
return ZIP;
} else if(str.equals("list")) {
return LIST;
} else if(str.equals("hdt")) {
return HDT;
switch (str) {
case "ntriples":
case "nt":
return NTRIPLES;
case "n3":
return N3;
case "nq":
case "nquad":
return NQUAD;
case "rdfxml":
case "rdf-xml":
case "owl":
return RDFXML;
case "turtle":
return TURTLE;
case "rar":
return RAR;
case "tar":
case "tgz":
case "tbz":
case "tbz2":
return TAR;
case "zip":
return ZIP;
case "list":
return LIST;
case "hdt":
return HDT;
}
throw new IllegalArgumentException();
}
Expand All @@ -147,7 +155,7 @@ public static RDFNotation guess(String fileName) throws IllegalArgumentException

int idx = str.lastIndexOf('.');
if(idx!=-1) {
String ext = str.substring(idx+1, str.length());
String ext = str.substring(idx+1);
if(ext.equals("gz") || ext.equals("bz") || ext.equals("bz2")|| ext.equals("xz")) {
str = str.substring(0,idx);
}
Expand Down
Loading

0 comments on commit 969c049

Please sign in to comment.