Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Literals dictionary - splitting objects into subsections by datatype #139

Merged
merged 2 commits into from
Jan 13, 2022
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Next Next commit
New dictionary splitting objects into subsections per literal type
  • Loading branch information
Ali Haidar authored and Ali Haidar committed Jan 11, 2022
commit 4e619cfde10530035231ae4a2f6cbcd06403e325
34 changes: 16 additions & 18 deletions hdt-api/src/main/java/org/rdfhdt/hdt/dictionary/Dictionary.java
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,8 @@


import java.io.Closeable;
import java.util.HashMap;
import java.util.TreeMap;

import org.rdfhdt.hdt.enums.TripleComponentRole;
import org.rdfhdt.hdt.header.Header;
Expand Down Expand Up @@ -65,45 +67,44 @@ public interface Dictionary extends Closeable {
public long stringToId(CharSequence str, TripleComponentRole position);

/**
* * Returns the number of elements in the dictionary
* Returns the number of elements in the dictionary
*/

/**
* Returns the data type of a given literal string
*
* @return long
* @param id
* The id to get the data type for
* @return String
*/
public String dataTypeOfId(long id);

public long getNumberOfElements();

/**
* Return the combined size of the sections of the dictionary (in bytes)
*
* @return long
*/
public long size();

/**
* Returns the number of subjects in the dictionary. Note: Includes shared.
*
* @return long
*/
public long getNsubjects();

/**
* Returns the number of predicates in the dictionary.
*
* @return long
*/
public long getNpredicates();

/**
* Returns the number of objects in the dictionary. Note: Includes shared
*
* @return long
*/
public long getNobjects();

/**
* Returns the number of subjects/objects in the dictionary.
*
* @return long
*/
public long getNAllObjects();
public long getNshared();

public DictionarySection getSubjects();
Expand All @@ -112,21 +113,18 @@ public interface Dictionary extends Closeable {

public DictionarySection getObjects();

public TreeMap<String,DictionarySection> getAllObjects();

public DictionarySection getShared();

/**
* Fills the header with information from the dictionary
* @param header
* the header to fill
* @param rootNode
* the rdf root node
*/
public void populateHeader(Header header, String rootNode);

/**
* Returns the type of the dictionary (the way it is written onto file/held in memory)
*
* @return String
* @return
*/
public String getType();
}
17 changes: 9 additions & 8 deletions hdt-api/src/main/java/org/rdfhdt/hdt/hdt/HDTVersion.java
Original file line number Diff line number Diff line change
Expand Up @@ -6,19 +6,20 @@ public class HDTVersion {
// Version of the actual HDT file that is generated or read.
// Software must be backwards compatible with all HDT files with the same number.
public static final String HDT_VERSION = "1";

public static final String HDT_VERSION_2 = "2";

// Version of the accompagning .index file that is generated or read
// Software must be backwards compatible with all index files with the same index and HDT version number.
public static final String INDEX_VERSION = "1";

// Subreleases that are backwards compatible with both HDT and index file
public static final String RELEASE_VERSION ="2";

public static String get_version_string(String delimiter) {
return "v" + HDT_VERSION + delimiter + INDEX_VERSION + delimiter + RELEASE_VERSION;
};

public static String get_index_suffix(String delimiter) {
return ".index.v" + HDT_VERSION + delimiter+INDEX_VERSION;
};
public static String get_version_string(String delimiter) {
return "v" + HDT_VERSION + delimiter + INDEX_VERSION + delimiter + RELEASE_VERSION;
};

public static String get_index_suffix(String delimiter) {
return ".index.v" + HDT_VERSION + delimiter+INDEX_VERSION;
};
}
15 changes: 9 additions & 6 deletions hdt-api/src/main/java/org/rdfhdt/hdt/hdt/HDTVocabulary.java
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,8 @@ public class HDTVocabulary {
// Base
public static final String HDT_BASE = "<http://purl.org/HDT/hdt#";
public static final String HDT_CONTAINER = HDT_BASE+"HDTv" + HDTVersion.HDT_VERSION+">";

public static final String HDT_CONTAINER_2 = HDT_BASE+"HDTv" + HDTVersion.HDT_VERSION_2+">";

public static final String HDT_HEADER = HDT_BASE+"header";
public static final String HDT_DICTIONARY_BASE = HDT_BASE+"dictionary";
public static final String HDT_DICTIONARY = HDT_DICTIONARY_BASE+">";
Expand All @@ -49,7 +50,7 @@ public class HDTVocabulary {
public static final String RDF_TYPE = RDF+"type>";
public static final String DUBLIN_CORE = "<http://purl.org/dc/terms/";
public static final String DUBLIN_CORE_ISSUED = DUBLIN_CORE+"issued>";

// VOID
public static final String VOID_BASE ="<http://rdfs.org/ns/void#";
public static final String VOID_DATASET = VOID_BASE + "Dataset>";
Expand Down Expand Up @@ -82,6 +83,8 @@ public class HDTVocabulary {
// Dictionary Types
public static final String DICTIONARY_TYPE_PLAIN = HDT_DICTIONARY_BASE+"Plain>";
public static final String DICTIONARY_TYPE_FOUR_SECTION = HDT_DICTIONARY_BASE+"Four>";
public static final String DICTIONARY_TYPE_MULT_SECTION = HDT_DICTIONARY_BASE+"Mult>";

public static final String DICTIONARY_TYPE_FOUR_PSFC_SECTION = HDT_DICTIONARY_BASE+"FourPsfc>";

// Triples
Expand All @@ -103,7 +106,7 @@ public class HDTVocabulary {
public static final String TRIPLES_TYPE_PLAIN = HDT_TRIPLES_BASE+"Plain>";
public static final String TRIPLES_TYPE_COMPACT = HDT_TRIPLES_BASE+"Compact>";
public static final String TRIPLES_TYPE_BITMAP = HDT_TRIPLES_BASE+"Bitmap>";

// Index type
public static final String INDEX_TYPE_FOQ = HDT_BASE+"indexFoQ>";

Expand All @@ -116,10 +119,10 @@ public class HDTVocabulary {

// Bitmaps
public static final String BITMAP_TYPE_PLAIN = HDT_BITMAP_BASE+"Plain>";
// Misc

// Misc
public static final String ORIGINAL_SIZE = HDT_BASE+"originalSize>";
public static final String HDT_SIZE = HDT_BASE+"hdtSize>";

private HDTVocabulary() {}
}
Original file line number Diff line number Diff line change
Expand Up @@ -27,11 +27,7 @@

package org.rdfhdt.hdt.dictionary;

import org.rdfhdt.hdt.dictionary.impl.FourSectionDictionary;
import org.rdfhdt.hdt.dictionary.impl.FourSectionDictionaryBig;
import org.rdfhdt.hdt.dictionary.impl.HashDictionary;
import org.rdfhdt.hdt.dictionary.impl.PSFCFourSectionDictionary;
import org.rdfhdt.hdt.dictionary.impl.PSFCTempDictionary;
import org.rdfhdt.hdt.dictionary.impl.*;
import org.rdfhdt.hdt.exceptions.IllegalFormatException;
import org.rdfhdt.hdt.hdt.HDTVocabulary;
import org.rdfhdt.hdt.options.ControlInfo;
Expand All @@ -40,43 +36,46 @@

/**
* Factory that creates Dictionary objects
*
*
*/
public class DictionaryFactory {

public static final String MOD_DICT_IMPL_HASH = "hash";
public static final String MOD_DICT_IMPL_MULT_HASH = "multHash";
public static final String MOD_DICT_IMPL_HASH_PSFC = "hashPsfc";
public static final String DICTIONARY_TYPE_FOUR_SECTION_BIG ="dictionaryFourBig";

public static final String DICTIONARY_TYPE_MULTI_OBJECTS = "dictionaryMultiObj";
private DictionaryFactory() {}

/**
* Creates a default dictionary (HashDictionary)
*
*
* @return Dictionary
*/
public static Dictionary createDefaultDictionary()
throws IllegalArgumentException {
return new FourSectionDictionary(new HDTSpecification());
}

/**
* Creates a default dictionary (HashDictionary)
*
*
* @return Dictionary
*/
public static TempDictionary createTempDictionary(HDTOptions spec) {
String name = spec.get("tempDictionary.impl");

// Implementations available in the Core
if(name==null || "".equals(name) || MOD_DICT_IMPL_HASH.equals(name)) {
return new HashDictionary(spec);
return new HashDictionary(spec,false);
} else if(MOD_DICT_IMPL_HASH_PSFC.equals(name)){
return new PSFCTempDictionary(new HashDictionary(spec));
return new PSFCTempDictionary(new HashDictionary(spec,false));
} else if(MOD_DICT_IMPL_MULT_HASH.equals(name)){
return new HashDictionary(spec,true);
}
throw new IllegalFormatException("Implementation of triples not found for "+name);
}

public static DictionaryPrivate createDictionary(HDTOptions spec) {
String name = spec.get("dictionary.type");
if(name==null || HDTVocabulary.DICTIONARY_TYPE_FOUR_SECTION.equals(name)) {
Expand All @@ -87,16 +86,20 @@ else if (HDTVocabulary.DICTIONARY_TYPE_FOUR_PSFC_SECTION.equals(name)){
}
else if (DICTIONARY_TYPE_FOUR_SECTION_BIG.equals(name)){
return new FourSectionDictionaryBig(spec);
}else if ((DICTIONARY_TYPE_MULTI_OBJECTS.equals(name))){
return new MultipleSectionDictionary(spec);
}
throw new IllegalFormatException("Implementation of dictionary not found for "+name);
}

public static DictionaryPrivate createDictionary(ControlInfo ci) {
String name = ci.getFormat();
if(HDTVocabulary.DICTIONARY_TYPE_FOUR_SECTION.equals(name)) {
return new FourSectionDictionary(new HDTSpecification());
} else if (HDTVocabulary.DICTIONARY_TYPE_FOUR_PSFC_SECTION.equals(name)) {
return new PSFCFourSectionDictionary(new HDTSpecification());
} else if(HDTVocabulary.DICTIONARY_TYPE_MULT_SECTION.equals(name)){
return new MultipleSectionDictionary(new HDTSpecification());
}
throw new IllegalFormatException("Implementation of dictionary not found for "+name);
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,8 @@
import org.rdfhdt.hdt.util.string.CompactString;
import org.rdfhdt.hdt.util.string.DelayedString;

import java.util.TreeMap;

/**
*
* This abstract class implements all general methods that are the same
Expand Down Expand Up @@ -218,5 +220,32 @@ public CharSequence idToString(long id, TripleComponentRole role) {
long localId = getLocalId(id, role);
return section.extract(localId);
}
@Override
public String dataTypeOfId(long id) {
try {
throw new IllegalAccessException("Method is not applicable on this dictionary");
} catch (IllegalAccessException e) {
e.printStackTrace();
}
return "";
}
@Override
public TreeMap<String, DictionarySection> getAllObjects() {
try {
throw new IllegalAccessException("Method is not applicable on this dictionary");
} catch (IllegalAccessException e) {
e.printStackTrace();
}
return null;
}
@Override
public long getNAllObjects() {
try {
throw new IllegalAccessException("Method is not applicable on this dictionary");
} catch (IllegalAccessException e) {
e.printStackTrace();
}
return 0;
}

}
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
import org.rdfhdt.hdt.dictionary.impl.section.PFCOptimizedExtractor;
import org.rdfhdt.hdt.enums.TripleComponentRole;

public class DictionaryPFCOptimizedExtractor {
public class DictionaryPFCOptimizedExtractor implements OptimizedExtractor{
private final PFCOptimizedExtractor shared, subjects, predicates, objects;
private final long numshared;

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -28,13 +28,15 @@
package org.rdfhdt.hdt.dictionary.impl;

import java.io.IOException;
import java.util.HashMap;
import java.util.Iterator;

import org.rdfhdt.hdt.dictionary.TempDictionarySection;
import org.rdfhdt.hdt.dictionary.impl.section.HashDictionarySection;
import org.rdfhdt.hdt.enums.TripleComponentRole;
import org.rdfhdt.hdt.options.HDTOptions;
import org.rdfhdt.hdt.triples.TempTriples;
import org.rdfhdt.hdt.triples.TripleID;
import org.rdfhdt.hdt.util.StopWatch;

/**
Expand All @@ -43,13 +45,14 @@
*/
public class HashDictionary extends BaseTempDictionary {

public HashDictionary(HDTOptions spec) {
boolean isCustom = false;
public HashDictionary(HDTOptions spec,boolean isCustom) {
super(spec);

this.isCustom = isCustom;
// FIXME: Read types from spec
subjects = new HashDictionarySection();
predicates = new HashDictionarySection();
objects = new HashDictionarySection();
objects = new HashDictionarySection(isCustom);
shared = new HashDictionarySection();
}

Expand Down Expand Up @@ -105,7 +108,10 @@ public void reorganize(TempTriples triples) {
st.reset();
subjects.sort();
predicates.sort();
long startTime = System.currentTimeMillis();
objects.sort();
long endTime = System.currentTimeMillis();
//System.out.println("Time to sort temp objects:"+(endTime - startTime)+" ms");
shared.sort();
//System.out.println("Sections sorted in "+ st.stopAndShow());

Expand Down
Loading