Skip to content

Commit

Permalink
apacheGH-2039: Case insensitive langtags (initial)
Browse files Browse the repository at this point in the history
  • Loading branch information
afs committed Dec 27, 2023
1 parent 7bf97bf commit 74a134d
Show file tree
Hide file tree
Showing 2 changed files with 22 additions and 3 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -215,7 +215,7 @@ protected Node createLiteral(String lexicalForm, String langTag, String datatype
RDFDatatype dType = TypeMapper.getInstance().getSafeTypeByName(datatypeURI);
n = NodeFactory.createLiteral(lexicalForm, dType);
} else if ( langTag != null && !langTag.isEmpty() )
n = NodeFactory.createLiteral(lexicalForm, langTag);
n = NodeFactory.createLiteralLang(lexicalForm, langTag);
else
n = NodeFactory.createLiteral(lexicalForm);
return n;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -106,7 +106,7 @@ private enum ValueMode { EAGER , LAZY }
/*package*/ LiteralLabel(String lex, String lang, RDFDatatype dtype) {
this.lexicalForm = lex;
this.dtype = Objects.requireNonNull(dtype);
this.lang = (lang == null ? "" : lang);
this.lang = formLangTag(lang);
hash = calcHashCode();
if ( valueMode == ValueMode.EAGER ) {
this.wellformed = setValue(lex, dtype);
Expand All @@ -116,6 +116,24 @@ private enum ValueMode { EAGER , LAZY }
value = null;
}

private static final boolean legacyLangTag = true;

/** Prepare the language tag - apply formatting normalization */
private static String formLangTag(String input) {
if ( legacyLangTag )
return (input == null ? "" : input);
// Format.
return (input == null ? "" : input.toLowerCase(Locale.ROOT));
}

/** Calculate the indexing form for a language tag */
private static String indexingLang(String lang) {
if ( legacyLangTag )
return lang.toLowerCase(Locale.ROOT);
return lang;
}


/**
* Build a typed literal label from its value form using
* whatever datatype is currently registered as the default
Expand Down Expand Up @@ -304,7 +322,8 @@ public Object getIndexingValue() {
if ( indexingValueIsSelf() )
return this;
if ( !lang.equals("") )
return getLexicalForm() + "@" + lang.toLowerCase(Locale.ROOT);
// Assumed formatted/case-insensitive language tags.
return getLexicalForm() + "@" + indexingLang(lang);
if ( wellformed ) {
Object value = getValue();
// JENA-1936
Expand Down

0 comments on commit 74a134d

Please sign in to comment.