Skip to content

Commit

Permalink
Bug 1719554 - Unify most of nsUnicodeProperties.h; r=platform-i18n-re…
Browse files Browse the repository at this point in the history
…viewers,jfkthame,gregtatum,necko-reviewers,valentin

This unifies most of the calls in nsUnicodeProperties.h. CharType and Script
will be handled in subsequent patches on this bug.

Differential Revision: https://phabricator.services.mozilla.com/D132273
  • Loading branch information
dminor committed Dec 6, 2021
1 parent 42bbbe8 commit 50b8416
Show file tree
Hide file tree
Showing 13 changed files with 286 additions and 114 deletions.
1 change: 1 addition & 0 deletions dom/base/DirectionalityUtils.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -215,6 +215,7 @@
#include "mozilla/dom/Element.h"
#include "mozilla/dom/HTMLSlotElement.h"
#include "mozilla/dom/ShadowRoot.h"
#include "mozilla/intl/UnicodeProperties.h"
#include "nsUnicodeProperties.h"
#include "nsTextFragment.h"
#include "nsAttrValue.h"
Expand Down
5 changes: 3 additions & 2 deletions dom/serializers/nsPlainTextSerializer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,6 @@
#include "nsContentUtils.h"
#include "nsReadableUtils.h"
#include "nsUnicharUtils.h"
#include "nsUnicodeProperties.h"
#include "nsCRT.h"
#include "mozilla/Casting.h"
#include "mozilla/EditorUtils.h"
Expand All @@ -31,6 +30,8 @@
#include "mozilla/dom/HTMLBRElement.h"
#include "mozilla/dom/Text.h"
#include "mozilla/intl/Segmenter.h"
#include "mozilla/intl/UnicodeProperties.h"
#include "nsUnicodeProperties.h"
#include "mozilla/Span.h"
#include "mozilla/Preferences.h"
#include "mozilla/StaticPrefs_converter.h"
Expand Down Expand Up @@ -1803,7 +1804,7 @@ int32_t GetUnicharWidth(char32_t aCh) {
return 1;
}

return unicode::IsEastAsianWidthFW(aCh) ? 2 : 1;
return intl::UnicodeProperties::IsEastAsianWidthFW(aCh) ? 2 : 1;
}

int32_t GetUnicharStringWidth(Span<const char16_t> aString) {
Expand Down
10 changes: 6 additions & 4 deletions gfx/thebes/gfxHarfBuzzShaper.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
#include "gfxTextRun.h"
#include "mozilla/Sprintf.h"
#include "mozilla/intl/String.h"
#include "mozilla/intl/UnicodeProperties.h"
#include "nsUnicodeProperties.h"
#include "nsUnicodeScriptCodes.h"

Expand Down Expand Up @@ -981,7 +982,7 @@ static hb_position_t HBGetHKerning(hb_font_t* font, void* font_data,

static hb_codepoint_t HBGetMirroring(hb_unicode_funcs_t* ufuncs,
hb_codepoint_t aCh, void* user_data) {
return GetMirroredChar(aCh);
return intl::UnicodeProperties::CharMirror(aCh);
}

static hb_unicode_general_category_t HBGetGeneralCategory(
Expand All @@ -996,13 +997,14 @@ static hb_script_t HBGetScript(hb_unicode_funcs_t* ufuncs, hb_codepoint_t aCh,

static hb_unicode_combining_class_t HBGetCombiningClass(
hb_unicode_funcs_t* ufuncs, hb_codepoint_t aCh, void* user_data) {
return hb_unicode_combining_class_t(GetCombiningClass(aCh));
return hb_unicode_combining_class_t(
intl::UnicodeProperties::GetCombiningClass(aCh));
}

static hb_bool_t HBUnicodeCompose(hb_unicode_funcs_t* ufuncs, hb_codepoint_t a,
hb_codepoint_t b, hb_codepoint_t* ab,
void* user_data) {
char32_t ch = mozilla::intl::String::ComposePairNFC(a, b);
char32_t ch = intl::String::ComposePairNFC(a, b);
if (ch > 0) {
*ab = ch;
return true;
Expand All @@ -1025,7 +1027,7 @@ static hb_bool_t HBUnicodeDecompose(hb_unicode_funcs_t* ufuncs,
#endif

char32_t decomp[2] = {0};
if (mozilla::intl::String::DecomposeRawNFD(ab, decomp)) {
if (intl::String::DecomposeRawNFD(ab, decomp)) {
if (decomp[1] || decomp[0] != ab) {
*a = decomp[0];
*b = decomp[1];
Expand Down
7 changes: 4 additions & 3 deletions gfx/thebes/gfxScriptItemizer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,7 @@

#include "gfxScriptItemizer.h"
#include "mozilla/intl/Script.h"
#include "mozilla/intl/UnicodeProperties.h"
#include "nsUnicodeProperties.h"
#include "nsCharTraits.h"
#include "harfbuzz/hb.h"
Expand Down Expand Up @@ -177,12 +178,12 @@ bool gfxScriptItemizer::Next(uint32_t& aRunStart, uint32_t& aRunLimit,
*/
gc = GetGeneralCategory(ch);
if (gc == HB_UNICODE_GENERAL_CATEGORY_OPEN_PUNCTUATION) {
uint32_t endPairChar = mozilla::unicode::GetMirroredChar(ch);
uint32_t endPairChar = mozilla::intl::UnicodeProperties::CharMirror(ch);
if (endPairChar != ch) {
push(endPairChar, scriptCode);
}
} else if (gc == HB_UNICODE_GENERAL_CATEGORY_CLOSE_PUNCTUATION &&
HasMirroredChar(ch)) {
mozilla::intl::UnicodeProperties::IsMirrored(ch)) {
while (STACK_IS_NOT_EMPTY() && TOP().endPairChar != ch) {
pop();
}
Expand Down Expand Up @@ -220,7 +221,7 @@ bool gfxScriptItemizer::Next(uint32_t& aRunStart, uint32_t& aRunLimit,
* pop the matching open character from the stack
*/
if (gc == HB_UNICODE_GENERAL_CATEGORY_CLOSE_PUNCTUATION &&
HasMirroredChar(ch)) {
mozilla::intl::UnicodeProperties::IsMirrored(ch)) {
pop();
}
} else {
Expand Down
1 change: 1 addition & 0 deletions intl/components/moz.build
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@ EXPORTS.mozilla.intl = [
"src/Script.h",
"src/String.h",
"src/TimeZone.h",
"src/UnicodeProperties.h",
]

UNIFIED_SOURCES += [
Expand Down
219 changes: 219 additions & 0 deletions intl/components/src/UnicodeProperties.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,219 @@
/* This Source Code Form is subject to the terms of the Mozilla Public
* License, v. 2.0. If a copy of the MPL was not distributed with this
* file, You can obtain one at http://mozilla.org/MPL/2.0/. */
#ifndef intl_components_UnicodeProperties_h_
#define intl_components_UnicodeProperties_h_

#include "unicode/uchar.h"
#include "unicode/uscript.h"

namespace mozilla::intl {

/**
* This component is a Mozilla-focused API for working with text properties.
*/
class UnicodeProperties final {
public:
/**
* Maps the specified character to a "mirror-image" character.
*/
static inline uint32_t CharMirror(uint32_t aCh) { return u_charMirror(aCh); }

/**
* Return the general category value for the code point.
*/
static inline uint32_t CharType(uint32_t aCh) { return u_charType(aCh); }

/**
* Determine whether the code point has the Bidi_Mirrored property.
*/
static inline bool IsMirrored(uint32_t aCh) { return u_isMirrored(aCh); }

/**
* Returns the combining class of the code point as specified in
* UnicodeData.txt.
*/
static inline uint8_t GetCombiningClass(uint32_t aCh) {
return u_getCombiningClass(aCh);
}

enum class IntProperty {
BidiPairedBracketType,
EastAsianWidth,
HangulSyllableType,
LineBreak,
NumericType,
};

/**
* Get the property value for an enumerated or integer Unicode property for a
* code point.
*/
static inline int32_t GetIntPropertyValue(uint32_t aCh, IntProperty aProp) {
UProperty prop;
switch (aProp) {
case IntProperty::BidiPairedBracketType:
prop = UCHAR_BIDI_PAIRED_BRACKET_TYPE;
break;
case IntProperty::EastAsianWidth:
prop = UCHAR_EAST_ASIAN_WIDTH;
break;
case IntProperty::HangulSyllableType:
prop = UCHAR_HANGUL_SYLLABLE_TYPE;
break;
case IntProperty::LineBreak:
prop = UCHAR_LINE_BREAK;
break;
case IntProperty::NumericType:
prop = UCHAR_NUMERIC_TYPE;
break;
}
return u_getIntPropertyValue(aCh, prop);
}

/**
* Get the numeric value for a Unicode code point as defined in the
* Unicode Character Database if the input is decimal or a digit,
* otherwise, returns -1.
*/
static inline int8_t GetNumericValue(uint32_t aCh) {
UNumericType type =
UNumericType(GetIntPropertyValue(aCh, IntProperty::NumericType));
return type == U_NT_DECIMAL || type == U_NT_DIGIT
? int8_t(u_getNumericValue(aCh))
: -1;
}

/**
* Maps the specified character to its paired bracket character.
*/
static inline uint32_t GetBidiPairedBracket(uint32_t aCh) {
return u_getBidiPairedBracket(aCh);
}

/**
* The given character is mapped to its uppercase equivalent according to
* UnicodeData.txt; if the character has no uppercase equivalent, the
* character itself is returned.
*/
static inline uint32_t ToUpper(uint32_t aCh) { return u_toupper(aCh); }

/**
* The given character is mapped to its lowercase equivalent according to
* UnicodeData.txt; if the character has no lowercase equivalent, the
* character itself is returned.
*/
static inline uint32_t ToLower(uint32_t aCh) { return u_tolower(aCh); }

/**
* Check if a code point has the Lowercase Unicode property.
*/
static inline bool IsLowercase(uint32_t aCh) { return u_isULowercase(aCh); }

/**
* The given character is mapped to its titlecase equivalent according to
* UnicodeData.txt; if the character has no titlecase equivalent, the
* character itself is returned.
*/
static inline uint32_t ToTitle(uint32_t aCh) { return u_totitle(aCh); }

/**
* The given character is mapped to its case folding equivalent according to
* UnicodeData.txt and CaseFolding.txt;
* if the character has no case folding equivalent, the character
* itself is returned.
*/
static inline uint32_t FoldCase(uint32_t aCh) {
return u_foldCase(aCh, U_FOLD_CASE_DEFAULT);
}

enum class BinaryProperty {
DefaultIgnorableCodePoint,
Emoji,
EmojiPresentation,
};

/**
* Check a binary Unicode property for a code point.
*/
static inline bool HasBinaryProperty(uint32_t aCh, BinaryProperty aProp) {
UProperty prop;
switch (aProp) {
case BinaryProperty::DefaultIgnorableCodePoint:
prop = UCHAR_DEFAULT_IGNORABLE_CODE_POINT;
break;
case BinaryProperty::Emoji:
prop = UCHAR_EMOJI;
break;
case BinaryProperty::EmojiPresentation:
prop = UCHAR_EMOJI_PRESENTATION;
break;
}
return u_hasBinaryProperty(aCh, prop);
}

/**
* Check if the width of aCh is full width, half width or wide
* excluding emoji.
*/
static inline bool IsEastAsianWidthFHWexcludingEmoji(uint32_t aCh) {
switch (GetIntPropertyValue(aCh, IntProperty::EastAsianWidth)) {
case U_EA_FULLWIDTH:
case U_EA_HALFWIDTH:
return true;
case U_EA_WIDE:
return HasBinaryProperty(aCh, BinaryProperty::Emoji) ? false : true;
case U_EA_AMBIGUOUS:
case U_EA_NARROW:
case U_EA_NEUTRAL:
return false;
}
return false;
}

/**
* Check if the width of aCh is ambiguous, full width, or wide.
*/
static inline bool IsEastAsianWidthAFW(uint32_t aCh) {
switch (GetIntPropertyValue(aCh, IntProperty::EastAsianWidth)) {
case U_EA_AMBIGUOUS:
case U_EA_FULLWIDTH:
case U_EA_WIDE:
return true;
case U_EA_HALFWIDTH:
case U_EA_NARROW:
case U_EA_NEUTRAL:
return false;
}
return false;
}

/**
* Check if the width of aCh is full width, or wide.
*/
static inline bool IsEastAsianWidthFW(uint32_t aCh) {
switch (GetIntPropertyValue(aCh, IntProperty::EastAsianWidth)) {
case U_EA_FULLWIDTH:
case U_EA_WIDE:
return true;
case U_EA_AMBIGUOUS:
case U_EA_HALFWIDTH:
case U_EA_NARROW:
case U_EA_NEUTRAL:
return false;
}
return false;
}

/**
* Check if the CharType of aCh is math or other symbol.
*/
static inline bool IsMathOrMusicSymbol(uint32_t aCh) {
// Keep this function in sync with is_math_symbol in base_chars.py.
return CharType(aCh) == U_MATH_SYMBOL || CharType(aCh) == U_OTHER_SYMBOL;
}
};

} // namespace mozilla::intl

#endif
19 changes: 13 additions & 6 deletions intl/lwbrk/LineBreaker.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
#include "nsUnicodeProperties.h"
#include "mozilla/ArrayUtils.h"
#include "mozilla/intl/Segmenter.h"
#include "mozilla/intl/UnicodeProperties.h"

using namespace mozilla::unicode;
using namespace mozilla::intl;
Expand Down Expand Up @@ -462,10 +463,12 @@ static int8_t GetClass(uint32_t u, LineBreakRule aLevel,
return CLASS_CLOSE_LIKE_CHARACTER;
}
if (aIsChineseOrJapanese) {
if (cls == U_LB_POSTFIX_NUMERIC && IsEastAsianWidthAFW(u)) {
if (cls == U_LB_POSTFIX_NUMERIC &&
UnicodeProperties::IsEastAsianWidthAFW(u)) {
return CLASS_CLOSE_LIKE_CHARACTER;
}
if (cls == U_LB_PREFIX_NUMERIC && IsEastAsianWidthAFW(u)) {
if (cls == U_LB_PREFIX_NUMERIC &&
UnicodeProperties::IsEastAsianWidthAFW(u)) {
return CLASS_OPEN_LIKE_CHARACTER;
}
if (u == 0x2010 || u == 0x2013 || u == 0x301C || u == 0x30A0) {
Expand All @@ -485,10 +488,12 @@ static int8_t GetClass(uint32_t u, LineBreakRule aLevel,
return CLASS_CLOSE_LIKE_CHARACTER;
}
if (aIsChineseOrJapanese) {
if (cls == U_LB_POSTFIX_NUMERIC && IsEastAsianWidthAFW(u)) {
if (cls == U_LB_POSTFIX_NUMERIC &&
UnicodeProperties::IsEastAsianWidthAFW(u)) {
return CLASS_CLOSE_LIKE_CHARACTER;
}
if (cls == U_LB_PREFIX_NUMERIC && IsEastAsianWidthAFW(u)) {
if (cls == U_LB_PREFIX_NUMERIC &&
UnicodeProperties::IsEastAsianWidthAFW(u)) {
return CLASS_OPEN_LIKE_CHARACTER;
}
if (u == 0x2010 || u == 0x2013 || u == 0x301C || u == 0x30A0) {
Expand All @@ -513,10 +518,12 @@ static int8_t GetClass(uint32_t u, LineBreakRule aLevel,
u == 0xFF01 || u == 0xFF1F) {
return CLASS_BREAKABLE;
}
if (cls == U_LB_POSTFIX_NUMERIC && IsEastAsianWidthAFW(u)) {
if (cls == U_LB_POSTFIX_NUMERIC &&
UnicodeProperties::IsEastAsianWidthAFW(u)) {
return CLASS_BREAKABLE;
}
if (cls == U_LB_PREFIX_NUMERIC && IsEastAsianWidthAFW(u)) {
if (cls == U_LB_PREFIX_NUMERIC &&
UnicodeProperties::IsEastAsianWidthAFW(u)) {
return CLASS_BREAKABLE;
}
if (u == 0x2010 || u == 0x2013 || u == 0x301C || u == 0x30A0) {
Expand Down
Loading

0 comments on commit 50b8416

Please sign in to comment.