Skip to content

Commit

Permalink
Bug 208789 - patch 1 - Create an intl::GeneralCategory enum for Unico…
Browse files Browse the repository at this point in the history
…deProperties::CharType() to return, to avoid directly referring to ICU4C constants or mapping via harfbuzz constants. r=platform-i18n-reviewers,nordzilla

No change in behavior; this just gives us our own version of the general category constants,
so we can avoid depending on ICU's constants elsewhere in the codebase.

Differential Revision: https://phabricator.services.mozilla.com/D173203
  • Loading branch information
jfkthame committed Mar 23, 2023
1 parent f3a127c commit f452e8d
Show file tree
Hide file tree
Showing 4 changed files with 63 additions and 3 deletions.
1 change: 1 addition & 0 deletions intl/components/moz.build
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ EXPORTS.mozilla.intl = [
"src/DateTimePatternGenerator.h",
"src/DisplayNames.h",
"src/FormatBuffer.h",
"src/GeneralCategory.h",
"src/ICU4CGlue.h",
"src/ICU4CLibrary.h",
"src/ICUError.h",
Expand Down
52 changes: 52 additions & 0 deletions intl/components/src/GeneralCategory.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
/* This Source Code Form is subject to the terms of the Mozilla Public
* License, v. 2.0. If a copy of the MPL was not distributed with this
* file, You can obtain one at http://mozilla.org/MPL/2.0/. */
#ifndef intl_components_GeneralCategory_h_
#define intl_components_GeneralCategory_h_

#include <cstdint>

namespace mozilla::intl {

// See https://www.unicode.org/reports/tr44/#General_Category_Values
// for details of these values.

// The values here must match the values used by ICU's UCharCategory.

enum class GeneralCategory : uint8_t {
Unassigned = 0,
Uppercase_Letter = 1,
Lowercase_Letter = 2,
Titlecase_Letter = 3,
Modifier_Letter = 4,
Other_Letter = 5,
Nonspacing_Mark = 6,
Enclosing_Mark = 7,
Spacing_Mark = 8,
Decimal_Number = 9,
Letter_Number = 10,
Other_Number = 11,
Space_Separator = 12,
Line_Separator = 13,
Paragraph_Separator = 14,
Control = 15,
Format = 16,
Private_Use = 17,
Surrogate = 18,
Dash_Punctuation = 19,
Open_Punctuation = 20,
Close_Punctuation = 21,
Connector_Punctuation = 22,
Other_Punctuation = 23,
Math_Symbol = 24,
Currency_Symbol = 25,
Modifier_Symbol = 26,
Other_Symbol = 27,
Initial_Punctuation = 28,
Final_Punctuation = 29,
GeneralCategoryCount
};

} // namespace mozilla::intl

#endif
8 changes: 6 additions & 2 deletions intl/components/src/UnicodeProperties.h
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
#define intl_components_UnicodeProperties_h_

#include "mozilla/intl/BidiClass.h"
#include "mozilla/intl/GeneralCategory.h"
#include "mozilla/intl/ICU4CGlue.h"
#include "mozilla/intl/UnicodeScriptCodes.h"
#include "mozilla/Vector.h"
Expand Down Expand Up @@ -34,7 +35,9 @@ class UnicodeProperties final {
/**
* Return the general category value for the code point.
*/
static inline uint32_t CharType(uint32_t aCh) { return u_charType(aCh); }
static inline GeneralCategory CharType(uint32_t aCh) {
return GeneralCategory(u_charType(aCh));
}

/**
* Determine whether the code point has the Bidi_Mirrored property.
Expand Down Expand Up @@ -222,7 +225,8 @@ class UnicodeProperties final {
*/
static inline bool IsMathOrMusicSymbol(uint32_t aCh) {
// Keep this function in sync with is_math_symbol in base_chars.py.
return CharType(aCh) == U_MATH_SYMBOL || CharType(aCh) == U_OTHER_SYMBOL;
return CharType(aCh) == GeneralCategory::Math_Symbol ||
CharType(aCh) == GeneralCategory::Other_Symbol;
}

static inline Script GetScriptCode(uint32_t aCh) {
Expand Down
5 changes: 4 additions & 1 deletion intl/unicharutil/util/nsUnicodeProperties.h
Original file line number Diff line number Diff line change
Expand Up @@ -64,8 +64,11 @@ const uint32_t kEmojiSkinToneLast = 0x1f3ff;

extern const hb_unicode_general_category_t sICUtoHBcategory[];

// NOTE: This returns values matching harfbuzz HB_UNICODE_GENERAL_CATEGORY_*
// constants, NOT the mozilla::intl::GeneralCategory enum.
// For the GeneralCategory enum, use intl::UnicodeProperties::CharType itself.
inline uint8_t GetGeneralCategory(uint32_t aCh) {
return sICUtoHBcategory[intl::UnicodeProperties::CharType(aCh)];
return sICUtoHBcategory[unsigned(intl::UnicodeProperties::CharType(aCh))];
}

inline int8_t GetNumericValue(uint32_t aCh) {
Expand Down

0 comments on commit f452e8d

Please sign in to comment.