Skip to content

Commit

Permalink
Bug 1806042 - Replace Narrow No-Break Space (U+202F) and Thin Space (…
Browse files Browse the repository at this point in the history
…U+2009) in DateTimeFormat/DateTimeIntervalFormat output with regular Space to mitigate breakage on fragile websites. r=anba

The data for a bunch of locales was updated in ICU 72 to use U+202F and U+2009 in places where previously it had regular Space characters.
Unfortunately, this breaks some sites that attempt to parse the formatted output using naive regular expressions (or similar)
that just expect space, rather than "any whitespace", and fail to match against the new formatted output.

To mitigate this, until more browsers update to the newer ICU/CLDR data and pressure builds on sites to fix such fragile scripts,
we can post-process the formatted output from ICU to replace these "special" spaces with standard ASCII space characters.

This workaround is designed to be easily disabled at build time by just changing the DATE_TIME_FORMAT_REPLACE_SPECIAL_SPACES #define,
when we're ready to try re-enabling the updated formats.

Differential Revision: https://phabricator.services.mozilla.com/D165408
  • Loading branch information
jfkthame committed Dec 24, 2022
1 parent 90152bb commit 5ef0f30
Show file tree
Hide file tree
Showing 2 changed files with 80 additions and 1 deletion.
29 changes: 29 additions & 0 deletions intl/components/src/DateIntervalFormat.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
* License, v. 2.0. If a copy of the MPL was not distributed with this
* file, You can obtain one at http://mozilla.org/MPL/2.0/. */

#include "DateTimeFormat.h" // for DATE_TIME_FORMAT_REPLACE_SPECIAL_SPACES
#include "DateTimeFormatUtils.h"
#include "ScopedICUObject.h"

Expand Down Expand Up @@ -70,6 +71,26 @@ DateIntervalFormat::~DateIntervalFormat() {
udtitvfmt_close(mDateIntervalFormat.GetMut());
}

#if DATE_TIME_FORMAT_REPLACE_SPECIAL_SPACES
// We reach inside the UFormattedValue and modify its internal string. (It's
// crucial that this is just an in-place replacement that doesn't alter any
// field positions, etc., )
static void ReplaceSpecialSpaces(const UFormattedValue* aValue) {
UErrorCode status = U_ZERO_ERROR;
int32_t len;
const UChar* str = ufmtval_getString(aValue, &len, &status);
if (U_FAILURE(status)) {
return;
}

for (const auto& c : Span(str, len)) {
if (IsSpecialSpace(c)) {
const_cast<UChar&>(c) = ' ';
}
}
}
#endif

ICUResult DateIntervalFormat::TryFormatCalendar(
const Calendar& aStart, const Calendar& aEnd,
AutoFormattedDateInterval& aFormatted, bool* aPracticallyEqual) const {
Expand All @@ -84,6 +105,10 @@ ICUResult DateIntervalFormat::TryFormatCalendar(
return Err(ToICUError(status));
}

#if DATE_TIME_FORMAT_REPLACE_SPECIAL_SPACES
ReplaceSpecialSpaces(aFormatted.Value());
#endif

MOZ_TRY(DateFieldsPracticallyEqual(aFormatted.Value(), aPracticallyEqual));
return Ok();
}
Expand All @@ -100,6 +125,10 @@ ICUResult DateIntervalFormat::TryFormatDateTime(
return Err(ToICUError(status));
}

#if DATE_TIME_FORMAT_REPLACE_SPECIAL_SPACES
ReplaceSpecialSpaces(aFormatted.Value());
#endif

MOZ_TRY(DateFieldsPracticallyEqual(aFormatted.Value(), aPracticallyEqual));
return Ok();
}
Expand Down
52 changes: 51 additions & 1 deletion intl/components/src/DateTimeFormat.h
Original file line number Diff line number Diff line change
Expand Up @@ -20,8 +20,29 @@
#include "mozilla/Variant.h"
#include "mozilla/Vector.h"

/*
* To work around webcompat problems caused by Narrow No-Break Space in
* formatted date/time output, where existing code on the web naively
* assumes there will be a normal Space, we replace any occurrences of
* U+202F in the formatted results with U+0020.
*
* The intention is to undo this hack once other major browsers are also
* ready to ship with the updated (ICU72) i18n data that uses NNBSP.
*
* See https://bugzilla.mozilla.org/show_bug.cgi?id=1806042 for details,
* and see DateIntervalFormat.cpp for the other piece of this hack.
*/
#define DATE_TIME_FORMAT_REPLACE_SPECIAL_SPACES 1

namespace mozilla::intl {

#if DATE_TIME_FORMAT_REPLACE_SPECIAL_SPACES
static inline bool IsSpecialSpace(char16_t c) {
// NARROW NO-BREAK SPACE and THIN SPACE
return c == 0x202F || c == 0x2009;
}
#endif

class Calendar;

/**
Expand Down Expand Up @@ -329,6 +350,14 @@ class DateTimeFormat final {
return result;
}

#if DATE_TIME_FORMAT_REPLACE_SPECIAL_SPACES
for (auto& c : u16Vec) {
if (IsSpecialSpace(c)) {
c = ' ';
}
}
#endif

if (!FillBuffer(u16Vec, aBuffer)) {
return Err(ICUError::OutOfMemory);
}
Expand All @@ -337,11 +366,24 @@ class DateTimeFormat final {
static_assert(std::is_same_v<typename B::CharType, char16_t>);

// The output buffer is UTF-16. ICU can output directly into this buffer.
return FillBufferWithICUCall(
auto result = FillBufferWithICUCall(
aBuffer, [&](UChar* target, int32_t length, UErrorCode* status) {
return udat_format(mDateFormat, aUnixEpoch, target, length, nullptr,
status);
});
if (result.isErr()) {
return result;
}

#if DATE_TIME_FORMAT_REPLACE_SPECIAL_SPACES
for (auto& c : Span(aBuffer.data(), aBuffer.length())) {
if (IsSpecialSpace(c)) {
c = ' ';
}
}
#endif

return Ok{};
}
};

Expand Down Expand Up @@ -380,6 +422,14 @@ class DateTimeFormat final {
return result.propagateErr();
}

#if DATE_TIME_FORMAT_REPLACE_SPECIAL_SPACES
for (auto& c : Span(aBuffer.data(), aBuffer.length())) {
if (IsSpecialSpace(c)) {
c = ' ';
}
}
#endif

return TryFormatToParts(fpositer, aBuffer.length(), aParts);
}

Expand Down

0 comments on commit 5ef0f30

Please sign in to comment.