From 7e4df308a7f39bf1027ca6bc1f37d0a3b2e79e76 Mon Sep 17 00:00:00 2001 From: Ian Hickson Date: Wed, 6 Dec 2017 12:17:30 -0800 Subject: [PATCH] Make Locale know about the deprecated Hebrew language code. (#4411) ...by making it know about ALL the deprecated language and region codes. --- lib/ui/window.dart | 161 ++++++++++++++++++++++++++++++++-- testing/dart/locale_test.dart | 22 +++++ tools/gen_locale.dart | 71 +++++++++++++++ 3 files changed, 249 insertions(+), 5 deletions(-) create mode 100644 testing/dart/locale_test.dart create mode 100644 tools/gen_locale.dart diff --git a/lib/ui/window.dart b/lib/ui/window.dart index 7f710e33e310c..f6a9d6e01d4fe 100644 --- a/lib/ui/window.dart +++ b/lib/ui/window.dart @@ -111,6 +111,13 @@ class WindowPadding { /// consisting of a language and a country. This is a subset of locale /// identifiers as defined by BCP 47. /// +/// Locales are canonicalized according to the "preferred value" entries in the +/// [IANA Language Subtag +/// Registry](https://www.iana.org/assignments/language-subtag-registry/language-subtag-registry). +/// For example, `const Locale('he')` and `const Locale('iw')` are equal and +/// both have the [languageCode] `he`, because `iw` is a deprecated language +/// subtag that was replaced by the subtag `he`. +/// /// See also: /// /// * [Window.locale], which specifies the system's currently selected @@ -125,14 +132,151 @@ class Locale { /// const Locale swissFrench = const Locale('fr', 'CH'); /// const Locale canadianFrench = const Locale('fr', 'CA'); /// ``` - const Locale(this.languageCode, this.countryCode); + /// + /// The primary language subtag must not be null. The region subtag is + /// optional. + /// + /// The values are _case sensitive_, and should match the case of the relevant + /// subtags in the [IANA Language Subtag + /// Registry](https://www.iana.org/assignments/language-subtag-registry/language-subtag-registry). + /// Typically this means the primary language subtag should be lowercase and + /// the region subtag should be uppercase. + const Locale(this._languageCode, [ this._countryCode ]) : assert(_languageCode != null); /// The primary language subtag for the locale. - final String languageCode; + /// + /// This must not be null. + /// + /// This is expected to be string registered in the [IANA Language Subtag + /// Registry](https://www.iana.org/assignments/language-subtag-registry/language-subtag-registry) + /// with the type "language". The string specified must match the case of the + /// string in the registry. + /// + /// Language subtags that are deprecated in the registry and have a preferred + /// code are changed to their preferred code. For example, `const + /// Locale('he')` and `const Locale('iw')` are equal, and both have the + /// [languageCode] `he`, because `iw` is a deprecated language subtag that was + /// replaced by the subtag `he`. + String get languageCode => _canonicalizeLanguageCode(_languageCode); + final String _languageCode; + + static String _canonicalizeLanguageCode(String languageCode) { + // This switch statement is generated by //flutter/tools/gen_locale.dart + // Mappings generated for language subtag registry as of 2017-08-15. + switch (languageCode) { + case 'in': return 'id'; // Indonesian; deprecated 1989-01-01 + case 'iw': return 'he'; // Hebrew; deprecated 1989-01-01 + case 'ji': return 'yi'; // Yiddish; deprecated 1989-01-01 + case 'jw': return 'jv'; // Javanese; deprecated 2001-08-13 + case 'mo': return 'ro'; // Moldavian, Moldovan; deprecated 2008-11-22 + case 'aam': return 'aas'; // Aramanik; deprecated 2015-02-12 + case 'adp': return 'dz'; // Adap; deprecated 2015-02-12 + case 'aue': return 'ktz'; // =/Kx'au//'ein; deprecated 2015-02-12 + case 'ayx': return 'nun'; // Ayi (China); deprecated 2011-08-16 + case 'bgm': return 'bcg'; // Baga Mboteni; deprecated 2016-05-30 + case 'bjd': return 'drl'; // Bandjigali; deprecated 2012-08-12 + case 'ccq': return 'rki'; // Chaungtha; deprecated 2012-08-12 + case 'cjr': return 'mom'; // Chorotega; deprecated 2010-03-11 + case 'cka': return 'cmr'; // Khumi Awa Chin; deprecated 2012-08-12 + case 'cmk': return 'xch'; // Chimakum; deprecated 2010-03-11 + case 'coy': return 'pij'; // Coyaima; deprecated 2016-05-30 + case 'cqu': return 'quh'; // Chilean Quechua; deprecated 2016-05-30 + case 'drh': return 'khk'; // Darkhat; deprecated 2010-03-11 + case 'drw': return 'prs'; // Darwazi; deprecated 2010-03-11 + case 'gav': return 'dev'; // Gabutamon; deprecated 2010-03-11 + case 'gfx': return 'vaj'; // Mangetti Dune !Xung; deprecated 2015-02-12 + case 'ggn': return 'gvr'; // Eastern Gurung; deprecated 2016-05-30 + case 'gti': return 'nyc'; // Gbati-ri; deprecated 2015-02-12 + case 'guv': return 'duz'; // Gey; deprecated 2016-05-30 + case 'hrr': return 'jal'; // Horuru; deprecated 2012-08-12 + case 'ibi': return 'opa'; // Ibilo; deprecated 2012-08-12 + case 'ilw': return 'gal'; // Talur; deprecated 2013-09-10 + case 'jeg': return 'oyb'; // Jeng; deprecated 2017-02-23 + case 'kgc': return 'tdf'; // Kasseng; deprecated 2016-05-30 + case 'kgh': return 'kml'; // Upper Tanudan Kalinga; deprecated 2012-08-12 + case 'koj': return 'kwv'; // Sara Dunjo; deprecated 2015-02-12 + case 'krm': return 'bmf'; // Krim; deprecated 2017-02-23 + case 'ktr': return 'dtp'; // Kota Marudu Tinagas; deprecated 2016-05-30 + case 'kvs': return 'gdj'; // Kunggara; deprecated 2016-05-30 + case 'kwq': return 'yam'; // Kwak; deprecated 2015-02-12 + case 'kxe': return 'tvd'; // Kakihum; deprecated 2015-02-12 + case 'kzj': return 'dtp'; // Coastal Kadazan; deprecated 2016-05-30 + case 'kzt': return 'dtp'; // Tambunan Dusun; deprecated 2016-05-30 + case 'lii': return 'raq'; // Lingkhim; deprecated 2015-02-12 + case 'lmm': return 'rmx'; // Lamam; deprecated 2014-02-28 + case 'meg': return 'cir'; // Mea; deprecated 2013-09-10 + case 'mst': return 'mry'; // Cataelano Mandaya; deprecated 2010-03-11 + case 'mwj': return 'vaj'; // Maligo; deprecated 2015-02-12 + case 'myt': return 'mry'; // Sangab Mandaya; deprecated 2010-03-11 + case 'nad': return 'xny'; // Nijadali; deprecated 2016-05-30 + case 'nnx': return 'ngv'; // Ngong; deprecated 2015-02-12 + case 'nts': return 'pij'; // Natagaimas; deprecated 2016-05-30 + case 'oun': return 'vaj'; // !O!ung; deprecated 2015-02-12 + case 'pcr': return 'adx'; // Panang; deprecated 2013-09-10 + case 'pmc': return 'huw'; // Palumata; deprecated 2016-05-30 + case 'pmu': return 'phr'; // Mirpur Panjabi; deprecated 2015-02-12 + case 'ppa': return 'bfy'; // Pao; deprecated 2016-05-30 + case 'ppr': return 'lcq'; // Piru; deprecated 2013-09-10 + case 'pry': return 'prt'; // Pray 3; deprecated 2016-05-30 + case 'puz': return 'pub'; // Purum Naga; deprecated 2014-02-28 + case 'sca': return 'hle'; // Sansu; deprecated 2012-08-12 + case 'skk': return 'oyb'; // Sok; deprecated 2017-02-23 + case 'tdu': return 'dtp'; // Tempasuk Dusun; deprecated 2016-05-30 + case 'thc': return 'tpo'; // Tai Hang Tong; deprecated 2016-05-30 + case 'thx': return 'oyb'; // The; deprecated 2015-02-12 + case 'tie': return 'ras'; // Tingal; deprecated 2011-08-16 + case 'tkk': return 'twm'; // Takpa; deprecated 2011-08-16 + case 'tlw': return 'weo'; // South Wemale; deprecated 2012-08-12 + case 'tmp': return 'tyj'; // Tai Mène; deprecated 2016-05-30 + case 'tne': return 'kak'; // Tinoc Kallahan; deprecated 2016-05-30 + case 'tnf': return 'prs'; // Tangshewi; deprecated 2010-03-11 + case 'tsf': return 'taj'; // Southwestern Tamang; deprecated 2015-02-12 + case 'uok': return 'ema'; // Uokha; deprecated 2015-02-12 + case 'xba': return 'cax'; // Kamba (Brazil); deprecated 2016-05-30 + case 'xia': return 'acn'; // Xiandao; deprecated 2013-09-10 + case 'xkh': return 'waw'; // Karahawyana; deprecated 2016-05-30 + case 'xsj': return 'suj'; // Subi; deprecated 2015-02-12 + case 'ybd': return 'rki'; // Yangbye; deprecated 2012-08-12 + case 'yma': return 'lrr'; // Yamphe; deprecated 2012-08-12 + case 'ymt': return 'mtm'; // Mator-Taygi-Karagas; deprecated 2015-02-12 + case 'yos': return 'zom'; // Yos; deprecated 2013-09-10 + case 'yuu': return 'yug'; // Yugh; deprecated 2014-02-28 + default: return languageCode; + } + } /// The region subtag for the locale. - final String countryCode; + /// + /// This can be null. + /// + /// This is expected to be string registered in the [IANA Language Subtag + /// Registry](https://www.iana.org/assignments/language-subtag-registry/language-subtag-registry) + /// with the type "region". The string specified must match the case of the + /// string in the registry. + /// + /// Region subtags that are deprecated in the registry and have a preferred + /// code are changed to their preferred code. For example, `const Locale('de', + /// 'DE')` and `const Locale('de', 'DD')` are equal, and both have the + /// [countryCode] `DE`, because `DD` is a deprecated language subtag that was + /// replaced by the subtag `DE`. + String get countryCode => _canonicalizeRegionCode(_countryCode); + final String _countryCode; + + static String _canonicalizeRegionCode(String regionCode) { + // This switch statement is generated by //flutter/tools/gen_locale.dart + // Mappings generated for language subtag registry as of 2017-08-15. + switch (regionCode) { + case 'BU': return 'MM'; // Burma; deprecated 1989-12-05 + case 'DD': return 'DE'; // German Democratic Republic; deprecated 1990-10-30 + case 'FX': return 'FR'; // Metropolitan France; deprecated 1997-07-14 + case 'TP': return 'TL'; // East Timor; deprecated 2002-05-20 + case 'YD': return 'YE'; // Democratic Yemen; deprecated 1990-08-14 + case 'ZR': return 'CD'; // Zaire; deprecated 1997-07-14 + default: return regionCode; + } + } + @override bool operator ==(dynamic other) { if (identical(this, other)) return true; @@ -143,14 +287,21 @@ class Locale { && countryCode == typedOther.countryCode; } + @override int get hashCode { int result = 373; result = 37 * result + languageCode.hashCode; - result = 37 * result + countryCode.hashCode; + if (_countryCode != null) + result = 37 * result + countryCode.hashCode; return result; } - String toString() => '${languageCode}_$countryCode'; + @override + String toString() { + if (_countryCode == null) + return languageCode; + return '${languageCode}_$countryCode'; + } } /// The most basic interface to the host operating system's user interface. diff --git a/testing/dart/locale_test.dart b/testing/dart/locale_test.dart new file mode 100644 index 0000000000000..602acc3337fef --- /dev/null +++ b/testing/dart/locale_test.dart @@ -0,0 +1,22 @@ +// Copyright 2017 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +import 'dart:ui'; + +import 'package:test/test.dart'; + +void main() { + test('Locale', () { + final Null $null = null; + expect(const Locale('en').toString(), 'en'); + expect(const Locale('en'), new Locale('en', $null)); + expect(const Locale('en').hashCode, new Locale('en', $null).hashCode); + expect(const Locale('en'), isNot(new Locale('en', '')))); + expect(const Locale('en').hashCode, isNot(new Locale('en', '').hashCode)); + expect(const Locale('en', 'US').toString(), 'en_US'); + expect(const Locale('iw').toString(), 'he'); + expect(const Locale('iw', 'DD').toString(), 'he_DE'); + expect(const Locale('iw', 'DD'), const Locale('he', 'DE')); + }); +} diff --git a/tools/gen_locale.dart b/tools/gen_locale.dart new file mode 100644 index 0000000000000..82fe08d5efdfb --- /dev/null +++ b/tools/gen_locale.dart @@ -0,0 +1,71 @@ +// Copyright 2017 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +// This file is used to generate the switch statements in the Locale class. +// See: ../lib/ui/window.dart + +// When running this script, use the output of this script to update the +// comments that say when the script was last run (that comment appears twice in +// window.dart), and then replace all the "case" statements with the output from +// this script (the first set for _canonicalizeLanguageCode and the second set +// for _canonicalizeRegionCode). + +import 'dart:async'; +import 'dart:convert'; +import 'dart:io'; + +const String registry = 'https://www.iana.org/assignments/language-subtag-registry/language-subtag-registry'; + +Map> parseSection(String section) { + final Map> result = >{}; + List lastHeading; + for (String line in section.split('\n')) { + if (line == '') + continue; + if (line.startsWith(' ')) { + lastHeading[lastHeading.length - 1] = '${lastHeading.last}${line.substring(1)}'; + continue; + } + final int colon = line.indexOf(':'); + if (colon <= 0) + throw 'not sure how to deal with "$line"'; + final String name = line.substring(0, colon); + final String value = line.substring(colon + 2); + lastHeading = result.putIfAbsent(name, () => []); + result[name].add(value); + } + return result; +} + +Future main() async { + final HttpClient client = new HttpClient(); + final String body = (await (await (await client.getUrl(Uri.parse(registry))).close()).transform(UTF8.decoder).toList()).join(''); + final List>> sections = body.split('%%').map>>(parseSection).toList(); + final Map> outputs = >{'language': [], 'region': []}; + String fileDate; + for (Map> section in sections) { + if (fileDate == null) { + // first block should contain a File-Date metadata line. + fileDate = section['File-Date'].single; + continue; + } + assert(section.containsKey('Type'), section.toString()); + final String type = section['Type'].single; + if ((type == 'language' || type == 'region') && (section.containsKey('Preferred-Value'))) { + assert(section.containsKey('Subtag'), section.toString()); + final String subtag = section['Subtag'].single; + final List descriptions = section['Description']; + assert(descriptions.isNotEmpty); + assert(section.containsKey('Deprecated')); + final String comment = section.containsKey('Comment') ? section['Comment'].single : 'deprecated ${section['Deprecated'].single}'; + final String preferredValue = section['Preferred-Value'].single; + outputs[type].add('case \'$subtag\': return \'$preferredValue\'; // ${descriptions.join(", ")}; $comment'); + } + } + print('// Mappings generated for language subtag registry as of $fileDate.'); + print('// For languageCode:'); + print(outputs['language'].join('\n')); + print('// For regionCode:'); + print(outputs['region'].join('\n')); +} \ No newline at end of file