Skip to content

Commit

Permalink
unicode: upgrade to 8.0.0
Browse files Browse the repository at this point in the history
Not sure if I'm on time for 1.5; Unicode 8 just got released.

Straighforward upgrade. Only changed maketables.go to prevent it from adding
the Cherokee upper and lower case mappings. This change causes the caseOrbit
table to NOT change. Added tests to verify that the relevant functions still
produce the correct result, even for Cherokee.

Fixes golang#11309

Change-Id: I42850f5b3399bde125b002efc78eff96dbd86a08
Reviewed-on: https://go-review.googlesource.com/11286
Reviewed-by: Russ Cox <[email protected]>
  • Loading branch information
mpvl authored and rsc committed Jun 26, 2015
1 parent 834fef8 commit fe15da6
Show file tree
Hide file tree
Showing 7 changed files with 437 additions and 215 deletions.
1 change: 1 addition & 0 deletions api/except.txt
Original file line number Diff line number Diff line change
Expand Up @@ -328,3 +328,4 @@ pkg syscall (netbsd-arm), type IfMsghdr struct, Pad_cgo_1 [4]uint8
pkg syscall (netbsd-arm-cgo), const SizeofIfData = 132
pkg syscall (netbsd-arm-cgo), type IfMsghdr struct, Pad_cgo_1 [4]uint8
pkg unicode, const Version = "6.3.0"
pkg unicode, const Version = "7.0.0"
7 changes: 7 additions & 0 deletions api/next.txt
Original file line number Diff line number Diff line change
Expand Up @@ -948,3 +948,10 @@ pkg syscall (openbsd-amd64-cgo), type SysProcAttr struct, Pgid int
pkg text/template, method (*Template) DefinedTemplates() string
pkg text/template, method (*Template) Option(...string) *Template
pkg time, method (Time) AppendFormat([]uint8, string) []uint8
pkg unicode, const Version = "8.0.0"
pkg unicode, var Ahom *RangeTable
pkg unicode, var Anatolian_Hieroglyphs *RangeTable
pkg unicode, var Hatran *RangeTable
pkg unicode, var Multani *RangeTable
pkg unicode, var Old_Hungarian *RangeTable
pkg unicode, var SignWriting *RangeTable
89 changes: 51 additions & 38 deletions src/strconv/isprint.go
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@

package strconv

// (468+138+67)*2 + (326)*4 = 2650 bytes
// (470+136+73)*2 + (342)*4 = 2726 bytes

var isPrint16 = []uint16{
0x0020, 0x007e,
Expand All @@ -26,8 +26,8 @@ var isPrint16 = []uint16{
0x0800, 0x082d,
0x0830, 0x085b,
0x085e, 0x085e,
0x08a0, 0x08b2,
0x08e4, 0x098c,
0x08a0, 0x08b4,
0x08e3, 0x098c,
0x098f, 0x0990,
0x0993, 0x09b2,
0x09b6, 0x09b9,
Expand All @@ -51,6 +51,7 @@ var isPrint16 = []uint16{
0x0ad0, 0x0ad0,
0x0ae0, 0x0ae3,
0x0ae6, 0x0af1,
0x0af9, 0x0af9,
0x0b01, 0x0b0c,
0x0b0f, 0x0b10,
0x0b13, 0x0b39,
Expand All @@ -73,7 +74,7 @@ var isPrint16 = []uint16{
0x0be6, 0x0bfa,
0x0c00, 0x0c39,
0x0c3d, 0x0c4d,
0x0c55, 0x0c59,
0x0c55, 0x0c5a,
0x0c60, 0x0c63,
0x0c66, 0x0c6f,
0x0c78, 0x0cb9,
Expand All @@ -84,7 +85,7 @@ var isPrint16 = []uint16{
0x0d01, 0x0d3a,
0x0d3d, 0x0d4e,
0x0d57, 0x0d57,
0x0d60, 0x0d63,
0x0d5f, 0x0d63,
0x0d66, 0x0d75,
0x0d79, 0x0d7f,
0x0d82, 0x0d96,
Expand Down Expand Up @@ -117,7 +118,8 @@ var isPrint16 = []uint16{
0x1318, 0x135a,
0x135d, 0x137c,
0x1380, 0x1399,
0x13a0, 0x13f4,
0x13a0, 0x13f5,
0x13f8, 0x13fd,
0x1400, 0x169c,
0x16a0, 0x16f8,
0x1700, 0x1714,
Expand Down Expand Up @@ -167,16 +169,17 @@ var isPrint16 = []uint16{
0x2030, 0x205e,
0x2070, 0x2071,
0x2074, 0x209c,
0x20a0, 0x20bd,
0x20a0, 0x20be,
0x20d0, 0x20f0,
0x2100, 0x2189,
0x2100, 0x218b,
0x2190, 0x23fa,
0x2400, 0x2426,
0x2440, 0x244a,
0x2460, 0x2b73,
0x2b76, 0x2b95,
0x2b98, 0x2bb9,
0x2bbd, 0x2bd1,
0x2bec, 0x2bef,
0x2c00, 0x2cf3,
0x2cf9, 0x2d27,
0x2d2d, 0x2d2d,
Expand All @@ -193,19 +196,19 @@ var isPrint16 = []uint16{
0x3131, 0x31ba,
0x31c0, 0x31e3,
0x31f0, 0x4db5,
0x4dc0, 0x9fcc,
0x4dc0, 0x9fd5,
0xa000, 0xa48c,
0xa490, 0xa4c6,
0xa4d0, 0xa62b,
0xa640, 0xa6f7,
0xa700, 0xa7ad,
0xa7b0, 0xa7b1,
0xa7b0, 0xa7b7,
0xa7f7, 0xa82b,
0xa830, 0xa839,
0xa840, 0xa877,
0xa880, 0xa8c4,
0xa8ce, 0xa8d9,
0xa8e0, 0xa8fb,
0xa8e0, 0xa8fd,
0xa900, 0xa953,
0xa95f, 0xa97c,
0xa980, 0xa9d9,
Expand All @@ -217,9 +220,8 @@ var isPrint16 = []uint16{
0xab01, 0xab06,
0xab09, 0xab0e,
0xab11, 0xab16,
0xab20, 0xab5f,
0xab64, 0xab65,
0xabc0, 0xabed,
0xab20, 0xab65,
0xab70, 0xabed,
0xabf0, 0xabf9,
0xac00, 0xd7a3,
0xd7b0, 0xd7c6,
Expand All @@ -234,8 +236,7 @@ var isPrint16 = []uint16{
0xfd92, 0xfdc7,
0xfdf0, 0xfdfd,
0xfe00, 0xfe19,
0xfe20, 0xfe2d,
0xfe30, 0xfe6b,
0xfe20, 0xfe6b,
0xfe70, 0xfefc,
0xff01, 0xffbe,
0xffc2, 0xffc7,
Expand Down Expand Up @@ -370,8 +371,6 @@ var isNotPrint16 = []uint16{
0x318f,
0x321f,
0x32ff,
0xa69e,
0xa78f,
0xa9ce,
0xa9ff,
0xab27,
Expand Down Expand Up @@ -418,12 +417,13 @@ var isPrint32 = []uint32{
0x01083c, 0x01083c,
0x01083f, 0x01089e,
0x0108a7, 0x0108af,
0x010900, 0x01091b,
0x0108e0, 0x0108f5,
0x0108fb, 0x01091b,
0x01091f, 0x010939,
0x01093f, 0x01093f,
0x010980, 0x0109b7,
0x0109be, 0x0109bf,
0x010a00, 0x010a06,
0x0109bc, 0x0109cf,
0x0109d2, 0x010a06,
0x010a0c, 0x010a33,
0x010a38, 0x010a3a,
0x010a3f, 0x010a47,
Expand All @@ -438,6 +438,9 @@ var isPrint32 = []uint32{
0x010b99, 0x010b9c,
0x010ba9, 0x010baf,
0x010c00, 0x010c48,
0x010c80, 0x010cb2,
0x010cc0, 0x010cf2,
0x010cfa, 0x010cff,
0x010e60, 0x010e7e,
0x011000, 0x01104d,
0x011052, 0x01106f,
Expand All @@ -446,37 +449,42 @@ var isPrint32 = []uint32{
0x0110f0, 0x0110f9,
0x011100, 0x011143,
0x011150, 0x011176,
0x011180, 0x0111c8,
0x0111cd, 0x0111cd,
0x0111d0, 0x0111da,
0x0111e1, 0x0111f4,
0x011180, 0x0111cd,
0x0111d0, 0x0111f4,
0x011200, 0x01123d,
0x011280, 0x0112a9,
0x0112b0, 0x0112ea,
0x0112f0, 0x0112f9,
0x011301, 0x01130c,
0x011300, 0x01130c,
0x01130f, 0x011310,
0x011313, 0x011339,
0x01133c, 0x011344,
0x011347, 0x011348,
0x01134b, 0x01134d,
0x011350, 0x011350,
0x011357, 0x011357,
0x01135d, 0x011363,
0x011366, 0x01136c,
0x011370, 0x011374,
0x011480, 0x0114c7,
0x0114d0, 0x0114d9,
0x011580, 0x0115b5,
0x0115b8, 0x0115c9,
0x0115b8, 0x0115dd,
0x011600, 0x011644,
0x011650, 0x011659,
0x011680, 0x0116b7,
0x0116c0, 0x0116c9,
0x011700, 0x011719,
0x01171d, 0x01172b,
0x011730, 0x01173f,
0x0118a0, 0x0118f2,
0x0118ff, 0x0118ff,
0x011ac0, 0x011af8,
0x012000, 0x012398,
0x012000, 0x012399,
0x012400, 0x012474,
0x012480, 0x012543,
0x013000, 0x01342e,
0x014400, 0x014646,
0x016800, 0x016a38,
0x016a40, 0x016a69,
0x016a6e, 0x016a6f,
Expand All @@ -497,7 +505,7 @@ var isPrint32 = []uint32{
0x01d000, 0x01d0f5,
0x01d100, 0x01d126,
0x01d129, 0x01d172,
0x01d17b, 0x01d1dd,
0x01d17b, 0x01d1e8,
0x01d200, 0x01d245,
0x01d300, 0x01d356,
0x01d360, 0x01d371,
Expand All @@ -508,7 +516,8 @@ var isPrint32 = []uint32{
0x01d50d, 0x01d546,
0x01d54a, 0x01d6a5,
0x01d6a8, 0x01d7cb,
0x01d7ce, 0x01d7ff,
0x01d7ce, 0x01da8b,
0x01da9b, 0x01daaf,
0x01e800, 0x01e8c4,
0x01e8c7, 0x01e8d6,
0x01ee00, 0x01ee24,
Expand All @@ -530,13 +539,7 @@ var isPrint32 = []uint32{
0x01f210, 0x01f23a,
0x01f240, 0x01f248,
0x01f250, 0x01f251,
0x01f300, 0x01f32c,
0x01f330, 0x01f37d,
0x01f380, 0x01f3ce,
0x01f3d4, 0x01f3f7,
0x01f400, 0x01f54a,
0x01f550, 0x01f642,
0x01f645, 0x01f6cf,
0x01f300, 0x01f6d0,
0x01f6e0, 0x01f6ec,
0x01f6f0, 0x01f6f3,
0x01f700, 0x01f773,
Expand All @@ -546,9 +549,13 @@ var isPrint32 = []uint32{
0x01f850, 0x01f859,
0x01f860, 0x01f887,
0x01f890, 0x01f8ad,
0x01f910, 0x01f918,
0x01f980, 0x01f984,
0x01f9c0, 0x01f9c0,
0x020000, 0x02a6d6,
0x02a700, 0x02b734,
0x02b740, 0x02b81d,
0x02b820, 0x02cea1,
0x02f800, 0x02fa1d,
0x0e0100, 0x0e01ef,
}
Expand All @@ -562,12 +569,18 @@ var isNotPrint32 = []uint16{ // add 0x10000 to each entry
0x0809,
0x0836,
0x0856,
0x08f3,
0x0a04,
0x0a14,
0x0a18,
0x10bd,
0x1135,
0x11e0,
0x1212,
0x1287,
0x1289,
0x128e,
0x129e,
0x1304,
0x1329,
0x1331,
Expand All @@ -589,6 +602,7 @@ var isNotPrint32 = []uint16{ // add 0x10000 to each entry
0xd53f,
0xd545,
0xd551,
0xdaa0,
0xee04,
0xee20,
0xee23,
Expand Down Expand Up @@ -618,7 +632,6 @@ var isNotPrint32 = []uint16{ // add 0x10000 to each entry
0xf0c0,
0xf0d0,
0xf12f,
0xf4ff,
0xf57a,
0xf5a4,
}
14 changes: 14 additions & 0 deletions src/unicode/letter_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ var upperTest = []rune{
0x181,
0x376,
0x3cf,
0x13bd,
0x1f2a,
0x2102,
0x2c00,
Expand All @@ -46,6 +47,7 @@ var notupperTest = []rune{
0x377,
0x387,
0x2150,
0xab7d,
0xffff,
0x10000,
}
Expand Down Expand Up @@ -194,6 +196,15 @@ var caseTest = []caseT{
{LowerCase, 0x0148, 0x0148},
{TitleCase, 0x0148, 0x0147},

// Lowercase lower than uppercase.
// AB78;CHEROKEE SMALL LETTER GE;Ll;0;L;;;;;N;;;13A8;;13A8
{UpperCase, 0xab78, 0x13a8},
{LowerCase, 0xab78, 0xab78},
{TitleCase, 0xab78, 0x13a8},
{UpperCase, 0x13a8, 0x13a8},
{LowerCase, 0x13a8, 0xab78},
{TitleCase, 0x13a8, 0x13a8},

// Last block in the 5.1.0 table
// 10400;DESERET CAPITAL LETTER LONG I;Lu;0;L;;;;;N;;;;10428;
{UpperCase, 0x10400, 0x10400},
Expand Down Expand Up @@ -405,6 +416,9 @@ var simpleFoldTests = []string{
// Extra special cases: has lower/upper but no case fold.
"İ",
"ı",

// Upper comes before lower (Cherokee).
"\u13b0\uab80",
}

func TestSimpleFold(t *testing.T) {
Expand Down
7 changes: 5 additions & 2 deletions src/unicode/maketables.go
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@ func main() {
var dataURL = flag.String("data", "", "full URL for UnicodeData.txt; defaults to --url/UnicodeData.txt")
var casefoldingURL = flag.String("casefolding", "", "full URL for CaseFolding.txt; defaults to --url/CaseFolding.txt")
var url = flag.String("url",
"http://www.unicode.org/Public/7.0.0/ucd/",
"http://www.unicode.org/Public/8.0.0/ucd/",
"URL of Unicode database directory")
var tablelist = flag.String("tables",
"all",
Expand Down Expand Up @@ -1152,11 +1152,14 @@ func printCasefold() {
}
}

// Delete the groups for which assuming [lower, upper] is right.
// Delete the groups for which assuming [lower, upper] or [upper, lower] is right.
for i, orb := range caseOrbit {
if len(orb) == 2 && chars[orb[0]].upperCase == orb[1] && chars[orb[1]].lowerCase == orb[0] {
caseOrbit[i] = nil
}
if len(orb) == 2 && chars[orb[1]].upperCase == orb[0] && chars[orb[0]].lowerCase == orb[1] {
caseOrbit[i] = nil
}
}

// Record orbit information in chars.
Expand Down
Loading

0 comments on commit fe15da6

Please sign in to comment.