string: rename iterator methods again (google#94)

The names chosen by the Bazel Skylark team are now: formerly: elems split_elems elem_ords elems codepoints split_codepoints codepoint_ords codepoints codepoints() is the recommended way to iterate over text strings.
coryb · Mar 29, 2018 · 0569d1c · 0569d1c
1 parent fd913d9
commit 0569d1c
Show file tree

Hide file tree

Showing 8 changed files with 137 additions and 136 deletions.
diff --git a/doc/spec.md b/doc/spec.md
@@ -154,8 +154,10 @@ concurrency, and other such features of Python.
     * [list·remove](#list·remove)
     * [set·union](#set·union)
     * [string·capitalize](#string·capitalize)
+    * [string·codepoint_ords](#string·codepoint_ords)
     * [string·codepoints](#string·codepoints)
     * [string·count](#string·count)
+    * [string·elem_ords](#string·elem_ords)
     * [string·elems](#string·elems)
     * [string·endswith](#string·endswith)
     * [string·find](#string·find)
@@ -179,8 +181,6 @@ concurrency, and other such features of Python.
     * [string·rsplit](#string·rsplit)
     * [string·rstrip](#string·rstrip)
     * [string·split](#string·split)
-    * [string·split_codepoints](#string·split_codepoints)
-    * [string·split_elems](#string·split_elems)
     * [string·splitlines](#string·splitlines)
     * [string·startswith](#string·startswith)
     * [string·strip](#string·strip)
@@ -532,7 +532,7 @@ an iterable sequence.
 To obtain a view of a string as an iterable sequence of numeric byte
 values, 1-byte substrings, numeric Unicode code points, or 1-code
 point substrings, you must explicitly call one of its four methods:
-`elems`, `split_elems`, `codepoints`, or `split_codepoints`.
+`elems`, `elem_ords`, `codepoints`, or `codepoint_ords`.
 
 Any value may formatted as a string using the `str` or `repr` built-in
 functions, the `str % tuple` operator, or the `str.format` method.
@@ -543,8 +543,10 @@ non-empty.
 Strings have several built-in methods:
 
 * [`capitalize`](#string·capitalize)
+* [`codepoint_ords`](#string·codepoint_ords)
 * [`codepoints`](#string·codepoints)
 * [`count`](#string·count)
+* [`elem_ords`](#string·elem_ords)
 * [`elems`](#string·elems)
 * [`endswith`](#string·endswith)
 * [`find`](#string·find)
@@ -569,8 +571,6 @@ Strings have several built-in methods:
 * [`rstrip`](#string·rstrip)
 * [`split`](#string·split)
 * [`splitlines`](#string·splitlines)
-* [`split_codepoints`](#string·split_codepoints)
-* [`split_elems`](#string·split_elems)
 * [`startswith`](#string·startswith)
 * [`strip`](#string·strip)
 * [`title`](#string·title)
@@ -3047,7 +3047,7 @@ repr([1, "x"])          # '[1, "x"]'
 
 ```python
 reversed(range(5))                              # [4, 3, 2, 1, 0]
-reversed("stressed".split_codepoints())         # ["d", "e", "s", "s", "e", "r", "t", "s"]
+reversed("stressed".codepoints())               # ["d", "e", "s", "s", "e", "r", "t", "s"]
 reversed({"one": 1, "two": 2}.keys())           # ["two", "one"]
 ```
 
@@ -3077,7 +3077,7 @@ argument to apply to obtain the value's sort key.
 The default behavior is the identity function.
 
 ```python
-sorted(set("harbors".split_codepoints()))                       # ['a', 'b', 'h', 'o', 'r', 's']
+sorted(set("harbors".codepoints()))                             # ['a', 'b', 'h', 'o', 'r', 's']
 sorted([3, 1, 4, 1, 5, 9])                                      # [1, 1, 3, 4, 5, 9]
 sorted([3, 1, 4, 1, 5, 9], reverse=True)                        # [9, 5, 4, 3, 1, 1]
 
@@ -3343,7 +3343,7 @@ nearest value within that range is used; see [Indexing](#indexing).
 is not a valid index (`int` or `None`).
 
 ```python
-x = list("banana".split_elems())
+x = list("banana".codepoints())
 x.index("a")                            # 1 (bAnana)
 x.index("a", 2)                         # 3 (banAna)
 x.index("a", -2)                        # 5 (bananA)
@@ -3413,23 +3413,23 @@ y = set([2, 3])
 x.union(y)                              # set([1, 2, 3])
 ```
 
-<a id='string·elems'></a>
-### string·elems
+<a id='string·elem_ords'></a>
+### string·elem_ords
 
-`S.elems()` returns an iterable value containing the
+`S.elem_ords()` returns an iterable value containing the
 sequence of numeric bytes values in the string S.
 
 To materialize the entire sequence of bytes, apply `list(...)` to the result.
 
 Example:
 
 ```python
-list("Hello, 世界".elems())        # [72, 101, 108, 108, 111, 44, 32, 228, 184, 150, 231, 149, 140]
+list("Hello, 世界".elem_ords())        # [72, 101, 108, 108, 111, 44, 32, 228, 184, 150, 231, 149, 140]
 ```
 
-See also: `string·split_elems`.
+See also: `string·elems`.
 
-<b>Implementation note:</b> `elems` is not provided by the Java implementation.
+<b>Implementation note:</b> `elem_ords` is not provided by the Java implementation.
 
 <a id='string·capitalize'></a>
 ### string·capitalize
@@ -3441,10 +3441,10 @@ that begin words changed to their title case.
 "hello, world!".capitalize()		# "Hello, World!"
 ```
 
-<a id='string·codepoints'></a>
-### string·codepoints
+<a id='string·codepoint_ords'></a>
+### string·codepoint_ords
 
-`S.codepoints()` returns an iterable value containing the
+`S.codepoint_ords()` returns an iterable value containing the
 sequence of integer Unicode code points encoded by the string S.
 Each invalid code within the string is treated as if it encodes the
 Unicode replacement character, U+FFFD.
@@ -3456,17 +3456,15 @@ materialize the entire sequence.
 Example:
 
 ```python
-list("Hello, 世界".codepoints())        # [72, 101, 108, 108, 111, 44, 32, 19990, 30028]
+list("Hello, 世界".codepoint_ords())        # [72, 101, 108, 108, 111, 44, 32, 19990, 30028]
 
-for cp in "Hello, 世界".codepoints():
-   if cp == ord(','):
-      break
-   print(chr(cp))  # prints "H", "e", "l", "l", "o"
+for cp in "Hello, 世界".codepoint_ords():
+   print(chr(cp))  # prints 'H', 'e', 'l', 'l', 'o', ',', ' ', '世', '界'
 ```
 
-See also: `string·split_codepoints`.
+See also: `string·codepoints`.
 
-<b>Implementation note:</b> `codepoints` is not provided by the Java implementation.
+<b>Implementation note:</b> `codepoint_ords` is not provided by the Java implementation.
 
 <a id='string·count'></a>
 ### string·count
@@ -3658,7 +3656,7 @@ are strings.
 
 ```python
 ", ".join(["one", "two", "three"])      # "one, two, three"
-"a".join("ctmrn".split_elems())         # "catamaran"
+"a".join("ctmrn".codepoints())          # "catamaran"
 ```
 
 <a id='string·lower'></a>
@@ -3792,27 +3790,26 @@ If `maxsplit` is given and non-negative, it specifies a maximum number of splits
 "banana".split("n", 1)                      # ["ba", "ana"]
 ```
 
-<a id='string·split_elems'></a>
-### string·split_elems
+<a id='string·elems'></a>
+### string·elems
 
-`S.split_elems()` returns an iterable value containing successive
+`S.elems()` returns an iterable value containing successive
 1-byte substrings of S.
 To materialize the entire sequence, apply `list(...)` to the result.
 
 Example:
 
 ```python
-list('Hello, 世界'.split_elems())  # ["H", "e", "l", "l", "o", ",", " ", "\xe4", "\xb8", "\x96", "\xe7", "\x95", "\x8c"]
+list('Hello, 世界'.elems())  # ["H", "e", "l", "l", "o", ",", " ", "\xe4", "\xb8", "\x96", "\xe7", "\x95", "\x8c"]
 ```
 
-See also: `string·elems`.
+See also: `string·elem_ords`.
 
-<b>Implementation note:</b> `split_elems` is not provided by the Java implementation.
 
-<a id='string·split_codepoints'></a>
-### string·split_codepoints
+<a id='string·codepoints'></a>
+### string·codepoints
 
-`S.split_codepoints()` returns an iterable value containing the sequence of
+`S.codepoints()` returns an iterable value containing the sequence of
 substrings of S that each encode a single Unicode code point.
 Each invalid code within the string is treated as if it encodes the
 Unicode replacement character, U+FFFD.
@@ -3824,17 +3821,15 @@ materialize the entire sequence.
 Example:
 
 ```python
-list('Hello, 世界'.split_codepoints())  # ['H', 'e', 'l', 'l', 'o', ',', ' ', '世', '界']
+list('Hello, 世界'.codepoints())  # ['H', 'e', 'l', 'l', 'o', ',', ' ', '世', '界']
 
-for cp in 'Hello, 世界'.split_codepoints():
-   if cp == ',':
-      break
-   print(cp)  # prints 'H', 'e', 'l', 'l', 'o'
+for cp in 'Hello, 世界'.codepoints():
+   print(cp)  # prints 'H', 'e', 'l', 'l', 'o', ',', ' ', '世', '界'
 ```
 
-See also: `string·codepoints`.
+See also: `string·codepoint_ords`.
 
-<b>Implementation note:</b> `split_codepoints` is not provided by the Java implementation.
+<b>Implementation note:</b> `codepoints` is not provided by the Java implementation.
 
 <a id='string·splitlines'></a>
 ### string·splitlines
@@ -3909,7 +3904,7 @@ eventually to eliminate all such differences on a case-by-case basis.
 * `lambda` expressions are supported (option: `-lambda`).
 * String elements are bytes.
 * Non-ASCII strings are encoded using UTF-8.
-* Strings have the additional methods `elems`, `split_elems`, `codepoints`, and `split_codepoints`.
+* Strings have the additional methods `elem_ords`, `codepoint_ords`, and `codepoints`.
 * The `chr` and `ord` built-in functions are supported.
 * The `set` built-in function is provided (option: `-set`).
 * `x += y` rebindings are permitted at top level.

diff --git a/library.go b/library.go
@@ -97,39 +97,39 @@ var (
 	}
 
 	stringMethods = map[string]builtinMethod{
-		"capitalize":       string_capitalize,
-		"codepoints":       string_iterable,
-		"count":            string_count,
-		"elems":            string_iterable,
-		"endswith":         string_endswith,
-		"find":             string_find,
-		"format":           string_format,
-		"index":            string_index,
-		"isalnum":          string_isalnum,
-		"isalpha":          string_isalpha,
-		"isdigit":          string_isdigit,
-		"islower":          string_islower,
-		"isspace":          string_isspace,
-		"istitle":          string_istitle,
-		"isupper":          string_isupper,
-		"join":             string_join,
-		"lower":            string_lower,
-		"lstrip":           string_strip, // sic
-		"partition":        string_partition,
-		"replace":          string_replace,
-		"rfind":            string_rfind,
-		"rindex":           string_rindex,
-		"rpartition":       string_partition, // sic
-		"rsplit":           string_split,     // sic
-		"rstrip":           string_strip,     // sic
-		"split":            string_split,
-		"split_codepoints": string_iterable, // sic
-		"split_elems":      string_iterable, // sic
-		"splitlines":       string_splitlines,
-		"startswith":       string_startswith,
-		"strip":            string_strip,
-		"title":            string_title,
-		"upper":            string_upper,
+		"capitalize":     string_capitalize,
+		"codepoint_ords": string_iterable,
+		"codepoints":     string_iterable, // sic
+		"count":          string_count,
+		"elem_ords":      string_iterable,
+		"elems":          string_iterable, // sic
+		"endswith":       string_endswith,
+		"find":           string_find,
+		"format":         string_format,
+		"index":          string_index,
+		"isalnum":        string_isalnum,
+		"isalpha":        string_isalpha,
+		"isdigit":        string_isdigit,
+		"islower":        string_islower,
+		"isspace":        string_isspace,
+		"istitle":        string_istitle,
+		"isupper":        string_isupper,
+		"join":           string_join,
+		"lower":          string_lower,
+		"lstrip":         string_strip, // sic
+		"partition":      string_partition,
+		"replace":        string_replace,
+		"rfind":          string_rfind,
+		"rindex":         string_rindex,
+		"rpartition":     string_partition, // sic
+		"rsplit":         string_split,     // sic
+		"rstrip":         string_strip,     // sic
+		"split":          string_split,
+		"splitlines":     string_splitlines,
+		"startswith":     string_startswith,
+		"strip":          string_strip,
+		"title":          string_title,
+		"upper":          string_upper,
 	}
 
 	setMethods = map[string]builtinMethod{
@@ -1423,18 +1423,18 @@ func string_capitalize(fnname string, recv Value, args Tuple, kwargs []Tuple) (V
 }
 
 // string_iterable returns an unspecified iterable value whose iterator yields:
-// - elems: numeric values of successive bytes
-// - codepoints: numeric values of successive Unicode code points
-// - split_elems: successive 1-byte substrings
-// - split_codepoints: successive substrings that encode a single Unicode code point.
+// - elems: successive 1-byte substrings
+// - codepoints: successive substrings that encode a single Unicode code point.
+// - elem_ords: numeric values of successive bytes
+// - codepoint_ords: numeric values of successive Unicode code points
 func string_iterable(fnname string, recv Value, args Tuple, kwargs []Tuple) (Value, error) {
 	if err := UnpackPositionalArgs(fnname, args, kwargs, 0); err != nil {
 		return nil, err
 	}
 	return stringIterable{
 		s:          recv.(String),
-		split:      fnname[0] == 's',
-		codepoints: fnname[len(fnname)-2] == 't',
+		ords:       fnname[len(fnname)-2] == 'd',
+		codepoints: fnname[0] == 'c',
 	}, nil
 }
 

diff --git a/testdata/builtins.sky b/testdata/builtins.sky
@@ -108,7 +108,7 @@ assert.true(1e15 not in range(4)) # too big for int32
 assert.true(1e100 not in range(4)) # too big for int64
 
 # list
-assert.eq(list("abc".split_elems()), ["a", "b", "c"])
+assert.eq(list("abc".elems()), ["a", "b", "c"])
 assert.eq(sorted(list({"a": 1, "b": 2})), ['a', 'b'])
 
 # min, max
@@ -124,17 +124,17 @@ assert.eq(min(5, -2, 1, 7, 3, key=lambda x: x*x), 1) # min absolute value
 assert.eq(min(5, -2, 1, 7, 3, key=lambda x: -x), 7) # min negated value
 
 # enumerate
-assert.eq(enumerate("abc".split_elems()), [(0, "a"), (1, "b"), (2, "c")])
+assert.eq(enumerate("abc".elems()), [(0, "a"), (1, "b"), (2, "c")])
 assert.eq(enumerate([False, True, None], 42), [(42, False), (43, True), (44, None)])
 
 # zip
 assert.eq(zip(), [])
 assert.eq(zip([]), [])
 assert.eq(zip([1, 2, 3]), [(1,), (2,), (3,)])
-assert.eq(zip("".split_elems()), [])
-assert.eq(zip("abc".split_elems(),
-              list("def".split_elems()),
-              "hijk".split_elems()),
+assert.eq(zip("".elems()), [])
+assert.eq(zip("abc".elems(),
+              list("def".elems()),
+              "hijk".elems()),
           [("a", "d", "h"), ("b", "e", "i"), ("c", "f", "j")])
 z1 = [1]
 assert.eq(zip(z1), [(1,)])