JeffLabonte
diff --git a/‎Lib/string.py
Lines changed: 305 additions & 0 deletions b/‎Lib/string.py
Lines changed: 305 additions & 0 deletions
diff --git a/‎tests/snippets/test_re.py
Lines changed: 18 additions & 0 deletions b/‎tests/snippets/test_re.py
Lines changed: 18 additions & 0 deletions
diff --git a/‎vm/src/stdlib/mod.rs
Lines changed: 1 addition & 1 deletion b/‎vm/src/stdlib/mod.rs
Lines changed: 1 addition & 1 deletion
@@ -0,0 +1,305 @@
+"""A collection of string constants.
+
+Public module variables:
+
+whitespace -- a string containing all ASCII whitespace
+ascii_lowercase -- a string containing all ASCII lowercase letters
+ascii_uppercase -- a string containing all ASCII uppercase letters
+ascii_letters -- a string containing all ASCII letters
+digits -- a string containing all ASCII decimal digits
+hexdigits -- a string containing all ASCII hexadecimal digits
+octdigits -- a string containing all ASCII octal digits
+punctuation -- a string containing all ASCII punctuation characters
+printable -- a string containing all ASCII characters considered printable
+
+"""
+
+__all__ = ["ascii_letters", "ascii_lowercase", "ascii_uppercase", "capwords",
+           "digits", "hexdigits", "octdigits", "printable", "punctuation",
+           "whitespace", "Formatter", "Template"]
+
+import _string
+
+# Some strings for ctype-style character classification
+whitespace = ' \t\n\r\v\f'
+ascii_lowercase = 'abcdefghijklmnopqrstuvwxyz'
+ascii_uppercase = 'ABCDEFGHIJKLMNOPQRSTUVWXYZ'
+ascii_letters = ascii_lowercase + ascii_uppercase
+digits = '0123456789'
+hexdigits = digits + 'abcdef' + 'ABCDEF'
+octdigits = '01234567'
+punctuation = r"""!"#$%&'()*+,-./:;<=>?@[\]^_`{|}~"""
+printable = digits + ascii_letters + punctuation + whitespace
+
+# Functions which aren't available as string methods.
+
+# Capitalize the words in a string, e.g. " aBc  dEf " -> "Abc Def".
+def capwords(s, sep=None):
+    """capwords(s [,sep]) -> string
+
+    Split the argument into words using split, capitalize each
+    word using capitalize, and join the capitalized words using
+    join.  If the optional second argument sep is absent or None,
+    runs of whitespace characters are replaced by a single space
+    and leading and trailing whitespace are removed, otherwise
+    sep is used to split and join the words.
+
+    """
+    return (sep or ' ').join(x.capitalize() for x in s.split(sep))
+
+
+####################################################################
+import re as _re
+from collections import ChainMap as _ChainMap
+
+class _TemplateMetaclass(type):
+    pattern = r"""
+    %(delim)s(?:
+      (?P<escaped>%(delim)s) |   # Escape sequence of two delimiters
+      (?P<named>%(id)s)      |   # delimiter and a Python identifier
+      \{(?P<braced>%(bid)s)\}  #|   # delimiter and a braced identifier
+    #   (?P<invalid>)              # Other ill-formed delimiter exprs
+    )
+    """
+
+    def __init__(cls, name, bases, dct):
+        super(_TemplateMetaclass, cls).__init__(name, bases, dct)
+        if 'pattern' in dct:
+            pattern = cls.pattern
+        else:
+            pattern = _TemplateMetaclass.pattern % {
+                'delim' : _re.escape(cls.delimiter),
+                'id'    : cls.idpattern,
+                'bid'   : cls.braceidpattern or cls.idpattern,
+                }
+        cls.pattern = _re.compile(pattern, cls.flags | _re.VERBOSE)
+
+
+class Template(metaclass=_TemplateMetaclass):
+    """A string class for supporting $-substitutions."""
+
+    delimiter = '$'
+    # r'[a-z]' matches to non-ASCII letters when used with IGNORECASE, but
+    # without the ASCII flag.  We can't add re.ASCII to flags because of
+    # backward compatibility.  So we use the ?a local flag and [a-z] pattern.
+    # See https://bugs.python.org/issue31672
+    idpattern = r'([_a-z][_a-z0-9]*)'
+    braceidpattern = None
+    flags = _re.IGNORECASE
+
+    def __init__(self, template):
+        self.template = template
+
+    # Search for $$, $identifier, ${identifier}, and any bare $'s
+
+    def _invalid(self, mo):
+        i = mo.start('invalid')
+        lines = self.template[:i].splitlines(keepends=True)
+        if not lines:
+            colno = 1
+            lineno = 1
+        else:
+            colno = i - len(''.join(lines[:-1]))
+            lineno = len(lines)
+        raise ValueError('Invalid placeholder in string: line %d, col %d' %
+                         (lineno, colno))
+
+    def substitute(*args, **kws):
+        if not args:
+            raise TypeError("descriptor 'substitute' of 'Template' object "
+                            "needs an argument")
+        self, *args = args  # allow the "self" keyword be passed
+        if len(args) > 1:
+            raise TypeError('Too many positional arguments')
+        if not args:
+            mapping = kws
+        elif kws:
+            mapping = _ChainMap(kws, args[0])
+        else:
+            mapping = args[0]
+        # Helper function for .sub()
+        def convert(mo):
+            # Check the most common path first.
+            named = mo.group('named') or mo.group('braced')
+            if named is not None:
+                return str(mapping[named])
+            if mo.group('escaped') is not None:
+                return self.delimiter
+            if mo.group('invalid') is not None:
+                self._invalid(mo)
+            raise ValueError('Unrecognized named group in pattern',
+                             self.pattern)
+        return self.pattern.sub(convert, self.template)
+
+    def safe_substitute(*args, **kws):
+        if not args:
+            raise TypeError("descriptor 'safe_substitute' of 'Template' object "
+                            "needs an argument")
+        self, *args = args  # allow the "self" keyword be passed
+        if len(args) > 1:
+            raise TypeError('Too many positional arguments')
+        if not args:
+            mapping = kws
+        elif kws:
+            mapping = _ChainMap(kws, args[0])
+        else:
+            mapping = args[0]
+        # Helper function for .sub()
+        def convert(mo):
+            named = mo.group('named') or mo.group('braced')
+            if named is not None:
+                try:
+                    return str(mapping[named])
+                except KeyError:
+                    return mo.group()
+            if mo.group('escaped') is not None:
+                return self.delimiter
+            if mo.group('invalid') is not None:
+                return mo.group()
+            raise ValueError('Unrecognized named group in pattern',
+                             self.pattern)
+        return self.pattern.sub(convert, self.template)
+
+
+
+########################################################################
+# the Formatter class
+# see PEP 3101 for details and purpose of this class
+
+# The hard parts are reused from the C implementation.  They're exposed as "_"
+# prefixed methods of str.
+
+# The overall parser is implemented in _string.formatter_parser.
+# The field name parser is implemented in _string.formatter_field_name_split
+
+class Formatter:
+    def format(*args, **kwargs):
+        if not args:
+            raise TypeError("descriptor 'format' of 'Formatter' object "
+                            "needs an argument")
+        self, *args = args  # allow the "self" keyword be passed
+        try:
+            format_string, *args = args # allow the "format_string" keyword be passed
+        except ValueError:
+            raise TypeError("format() missing 1 required positional "
+                            "argument: 'format_string'") from None
+        return self.vformat(format_string, args, kwargs)
+
+    def vformat(self, format_string, args, kwargs):
+        used_args = set()
+        result, _ = self._vformat(format_string, args, kwargs, used_args, 2)
+        self.check_unused_args(used_args, args, kwargs)
+        return result
+
+    def _vformat(self, format_string, args, kwargs, used_args, recursion_depth,
+                 auto_arg_index=0):
+        if recursion_depth < 0:
+            raise ValueError('Max string recursion exceeded')
+        result = []
+        for literal_text, field_name, format_spec, conversion in \
+                self.parse(format_string):
+
+            # output the literal text
+            if literal_text:
+                result.append(literal_text)
+
+            # if there's a field, output it
+            if field_name is not None:
+                # this is some markup, find the object and do
+                #  the formatting
+
+                # handle arg indexing when empty field_names are given.
+                if field_name == '':
+                    if auto_arg_index is False:
+                        raise ValueError('cannot switch from manual field '
+                                         'specification to automatic field '
+                                         'numbering')
+                    field_name = str(auto_arg_index)
+                    auto_arg_index += 1
+                elif field_name.isdigit():
+                    if auto_arg_index:
+                        raise ValueError('cannot switch from manual field '
+                                         'specification to automatic field '
+                                         'numbering')
+                    # disable auto arg incrementing, if it gets
+                    # used later on, then an exception will be raised
+                    auto_arg_index = False
+
+                # given the field_name, find the object it references
+                #  and the argument it came from
+                obj, arg_used = self.get_field(field_name, args, kwargs)
+                used_args.add(arg_used)
+
+                # do any conversion on the resulting object
+                obj = self.convert_field(obj, conversion)
+
+                # expand the format spec, if needed
+                format_spec, auto_arg_index = self._vformat(
+                    format_spec, args, kwargs,
+                    used_args, recursion_depth-1,
+                    auto_arg_index=auto_arg_index)
+
+                # format the object and append to the result
+                result.append(self.format_field(obj, format_spec))
+
+        return ''.join(result), auto_arg_index
+
+
+    def get_value(self, key, args, kwargs):
+        if isinstance(key, int):
+            return args[key]
+        else:
+            return kwargs[key]
+
+
+    def check_unused_args(self, used_args, args, kwargs):
+        pass
+
+
+    def format_field(self, value, format_spec):
+        return format(value, format_spec)
+
+
+    def convert_field(self, value, conversion):
+        # do any conversion on the resulting object
+        if conversion is None:
+            return value
+        elif conversion == 's':
+            return str(value)
+        elif conversion == 'r':
+            return repr(value)
+        elif conversion == 'a':
+            return ascii(value)
+        raise ValueError("Unknown conversion specifier {0!s}".format(conversion))
+
+
+    # returns an iterable that contains tuples of the form:
+    # (literal_text, field_name, format_spec, conversion)
+    # literal_text can be zero length
+    # field_name can be None, in which case there's no
+    #  object to format and output
+    # if field_name is not None, it is looked up, formatted
+    #  with format_spec and conversion and then used
+    def parse(self, format_string):
+        return _string.formatter_parser(format_string)
+
+
+    # given a field_name, find the object it references.
+    #  field_name:   the field being looked up, e.g. "0.name"
+    #                 or "lookup[3]"
+    #  used_args:    a set of which args have been used
+    #  args, kwargs: as passed in to vformat
+    def get_field(self, field_name, args, kwargs):
+        first, rest = _string.formatter_field_name_split(field_name)
+
+        obj = self.get_value(first, args, kwargs)
+
+        # loop through the rest of the field_name, doing
+        #  getattr or getitem as needed
+        for is_attr, i in rest:
+            if is_attr:
+                obj = getattr(obj, i)
+            else:
+                obj = obj[i]
+
+        return obj, first
@@ -13,3 +13,21 @@
 assert mo.end() == 5
 
 assert re.escape('python.exe') == 'python\\.exe'
+
+p = re.compile('ab')
+s = p.sub('x', 'abcabca')
+print(s)
+assert s == 'xcxca'
+
+idpattern = r'([_a-z][_a-z0-9]*)'
+
+mo = re.search(idpattern, '7382 _boe0+2')
+print(mo)
+# TODO:
+# assert mo.group(0) == '_boe0'
+
+from string import Template
+s = Template('$who likes $what')
+# TODO:
+# r = s.substitute(who='tim', what='kung pow')
+# print(r)
@@ -53,7 +53,7 @@ pub fn get_module_inits() -> HashMap<String, StdlibInitFunc> {
         "platform".to_string() => Box::new(platform::make_module),
         "re".to_string() => Box::new(re::make_module),
         "random".to_string() => Box::new(random::make_module),
-        "string".to_string() => Box::new(string::make_module),
+        "_string".to_string() => Box::new(string::make_module),
         "struct".to_string() => Box::new(pystruct::make_module),
         "_thread".to_string() => Box::new(thread::make_module),
         "time".to_string() => Box::new(time_module::make_module),