Skip to content

Commit

Permalink
Merge pull request swiftlang#36623 from xwu/better-atoi
Browse files Browse the repository at this point in the history
[stdlib][SR-7556] Re-implement string-to-integer parsing
  • Loading branch information
milseman authored Apr 3, 2021
2 parents 717a132 + cef11cd commit 9ab21d3
Show file tree
Hide file tree
Showing 2 changed files with 176 additions and 95 deletions.
254 changes: 159 additions & 95 deletions stdlib/public/core/IntegerParsing.swift
Original file line number Diff line number Diff line change
Expand Up @@ -2,23 +2,175 @@
//
// This source file is part of the Swift.org open source project
//
// Copyright (c) 2014 - 2017 Apple Inc. and the Swift project authors
// Copyright (c) 2014 - 2021 Apple Inc. and the Swift project authors
// Licensed under Apache License v2.0 with Runtime Library Exception
//
// See https://swift.org/LICENSE.txt for license information
// See https://swift.org/CONTRIBUTORS.txt for the list of Swift project authors
//
//===----------------------------------------------------------------------===//

@_alwaysEmitIntoClient
internal func _parseIntegerDigits<Result: FixedWidthInteger>(
ascii codeUnits: UnsafeBufferPointer<UInt8>, radix: Int, isNegative: Bool
) -> Result? {
_internalInvariant(radix >= 2 && radix <= 36)
guard _fastPath(!codeUnits.isEmpty) else { return nil }

// ASCII constants, named for clarity:
let _0 = 48 as UInt8, _A = 65 as UInt8, _a = 97 as UInt8

let numericalUpperBound: UInt8
let uppercaseUpperBound: UInt8
let lowercaseUpperBound: UInt8
if radix <= 10 {
numericalUpperBound = _0 &+ UInt8(truncatingIfNeeded: radix)
uppercaseUpperBound = _A
lowercaseUpperBound = _a
} else {
numericalUpperBound = _0 &+ 10
uppercaseUpperBound = _A &+ UInt8(truncatingIfNeeded: radix &- 10)
lowercaseUpperBound = _a &+ UInt8(truncatingIfNeeded: radix &- 10)
}
let multiplicand = Result(truncatingIfNeeded: radix)
var result = 0 as Result
for digit in codeUnits {
let digitValue: Result
if _fastPath(digit >= _0 && digit < numericalUpperBound) {
digitValue = Result(truncatingIfNeeded: digit &- _0)
} else if _fastPath(digit >= _A && digit < uppercaseUpperBound) {
digitValue = Result(truncatingIfNeeded: digit &- _A &+ 10)
} else if _fastPath(digit >= _a && digit < lowercaseUpperBound) {
digitValue = Result(truncatingIfNeeded: digit &- _a &+ 10)
} else {
return nil
}
let overflow1: Bool
(result, overflow1) = result.multipliedReportingOverflow(by: multiplicand)
let overflow2: Bool
(result, overflow2) = isNegative
? result.subtractingReportingOverflow(digitValue)
: result.addingReportingOverflow(digitValue)
guard _fastPath(!overflow1 && !overflow2) else { return nil }
}
return result
}

@_alwaysEmitIntoClient
internal func _parseInteger<Result: FixedWidthInteger>(
ascii codeUnits: UnsafeBufferPointer<UInt8>, radix: Int
) -> Result? {
_internalInvariant(!codeUnits.isEmpty)

// ASCII constants, named for clarity:
let _plus = 43 as UInt8, _minus = 45 as UInt8

let first = codeUnits[0]
if first == _minus {
return _parseIntegerDigits(
ascii: UnsafeBufferPointer(rebasing: codeUnits[1...]),
radix: radix, isNegative: true)
}
if first == _plus {
return _parseIntegerDigits(
ascii: UnsafeBufferPointer(rebasing: codeUnits[1...]),
radix: radix, isNegative: false)
}
return _parseIntegerDigits(ascii: codeUnits, radix: radix, isNegative: false)
}

@_alwaysEmitIntoClient
@inline(never)
internal func _parseInteger<S: StringProtocol, Result: FixedWidthInteger>(
ascii text: S, radix: Int
) -> Result? {
var str = String(text)
return str.withUTF8 { _parseInteger(ascii: $0, radix: radix) }
}

extension FixedWidthInteger {
/// Creates a new integer value from the given string and radix.
///
/// The string passed as `text` may begin with a plus or minus sign character
/// (`+` or `-`), followed by one or more numeric digits (`0-9`) or letters
/// (`a-z` or `A-Z`). Parsing of the string is case insensitive.
///
/// let x = Int("123")
/// // x == 123
///
/// let y = Int("-123", radix: 8)
/// // y == -83
/// let y = Int("+123", radix: 8)
/// // y == +83
///
/// let z = Int("07b", radix: 16)
/// // z == 123
///
/// If `text` is in an invalid format or contains characters that are out of
/// bounds for the given `radix`, or if the value it denotes in the given
/// `radix` is not representable, the result is `nil`. For example, the
/// following conversions result in `nil`:
///
/// Int(" 100") // Includes whitespace
/// Int("21-50") // Invalid format
/// Int("ff6600") // Characters out of bounds
/// Int("zzzzzzzzzzzzz", radix: 36) // Out of range
///
/// - Parameters:
/// - text: The ASCII representation of a number in the radix passed as
/// `radix`.
/// - radix: The radix, or base, to use for converting `text` to an integer
/// value. `radix` must be in the range `2...36`. The default is 10.
@inlinable
@inline(__always)
public init?<S: StringProtocol>(_ text: S, radix: Int = 10) {
_precondition(2...36 ~= radix, "Radix not in range 2...36")
guard _fastPath(!text.isEmpty) else { return nil }
let result: Self? =
text.utf8.withContiguousStorageIfAvailable {
_parseInteger(ascii: $0, radix: radix)
} ?? _parseInteger(ascii: text, radix: radix)
guard let result_ = result else { return nil }
self = result_
}

/// Creates a new integer value from the given string.
///
/// The string passed as `description` may begin with a plus or minus sign
/// character (`+` or `-`), followed by one or more numeric digits (`0-9`).
///
/// let x = Int("123")
/// // x == 123
///
/// If `description` is in an invalid format, or if the value it denotes in
/// base 10 is not representable, the result is `nil`. For example, the
/// following conversions result in `nil`:
///
/// Int(" 100") // Includes whitespace
/// Int("21-50") // Invalid format
/// Int("ff6600") // Characters out of bounds
/// Int("10000000000000000000000000") // Out of range
///
/// - Parameter description: The ASCII representation of a number.
@inlinable
@inline(__always)
public init?(_ description: String) {
self.init(description, radix: 10)
}
}

//===----------------------------------------------------------------------===//
// Old entry points preserved for ABI compatibility.
//===----------------------------------------------------------------------===//

/// Returns c as a UTF16.CodeUnit. Meant to be used as _ascii16("x").
@inlinable
@usableFromInline // Previously '@inlinable'.
internal func _ascii16(_ c: Unicode.Scalar) -> UTF16.CodeUnit {
_internalInvariant(c.value >= 0 && c.value <= 0x7F, "not ASCII")
return UTF16.CodeUnit(c.value)
}

@inlinable
@inline(__always)
@usableFromInline // Previously '@inlinable @inline(__always)'.
internal func _asciiDigit<CodeUnit: UnsignedInteger, Result: BinaryInteger>(
codeUnit u_: CodeUnit, radix: Result
) -> Result? {
Expand All @@ -36,8 +188,7 @@ internal func _asciiDigit<CodeUnit: UnsignedInteger, Result: BinaryInteger>(
return Result(truncatingIfNeeded: d)
}

@inlinable
@inline(__always)
@usableFromInline // Previously '@inlinable @inline(__always)'.
internal func _parseUnsignedASCII<
Rest: IteratorProtocol, Result: FixedWidthInteger
>(
Expand Down Expand Up @@ -67,13 +218,10 @@ where Rest.Element: UnsignedInteger {
return result
}

//
// TODO (TODO: JIRA): This needs to be completely rewritten. It's about 20KB of
// This function has been superseded because it is about 20KB of previously
// always-inline code, most of which are MOV instructions.
//

@inlinable
@inline(__always)
@usableFromInline // Previously '@inlinable @inline(__always)'.
internal func _parseASCII<
CodeUnits: IteratorProtocol, Result: FixedWidthInteger
>(
Expand Down Expand Up @@ -113,88 +261,4 @@ extension FixedWidthInteger {
where CodeUnits.Element: UnsignedInteger {
return _parseASCII(codeUnits: &codeUnits, radix: radix)
}

/// Creates a new integer value from the given string and radix.
///
/// The string passed as `text` may begin with a plus or minus sign character
/// (`+` or `-`), followed by one or more numeric digits (`0-9`) or letters
/// (`a-z` or `A-Z`). Parsing of the string is case insensitive.
///
/// let x = Int("123")
/// // x == 123
///
/// let y = Int("-123", radix: 8)
/// // y == -83
/// let y = Int("+123", radix: 8)
/// // y == +83
///
/// let z = Int("07b", radix: 16)
/// // z == 123
///
/// If `text` is in an invalid format or contains characters that are out of
/// bounds for the given `radix`, or if the value it denotes in the given
/// `radix` is not representable, the result is `nil`. For example, the
/// following conversions result in `nil`:
///
/// Int(" 100") // Includes whitespace
/// Int("21-50") // Invalid format
/// Int("ff6600") // Characters out of bounds
/// Int("zzzzzzzzzzzzz", radix: 36) // Out of range
///
/// - Parameters:
/// - text: The ASCII representation of a number in the radix passed as
/// `radix`.
/// - radix: The radix, or base, to use for converting `text` to an integer
/// value. `radix` must be in the range `2...36`. The default is 10.
@inlinable // @specializable
@_semantics("optimize.sil.specialize.generic.partial.never")
public init?<S: StringProtocol>(_ text: S, radix: Int = 10) {
_precondition(2...36 ~= radix, "Radix not in range 2...36")

if let str = text as? String, str._guts.isFastUTF8 {
guard let ret = str._guts.withFastUTF8 ({ utf8 -> Self? in
var iter = utf8.makeIterator()
return _parseASCII(codeUnits: &iter, radix: Self(radix))
}) else {
return nil
}
self = ret
return
}

// TODO(String performance): We can provide fast paths for common radices,
// native UTF-8 storage, etc.

var iter = text.utf8.makeIterator()
guard let ret = Self._parseASCIISlowPath(
codeUnits: &iter, radix: Self(radix)
) else { return nil }

self = ret
}

/// Creates a new integer value from the given string.
///
/// The string passed as `description` may begin with a plus or minus sign
/// character (`+` or `-`), followed by one or more numeric digits (`0-9`).
///
/// let x = Int("123")
/// // x == 123
///
/// If `description` is in an invalid format, or if the value it denotes in
/// base 10 is not representable, the result is `nil`. For example, the
/// following conversions result in `nil`:
///
/// Int(" 100") // Includes whitespace
/// Int("21-50") // Invalid format
/// Int("ff6600") // Characters out of bounds
/// Int("10000000000000000000000000") // Out of range
///
/// - Parameter description: The ASCII representation of a number.
@inlinable
@_semantics("optimize.sil.specialize.generic.partial.never")
@inline(__always)
public init?(_ description: String) {
self.init(description, radix: 10)
}
}
17 changes: 17 additions & 0 deletions test/stdlib/NSSlowString.swift
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,23 @@ tests.test("Iterator") {
expectEqualSequence(opaque.utf8.reversed(), native.utf8.reversed())
}

tests.test("String-to-integer parsing") {
let native = "1234"
let opaque = NSSlowString(string: "1234") as String

expectEqual(Int(opaque, radix: 16)!, Int(native, radix: 16)!)
expectEqual(Int(opaque, radix: 15)!, Int(native, radix: 15)!)
expectEqual(Int(opaque, radix: 10)!, Int(native, radix: 10)!)
expectEqual(Int(opaque, radix: 8)!, Int(native, radix: 8)!)
expectEqual(Int(opaque, radix: 5)!, Int(native, radix: 5)!)

expectEqual(UInt16(opaque, radix: 16)!, UInt16(native, radix: 16)!)
expectEqual(UInt16(opaque, radix: 15)!, UInt16(native, radix: 15)!)
expectEqual(UInt16(opaque, radix: 10)!, UInt16(native, radix: 10)!)
expectEqual(UInt16(opaque, radix: 8)!, UInt16(native, radix: 8)!)
expectEqual(UInt16(opaque, radix: 5)!, UInt16(native, radix: 5)!)
}

tests.test("Unicode 9 grapheme breaking")
.xfail(.osxMinor(10, 9, reason: "Mac OS X 10.9 has an old version of ICU"))
.xfail(.iOSMajor(7, reason: "iOS 7 has an old version of ICU"))
Expand Down

0 comments on commit 9ab21d3

Please sign in to comment.