Skip to content

Commit

Permalink
Avoid allocating for names in the PEP 508 parser (astral-sh#10476)
Browse files Browse the repository at this point in the history
## Summary

We can read from the slice directly. I don't think this will affect
performance today, because `from_str` will then allocate, but it
_should_ be a speedup once astral-sh#10475 merges, since we can then avoid
allocating a `String` and go straight from `str` to `ArcStr`.
  • Loading branch information
charliermarsh authored Jan 10, 2025
1 parent d44affa commit 7a21b71
Showing 1 changed file with 19 additions and 24 deletions.
43 changes: 19 additions & 24 deletions crates/uv-pep508/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -394,12 +394,9 @@ fn parse_name<T: Pep508Url>(cursor: &mut Cursor) -> Result<PackageName, Pep508Er
// https://peps.python.org/pep-0508/#names
// ^([A-Z0-9]|[A-Z0-9][A-Z0-9._-]*[A-Z0-9])$ with re.IGNORECASE
let start = cursor.pos();
let mut name = String::new();

if let Some((index, char)) = cursor.next() {
if matches!(char, 'A'..='Z' | 'a'..='z' | '0'..='9') {
name.push(char);
} else {
if !matches!(char, 'A'..='Z' | 'a'..='z' | '0'..='9') {
// Check if the user added a filesystem path without a package name. pip supports this
// in `requirements.txt`, but it doesn't adhere to the PEP 508 grammar.
let mut clone = cursor.clone().at(start);
Expand Down Expand Up @@ -431,26 +428,24 @@ fn parse_name<T: Pep508Url>(cursor: &mut Cursor) -> Result<PackageName, Pep508Er
}

loop {
match cursor.peek() {
Some((index, char @ ('A'..='Z' | 'a'..='z' | '0'..='9' | '.' | '-' | '_'))) => {
name.push(char);
cursor.next();
// [.-_] can't be the final character
if cursor.peek().is_none() && matches!(char, '.' | '-' | '_') {
return Err(Pep508Error {
message: Pep508ErrorSource::String(format!(
"Package name must end with an alphanumeric character, not '{char}'"
)),
start: index,
len: char.len_utf8(),
input: cursor.to_string(),
});
}
}
Some(_) | None => {
return Ok(PackageName::new(name)
.expect("`PackageName` validation should match PEP 508 parsing"));
if let Some((index, char @ ('A'..='Z' | 'a'..='z' | '0'..='9' | '.' | '-' | '_'))) =
cursor.peek()
{
cursor.next();
// [.-_] can't be the final character
if cursor.peek().is_none() && matches!(char, '.' | '-' | '_') {
return Err(Pep508Error {
message: Pep508ErrorSource::String(format!(
"Package name must end with an alphanumeric character, not `{char}`"
)),
start: index,
len: char.len_utf8(),
input: cursor.to_string(),
});
}
} else {
let len = cursor.pos() - start;
return Ok(PackageName::from_str(cursor.slice(start, len)).unwrap());
}
}
}
Expand Down Expand Up @@ -1033,7 +1028,7 @@ mod tests {
assert_snapshot!(
parse_pep508_err("name_"),
@"
Package name must end with an alphanumeric character, not '_'
Package name must end with an alphanumeric character, not `_`
name_
^"
);
Expand Down

0 comments on commit 7a21b71

Please sign in to comment.