diff --git a/.github/CODEOWNERS b/.github/CODEOWNERS new file mode 100644 index 000000000..78c546834 --- /dev/null +++ b/.github/CODEOWNERS @@ -0,0 +1 @@ +* @Geal diff --git a/.github/PULL_REQUEST_TEMPLATE.md b/.github/PULL_REQUEST_TEMPLATE.md index 97a320ac8..932a2a50a 100644 --- a/.github/PULL_REQUEST_TEMPLATE.md +++ b/.github/PULL_REQUEST_TEMPLATE.md @@ -14,13 +14,13 @@ Please provide the following information with this pull request: - related issue number (I need some context to understand a PR with a lot of code, except for documentation typos) -- a test case reproducing the issue. You can write it in [issues.rs](https://github.com/Geal/nom/blob/master/tests/issues.rs) +- a test case reproducing the issue. You can write it in [issues.rs](https://github.com/Geal/nom/blob/main/tests/issues.rs) - if adding a new combinator, please add code documentation and some unit tests -in the same file. Also, please update the [combinator list](https://github.com/Geal/nom/blob/master/doc/choosing_a_combinator.md) +in the same file. Also, please update the [combinator list](https://github.com/Geal/nom/blob/main/doc/choosing_a_combinator.md) ## Code style -This project follows a [code style](https://github.com/Geal/nom/blob/master/rustfmt.toml) +This project follows a [code style](https://github.com/Geal/nom/blob/main/rustfmt.toml) checked by [rustfmt][https://github.com/rust-lang-nursery/rustfmt]. Please avoid cosmetic fixes unrelated to the pull request. Keeping the changes @@ -28,9 +28,9 @@ as small as possible increase your chances of getting this merged quickly. ## Rebasing -To make sure the changes will work properly once merged into the master branch +To make sure the changes will work properly once merged into the main branch (which might have changed while you were working on your PR), please -[rebase your PR on master](https://git-scm.com/book/en/v2/Git-Branching-Rebasing). +[rebase your PR on main](https://git-scm.com/book/en/v2/Git-Branching-Rebasing). ## Squashing the commits diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index e8c35ccb6..374183791 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -3,7 +3,7 @@ name: CI on: [push, pull_request] env: - RUST_MINVERSION: 1.41.1 + RUST_MINVERSION: 1.56.0 CARGO_INCREMENTAL: 0 CARGO_NET_RETRY: 10 @@ -18,7 +18,7 @@ jobs: - stable - beta - nightly - - 1.48.0 + - 1.56.0 features: - '' @@ -135,10 +135,9 @@ jobs: override: true - name: Build - uses: actions-rs/cargo@v1 - with: - command: doc - args: --verbose --features "std docsrs" + env: + RUSTDOCFLAGS: -D warnings + run: cargo doc --no-deps --document-private-items --workspace --verbose --features "std docsrs" fmt: name: Check formatting @@ -185,12 +184,15 @@ jobs: uses: actions-rs/cargo@v1 with: command: install - args: cargo-tarpaulin --version 0.18.0-alpha3 # @TODO restore to normal (https://github.com/xd009642/tarpaulin/issues/756#issuecomment-838769320) + args: cargo-tarpaulin - name: Run cargo tarpaulin uses: actions-rs/cargo@v1 - env: - TOKEN: ${{ secrets.COVERALLS_TOKEN }} with: command: tarpaulin - args: --coveralls "$TOKEN" --avoid-cfg-tarpaulin # @TODO restore to normal (https://github.com/xd009642/tarpaulin/issues/756#issuecomment-838769320) + args: --output-dir coverage --out Lcov + + - name: Publish to Coveralls + uses: coverallsapp/github-action@master + with: + github-token: ${{ secrets.GITHUB_TOKEN }} diff --git a/CHANGELOG.md b/CHANGELOG.md index 49ef3f5a7..fc94c29d5 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -6,6 +6,93 @@ ### Changed +## 7.1.2 - 2023-01-01 + +### Thanks + +- @joubs +- @Fyko +- @LoganDark +- @darnuria +- @jkugelman +- @barower +- @puzzlewolf +- @epage +- @cky +- @wolthom +- @w1ll-i-code + +### Changed + +- documentation fixes +- tests fixes +- limit the initial capacity of the result vector of `many_m_n` to 64kiB +- bits parser now accept `Parser` implementors instead of only functions + +### Added + +- implement `Tuple` parsing for the unit type as a special case +- implement `ErrorConvert` on the unit type to make it usable as error type for bits parsers +- bool parser for bits input + +## 7.1.1 - 2022-03-14 + +### Thanks + +- @ThomasdenH +- @@SphinxKnight +- @irevoire +- @doehyunbaek +- @pxeger +- @punkeel +- @max-sixty +- @Xiretza +- @5c077m4n +- @erihsu +- @TheNeikos +- @LoganDark +- @nickelc +- @chotchki +- @ctrlcctrlv + + +### Changed + +- documentation fixes +- more examples + +## 7.1.0 - 2021-11-04 + +### Thanks + +- @nickelc +- @Stargateur +- @NilsIrl +- @clonejo +- @Strytyp +- @schubart +- @jihchi +- @nipunn1313 +- @Gungy2 +- @Drumato +- @Alexhuszagh +- @Aehmlo +- @homersimpsons +- @dne +- @epage +- @saiintbrisson +- @pymongo + +### Changed + +- documentation fixes +- Ci fixes +- the move to minimal-lexical for float parsing introduced bugs that cannot be resolved right now, so this version moves back to using the standard lib' parser. *This is a performance regression**. If you have specific requirements around float parsing, you are strongly encouraged to use [recognize_float](https://docs.rs/nom/latest/nom/number/complete/fn.recognize_float.html) and another library to convert to a f32 or f64 + +### Added + +- alt now works with 1 elment tuples + ## 7.0.0 - 2021-08-21 This release fixes dependency compilation issues and strengthen the minimum supported Rust version (MSRV) policy. This is also the first release without the macros that were used since nom's beginning. @@ -84,7 +171,7 @@ This release was done thanks to the hard work of (by order of appearance in the - lots of documentation fixes - relax trait bounds -- workarounds for depenency issues with bitvec and memchr +- workarounds for dependency issues with bitvec and memchr ## 6.1.2 - 2021-02-15 @@ -245,7 +332,7 @@ containing example patterns. - removed the deprecated `whitespace` module - the default error type is now a struct (`nom::error::Error`) instead of a tuple - the `FromExternalError` allows wrapping the error returned by the function in the `map_res` combinator -- renamed the `dbg!` macro to avoid conficts with `std::dbg!` +- renamed the `dbg!` macro to avoid conflicts with `std::dbg!` - `separated_list` now allows empty elements @@ -256,7 +343,7 @@ containing example patterns. - `success`: returns a value without consuming the input - `satisfy`: checks a predicate over the next character - `eof` function combinator -- `consumed`: returnes the produced value and the consumed input +- `consumed`: returns the produced value and the consumed input - `length_count` function combinator - `into`: converts a parser's output and error values if `From` implementations are available - `IResult::finish()`: converts a parser's result to `Result<(I, O), E>` by removing the distinction between `Error` and `Failure` and panicking on `Incomplete` @@ -819,7 +906,7 @@ Bugfix release The 2.0 release is one of the biggest yet. It was a good opportunity to clean up some badly named combinators and fix invalid behaviours. -Since this version introduces a few breaking changes, an [upgrade documentation](https://github.com/Geal/nom/blob/master/doc/upgrading_to_nom_2.md) is available, detailing the steps to fix the most common migration issues. After testing on a set of 30 crates, most of them will build directly, a large part will just need to activate the "verbose-errors" compilation feature. The remaining fixes are documented. +Since this version introduces a few breaking changes, an [upgrade documentation](https://github.com/Geal/nom/blob/main/doc/upgrading_to_nom_2.md) is available, detailing the steps to fix the most common migration issues. After testing on a set of 30 crates, most of them will build directly, a large part will just need to activate the "verbose-errors" compilation feature. The remaining fixes are documented. This version also adds a lot of interesting features, like the permutation combinator or whitespace separated formats support. @@ -1124,7 +1211,7 @@ Considering the number of changes since the last release, this version can conta ## 0.3.11 - 2015-08-04 ### Thanks -- @bluss for remarking that the crate included random junk lying non commited in my local repository +- @bluss for remarking that the crate included random junk lying non committed in my local repository ### Fixed - cleanup of my local repository will ship less files in the crates, resulting in a smaller download @@ -1187,7 +1274,7 @@ Considering the number of changes since the last release, this version can conta ### Added - documentation for a few functions - the consumer trait now requires the `failed(&self, error_code)` method in case of parsing error -- `named!` now handles thge alternative `named!(pub fun_name, ...)` +- `named!` now handles the alternative `named!(pub fun_name, ...)` ### Fixed - `filter!` now returns the whole input if the filter function never returned false @@ -1388,7 +1475,10 @@ Considering the number of changes since the last release, this version can conta ## Compare code -* [unreleased](https://github.com/Geal/nom/compare/7.0.0...HEAD) +* [unreleased](https://github.com/Geal/nom/compare/7.1.2...HEAD) +* [7.1.2](https://github.com/Geal/nom/compare/7.1.1...7.1.2) +* [7.1.1](https://github.com/Geal/nom/compare/7.1.0...7.1.1) +* [7.1.0](https://github.com/Geal/nom/compare/7.0.0...7.1.0) * [7.0.0](https://github.com/Geal/nom/compare/6.2.1...7.0.0) * [6.2.1](https://github.com/Geal/nom/compare/6.2.0...6.2.1) * [6.2.0](https://github.com/Geal/nom/compare/6.1.2...6.2.0) diff --git a/Cargo.toml b/Cargo.toml index 9261241d7..b0a718846 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,7 +1,7 @@ [package] name = "nom" -version = "7.0.0" +version = "7.1.2" authors = [ "contact@geoffroycouprie.com" ] description = "A byte-oriented, zero-copy, parser combinators library" license = "MIT" @@ -10,9 +10,12 @@ readme = "README.md" documentation = "https://docs.rs/nom" keywords = ["parser", "parser-combinators", "parsing", "streaming", "bit"] categories = ["parsing"] -edition = "2018" +edition = "2021" autoexamples = false +# also update in README.md (badge and "Rust version requirements" section) +rust-version = "1.56" + include = [ "CHANGELOG.md", "LICENSE", @@ -23,12 +26,11 @@ include = [ "src/*/*.rs", "tests/*.rs", "doc/nom_recipes.md", - "build.rs" ] [features] alloc = [] -std = ["alloc", "memchr/use_std", "minimal-lexical/std"] +std = ["alloc", "memchr/std", "minimal-lexical/std"] default = ["std"] docsrs = [] @@ -37,18 +39,13 @@ version = "0.2.0" default-features = false [dependencies.memchr] -version = "2.0" +version = "2.3" default-features = false [dev-dependencies] -criterion = "0.3" -jemallocator = "^0.3" doc-comment = "0.3" proptest = "1.0.0" -[build-dependencies] -version_check = "0.9" - [package.metadata.docs.rs] features = ["alloc", "std", "docsrs"] all-features = true @@ -58,9 +55,6 @@ debug = true lto = true codegen-units = 1 -[lib] -bench = false - [[test]] name = "arithmetic" @@ -68,9 +62,6 @@ name = "arithmetic" name = "arithmetic_ast" required-features = ["alloc"] -[[test]] -name = "blockbuf-arithmetic" - [[test]] name = "css" @@ -113,43 +104,24 @@ name = "reborrow_fold" name = "fnmut" required-features = ["alloc"] -[[bench]] -name = "arithmetic" -path = "benches/arithmetic.rs" -harness = false - -[[bench]] -name = "number" -path = "benches/number.rs" -harness = false - -[[bench]] -name = "http" -path = "benches/http.rs" -harness = false - -[[bench]] -name = "ini" -path = "benches/ini.rs" -harness = false - -[[bench]] -name = "ini_str" -path = "benches/ini_str.rs" -harness = false - -[[bench]] -name = "json" -path = "benches/json.rs" -harness = false +[[example]] +name = "custom_error" +required-features = ["alloc"] +path = "examples/custom_error.rs" [[example]] name = "json" required-features = ["alloc"] path = "examples/json.rs" +[[example]] +name = "json_iterator" +required-features = ["alloc"] +path = "examples/json_iterator.rs" + [[example]] name = "iterator" +path = "examples/iterator.rs" [[example]] name = "s_expression" @@ -163,5 +135,8 @@ path = "examples/string.rs" [badges] travis-ci = { repository = "Geal/nom" } -coveralls = { repository = "Geal/nom", branch = "master", service = "github" } +coveralls = { repository = "Geal/nom", branch = "main", service = "github" } maintenance = { status = "actively-developed" } + +[workspace] +members = [".", "benchmarks/"] diff --git a/README.md b/README.md index efe9c27c6..bf6d56aa6 100644 --- a/README.md +++ b/README.md @@ -3,9 +3,9 @@ [![LICENSE](https://img.shields.io/badge/license-MIT-blue.svg)](LICENSE) [![Join the chat at https://gitter.im/Geal/nom](https://badges.gitter.im/Join%20Chat.svg)](https://gitter.im/Geal/nom?utm_source=badge&utm_medium=badge&utm_campaign=pr-badge&utm_content=badge) [![Build Status](https://github.com/Geal/nom/actions/workflows/ci.yml/badge.svg)](https://github.com/Geal/nom/actions/workflows/ci.yml) -[![Coverage Status](https://coveralls.io/repos/Geal/nom/badge.svg?branch=master)](https://coveralls.io/r/Geal/nom?branch=master) +[![Coverage Status](https://coveralls.io/repos/github/Geal/nom/badge.svg?branch=main)](https://coveralls.io/github/Geal/nom?branch=main) [![Crates.io Version](https://img.shields.io/crates/v/nom.svg)](https://crates.io/crates/nom) -[![Minimum rustc version](https://img.shields.io/badge/rustc-1.48.0+-lightgray.svg)](#rust-version-requirements) +[![Minimum rustc version](https://img.shields.io/badge/rustc-1.56.0+-lightgray.svg)](#rust-version-requirements-msrv) nom is a parser combinators library written in Rust. Its goal is to provide tools to build safe parsers without compromising the speed or memory consumption. To @@ -13,10 +13,29 @@ that end, it uses extensively Rust's *strong typing* and *memory safety* to prod fast and correct parsers, and provides functions, macros and traits to abstract most of the error prone plumbing. -![nom logo in CC0 license, by Ange Albertini](https://raw.githubusercontent.com/Geal/nom/master/assets/nom.png) +![nom logo in CC0 license, by Ange Albertini](https://raw.githubusercontent.com/Geal/nom/main/assets/nom.png) *nom will happily take a byte out of your files :)* + + +- [Example](#example) +- [Documentation](#documentation) +- [Why use nom?](#why-use-nom) + - [Binary format parsers](#binary-format-parsers) + - [Text format parsers](#text-format-parsers) + - [Programming language parsers](#programming-language-parsers) + - [Streaming formats](#streaming-formats) +- [Parser combinators](#parser-combinators) +- [Technical features](#technical-features) +- [Rust version requirements](#rust-version-requirements-msrv) +- [Installation](#installation) +- [Related projects](#related-projects) +- [Parsers written with nom](#parsers-written-with-nom) +- [Contributors](#contributors) + + + ## Example [Hexadecimal color](https://developer.mozilla.org/en-US/docs/Web/CSS/color) parser: @@ -73,8 +92,8 @@ fn parse_color() { ## Documentation - [Reference documentation](https://docs.rs/nom) -- [Various design documents and tutorials](https://github.com/Geal/nom/tree/master/doc) -- [List of combinators and their behaviour](https://github.com/Geal/nom/blob/master/doc/choosing_a_combinator.md) +- [Various design documents and tutorials](https://github.com/Geal/nom/tree/main/doc) +- [List of combinators and their behaviour](https://github.com/Geal/nom/blob/main/doc/choosing_a_combinator.md) If you need any help developing your parsers, please ping `geal` on IRC (libera, geeknode, oftc), go to `#nom-parsers` on Libera IRC, or on the [Gitter chat room](https://gitter.im/Geal/nom). @@ -112,7 +131,7 @@ formats such as JSON, nom can manage it, and provides you with useful tools: Example projects: -- [HTTP proxy](https://github.com/sozu-proxy/sozu/blob/master/lib/src/protocol/http/parser.rs) +- [HTTP proxy](https://github.com/sozu-proxy/sozu/tree/main/lib/src/protocol/http/parser) - [TOML parser](https://github.com/joelself/tomllib) ### Programming language parsers @@ -147,7 +166,7 @@ It allows you to build powerful, deterministic state machines for your protocols Example projects: -- [HTTP proxy](https://github.com/sozu-proxy/sozu/blob/master/lib/src/protocol/http/parser.rs) +- [HTTP proxy](https://github.com/sozu-proxy/sozu/tree/main/lib/src/protocol/http/parser) - [Using nom with generators](https://github.com/Geal/generator_nom) ## Parser combinators @@ -185,9 +204,9 @@ nom parsers are for: Some benchmarks are available on [Github](https://github.com/Geal/nom_benchmarks). -## Rust version requirements +## Rust version requirements (MSRV) -The 7.0 series of nom supports **Rustc version 1.48 or greater**. It is known to work properly on Rust 1.41.1 but there is no guarantee it will stay the case through this major release. +The 7.0 series of nom supports **Rustc version 1.56 or greater**. The current policy is that this will only be updated in the next major nom release. @@ -229,7 +248,7 @@ Here is a (non exhaustive) list of known projects using nom: [CSV](https://github.com/GuillaumeGomez/csv-parser), [FASTA](https://github.com/TianyiShi2001/nom-fasta), [FASTQ](https://github.com/elij/fastq.rs), -[INI](https://github.com/Geal/nom/blob/master/tests/ini.rs), +[INI](https://github.com/Geal/nom/blob/main/tests/ini.rs), [ISO 8601 dates](https://github.com/badboy/iso8601), [libconfig-like configuration file format](https://github.com/filipegoncalves/rust-config), [Web archive](https://github.com/sbeckeriv/warc_nom_parser), @@ -250,6 +269,7 @@ Here is a (non exhaustive) list of known projects using nom: [CSML](https://github.com/CSML-by-Clevy/csml-interpreter), [Wasm](https://github.com/Strytyp/wasm-nom), [Pseudocode](https://github.com/Gungy2/pseudocode) +[Filter for MeiliSearch](https://github.com/meilisearch/meilisearch) - Interface definition formats: [Thrift](https://github.com/thehydroimpulse/thrust) - Audio, video and image formats: [GIF](https://github.com/Geal/gif.rs), @@ -260,14 +280,15 @@ Here is a (non exhaustive) list of known projects using nom: [Matroska (MKV)](https://github.com/rust-av/matroska) - Document formats: [TAR](https://github.com/Keruspe/tar-parser.rs), -[GZ](https://github.com/nharward/nom-gzip) +[GZ](https://github.com/nharward/nom-gzip), +[GDSII](https://github.com/erihsu/gds2-io) - Cryptographic formats: [X.509](https://github.com/rusticata/x509-parser) - Network protocol formats: [Bencode](https://github.com/jbaum98/bencode.rs), [D-Bus](https://github.com/toshokan/misato), [DHCP](https://github.com/rusticata/dhcp-parser), -[HTTP](https://github.com/sozu-proxy/sozu/tree/master/lib/src/protocol/http), +[HTTP](https://github.com/sozu-proxy/sozu/tree/main/lib/src/protocol/http), [URI](https://github.com/santifa/rrp/blob/master/src/uri.rs), [IMAP](https://github.com/djc/tokio-imap), [IRC](https://github.com/Detegr/RBot-parser), @@ -293,7 +314,8 @@ Here is a (non exhaustive) list of known projects using nom: [Telcordia/Bellcore SR-4731 SOR OTDR files](https://github.com/JamesHarrison/otdrs), [MySQL binary log](https://github.com/PrivateRookie/boxercrab), [URI](https://github.com/Skasselbard/nom-uri), -[Furigana](https://github.com/sachaarbonel/furigana.rs) +[Furigana](https://github.com/sachaarbonel/furigana.rs), +[Wordle Result](https://github.com/Fyko/wordle-stats/tree/main/parser) Want to create a new parser using `nom`? A list of not yet implemented formats is available [here](https://github.com/Geal/nom/issues/14). diff --git a/benchmarks/Cargo.toml b/benchmarks/Cargo.toml new file mode 100644 index 000000000..af58dbfe2 --- /dev/null +++ b/benchmarks/Cargo.toml @@ -0,0 +1,44 @@ +[package] +name = "benchmarks" +version = "0.1.0" +edition = "2018" + +# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html + +[dependencies] +criterion = "0.3.5" +jemallocator = "0.3.2" +nom = { path = "../" } + +[lib] +bench = false + +[[bench]] +name = "arithmetic" +path = "benches/arithmetic.rs" +harness = false + +[[bench]] +name = "number" +path = "benches/number.rs" +harness = false + +[[bench]] +name = "http" +path = "benches/http.rs" +harness = false + +[[bench]] +name = "ini" +path = "benches/ini.rs" +harness = false + +[[bench]] +name = "ini_str" +path = "benches/ini_str.rs" +harness = false + +[[bench]] +name = "json" +path = "benches/json.rs" +harness = false diff --git a/benchmarks/README.md b/benchmarks/README.md new file mode 100644 index 000000000..e75e1aa3f --- /dev/null +++ b/benchmarks/README.md @@ -0,0 +1 @@ +# Benchmarks for nom parsers diff --git a/benches/arithmetic.rs b/benchmarks/benches/arithmetic.rs similarity index 97% rename from benches/arithmetic.rs rename to benchmarks/benches/arithmetic.rs index 136aca559..cab12224f 100644 --- a/benches/arithmetic.rs +++ b/benchmarks/benches/arithmetic.rs @@ -65,6 +65,7 @@ fn expr(input: &[u8]) -> IResult<&[u8], i64> { )(input) } +#[allow(clippy::eq_op, clippy::erasing_op)] fn arithmetic(c: &mut Criterion) { let data = b" 2*2 / ( 5 - 1) + 3 / 4 * (2 - 7 + 567 *12 /2) + 3*(1+2*( 45 /2));"; diff --git a/benches/http.rs b/benchmarks/benches/http.rs similarity index 100% rename from benches/http.rs rename to benchmarks/benches/http.rs diff --git a/benches/ini.rs b/benchmarks/benches/ini.rs similarity index 100% rename from benches/ini.rs rename to benchmarks/benches/ini.rs diff --git a/benches/ini_str.rs b/benchmarks/benches/ini_str.rs similarity index 100% rename from benches/ini_str.rs rename to benchmarks/benches/ini_str.rs diff --git a/benches/json.rs b/benchmarks/benches/json.rs similarity index 100% rename from benches/json.rs rename to benchmarks/benches/json.rs diff --git a/benches/number.rs b/benchmarks/benches/number.rs similarity index 100% rename from benches/number.rs rename to benchmarks/benches/number.rs diff --git a/benchmarks/src/lib.rs b/benchmarks/src/lib.rs new file mode 100644 index 000000000..059dc1238 --- /dev/null +++ b/benchmarks/src/lib.rs @@ -0,0 +1,8 @@ +#[cfg(test)] +mod tests { + #[test] + fn it_works() { + let result = 2 + 2; + assert_eq!(result, 4); + } +} diff --git a/build.rs b/build.rs deleted file mode 100644 index 0fb578435..000000000 --- a/build.rs +++ /dev/null @@ -1,7 +0,0 @@ -extern crate version_check; - -fn main() { - if version_check::is_min_version("1.44.0").unwrap_or(true) { - println!("cargo:rustc-cfg=stable_i128"); - } -} diff --git a/doc/archive/FAQ.md b/doc/archive/FAQ.md index 9b02ee925..eb39788ff 100644 --- a/doc/archive/FAQ.md +++ b/doc/archive/FAQ.md @@ -12,7 +12,7 @@ If you got the following error when compiling your nom parser: error[E0425]: cannot find value `INVALID_NOM_SYNTAX_PLEASE_SEE_FAQ` in this scope --> src/lib.rs:111:7 | -111 | INVALID_NOM_SYNTAX_PLEASE_SEE_FAQ //https://github.com/Geal/nom/blob/master/doc/FAQ.md#using-nightly-to-get-better-error-messages +111 | INVALID_NOM_SYNTAX_PLEASE_SEE_FAQ //https://github.com/Geal/nom/blob/main/doc/archive/FAQ.md#using-nightly-to-get-better-error-messages | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ not found in this scope ``` diff --git a/doc/archive/upgrading_to_nom_1.md b/doc/archive/upgrading_to_nom_1.md index b59cb4ad0..f54a0bfba 100644 --- a/doc/archive/upgrading_to_nom_1.md +++ b/doc/archive/upgrading_to_nom_1.md @@ -51,7 +51,7 @@ fn parse_status(i: &[u8]) -> IResult<&[u8], Status> # Producers and consumers -The old implementation was not flexible, and a bit slow (because of allocations). The new implementation can be driven more precisely outside of the consumer, step by step if needed, can return a result, has custom error types, and can combine consumers. You can see [an example in the repository](https://github.com/Geal/nom/blob/master/tests/omnom.rs#). +The old implementation was not flexible, and a bit slow (because of allocations). The new implementation can be driven more precisely outside of the consumer, step by step if needed, can return a result, has custom error types, and can combine consumers. You can see [an example in the repository](https://github.com/Geal/nom/blob/1.0/tests/omnom.rs#). # Changes around `Incomplete` diff --git a/doc/choosing_a_combinator.md b/doc/choosing_a_combinator.md index e23531059..0c389e7c8 100644 --- a/doc/choosing_a_combinator.md +++ b/doc/choosing_a_combinator.md @@ -27,7 +27,7 @@ Those are used to recognize the lowest level elements of your grammar, like, "he | combinator | usage | input | output | comment | |---|---|---|---|---| | [alt](https://docs.rs/nom/latest/nom/branch/fn.alt.html) | `alt((tag("ab"), tag("cd")))` | `"cdef"` | `Ok(("ef", "cd"))` |Try a list of parsers and return the result of the first successful one| -| [permutation](https://docs.rs/nom/latest/nom/branch/fn.permutation.html) | `permutation(tag("ab"), tag("cd"), tag("12"))` | `"cd12abc"` | `Ok(("c", ("ab", "cd", "12"))` |Succeeds when all its child parser have succeeded, whatever the order| +| [permutation](https://docs.rs/nom/latest/nom/branch/fn.permutation.html) | `permutation((tag("ab"), tag("cd"), tag("12")))` | `"cd12abc"` | `Ok(("c", ("ab", "cd", "12"))` |Succeeds when all its child parser have succeeded, whatever the order| ## Sequence combinators @@ -38,19 +38,20 @@ Those are used to recognize the lowest level elements of your grammar, like, "he | [terminated](https://docs.rs/nom/latest/nom/sequence/fn.terminated.html) | `terminated(tag("ab"), tag("XY"))` | `"abXYZ"` | `Ok(("Z", "ab"))` || | [pair](https://docs.rs/nom/latest/nom/sequence/fn.pair.html) | `pair(tag("ab"), tag("XY"))` | `"abXYZ"` | `Ok(("Z", ("ab", "XY")))` || | [separated_pair](https://docs.rs/nom/latest/nom/sequence/fn.separated_pair.html) | `separated_pair(tag("hello"), char(','), tag("world"))` | `"hello,world!"` | `Ok(("!", ("hello", "world")))` || +| tuple | `((tag("ab"), tag("XY"), take(1)))` | `"abXYZ!"` | `Ok(("!", ("ab", "XY", "Z")))` | Chains parsers and assemble the sub results in a tuple. You can use as many child parsers as you can put elements in a tuple| ## Applying a parser multiple times | combinator | usage | input | output | comment | |---|---|---|---|---| -| [count](https://docs.rs/nom/latest/nom/multi/fn.count.html) | `count(take(2), 3)` | `"abcdefgh"` | `Ok(("gh", vec!("ab", "cd", "ef")))` |Applies the child parser a specified number of times| -| [many0](https://docs.rs/nom/latest/nom/multi/fn.many0.html) | `many0(tag("ab"))` | `"abababc"` | `Ok(("c", vec!("ab", "ab", "ab")))` |Applies the parser 0 or more times and returns the list of results in a Vec. `many1` does the same operation but must return at least one element| -| [many_m_n](https://docs.rs/nom/latest/nom/multi/fn.many_m_n.html) | `many_m_n(1, 3, tag("ab"))` | `"ababc"` | `Ok(("c", vec!("ab", "ab")))` |Applies the parser between m and n times (n included) and returns the list of results in a Vec| -| [many_till](https://docs.rs/nom/latest/nom/multi/fn.many_till.html) | `many_till(tag( "ab" ), tag( "ef" ))` | `"ababefg"` | `Ok(("g", (vec!("ab", "ab"), "ef")))` |Applies the first parser until the second applies. Returns a tuple containing the list of results from the first in a Vec and the result of the second| -| [separated_list0](https://docs.rs/nom/latest/nom/multi/fn.separated_list0.html) | `separated_list0(tag(","), tag("ab"))` | `"ab,ab,ab."` | `Ok((".", vec!("ab", "ab", "ab")))` |`separated_list1` works like `separated_list0` but must returns at least one element| +| [count](https://docs.rs/nom/latest/nom/multi/fn.count.html) | `count(take(2), 3)` | `"abcdefgh"` | `Ok(("gh", vec!["ab", "cd", "ef"]))` |Applies the child parser a specified number of times| +| [many0](https://docs.rs/nom/latest/nom/multi/fn.many0.html) | `many0(tag("ab"))` | `"abababc"` | `Ok(("c", vec!["ab", "ab", "ab"]))` |Applies the parser 0 or more times and returns the list of results in a Vec. `many1` does the same operation but must return at least one element| +| [many_m_n](https://docs.rs/nom/latest/nom/multi/fn.many_m_n.html) | `many_m_n(1, 3, tag("ab"))` | `"ababc"` | `Ok(("c", vec!["ab", "ab"]))` |Applies the parser between m and n times (n included) and returns the list of results in a Vec| +| [many_till](https://docs.rs/nom/latest/nom/multi/fn.many_till.html) | `many_till(tag( "ab" ), tag( "ef" ))` | `"ababefg"` | `Ok(("g", (vec!["ab", "ab"], "ef")))` |Applies the first parser until the second applies. Returns a tuple containing the list of results from the first in a Vec and the result of the second| +| [separated_list0](https://docs.rs/nom/latest/nom/multi/fn.separated_list0.html) | `separated_list0(tag(","), tag("ab"))` | `"ab,ab,ab."` | `Ok((".", vec!["ab", "ab", "ab"]))` |`separated_list1` works like `separated_list0` but must returns at least one element| | [fold_many0](https://docs.rs/nom/latest/nom/multi/fn.fold_many0.html) | `fold_many0(be_u8, \|\| 0, \|acc, item\| acc + item)` | `[1, 2, 3]` | `Ok(([], 6))` |Applies the parser 0 or more times and folds the list of return values. The `fold_many1` version must apply the child parser at least one time| | [fold_many_m_n](https://docs.rs/nom/latest/nom/multi/fn.fold_many_m_n.html) | `fold_many_m_n(1, 2, be_u8, \|\| 0, \|acc, item\| acc + item)` | `[1, 2, 3]` | `Ok(([3], 3))` |Applies the parser between m and n times (n included) and folds the list of return value| -| [length_count](https://docs.rs/nom/latest/nom/multi/fn.length_count.html) | `length_count(number, tag("ab"))` | `"2ababab"` | `Ok(("ab", vec!("ab", "ab")))` |Gets a number from the first parser, then applies the second parser that many times| +| [length_count](https://docs.rs/nom/latest/nom/multi/fn.length_count.html) | `length_count(number, tag("ab"))` | `"2ababab"` | `Ok(("ab", vec!["ab", "ab"]))` |Gets a number from the first parser, then applies the second parser that many times| ## Integers @@ -74,11 +75,15 @@ The following parsers could be found on [docs.rs number section](https://docs.rs ## Modifiers -- [`cond`](https://docs.rs/nom/latest/nom/combinator/fn.cond.html): Conditional combinator. Wraps another parser and calls it if the condition is met -- [`Parser::flat_map`](https://docs.rs/nom/latest/nom/trait.Parser.html#method.flat_map): method to map a new parser from the output of the first parser, then apply that parser over the rest of the input -- [`flat_map`](https://docs.rs/nom/latest/nom/combinator/fn.flat_map.html): function variant of `Parser::flat_map` -- [`Parser::map`](https://docs.rs/nom/latest/nom/trait.Parser.html#method.map): method to map a function on the result of a parser + +- [`Parser::and`](https://docs.rs/nom/latest/nom/trait.Parser.html#method.and): method to create a parser by applying the supplied parser to the rest of the input after applying `self`, returning their results as a tuple (like `sequence::tuple` but only takes one parser) +- [`Parser::and_then`](https://docs.rs/nom/latest/nom/trait.Parser.html#method.and_then): method to create a parser from applying another parser to the output of `self` +- [`map_parser`](https://docs.rs/nom/latest/nom/combinator/fn.map_parser.html): function variant of `Parser::and_then` +- [`Parser::map`](https://docs.rs/nom/latest/nom/trait.Parser.html#method.map): method to map a function on the output of `self` - [`map`](https://docs.rs/nom/latest/nom/combinator/fn.map.html): function variant of `Parser::map` +- [`Parser::flat_map`](https://docs.rs/nom/latest/nom/trait.Parser.html#method.flat_map): method to create a parser which will map a parser returning function (such as `take` or something which returns a parser) on the output of `self`, then apply that parser over the rest of the input. That is, this method accepts a parser-returning function which consumes the output of `self`, the resulting parser gets applied to the rest of the input +- [`flat_map`](https://docs.rs/nom/latest/nom/combinator/fn.flat_map.html): function variant of `Parser::flat_map` +- [`cond`](https://docs.rs/nom/latest/nom/combinator/fn.cond.html): Conditional combinator. Wraps another parser and calls it if the condition is met - [`map_opt`](https://docs.rs/nom/latest/nom/combinator/fn.map_opt.html): Maps a function returning an `Option` on the output of a parser - [`map_res`](https://docs.rs/nom/latest/nom/combinator/fn.map_res.html): Maps a function returning a `Result` on the output of a parser - [`not`](https://docs.rs/nom/latest/nom/combinator/fn.not.html): Returns a result only if the embedded parser returns `Error` or `Incomplete`. Does not consume the input @@ -125,7 +130,7 @@ Use these functions with a combinator like `take_while`: - [`is_space`](https://docs.rs/nom/latest/nom/character/fn.is_space.html): Tests if byte is ASCII space or tab: `[ \t]` - [`is_newline`](https://docs.rs/nom/latest/nom/character/fn.is_newline.html): Tests if byte is ASCII newline: `[\n]` -Alternatively there are ready to use function: +Alternatively there are ready to use functions: - [`alpha0`](https://docs.rs/nom/latest/nom/character/complete/fn.alpha0.html): Recognizes zero or more lowercase and uppercase alphabetic characters: `[a-zA-Z]`. [`alpha1`](https://docs.rs/nom/latest/nom/character/complete/fn.alpha1.html) does the same but returns at least one character - [`alphanumeric0`](https://docs.rs/nom/latest/nom/character/complete/fn.alphanumeric0.html): Recognizes zero or more numerical and alphabetic characters: `[0-9a-zA-Z]`. [`alphanumeric1`](https://docs.rs/nom/latest/nom/character/complete/fn.alphanumeric1.html) does the same but returns at least one character diff --git a/doc/error_management.md b/doc/error_management.md index 34d908473..0cb7b5203 100644 --- a/doc/error_management.md +++ b/doc/error_management.md @@ -21,11 +21,11 @@ pub enum Err { The result is either an `Ok((I, O))` containing the remaining input and the parsed value, or an `Err(nom::Err)` with `E` the error type. -`nom::Err` is an enum because combinators can have diferent behaviours -depending on the value: +`nom::Err` is an enum because combinators can have different behaviours +depending on the value. The `Err` enum expresses 3 conditions for a parser error: +- `Incomplete` indicates that a parser did not have enough data to decide. This can be returned by parsers found in `streaming` submodules to indicate that we should buffer more data from a file or socket. Parsers in the `complete` submodules assume that they have the entire input data, so if it was not sufficient, they will instead return a `Err::Error`. When a parser returns `Incomplete`, we should accumulate more data in the buffer (example: reading from a socket) and call the parser again - `Error` is a normal parser error. If a child parser of the `alt` combinator returns `Error`, it will try another child parser -- `Failure` is an error from which we cannot recover: The `alt` combinator will not try other branches if a child parser returns `Failure`. This is used when we know we were in the right branch of `alt` and do not need to try other branches -- `Incomplete` indicates that a parser did not have enough data to decide. This can be returned by parsers found in `streaming` submodules. Parsers in the `complete` submodules assume that they have the entire input data, so if it was not sufficient, they will instead return a `Err::Error`. When a parser returns `Incomplete`, we should accumulate more data in the buffer (example: reading from a socket) and call the parser again +- `Failure` is an error from which we cannot recover: The `alt` combinator will not try other branches if a child parser returns `Failure`. If we know we were in the right branch (example: we found a correct prefix character but input after that was wrong), we can transform a `Err::Error` into a `Err::Failure` with the `cut()` combinator If we are running a parser and know it will not return `Err::Incomplete`, we can directly extract the error type from `Err::Error` or `Err::Failure` with the @@ -36,13 +36,13 @@ let parser_result: IResult = parser(input); let result: Result<(I, O), E> = parser_result.finish(); ``` -If we used a borrowed type as input, like `&[u8]` ot `&str`, we might want to +If we used a borrowed type as input, like `&[u8]` or `&str`, we might want to convert it to an owned type to transmit it somewhere, with the `to_owned()` method: ```rust let result: Result<(&[u8], Value), Err>> = - parser(data).map_err(|e: E<&[u8]>| -> e.to_owned()); + parser(data).map_err(|e: E<&[u8]>| e.to_owned()); ``` nom provides a powerful error system that can adapt to your needs: you can @@ -68,11 +68,6 @@ directly at the call site. See [the JSON parser](https://github.com/Geal/nom/blob/5405e1173f1052f7e006dcb0b9cfda2b06557b65/examples/json.rs#L209-L286) for an example of choosing different error types at the call site. -The `Err` enum expresses 3 conditions for a parser error: -- `Incomplete` indicates that a parser did not have enough data to decide. This can be returned by parsers found in `streaming` submodules to indicate that we should buffer more data from a file or socket. Parsers in the `complete` submodules assume that they have the entire input data, so if it was not sufficient, they will instead return a `Err::Error` -- `Error` is a normal parser error. If a child parser of the `alt` combinator returns `Error`, it will try another child parser -- `Failure` is an error from which we cannot recover: The `alt` combinator will not try other branches if a child parser returns `Failure`. If we know we were in the right branch (example: we found a correct prefix character but input after that was wrong), we can transform a `Err::Error` into a `Err::Failure` with the `cut()` combinator - ## Common error types ### the default error type: nom::error::Error diff --git a/doc/making_a_new_parser_from_scratch.md b/doc/making_a_new_parser_from_scratch.md index 94d297745..0d2e66aae 100644 --- a/doc/making_a_new_parser_from_scratch.md +++ b/doc/making_a_new_parser_from_scratch.md @@ -191,10 +191,10 @@ This function wraps a parser that accepts a `&[u8]` as input and prints its hexdump if the child parser encountered an error: ```rust -use nom::{dbg_dmp, bytes::complete::tag}; +use nom::{IResult, error::dbg_dmp, bytes::complete::tag}; fn f(i: &[u8]) -> IResult<&[u8], &[u8]> { - dbg_dmp(tag("abcd"))(i) + dbg_dmp(tag("abcd"), "tag")(i) } let a = &b"efghijkl"[..]; diff --git a/doc/nom_recipes.md b/doc/nom_recipes.md index 9b6f310f8..6002ebdb7 100644 --- a/doc/nom_recipes.md +++ b/doc/nom_recipes.md @@ -35,9 +35,9 @@ use nom::{ /// A combinator that takes a parser `inner` and produces a parser that also consumes both leading and /// trailing whitespace, returning the output of `inner`. -fn ws<'a, F: 'a, O, E: ParseError<&'a str>>(inner: F) -> impl FnMut(&'a str) -> IResult<&'a str, O, E> +fn ws<'a, F, O, E: ParseError<&'a str>>(inner: F) -> impl FnMut(&'a str) -> IResult<&'a str, O, E> where - F: Fn(&'a str) -> IResult<&'a str, O, E>, + F: FnMut(&'a str) -> IResult<&'a str, O, E>, { delimited( multispace0, @@ -114,7 +114,7 @@ letters and numbers may be parsed like this: use nom::{ IResult, branch::alt, - multi::many0, + multi::many0_count, combinator::recognize, sequence::pair, character::complete::{alpha1, alphanumeric1}, @@ -125,7 +125,7 @@ pub fn identifier(input: &str) -> IResult<&str, &str> { recognize( pair( alt((alpha1, tag("_"))), - many0(alt((alphanumeric1, tag("_")))) + many0_count(alt((alphanumeric1, tag("_")))) ) )(input) } @@ -141,7 +141,7 @@ input text that was parsed, which in this case is the entire `&str` `hello_world ### Escaped Strings -This is [one of the examples](https://github.com/Geal/nom/blob/master/examples/string.rs) in the +This is [one of the examples](https://github.com/Geal/nom/blob/main/examples/string.rs) in the examples directory. ### Integers diff --git a/doc/upgrading_to_nom_5.md b/doc/upgrading_to_nom_5.md index 599ba729a..421eb6711 100644 --- a/doc/upgrading_to_nom_5.md +++ b/doc/upgrading_to_nom_5.md @@ -113,7 +113,7 @@ list, we have functions that take other functions as arguments, and return functions. This technique has a lot of advantages over macros: -- No type inference issues, you can explicitely describe the error type in +- No type inference issues, you can explicitly describe the error type in function definitions - Nicer compilation errors: rustc can show you exactly what is missing when calling a combinator, if you need to import new traits, etc. diff --git a/examples/custom_error.rs b/examples/custom_error.rs index 976b032e8..c1fb2d88d 100644 --- a/examples/custom_error.rs +++ b/examples/custom_error.rs @@ -21,10 +21,12 @@ impl ParseError for CustomError { } } -fn parse(input: &str) -> IResult<&str, &str, CustomError<&str>> { +pub fn parse(_input: &str) -> IResult<&str, &str, CustomError<&str>> { Err(Error(CustomError::MyError)) } +fn main() {} + #[cfg(test)] mod tests { use super::parse; diff --git a/examples/json.rs b/examples/json.rs index f9e9a57b9..58785fc1d 100644 --- a/examples/json.rs +++ b/examples/json.rs @@ -1,8 +1,5 @@ #![cfg(feature = "alloc")] -#[global_allocator] -static ALLOC: jemallocator::Jemalloc = jemallocator::Jemalloc; - use nom::{ branch::alt, bytes::complete::{escaped, tag, take_while}, @@ -19,6 +16,7 @@ use std::str; #[derive(Debug, PartialEq)] pub enum JsonValue { + Null, Str(String), Boolean(bool), Num(f64), @@ -78,6 +76,10 @@ fn boolean<'a, E: ParseError<&'a str>>(input: &'a str) -> IResult<&'a str, bool, alt((parse_true, parse_false))(input) } +fn null<'a, E: ParseError<&'a str>>(input: &'a str) -> IResult<&'a str, (), E> { + value((), tag("null"))(input) +} + /// this parser combines the previous `parse_str` parser, that recognizes the /// interior of a string, with a parse to recognize the double quote character, /// before the string (using `preceded`) and after the string (using `terminated`). @@ -162,6 +164,7 @@ fn json_value<'a, E: ParseError<&'a str> + ContextError<&'a str>>( map(string, |s| JsonValue::Str(String::from(s))), map(double, JsonValue::Num), map(boolean, JsonValue::Boolean), + map(null, |_| JsonValue::Null), )), )(i) } @@ -172,7 +175,11 @@ fn root<'a, E: ParseError<&'a str> + ContextError<&'a str>>( ) -> IResult<&'a str, JsonValue, E> { delimited( sp, - alt((map(hash, JsonValue::Object), map(array, JsonValue::Array))), + alt(( + map(hash, JsonValue::Object), + map(array, JsonValue::Array), + map(null, |_| JsonValue::Null), + )), opt(sp), )(i) } @@ -318,4 +325,6 @@ fn main() { } _ => {} } + + assert!(root::<(&str, ErrorKind)>("null").is_ok()); } diff --git a/examples/json_iterator.rs b/examples/json_iterator.rs index 3b56aacc5..ebc026346 100644 --- a/examples/json_iterator.rs +++ b/examples/json_iterator.rs @@ -1,32 +1,20 @@ #![cfg(feature = "alloc")] - -use jemallocator; - -#[global_allocator] -static ALLOC: jemallocator::Jemalloc = jemallocator::Jemalloc; - use nom::{ branch::alt, bytes::complete::{escaped, tag, take_while}, character::complete::{alphanumeric1 as alphanumeric, char, one_of}, - combinator::{map, opt, cut}, - error::{context, ErrorKind, ParseError}, - error::{VerboseError, VerboseErrorKind}, - multi::separated_list, + combinator::{cut, map}, + error::{context, ParseError}, + multi::separated_list0, number::complete::double, - sequence::{delimited, preceded, separated_pair, terminated}, - Err, IResult, Offset, + sequence::{preceded, separated_pair, terminated}, + IResult, }; use std::collections::HashMap; -use std::str; use std::cell::Cell; - -struct Cursor<'a> { - inner: &'a str, - offset: usize, -} +use std::str; #[derive(Clone, Debug)] pub struct JsonValue<'a, 'b> { @@ -34,7 +22,7 @@ pub struct JsonValue<'a, 'b> { pub offset: &'b Cell, } -impl<'a, 'b:'a> JsonValue<'a, 'b> { +impl<'a, 'b: 'a> JsonValue<'a, 'b> { pub fn new(input: &'a str, offset: &'b Cell) -> JsonValue<'a, 'b> { JsonValue { input, offset } } @@ -55,7 +43,7 @@ impl<'a, 'b:'a> JsonValue<'a, 'b> { self.offset(i); println!("-> {}", s); Some(s) - }, + } _ => None, } } @@ -67,27 +55,27 @@ impl<'a, 'b:'a> JsonValue<'a, 'b> { self.offset(i); println!("-> {}", o); Some(o) - }, + } _ => None, } } pub fn number(&self) -> Option { println!("number()"); - match double::<_,()>(self.data()) { + match double::<_, ()>(self.data()) { Ok((i, o)) => { self.offset(i); println!("-> {}", o); Some(o) - }, + } _ => None, } } - pub fn array(&self) -> Option>> { + pub fn array(&self) -> Option>> { println!("array()"); - match tag::<_,_,()>("[")(self.data()) { + match tag::<_, _, ()>("[")(self.data()) { Err(_) => None, Ok((i, _)) => { println!("["); @@ -98,7 +86,7 @@ impl<'a, 'b:'a> JsonValue<'a, 'b> { let v = self.clone(); - Some(std::iter::from_fn(move|| { + Some(std::iter::from_fn(move || { if done { return None; } @@ -109,30 +97,29 @@ impl<'a, 'b:'a> JsonValue<'a, 'b> { match value(v.data()) { Ok((i, _)) => { v.offset(i); - }, - Err(_) => {}, + } + Err(_) => {} } } - - match tag::<_,_,()>("]")(v.data()) { + match tag::<_, _, ()>("]")(v.data()) { Ok((i, _)) => { println!("]"); v.offset(i); done = true; return None; - }, + } Err(_) => {} }; if first { first = false; } else { - match tag::<_,_,()>(",")(v.data()) { + match tag::<_, _, ()>(",")(v.data()) { Ok((i, _)) => { - println!(","); + println!(","); v.offset(i); - }, + } Err(_) => { done = true; return None; @@ -143,15 +130,14 @@ impl<'a, 'b:'a> JsonValue<'a, 'b> { println!("-> {}", v.data()); previous = v.offset.get(); Some(v.clone()) - })) } } } - pub fn object(&self) -> Option)>> { + pub fn object(&self) -> Option)>> { println!("object()"); - match tag::<_,_,()>("{")(self.data()) { + match tag::<_, _, ()>("{")(self.data()) { Err(_) => None, Ok((i, _)) => { self.offset(i); @@ -164,7 +150,7 @@ impl<'a, 'b:'a> JsonValue<'a, 'b> { let v = self.clone(); - Some(std::iter::from_fn(move|| { + Some(std::iter::from_fn(move || { if done { return None; } @@ -175,29 +161,29 @@ impl<'a, 'b:'a> JsonValue<'a, 'b> { match value(v.data()) { Ok((i, _)) => { v.offset(i); - }, - Err(_) => {}, + } + Err(_) => {} } } - match tag::<_,_,()>("}")(v.data()) { + match tag::<_, _, ()>("}")(v.data()) { Ok((i, _)) => { println!("}}"); v.offset(i); done = true; return None; - }, + } Err(_) => {} }; if first { first = false; } else { - match tag::<_,_,()>(",")(v.data()) { + match tag::<_, _, ()>(",")(v.data()) { Ok((i, _)) => { println!(","); v.offset(i); - }, + } Err(_) => { done = true; return None; @@ -209,7 +195,7 @@ impl<'a, 'b:'a> JsonValue<'a, 'b> { Ok((i, key)) => { v.offset(i); - match tag::<_,_,()>(":")(v.data()) { + match tag::<_, _, ()>(":")(v.data()) { Err(_) => None, Ok((i, _)) => { v.offset(i); @@ -220,10 +206,9 @@ impl<'a, 'b:'a> JsonValue<'a, 'b> { Some((key, v.clone())) } } - }, + } _ => None, } - })) } } @@ -241,47 +226,44 @@ fn parse_str<'a, E: ParseError<&'a str>>(i: &'a str) -> IResult<&'a str, &'a str } fn string<'a>(i: &'a str) -> IResult<&'a str, &'a str> { - context("string", - preceded( - char('\"'), - cut(terminated( - parse_str, - char('\"') - ))))(i) + context( + "string", + preceded(char('\"'), cut(terminated(parse_str, char('\"')))), + )(i) } fn boolean<'a>(input: &'a str) -> IResult<&'a str, bool> { - alt(( - map(tag("false"), |_| false), - map(tag("true"), |_| true) - ))(input) + alt((map(tag("false"), |_| false), map(tag("true"), |_| true)))(input) } fn array<'a>(i: &'a str) -> IResult<&'a str, ()> { context( "array", - preceded(char('['), - cut(terminated( - map(separated_list(preceded(sp, char(',')), value), |_| ()), - preceded(sp, char(']')))) - ))(i) + preceded( + char('['), + cut(terminated( + map(separated_list0(preceded(sp, char(',')), value), |_| ()), + preceded(sp, char(']')), + )), + ), + )(i) } fn key_value<'a>(i: &'a str) -> IResult<&'a str, (&'a str, ())> { -separated_pair(preceded(sp, string), cut(preceded(sp, char(':'))), value)(i) + separated_pair(preceded(sp, string), cut(preceded(sp, char(':'))), value)(i) } fn hash<'a>(i: &'a str) -> IResult<&'a str, ()> { context( "map", - preceded(char('{'), - cut(terminated( - map( - separated_list(preceded(sp, char(',')), key_value), - |_| ()), - preceded(sp, char('}')), - )) - ))(i) + preceded( + char('{'), + cut(terminated( + map(separated_list0(preceded(sp, char(',')), key_value), |_| ()), + preceded(sp, char('}')), + )), + ), + )(i) } fn value<'a>(i: &'a str) -> IResult<&'a str, ()> { @@ -320,13 +302,17 @@ fn main() { let parser = JsonValue::new(data, &offset); if let Some(o) = parser.object() { - let s: HashMap<&str, &str> = o.filter(|(k,_)| *k == "users" ) - .filter_map(|(_, v)| v.object()).flatten() - .filter_map(|(user, v)| v.object().map(|o| (user, o))) - .map(|(user, o)| { - o.filter(|(k,_)| *k == "city" ) - .filter_map(move |(_, v)| v.string().map(|s| (user, s))) - }).flatten().collect(); + let s: HashMap<&str, &str> = o + .filter(|(k, _)| *k == "users") + .filter_map(|(_, v)| v.object()) + .flatten() + .filter_map(|(user, v)| v.object().map(|o| (user, o))) + .map(|(user, o)| { + o.filter(|(k, _)| *k == "city") + .filter_map(move |(_, v)| v.string().map(|s| (user, s))) + }) + .flatten() + .collect(); println!("res = {:?}", s); } diff --git a/examples/macro.rs b/examples/macro.rs deleted file mode 100644 index a5ecdfa17..000000000 --- a/examples/macro.rs +++ /dev/null @@ -1,17 +0,0 @@ -#[macro_use] -extern crate nom; - -use nom::{character::complete::digit0, number::complete::be_u32}; - - -named!(first, flat_map!(digit0, parse_to!(u32))); -named!(second, call!(be_u32)); - -named!(parser, alt!(first | second)); - -fn main() { - let data = b"1234;"; - - let res = parser(&data[..]); - println!("res: {:?}", res); -} diff --git a/examples/s_expression.rs b/examples/s_expression.rs index b115c503d..7f4595186 100644 --- a/examples/s_expression.rs +++ b/examples/s_expression.rs @@ -4,9 +4,6 @@ #![cfg(feature = "alloc")] -#[global_allocator] -static ALLOC: jemallocator::Jemalloc = jemallocator::Jemalloc; - use nom::{ branch::alt, bytes::complete::tag, @@ -22,7 +19,7 @@ use nom::{ /// In this case, we want something tree-like /// Starting from the most basic, we define some built-in functions that our lisp has -#[derive(Debug, PartialEq, Clone, Copy)] +#[derive(Debug, Eq, PartialEq, Clone, Copy)] pub enum BuiltIn { Plus, Minus, @@ -35,7 +32,7 @@ pub enum BuiltIn { /// We now wrap this type and a few other primitives into our Atom type. /// Remember from before that Atoms form one half of our language. -#[derive(Debug, PartialEq, Clone)] +#[derive(Debug, Eq, PartialEq, Clone)] pub enum Atom { Num(i32), Keyword(String), @@ -53,7 +50,7 @@ pub enum Atom { /// structure that we can deal with programmatically. Thus any valid expression /// is also a valid data structure in Lisp itself. -#[derive(Debug, PartialEq, Clone)] +#[derive(Debug, Eq, PartialEq, Clone)] pub enum Expr { Constant(Atom), /// (func-name arg1 arg2) @@ -68,7 +65,7 @@ pub enum Expr { /// Continuing the trend of starting from the simplest piece and building up, /// we start by creating a parser for the built-in operator functions. -fn parse_builtin_op<'a>(i: &'a str) -> IResult<&'a str, BuiltIn, VerboseError<&'a str>> { +fn parse_builtin_op(i: &str) -> IResult<&str, BuiltIn, VerboseError<&str>> { // one_of matches one of the characters we give it let (i, t) = one_of("+-*/=")(i)?; @@ -87,7 +84,7 @@ fn parse_builtin_op<'a>(i: &'a str) -> IResult<&'a str, BuiltIn, VerboseError<&' )) } -fn parse_builtin<'a>(i: &'a str) -> IResult<&'a str, BuiltIn, VerboseError<&'a str>> { +fn parse_builtin(i: &str) -> IResult<&str, BuiltIn, VerboseError<&str>> { // alt gives us the result of first parser that succeeds, of the series of // parsers we give it alt(( @@ -99,7 +96,7 @@ fn parse_builtin<'a>(i: &'a str) -> IResult<&'a str, BuiltIn, VerboseError<&'a s } /// Our boolean values are also constant, so we can do it the same way -fn parse_bool<'a>(i: &'a str) -> IResult<&'a str, Atom, VerboseError<&'a str>> { +fn parse_bool(i: &str) -> IResult<&str, Atom, VerboseError<&str>> { alt(( map(tag("#t"), |_| Atom::Boolean(true)), map(tag("#f"), |_| Atom::Boolean(false)), @@ -112,7 +109,7 @@ fn parse_bool<'a>(i: &'a str) -> IResult<&'a str, Atom, VerboseError<&'a str>> { /// /// Put plainly: `preceded(tag(":"), cut(alpha1))` means that once we see the `:` /// character, we have to see one or more alphabetic chararcters or the input is invalid. -fn parse_keyword<'a>(i: &'a str) -> IResult<&'a str, Atom, VerboseError<&'a str>> { +fn parse_keyword(i: &str) -> IResult<&str, Atom, VerboseError<&str>> { map( context("keyword", preceded(tag(":"), cut(alpha1))), |sym_str: &str| Atom::Keyword(sym_str.to_string()), @@ -121,20 +118,20 @@ fn parse_keyword<'a>(i: &'a str) -> IResult<&'a str, Atom, VerboseError<&'a str> /// Next up is number parsing. We're keeping it simple here by accepting any number (> 1) /// of digits but ending the program if it doesn't fit into an i32. -fn parse_num<'a>(i: &'a str) -> IResult<&'a str, Atom, VerboseError<&'a str>> { +fn parse_num(i: &str) -> IResult<&str, Atom, VerboseError<&str>> { alt(( map_res(digit1, |digit_str: &str| { digit_str.parse::().map(Atom::Num) }), map(preceded(tag("-"), digit1), |digit_str: &str| { - Atom::Num(-1 * digit_str.parse::().unwrap()) + Atom::Num(-digit_str.parse::().unwrap()) }), ))(i) } /// Now we take all these simple parsers and connect them. /// We can now parse half of our language! -fn parse_atom<'a>(i: &'a str) -> IResult<&'a str, Atom, VerboseError<&'a str>> { +fn parse_atom(i: &str) -> IResult<&str, Atom, VerboseError<&str>> { alt(( parse_num, parse_bool, @@ -144,8 +141,8 @@ fn parse_atom<'a>(i: &'a str) -> IResult<&'a str, Atom, VerboseError<&'a str>> { } /// We then add the Expr layer on top -fn parse_constant<'a>(i: &'a str) -> IResult<&'a str, Expr, VerboseError<&'a str>> { - map(parse_atom, |atom| Expr::Constant(atom))(i) +fn parse_constant(i: &str) -> IResult<&str, Expr, VerboseError<&str>> { + map(parse_atom, Expr::Constant)(i) } /// Before continuing, we need a helper function to parse lists. @@ -176,7 +173,7 @@ where /// We can sequence parsers together by grouping them in a tuple, forming a new /// parser returning a tuple containing the result of each parser in the same order, /// and then map over it to transform the output into an `Expr::Application` -fn parse_application<'a>(i: &'a str) -> IResult<&'a str, Expr, VerboseError<&'a str>> { +fn parse_application(i: &str) -> IResult<&str, Expr, VerboseError<&str>> { let application_inner = map((parse_expr, many0(parse_expr)), |(head, tail)| { Expr::Application(Box::new(head), tail) }); @@ -190,7 +187,7 @@ fn parse_application<'a>(i: &'a str) -> IResult<&'a str, Expr, VerboseError<&'a /// /// In fact, we define our parser as if `Expr::If` was defined with an Option in it, /// we have the `opt` combinator which fits very nicely here. -fn parse_if<'a>(i: &'a str) -> IResult<&'a str, Expr, VerboseError<&'a str>> { +fn parse_if(i: &str) -> IResult<&str, Expr, VerboseError<&str>> { let if_inner = context( "if expression", map( @@ -222,19 +219,19 @@ fn parse_if<'a>(i: &'a str) -> IResult<&'a str, Expr, VerboseError<&'a str>> { /// This example doesn't have the symbol atom, but by adding variables and changing /// the definition of quote to not always be around an S-expression, we'd get them /// naturally. -fn parse_quote<'a>(i: &'a str) -> IResult<&'a str, Expr, VerboseError<&'a str>> { +fn parse_quote(i: &str) -> IResult<&str, Expr, VerboseError<&str>> { // this should look very straight-forward after all we've done: // we find the `'` (quote) character, use cut to say that we're unambiguously // looking for an s-expression of 0 or more expressions, and then parse them map( context("quote", preceded(tag("'"), cut(s_exp(many0(parse_expr))))), - |exprs| Expr::Quote(exprs), + Expr::Quote, )(i) } /// We tie them all together again, making a top-level expression parser! -fn parse_expr<'a>(i: &'a str) -> IResult<&'a str, Expr, VerboseError<&'a str>> { +fn parse_expr(i: &str) -> IResult<&str, Expr, VerboseError<&str>> { preceded( multispace0, alt((parse_constant, parse_application, parse_if, parse_quote)), @@ -295,7 +292,7 @@ fn eval_expression(e: Expr) -> Option { let reduced_head = eval_expression(*head)?; let reduced_tail = tail .into_iter() - .map(|expr| eval_expression(expr)) + .map(eval_expression) .collect::>>()?; if let Expr::Constant(Atom::BuiltIn(bi)) = reduced_head { Some(Expr::Constant(match bi { @@ -365,7 +362,7 @@ fn eval_expression(e: Expr) -> Option { fn eval_from_str(src: &str) -> Result { parse_expr(src) .map_err(|e: nom::Err>| format!("{:#?}", e)) - .and_then(|(_, exp)| eval_expression(exp).ok_or("Eval failed".to_string())) + .and_then(|(_, exp)| eval_expression(exp).ok_or_else(|| "Eval failed".to_string())) } fn main() { diff --git a/examples/string.rs b/examples/string.rs index f89d3d126..b4015c571 100644 --- a/examples/string.rs +++ b/examples/string.rs @@ -11,9 +11,6 @@ #![cfg(feature = "alloc")] -#[global_allocator] -static ALLOC: jemallocator::Jemalloc = jemallocator::Jemalloc; - use nom::branch::alt; use nom::bytes::streaming::{is_not, take_while_m_n}; use nom::character::streaming::{char, multispace1}; @@ -38,7 +35,7 @@ where // a predicate. `parse_hex` here parses between 1 and 6 hexadecimal numerals. let parse_hex = take_while_m_n(1, 6, |c: char| c.is_ascii_hexdigit()); - // `preceeded` takes a prefix parser, and if it succeeds, returns the result + // `preceded` takes a prefix parser, and if it succeeds, returns the result // of the body parser. In this case, it parses u{XXXX}. let parse_delimited_hex = preceded( char('u'), @@ -57,7 +54,7 @@ where // the function returns None, map_opt returns an error. In this case, because // not all u32 values are valid unicode code points, we have to fallibly // convert to char with from_u32. - map_opt(parse_u32, |value| std::char::from_u32(value))(input) + map_opt(parse_u32, std::char::from_u32)(input) } /// Parse an escaped character: \n, \t, \r, \u{00AC}, etc. diff --git a/proptest-regressions/number/streaming.txt b/proptest-regressions/number/streaming.txt index 44be1dc7e..d672df2ea 100644 --- a/proptest-regressions/number/streaming.txt +++ b/proptest-regressions/number/streaming.txt @@ -5,3 +5,4 @@ # It is recommended to check this file in to source control so that # everyone who runs the test benefits from these saved cases. cc 68154e0c90b20374781d3e3932bddb80e8c6a97901d0331bbd7e6daa75b794cb # shrinks to s = "0e" +cc d31506b74ad24a80485adb176039e2fa82cf58798738288a2c810952c68d7600 # shrinks to s = "inf" diff --git a/src/bits/complete.rs b/src/bits/complete.rs index b46bc7a27..bf36dcc2a 100644 --- a/src/bits/complete.rs +++ b/src/bits/complete.rs @@ -105,6 +105,29 @@ where } } +/// Parses one specific bit as a bool. +/// +/// # Example +/// ```rust +/// # use nom::bits::complete::bool; +/// # use nom::IResult; +/// # use nom::error::{Error, ErrorKind}; +/// +/// fn parse(input: (&[u8], usize)) -> IResult<(&[u8], usize), bool> { +/// bool(input) +/// } +/// +/// assert_eq!(parse(([0b10000000].as_ref(), 0)), Ok((([0b10000000].as_ref(), 1), true))); +/// assert_eq!(parse(([0b10000000].as_ref(), 1)), Ok((([0b10000000].as_ref(), 2), false))); +/// ``` +pub fn bool>(input: (I, usize)) -> IResult<(I, usize), bool, E> +where + I: Slice> + InputIter + InputLength, +{ + let (res, bit): (_, u32) = take(1usize)(input)?; + Ok((res, bit != 0)) +} + #[cfg(test)] mod test { use super::*; @@ -147,4 +170,28 @@ mod test { Ok((([0b11111111].as_ref(), 4), 0b1000110100111111111111)) ); } + + #[test] + fn test_bool_0() { + let input = [0b10000000].as_ref(); + + let result: crate::IResult<(&[u8], usize), bool> = bool((input, 0)); + + assert_eq!(result, Ok(((input, 1), true))); + } + + #[test] + fn test_bool_eof() { + let input = [0b10000000].as_ref(); + + let result: crate::IResult<(&[u8], usize), bool> = bool((input, 8)); + + assert_eq!( + result, + Err(crate::Err::Error(crate::error::Error { + input: (input, 8), + code: ErrorKind::Eof + })) + ); + } } diff --git a/src/bits/mod.rs b/src/bits/mod.rs index 68c1b83f8..c08874a35 100644 --- a/src/bits/mod.rs +++ b/src/bits/mod.rs @@ -41,7 +41,7 @@ where E1: ParseError<(I, usize)> + ErrorConvert, E2: ParseError, I: Slice>, - P: Parser<(I, usize), O, E1> + P: Parser<(I, usize), O, E1>, { move |input: I| match parser.parse((input, 0)) { Ok(((rest, offset), result)) => { @@ -86,7 +86,7 @@ where E1: ParseError + ErrorConvert, E2: ParseError<(I, usize)>, I: Slice> + Clone, - P: FnMut(I) -> IResult, + P: Parser, { move |(input, offset): (I, usize)| { let inner = if offset % 8 != 0 { @@ -95,7 +95,7 @@ where input.slice((offset / 8)..) }; let i = (input, offset); - match parser(inner) { + match parser.parse(inner) { Ok((rest, res)) => Ok(((rest, 0), res)), Err(Err::Incomplete(Needed::Unknown)) => Err(Err::Incomplete(Needed::Unknown)), Err(Err::Incomplete(Needed::Size(sz))) => Err(match sz.get().checked_mul(8) { @@ -122,9 +122,7 @@ mod test { // Take 3 bit slices with sizes [4, 8, 4]. let result: IResult<&[u8], (u8, u8, u8)> = - bits::<_, _, Error<(&[u8], usize)>, _, _>((take(4usize), take(8usize), take(4usize)))( - input, - ); + bits::<_, _, Error<(&[u8], usize)>, _, _>((take(4usize), take(8usize), take(4usize)))(input); let output = result.expect("We take 2 bytes and the input is longer than 2 bytes"); diff --git a/src/bits/streaming.rs b/src/bits/streaming.rs index 982569ce2..a7c8d0a67 100644 --- a/src/bits/streaming.rs +++ b/src/bits/streaming.rs @@ -78,3 +78,93 @@ where }) } } + +/// Parses one specific bit as a bool. +/// +/// # Example +/// ```rust +/// # use nom::bits::complete::bool; +/// # use nom::IResult; +/// # use nom::error::{Error, ErrorKind}; +/// +/// fn parse(input: (&[u8], usize)) -> IResult<(&[u8], usize), bool> { +/// bool(input) +/// } +/// +/// assert_eq!(parse(([0b10000000].as_ref(), 0)), Ok((([0b10000000].as_ref(), 1), true))); +/// assert_eq!(parse(([0b10000000].as_ref(), 1)), Ok((([0b10000000].as_ref(), 2), false))); +/// ``` +pub fn bool>(input: (I, usize)) -> IResult<(I, usize), bool, E> +where + I: Slice> + InputIter + InputLength, +{ + let (res, bit): (_, u32) = take(1usize)(input)?; + Ok((res, bit != 0)) +} + +#[cfg(test)] +mod test { + use super::*; + + #[test] + fn test_take_0() { + let input = [].as_ref(); + let count = 0usize; + assert_eq!(count, 0usize); + let offset = 0usize; + + let result: crate::IResult<(&[u8], usize), usize> = take(count)((input, offset)); + + assert_eq!(result, Ok(((input, offset), 0))); + } + + #[test] + fn test_tag_ok() { + let input = [0b00011111].as_ref(); + let offset = 0usize; + let bits_to_take = 4usize; + let value_to_tag = 0b0001; + + let result: crate::IResult<(&[u8], usize), usize> = + tag(value_to_tag, bits_to_take)((input, offset)); + + assert_eq!(result, Ok(((input, bits_to_take), value_to_tag))); + } + + #[test] + fn test_tag_err() { + let input = [0b00011111].as_ref(); + let offset = 0usize; + let bits_to_take = 4usize; + let value_to_tag = 0b1111; + + let result: crate::IResult<(&[u8], usize), usize> = + tag(value_to_tag, bits_to_take)((input, offset)); + + assert_eq!( + result, + Err(crate::Err::Error(crate::error::Error { + input: (input, offset), + code: ErrorKind::TagBits + })) + ); + } + + #[test] + fn test_bool_0() { + let input = [0b10000000].as_ref(); + + let result: crate::IResult<(&[u8], usize), bool> = bool((input, 0)); + + assert_eq!(result, Ok(((input, 1), true))); + } + + #[test] + fn test_bool_eof() { + let input = [0b10000000].as_ref(); + + let result: crate::IResult<(&[u8], usize), bool> = bool((input, 8)); + + assert_eq!(result, Err(crate::Err::Incomplete(Needed::new(1)))); + } +} diff --git a/src/branch/mod.rs b/src/branch/mod.rs index 9df5a2941..e03622cb0 100644 --- a/src/branch/mod.rs +++ b/src/branch/mod.rs @@ -1,29 +1,5 @@ //! Choice combinators -macro_rules! succ ( - (0, $submac:ident ! ($($rest:tt)*)) => ($submac!(1, $($rest)*)); - (1, $submac:ident ! ($($rest:tt)*)) => ($submac!(2, $($rest)*)); - (2, $submac:ident ! ($($rest:tt)*)) => ($submac!(3, $($rest)*)); - (3, $submac:ident ! ($($rest:tt)*)) => ($submac!(4, $($rest)*)); - (4, $submac:ident ! ($($rest:tt)*)) => ($submac!(5, $($rest)*)); - (5, $submac:ident ! ($($rest:tt)*)) => ($submac!(6, $($rest)*)); - (6, $submac:ident ! ($($rest:tt)*)) => ($submac!(7, $($rest)*)); - (7, $submac:ident ! ($($rest:tt)*)) => ($submac!(8, $($rest)*)); - (8, $submac:ident ! ($($rest:tt)*)) => ($submac!(9, $($rest)*)); - (9, $submac:ident ! ($($rest:tt)*)) => ($submac!(10, $($rest)*)); - (10, $submac:ident ! ($($rest:tt)*)) => ($submac!(11, $($rest)*)); - (11, $submac:ident ! ($($rest:tt)*)) => ($submac!(12, $($rest)*)); - (12, $submac:ident ! ($($rest:tt)*)) => ($submac!(13, $($rest)*)); - (13, $submac:ident ! ($($rest:tt)*)) => ($submac!(14, $($rest)*)); - (14, $submac:ident ! ($($rest:tt)*)) => ($submac!(15, $($rest)*)); - (15, $submac:ident ! ($($rest:tt)*)) => ($submac!(16, $($rest)*)); - (16, $submac:ident ! ($($rest:tt)*)) => ($submac!(17, $($rest)*)); - (17, $submac:ident ! ($($rest:tt)*)) => ($submac!(18, $($rest)*)); - (18, $submac:ident ! ($($rest:tt)*)) => ($submac!(19, $($rest)*)); - (19, $submac:ident ! ($($rest:tt)*)) => ($submac!(20, $($rest)*)); - (20, $submac:ident ! ($($rest:tt)*)) => ($submac!(21, $($rest)*)); -); - #[cfg(test)] mod tests; diff --git a/src/branch/tests.rs b/src/branch/tests.rs index ecd44407e..ff0e173fe 100644 --- a/src/branch/tests.rs +++ b/src/branch/tests.rs @@ -12,7 +12,7 @@ use crate::{ }; #[cfg(feature = "alloc")] -#[derive(Debug, Clone, PartialEq)] +#[derive(Debug, Clone, Eq, PartialEq)] pub struct ErrorStr(String); #[cfg(feature = "alloc")] @@ -114,6 +114,7 @@ fn alt_incomplete() { #[test] fn permutation_test() { + #[allow(clippy::type_complexity)] fn perm(i: &[u8]) -> IResult<&[u8], (&[u8], &[u8], &[u8])> { permutation((tag("abcd"), tag("efg"), tag("hi")))(i) } diff --git a/src/bytes/complete.rs b/src/bytes/complete.rs index 9375b1fc5..25da0754e 100644 --- a/src/bytes/complete.rs +++ b/src/bytes/complete.rs @@ -327,6 +327,7 @@ where /// assert_eq!(till_colon("12345"), Ok(("", "12345"))); /// assert_eq!(till_colon(""), Ok(("", ""))); /// ``` +#[allow(clippy::redundant_closure)] pub fn take_till>( cond: F, ) -> impl Fn(Input) -> IResult @@ -358,6 +359,7 @@ where /// assert_eq!(till_colon("12345"), Ok(("", "12345"))); /// assert_eq!(till_colon(""), Err(Err::Error(Error::new("", ErrorKind::TakeTill1)))); /// ``` +#[allow(clippy::redundant_closure)] pub fn take_till1>( cond: F, ) -> impl Fn(Input) -> IResult @@ -388,6 +390,18 @@ where /// assert_eq!(take6("short"), Err(Err::Error(Error::new("short", ErrorKind::Eof)))); /// assert_eq!(take6(""), Err(Err::Error(Error::new("", ErrorKind::Eof)))); /// ``` +/// +/// The units that are taken will depend on the input type. For example, for a +/// `&str` it will take a number of `char`'s, whereas for a `&[u8]` it will +/// take that many `u8`'s: +/// +/// ```rust +/// use nom::error::Error; +/// use nom::bytes::complete::take; +/// +/// assert_eq!(take::<_, _, Error<_>>(1usize)("💙"), Ok(("", "💙"))); +/// assert_eq!(take::<_, _, Error<_>>(1usize)("💙".as_bytes()), Ok((b"\x9F\x92\x99".as_ref(), b"\xF0".as_ref()))); +/// ``` pub fn take>( count: C, ) -> impl Fn(Input) -> IResult @@ -724,7 +738,7 @@ mod tests { } // issue ##1118 escaped does not work with empty string - fn unquote<'a>(input: &'a str) -> IResult<&'a str, &'a str> { + fn unquote(input: &str) -> IResult<&str, &str> { use crate::bytes::complete::*; use crate::character::complete::*; use crate::combinator::opt; diff --git a/src/bytes/streaming.rs b/src/bytes/streaming.rs index e972760e2..a8207a967 100644 --- a/src/bytes/streaming.rs +++ b/src/bytes/streaming.rs @@ -340,6 +340,7 @@ where /// assert_eq!(till_colon("12345"), Err(Err::Incomplete(Needed::new(1)))); /// assert_eq!(till_colon(""), Err(Err::Incomplete(Needed::new(1)))); /// ``` +#[allow(clippy::redundant_closure)] pub fn take_till>( cond: F, ) -> impl Fn(Input) -> IResult @@ -372,6 +373,7 @@ where /// assert_eq!(till_colon("12345"), Err(Err::Incomplete(Needed::new(1)))); /// assert_eq!(till_colon(""), Err(Err::Incomplete(Needed::new(1)))); /// ``` +#[allow(clippy::redundant_closure)] pub fn take_till1>( cond: F, ) -> impl Fn(Input) -> IResult diff --git a/src/bytes/tests.rs b/src/bytes/tests.rs index 159c4b4ff..4af4eeff0 100644 --- a/src/bytes/tests.rs +++ b/src/bytes/tests.rs @@ -612,11 +612,11 @@ fn case_insensitive() { assert_eq!(test2("ab"), Err(Err::Incomplete(Needed::new(2)))); assert_eq!( test2("Hello"), - Err(Err::Error(error_position!(&"Hello"[..], ErrorKind::Tag))) + Err(Err::Error(error_position!("Hello", ErrorKind::Tag))) ); assert_eq!( test2("Hel"), - Err(Err::Error(error_position!(&"Hel"[..], ErrorKind::Tag))) + Err(Err::Error(error_position!("Hel", ErrorKind::Tag))) ); } diff --git a/src/character/complete.rs b/src/character/complete.rs index 5b73c1467..eccbb4e3a 100644 --- a/src/character/complete.rs +++ b/src/character/complete.rs @@ -414,6 +414,24 @@ where /// assert_eq!(parser("c1"), Err(Err::Error(Error::new("c1", ErrorKind::Digit)))); /// assert_eq!(parser(""), Err(Err::Error(Error::new("", ErrorKind::Digit)))); /// ``` +/// +/// ## Parsing an integer +/// You can use `digit1` in combination with [`map_res`] to parse an integer: +/// +/// ``` +/// # use nom::{Err, error::{Error, ErrorKind}, IResult, Needed}; +/// # use nom::combinator::map_res; +/// # use nom::character::complete::digit1; +/// fn parser(input: &str) -> IResult<&str, u32> { +/// map_res(digit1, str::parse)(input) +/// } +/// +/// assert_eq!(parser("416"), Ok(("", 416))); +/// assert_eq!(parser("12b"), Ok(("b", 12))); +/// assert!(parser("b").is_err()); +/// ``` +/// +/// [`map_res`]: crate::combinator::map_res pub fn digit1>(input: T) -> IResult where T: InputTakeAtPosition, @@ -879,8 +897,8 @@ mod tests { let e = " "; assert_eq!(alpha1::<_, (_, ErrorKind)>(a), Ok((empty, a))); assert_eq!(alpha1(b), Err(Err::Error((b, ErrorKind::Alpha)))); - assert_eq!(alpha1::<_, (_, ErrorKind)>(c), Ok((&c[1..], &"a"[..]))); - assert_eq!(alpha1::<_, (_, ErrorKind)>(d), Ok(("é12", &"az"[..]))); + assert_eq!(alpha1::<_, (_, ErrorKind)>(c), Ok((&c[1..], "a"))); + assert_eq!(alpha1::<_, (_, ErrorKind)>(d), Ok(("é12", "az"))); assert_eq!(digit1(a), Err(Err::Error((a, ErrorKind::Digit)))); assert_eq!(digit1::<_, (_, ErrorKind)>(b), Ok((empty, b))); assert_eq!(digit1(c), Err(Err::Error((c, ErrorKind::Digit)))); @@ -888,7 +906,7 @@ mod tests { assert_eq!(hex_digit1::<_, (_, ErrorKind)>(a), Ok((empty, a))); assert_eq!(hex_digit1::<_, (_, ErrorKind)>(b), Ok((empty, b))); assert_eq!(hex_digit1::<_, (_, ErrorKind)>(c), Ok((empty, c))); - assert_eq!(hex_digit1::<_, (_, ErrorKind)>(d), Ok(("zé12", &"a"[..]))); + assert_eq!(hex_digit1::<_, (_, ErrorKind)>(d), Ok(("zé12", "a"))); assert_eq!(hex_digit1(e), Err(Err::Error((e, ErrorKind::HexDigit)))); assert_eq!(oct_digit1(a), Err(Err::Error((a, ErrorKind::OctDigit)))); assert_eq!(oct_digit1::<_, (_, ErrorKind)>(b), Ok((empty, b))); @@ -976,10 +994,7 @@ mod tests { ); let d: &[u8] = b"ab12cd"; - assert_eq!( - not_line_ending::<_, (_, ErrorKind)>(d), - Ok((&[][..], &d[..])) - ); + assert_eq!(not_line_ending::<_, (_, ErrorKind)>(d), Ok((&[][..], d))); } #[test] @@ -1113,7 +1128,7 @@ mod tests { assert_parse!(crlf("\r\na"), Ok(("a", "\r\n"))); assert_parse!( crlf("\r"), - Err(Err::Error(error_position!(&"\r"[..], ErrorKind::CrLf))) + Err(Err::Error(error_position!("\r", ErrorKind::CrLf))) ); assert_parse!( crlf("\ra"), @@ -1138,7 +1153,7 @@ mod tests { assert_parse!(line_ending("\r\na"), Ok(("a", "\r\n"))); assert_parse!( line_ending("\r"), - Err(Err::Error(error_position!(&"\r"[..], ErrorKind::CrLf))) + Err(Err::Error(error_position!("\r", ErrorKind::CrLf))) ); assert_parse!( line_ending("\ra"), diff --git a/src/character/mod.rs b/src/character/mod.rs index 2c5d3bc4a..7ed98e92f 100644 --- a/src/character/mod.rs +++ b/src/character/mod.rs @@ -19,7 +19,7 @@ pub mod streaming; /// ``` #[inline] pub fn is_alphabetic(chr: u8) -> bool { - (chr >= 0x41 && chr <= 0x5A) || (chr >= 0x61 && chr <= 0x7A) + matches!(chr, 0x41..=0x5A | 0x61..=0x7A) } /// Tests if byte is ASCII digit: 0-9 @@ -33,7 +33,7 @@ pub fn is_alphabetic(chr: u8) -> bool { /// ``` #[inline] pub fn is_digit(chr: u8) -> bool { - chr >= 0x30 && chr <= 0x39 + matches!(chr, 0x30..=0x39) } /// Tests if byte is ASCII hex digit: 0-9, A-F, a-f @@ -49,7 +49,7 @@ pub fn is_digit(chr: u8) -> bool { /// ``` #[inline] pub fn is_hex_digit(chr: u8) -> bool { - (chr >= 0x30 && chr <= 0x39) || (chr >= 0x41 && chr <= 0x46) || (chr >= 0x61 && chr <= 0x66) + matches!(chr, 0x30..=0x39 | 0x41..=0x46 | 0x61..=0x66) } /// Tests if byte is ASCII octal digit: 0-7 @@ -64,7 +64,7 @@ pub fn is_hex_digit(chr: u8) -> bool { /// ``` #[inline] pub fn is_oct_digit(chr: u8) -> bool { - chr >= 0x30 && chr <= 0x37 + matches!(chr, 0x30..=0x37) } /// Tests if byte is ASCII alphanumeric: A-Z, a-z, 0-9 diff --git a/src/character/streaming.rs b/src/character/streaming.rs index 88aabba35..eaa25516d 100644 --- a/src/character/streaming.rs +++ b/src/character/streaming.rs @@ -836,8 +836,8 @@ mod tests { Err(Err::Incomplete(Needed::new(1))) ); assert_eq!(alpha1(b), Err(Err::Error((b, ErrorKind::Alpha)))); - assert_eq!(alpha1::<_, (_, ErrorKind)>(c), Ok((&c[1..], &"a"[..]))); - assert_eq!(alpha1::<_, (_, ErrorKind)>(d), Ok(("é12", &"az"[..]))); + assert_eq!(alpha1::<_, (_, ErrorKind)>(c), Ok((&c[1..], "a"))); + assert_eq!(alpha1::<_, (_, ErrorKind)>(d), Ok(("é12", "az"))); assert_eq!(digit1(a), Err(Err::Error((a, ErrorKind::Digit)))); assert_eq!( digit1::<_, (_, ErrorKind)>(b), @@ -857,7 +857,7 @@ mod tests { hex_digit1::<_, (_, ErrorKind)>(c), Err(Err::Incomplete(Needed::new(1))) ); - assert_eq!(hex_digit1::<_, (_, ErrorKind)>(d), Ok(("zé12", &"a"[..]))); + assert_eq!(hex_digit1::<_, (_, ErrorKind)>(d), Ok(("zé12", "a"))); assert_eq!(hex_digit1(e), Err(Err::Error((e, ErrorKind::HexDigit)))); assert_eq!(oct_digit1(a), Err(Err::Error((a, ErrorKind::OctDigit)))); assert_eq!( diff --git a/src/character/tests.rs b/src/character/tests.rs index 64c2a1c8a..ef3b02a0d 100644 --- a/src/character/tests.rs +++ b/src/character/tests.rs @@ -54,9 +54,9 @@ fn char_str() { char('c')(i) } - let a = &"abcd"[..]; + let a = "abcd"; assert_eq!(f(a), Err(Err::Error(error_position!(a, ErrorKind::Char)))); - let b = &"cde"[..]; - assert_eq!(f(b), Ok((&"de"[..], 'c'))); + let b = "cde"; + assert_eq!(f(b), Ok(("de", 'c'))); } diff --git a/src/combinator/mod.rs b/src/combinator/mod.rs index 2074f2fdf..e261201ce 100644 --- a/src/combinator/mod.rs +++ b/src/combinator/mod.rs @@ -226,11 +226,11 @@ where /// ``` pub fn flat_map, F, G, H>( mut parser: F, - applied_parser: G, + mut applied_parser: G, ) -> impl FnMut(I) -> IResult where F: Parser, - G: Fn(O1) -> H, + G: FnMut(O1) -> H, H: Parser, { move |input: I| { @@ -239,7 +239,9 @@ where } } -/// Optional parser: Will return `None` if not successful. +/// Optional parser, will return `None` on [`Err::Error`]. +/// +/// To chain an error up, see [`cut`]. /// /// ```rust /// # use nom::{Err,error::ErrorKind, IResult}; @@ -596,18 +598,55 @@ where } } -/// transforms an error to failure +/// Transforms an [`Err::Error`] (recoverable) to [`Err::Failure`] (unrecoverable) +/// +/// This commits the parse result, preventing alternative branch paths like with +/// [`nom::branch::alt`][crate::branch::alt]. +/// +/// # Example /// +/// Without `cut`: /// ```rust /// # use nom::{Err,error::ErrorKind, IResult}; +/// # use nom::character::complete::{one_of, digit1}; +/// # use nom::combinator::rest; +/// # use nom::branch::alt; +/// # use nom::sequence::preceded; +/// # fn main() { +/// +/// fn parser(input: &str) -> IResult<&str, &str> { +/// alt(( +/// preceded(one_of("+-"), digit1), +/// rest +/// ))(input) +/// } +/// +/// assert_eq!(parser("+10 ab"), Ok((" ab", "10"))); +/// assert_eq!(parser("ab"), Ok(("", "ab"))); +/// assert_eq!(parser("+"), Ok(("", "+"))); +/// # } +/// ``` +/// +/// With `cut`: +/// ```rust +/// # use nom::{Err,error::ErrorKind, IResult, error::Error}; +/// # use nom::character::complete::{one_of, digit1}; +/// # use nom::combinator::rest; +/// # use nom::branch::alt; +/// # use nom::sequence::preceded; /// use nom::combinator::cut; -/// use nom::character::complete::alpha1; /// # fn main() { /// -/// let mut parser = cut(alpha1); +/// fn parser(input: &str) -> IResult<&str, &str> { +/// alt(( +/// preceded(one_of("+-"), cut(digit1)), +/// rest +/// ))(input) +/// } /// -/// assert_eq!(parser("abcd;"), Ok((";", "abcd"))); -/// assert_eq!(parser("123;"), Err(Err::Failure(("123;", ErrorKind::Alpha)))); +/// assert_eq!(parser("+10 ab"), Ok((" ab", "10"))); +/// assert_eq!(parser("ab"), Ok(("", "ab"))); +/// assert_eq!(parser("+"), Err(Err::Failure(Error { input: "", code: ErrorKind::Digit }))); /// # } /// ``` pub fn cut, F>(mut parser: F) -> impl FnMut(I) -> IResult @@ -664,6 +703,8 @@ where /// Call the iterator's [ParserIterator::finish] method to get the remaining input if successful, /// or the error value if we encountered an error. /// +/// On [`Err::Error`], iteration will stop. To instead chain an error up, see [`cut`]. +/// /// ```rust /// use nom::{combinator::iterator, IResult, bytes::complete::tag, character::complete::alpha1, sequence::terminated}; /// use std::collections::HashMap; diff --git a/src/combinator/tests.rs b/src/combinator/tests.rs index 15d32b8aa..d209e2b11 100644 --- a/src/combinator/tests.rs +++ b/src/combinator/tests.rs @@ -183,7 +183,7 @@ fn test_verify_alloc() { s == &b"abc"[..] }); - assert_eq!(parser1(&b"abcd"[..]), Ok((&b"d"[..], (&b"abc").to_vec()))); + assert_eq!(parser1(&b"abcd"[..]), Ok((&b"d"[..], b"abc".to_vec()))); assert_eq!( parser1(&b"defg"[..]), Err(Err::Error((&b"defg"[..], ErrorKind::Verify))) diff --git a/src/error.rs b/src/error.rs index 498b5e135..f0bb26a7b 100644 --- a/src/error.rs +++ b/src/error.rs @@ -54,7 +54,7 @@ pub trait FromExternalError { } /// default error type, only contains the error' location and code -#[derive(Debug, PartialEq)] +#[derive(Debug, Eq, PartialEq)] pub struct Error { /// position of the error in the input data pub input: I, @@ -147,7 +147,7 @@ pub fn append_error>(input: I, kind: ErrorKind, other: E) -> /// it can be used to display user friendly error messages #[cfg(feature = "alloc")] #[cfg_attr(feature = "docsrs", doc(cfg(feature = "alloc")))] -#[derive(Clone, Debug, PartialEq)] +#[derive(Clone, Debug, Eq, PartialEq)] pub struct VerboseError { /// List of errors accumulated by `VerboseError`, containing the affected /// part of input data, and some context @@ -156,7 +156,7 @@ pub struct VerboseError { #[cfg(feature = "alloc")] #[cfg_attr(feature = "docsrs", doc(cfg(feature = "alloc")))] -#[derive(Clone, Debug, PartialEq)] +#[derive(Clone, Debug, Eq, PartialEq)] /// Error context for `VerboseError` pub enum VerboseErrorKind { /// Static string added by the `context` function diff --git a/src/internal.rs b/src/internal.rs index b793cd922..4ad258c3d 100644 --- a/src/internal.rs +++ b/src/internal.rs @@ -1,7 +1,7 @@ //! Basic types to build the parsers use self::Needed::*; -use crate::error::{self, ErrorKind, ParseError}; +use crate::error::{self, ErrorKind, FromExternalError, ParseError}; use crate::lib::std::fmt; use core::num::NonZeroUsize; @@ -108,11 +108,7 @@ pub enum Err { impl Err { /// Tests if the result is Incomplete pub fn is_incomplete(&self) -> bool { - if let Err::Incomplete(_) = self { - true - } else { - false - } + matches!(self, Err::Incomplete(..)) } /// Applies the given function to the inner error @@ -246,7 +242,7 @@ pub trait Parser { /// Maps a function over the result of a parser fn map(self, g: G) -> Map where - G: Fn(O) -> O2, + G: FnMut(O) -> O2, Self: core::marker::Sized, { Map { @@ -256,10 +252,37 @@ pub trait Parser { } } + /// Applies a function returning a `Result` over the result of a parser. + fn map_res(self, g: G) -> MapRes + where + G: Fn(O) -> Result, + E: FromExternalError, + Self: core::marker::Sized, + { + MapRes { + f: self, + g, + phantom: core::marker::PhantomData, + } + } + + /// Applies a function returning an `Option` over the result of a parser. + fn map_opt(self, g: G) -> MapOpt + where + G: Fn(O) -> Option, + Self: core::marker::Sized, + { + MapOpt { + f: self, + g, + phantom: core::marker::PhantomData, + } + } + /// Creates a second parser from the output of the first one, then apply over the rest of the input fn flat_map(self, g: G) -> FlatMap where - G: Fn(O) -> H, + G: FnMut(O) -> H, H: Parser, Self: core::marker::Sized, { @@ -317,9 +340,9 @@ pub trait Parser { } } -impl<'a, I, O, E, F> Parser for F +impl Parser for F where - F: FnMut(I) -> IResult + 'a, + F: FnMut(I) -> IResult, { fn parse(&mut self, i: I) -> IResult { self(i) @@ -363,7 +386,7 @@ impl_parser_for_tuples!(P1 O1, P2 O2, P3 O3, P4 O4, P5 O5, P6 O6, P7 O7, P8 O8, use alloc::boxed::Box; #[cfg(feature = "alloc")] -impl<'a, I, O, E> Parser for Box + 'a> { +impl Parser for Box> { fn parse(&mut self, input: I) -> IResult { (**self).parse(input) } @@ -377,7 +400,7 @@ pub struct Map { phantom: core::marker::PhantomData, } -impl<'a, I, O1, O2, E, F: Parser, G: Fn(O1) -> O2> Parser for Map { +impl, G: FnMut(O1) -> O2> Parser for Map { fn parse(&mut self, i: I) -> IResult { match self.f.parse(i) { Err(e) => Err(e), @@ -386,6 +409,54 @@ impl<'a, I, O1, O2, E, F: Parser, G: Fn(O1) -> O2> Parser fo } } +/// Implementation of `Parser::map_res` +pub struct MapRes { + f: F, + g: G, + phantom: core::marker::PhantomData, +} + +impl Parser for MapRes +where + I: Clone, + E: FromExternalError, + F: Parser, + G: Fn(O) -> Result, +{ + fn parse(&mut self, input: I) -> IResult { + let i = input.clone(); + let (input, o1) = self.f.parse(input)?; + match (self.g)(o1) { + Ok(o2) => Ok((input, o2)), + Err(e) => Err(Err::Error(E::from_external_error(i, ErrorKind::MapRes, e))), + } + } +} + +/// Implementation of `Parser::map_opt` +pub struct MapOpt { + f: F, + g: G, + phantom: core::marker::PhantomData, +} + +impl Parser for MapOpt +where + I: Clone, + E: ParseError, + F: Parser, + G: Fn(O) -> Option, +{ + fn parse(&mut self, input: I) -> IResult { + let i = input.clone(); + let (input, o1) = self.f.parse(input)?; + match (self.g)(o1) { + Some(o2) => Ok((input, o2)), + None => Err(Err::Error(E::from_error_kind(i, ErrorKind::MapOpt))), + } + } +} + /// Implementation of `Parser::flat_map` #[cfg_attr(nightly, warn(rustdoc::missing_doc_code_examples))] pub struct FlatMap { @@ -394,7 +465,7 @@ pub struct FlatMap { phantom: core::marker::PhantomData, } -impl<'a, I, O1, O2, E, F: Parser, G: Fn(O1) -> H, H: Parser> Parser +impl, G: FnMut(O1) -> H, H: Parser> Parser for FlatMap { fn parse(&mut self, i: I) -> IResult { @@ -411,7 +482,7 @@ pub struct AndThen { phantom: core::marker::PhantomData, } -impl<'a, I, O1, O2, E, F: Parser, G: Parser> Parser +impl, G: Parser> Parser for AndThen { fn parse(&mut self, i: I) -> IResult { @@ -428,9 +499,7 @@ pub struct And { g: G, } -impl<'a, I, O1, O2, E, F: Parser, G: Parser> Parser - for And -{ +impl, G: Parser> Parser for And { fn parse(&mut self, i: I) -> IResult { let (i, o1) = self.f.parse(i)?; let (i, o2) = self.g.parse(i)?; @@ -445,7 +514,7 @@ pub struct Or { g: G, } -impl<'a, I: Clone, O, E: crate::error::ParseError, F: Parser, G: Parser> +impl, F: Parser, G: Parser> Parser for Or { fn parse(&mut self, i: I) -> IResult { @@ -470,7 +539,6 @@ pub struct Into, E1, E2: From> { } impl< - 'a, I: Clone, O1, O2: From, @@ -494,16 +562,15 @@ mod tests { use super::*; use crate::error::ErrorKind; - use crate::sequence::terminated; use crate::bytes::streaming::{tag, take}; use crate::number::streaming::be_u16; - + use crate::sequence::terminated; #[doc(hidden)] #[macro_export] macro_rules! assert_size ( ($t:ty, $sz:expr) => ( - assert_eq!(crate::lib::std::mem::size_of::<$t>(), $sz); + assert_eq!($crate::lib::std::mem::size_of::<$t>(), $sz); ); ); @@ -511,7 +578,9 @@ mod tests { #[cfg(target_pointer_width = "64")] fn size_test() { assert_size!(IResult<&[u8], &[u8], (&[u8], u32)>, 40); - assert_size!(IResult<&str, &str, u32>, 40); + //FIXME: since rust 1.65, this is now 32 bytes, likely thanks to https://github.com/rust-lang/rust/pull/94075 + // deactivating that test for now because it'll have different values depending on the rust version + // assert_size!(IResult<&str, &str, u32>, 40); assert_size!(Needed, 8); assert_size!(Err, 16); assert_size!(ErrorKind, 1); diff --git a/src/lib.rs b/src/lib.rs index 85c9659fd..952a1a02b 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -51,16 +51,16 @@ //! //! The code is available on [Github](https://github.com/Geal/nom) //! -//! There are a few [guides](https://github.com/Geal/nom/tree/master/doc) with more details -//! about [how to write parsers](https://github.com/Geal/nom/blob/master/doc/making_a_new_parser_from_scratch.md), -//! or the [error management system](https://github.com/Geal/nom/blob/master/doc/error_management.md). +//! There are a few [guides](https://github.com/Geal/nom/tree/main/doc) with more details +//! about [how to write parsers](https://github.com/Geal/nom/blob/main/doc/making_a_new_parser_from_scratch.md), +//! or the [error management system](https://github.com/Geal/nom/blob/main/doc/error_management.md). //! You can also check out the [recipes] module that contains examples of common patterns. //! //! **Looking for a specific combinator? Read the -//! ["choose a combinator" guide](https://github.com/Geal/nom/blob/master/doc/choosing_a_combinator.md)** +//! ["choose a combinator" guide](https://github.com/Geal/nom/blob/main/doc/choosing_a_combinator.md)** //! //! If you are upgrading to nom 5.0, please read the -//! [migration document](https://github.com/Geal/nom/blob/master/doc/upgrading_to_nom_5.md). +//! [migration document](https://github.com/Geal/nom/blob/main/doc/upgrading_to_nom_5.md). //! //! ## Parser combinators //! @@ -135,13 +135,13 @@ //! A parser in nom is a function which, for an input type `I`, an output type `O` //! and an optional error type `E`, will have the following signature: //! -//! ```rust,ignore +//! ```rust,compile_fail //! fn parser(input: I) -> IResult; //! ``` //! //! Or like this, if you don't want to specify a custom error type (it will be `(I, ErrorKind)` by default): //! -//! ```rust,ignore +//! ```rust,compile_fail //! fn parser(input: I) -> IResult; //! ``` //! @@ -166,8 +166,8 @@ //! - An error `Err(Err::Incomplete(Needed))` indicating that more input is necessary. `Needed` can indicate how much data is needed //! - An error `Err(Err::Failure(c))`. It works like the `Error` case, except it indicates an unrecoverable error: We cannot backtrack and test another parser //! -//! Please refer to the ["choose a combinator" guide](https://github.com/Geal/nom/blob/master/doc/choosing_a_combinator.md) for an exhaustive list of parsers. -//! See also the rest of the documentation [here](https://github.com/Geal/nom/blob/master/doc). +//! Please refer to the ["choose a combinator" guide](https://github.com/Geal/nom/blob/main/doc/choosing_a_combinator.md) for an exhaustive list of parsers. +//! See also the rest of the documentation [here](https://github.com/Geal/nom/blob/main/doc). //! //! ## Making new parsers with function combinators //! @@ -368,13 +368,12 @@ //! // while the complete version knows that all of the data is there //! assert_eq!(alpha0_complete("abcd"), Ok(("", "abcd"))); //! ``` -//! **Going further:** Read the [guides](https://github.com/Geal/nom/tree/master/doc), +//! **Going further:** Read the [guides](https://github.com/Geal/nom/tree/main/doc), //! check out the [recipes]! #![cfg_attr(not(feature = "std"), no_std)] -#![cfg_attr(feature = "cargo-clippy", allow(clippy::doc_markdown))] -#![cfg_attr(nightly, feature(test))] #![cfg_attr(feature = "docsrs", feature(doc_cfg))] #![cfg_attr(feature = "docsrs", feature(extended_key_value_attributes))] +#![allow(clippy::doc_markdown)] #![deny(missing_docs)] #[cfg_attr(nightly, warn(rustdoc::missing_doc_code_examples))] #[cfg(feature = "alloc")] @@ -383,9 +382,6 @@ extern crate alloc; #[cfg(doctest)] extern crate doc_comment; -#[cfg(nightly)] -extern crate test; - #[cfg(doctest)] doc_comment::doctest!("../README.md"); @@ -441,19 +437,20 @@ pub use self::traits::*; pub use self::str::*; +#[macro_use] +mod macros; #[macro_use] pub mod error; -mod internal; -mod traits; -pub mod combinator; -#[macro_use] pub mod branch; -pub mod sequence; +pub mod combinator; +mod internal; pub mod multi; +pub mod sequence; +mod traits; -pub mod bytes; pub mod bits; +pub mod bytes; pub mod character; diff --git a/src/macros.rs b/src/macros.rs new file mode 100644 index 000000000..980d2d90e --- /dev/null +++ b/src/macros.rs @@ -0,0 +1,23 @@ +macro_rules! succ ( + (0, $submac:ident ! ($($rest:tt)*)) => ($submac!(1, $($rest)*)); + (1, $submac:ident ! ($($rest:tt)*)) => ($submac!(2, $($rest)*)); + (2, $submac:ident ! ($($rest:tt)*)) => ($submac!(3, $($rest)*)); + (3, $submac:ident ! ($($rest:tt)*)) => ($submac!(4, $($rest)*)); + (4, $submac:ident ! ($($rest:tt)*)) => ($submac!(5, $($rest)*)); + (5, $submac:ident ! ($($rest:tt)*)) => ($submac!(6, $($rest)*)); + (6, $submac:ident ! ($($rest:tt)*)) => ($submac!(7, $($rest)*)); + (7, $submac:ident ! ($($rest:tt)*)) => ($submac!(8, $($rest)*)); + (8, $submac:ident ! ($($rest:tt)*)) => ($submac!(9, $($rest)*)); + (9, $submac:ident ! ($($rest:tt)*)) => ($submac!(10, $($rest)*)); + (10, $submac:ident ! ($($rest:tt)*)) => ($submac!(11, $($rest)*)); + (11, $submac:ident ! ($($rest:tt)*)) => ($submac!(12, $($rest)*)); + (12, $submac:ident ! ($($rest:tt)*)) => ($submac!(13, $($rest)*)); + (13, $submac:ident ! ($($rest:tt)*)) => ($submac!(14, $($rest)*)); + (14, $submac:ident ! ($($rest:tt)*)) => ($submac!(15, $($rest)*)); + (15, $submac:ident ! ($($rest:tt)*)) => ($submac!(16, $($rest)*)); + (16, $submac:ident ! ($($rest:tt)*)) => ($submac!(17, $($rest)*)); + (17, $submac:ident ! ($($rest:tt)*)) => ($submac!(18, $($rest)*)); + (18, $submac:ident ! ($($rest:tt)*)) => ($submac!(19, $($rest)*)); + (19, $submac:ident ! ($($rest:tt)*)) => ($submac!(20, $($rest)*)); + (20, $submac:ident ! ($($rest:tt)*)) => ($submac!(21, $($rest)*)); +); diff --git a/src/multi/mod.rs b/src/multi/mod.rs index a1191346d..ce23ae993 100644 --- a/src/multi/mod.rs +++ b/src/multi/mod.rs @@ -11,15 +11,28 @@ use crate::lib::std::vec::Vec; use crate::traits::{InputLength, InputTake, ToUsize}; use core::num::NonZeroUsize; -/// Repeats the embedded parser until it fails -/// and returns the results in a `Vec`. +/// Don't pre-allocate more than 64KiB when calling `Vec::with_capacity`. +/// +/// Pre-allocating memory is a nice optimization but count fields can't +/// always be trusted. We should clamp initial capacities to some reasonable +/// amount. This reduces the risk of a bogus count value triggering a panic +/// due to an OOM error. +/// +/// This does not affect correctness. Nom will always read the full number +/// of elements regardless of the capacity cap. +#[cfg(feature = "alloc")] +const MAX_INITIAL_CAPACITY_BYTES: usize = 65536; + +/// Repeats the embedded parser, gathering the results in a `Vec`. +/// +/// This stops on [`Err::Error`] and returns the results that were accumulated. To instead chain an error up, see +/// [`cut`][crate::combinator::cut]. /// /// # Arguments /// * `f` The parser to apply. /// -/// *Note*: if the parser passed to `many0` accepts empty inputs -/// (like `alpha0` or `digit0`), `many0` will return an error, -/// to prevent going into an infinite loop +/// *Note*: if the parser passed in accepts empty inputs (like `alpha0` or `digit0`), `many0` will +/// return an error, to prevent going into an infinite loop /// /// ```rust /// # use nom::{Err, error::ErrorKind, Needed, IResult}; @@ -64,10 +77,10 @@ where } } -/// Runs the embedded parser until it fails and -/// returns the results in a `Vec`. Fails if -/// the embedded parser does not produce at least -/// one result. +/// Runs the embedded parser, gathering the results in a `Vec`. +/// +/// This stops on [`Err::Error`] if there is at least one result, and returns the results that were accumulated. To instead chain an error up, +/// see [`cut`][crate::combinator::cut]. /// /// # Arguments /// * `f` The parser to apply. @@ -126,9 +139,12 @@ where } } -/// Applies the parser `f` until the parser `g` produces -/// a result. Returns a pair consisting of the results of -/// `f` in a `Vec` and the result of `g`. +/// Applies the parser `f` until the parser `g` produces a result. +/// +/// Returns a tuple of the results of `f` in a `Vec` and the result of `g`. +/// +/// `f` keeps going so long as `g` produces [`Err::Error`]. To instead chain an error up, see [`cut`][crate::combinator::cut]. +/// /// ```rust /// # use nom::{Err, error::{Error, ErrorKind}, Needed, IResult}; /// use nom::multi::many_till; @@ -183,8 +199,11 @@ where } } -/// Alternates between two parsers to produce -/// a list of elements. +/// Alternates between two parsers to produce a list of elements. +/// +/// This stops when either parser returns [`Err::Error`] and returns the results that were accumulated. To instead chain an error up, see +/// [`cut`][crate::combinator::cut]. +/// /// # Arguments /// * `sep` Parses the separator between list elements. /// * `f` Parses the elements of the list. @@ -253,9 +272,13 @@ where } } -/// Alternates between two parsers to produce -/// a list of elements. Fails if the element -/// parser does not produce at least one element. +/// Alternates between two parsers to produce a list of elements until [`Err::Error`]. +/// +/// Fails if the element parser does not produce at least one element.$ +/// +/// This stops when either parser returns [`Err::Error`] and returns the results that were accumulated. To instead chain an error up, see +/// [`cut`][crate::combinator::cut]. +/// /// # Arguments /// * `sep` Parses the separator between list elements. /// * `f` Parses the elements of the list. @@ -323,13 +346,20 @@ where } } -/// Repeats the embedded parser `n` times or until it fails -/// and returns the results in a `Vec`. Fails if the -/// embedded parser does not succeed at least `m` times. +/// Repeats the embedded parser `m..=n` times +/// +/// This stops before `n` when the parser returns [`Err::Error`] and returns the results that were accumulated. To instead chain an error up, see +/// [`cut`][crate::combinator::cut]. +/// /// # Arguments /// * `m` The minimum number of iterations. /// * `n` The maximum number of iterations. /// * `f` The parser to apply. +/// +/// *Note*: If the parser passed to `many1` accepts empty inputs +/// (like `alpha0` or `digit0`), `many1` will return an error, +/// to prevent going into an infinite loop. +/// /// ```rust /// # use nom::{Err, error::ErrorKind, Needed, IResult}; /// use nom::multi::many_m_n; @@ -362,7 +392,8 @@ where return Err(Err::Failure(E::from_error_kind(input, ErrorKind::ManyMN))); } - let mut res = crate::lib::std::vec::Vec::with_capacity(min); + let max_initial_capacity = MAX_INITIAL_CAPACITY_BYTES / crate::lib::std::mem::size_of::(); + let mut res = crate::lib::std::vec::Vec::with_capacity(min.min(max_initial_capacity)); for count in 0..max { let len = input.input_len(); match parse.parse(input.clone()) { @@ -392,10 +423,17 @@ where } } -/// Repeats the embedded parser until it fails -/// and returns the number of successful iterations. +/// Repeats the embedded parser, counting the results +/// +/// This stops on [`Err::Error`]. To instead chain an error up, see +/// [`cut`][crate::combinator::cut]. +/// /// # Arguments /// * `f` The parser to apply. +/// +/// *Note*: if the parser passed in accepts empty inputs (like `alpha0` or `digit0`), `many0` will +/// return an error, to prevent going into an infinite loop +/// /// ```rust /// # use nom::{Err, error::ErrorKind, Needed, IResult}; /// use nom::multi::many0_count; @@ -442,12 +480,18 @@ where } } -/// Repeats the embedded parser until it fails -/// and returns the number of successful iterations. -/// Fails if the embedded parser does not succeed -/// at least once. +/// Runs the embedded parser, counting the results. +/// +/// This stops on [`Err::Error`] if there is at least one result. To instead chain an error up, +/// see [`cut`][crate::combinator::cut]. +/// /// # Arguments /// * `f` The parser to apply. +/// +/// *Note*: If the parser passed to `many1` accepts empty inputs +/// (like `alpha0` or `digit0`), `many1` will return an error, +/// to prevent going into an infinite loop. +/// /// ```rust /// # use nom::{Err, error::{Error, ErrorKind}, Needed, IResult}; /// use nom::multi::many1_count; @@ -499,8 +543,8 @@ where } } -/// Runs the embedded parser a specified number -/// of times. Returns the results in a `Vec`. +/// Runs the embedded parser `count` times, gathering the results in a `Vec` +/// /// # Arguments /// * `f` The parser to apply. /// * `count` How often to apply the parser. @@ -529,7 +573,8 @@ where { move |i: I| { let mut input = i.clone(); - let mut res = crate::lib::std::vec::Vec::with_capacity(count); + let max_initial_capacity = MAX_INITIAL_CAPACITY_BYTES / crate::lib::std::mem::size_of::(); + let mut res = crate::lib::std::vec::Vec::with_capacity(count.min(max_initial_capacity)); for _ in 0..count { let input_ = input.clone(); @@ -551,8 +596,10 @@ where } } -/// Runs the embedded parser repeatedly, filling the given slice with results. This parser fails if -/// the input runs out before the given slice is full. +/// Runs the embedded parser repeatedly, filling the given slice with results. +/// +/// This parser fails if the input runs out before the given slice is full. +/// /// # Arguments /// * `f` The parser to apply. /// * `buf` The slice to fill @@ -573,10 +620,10 @@ where /// assert_eq!(parser(""), Err(Err::Error(Error::new("", ErrorKind::Tag)))); /// assert_eq!(parser("abcabcabc"), Ok(("abc", ["abc", "abc"]))); /// ``` -pub fn fill<'a, I, O, E, F>(f: F, buf: &'a mut [O]) -> impl FnMut(I) -> IResult + 'a +pub fn fill<'a, I, O, E, F>(mut f: F, buf: &'a mut [O]) -> impl FnMut(I) -> IResult + 'a where I: Clone + PartialEq, - F: Fn(I) -> IResult + 'a, + F: Parser + 'a, E: ParseError, { move |i: I| { @@ -584,7 +631,7 @@ where for elem in buf.iter_mut() { let input_ = input.clone(); - match f(input_) { + match f.parse(input_) { Ok((i, o)) => { *elem = o; input = i; @@ -602,13 +649,20 @@ where } } -/// Applies a parser until it fails and accumulates -/// the results using a given function and initial value. +/// Repeats the embedded parser, calling `g` to gather the results. +/// +/// This stops on [`Err::Error`]. To instead chain an error up, see +/// [`cut`][crate::combinator::cut]. +/// /// # Arguments /// * `f` The parser to apply. /// * `init` A function returning the initial value. /// * `g` The function that combines a result of `f` with /// the current accumulator. +/// +/// *Note*: if the parser passed in accepts empty inputs (like `alpha0` or `digit0`), `many0` will +/// return an error, to prevent going into an infinite loop +/// /// ```rust /// # use nom::{Err, error::ErrorKind, Needed, IResult}; /// use nom::multi::fold_many0; @@ -670,15 +724,21 @@ where } } -/// Applies a parser until it fails and accumulates -/// the results using a given function and initial value. -/// Fails if the embedded parser does not succeed at least -/// once. +/// Repeats the embedded parser, calling `g` to gather the results. +/// +/// This stops on [`Err::Error`] if there is at least one result. To instead chain an error up, +/// see [`cut`][crate::combinator::cut]. +/// /// # Arguments /// * `f` The parser to apply. /// * `init` A function returning the initial value. /// * `g` The function that combines a result of `f` with /// the current accumulator. +/// +/// *Note*: If the parser passed to `many1` accepts empty inputs +/// (like `alpha0` or `digit0`), `many1` will return an error, +/// to prevent going into an infinite loop. +/// /// ```rust /// # use nom::{Err, error::{Error, ErrorKind}, Needed, IResult}; /// use nom::multi::fold_many1; @@ -748,10 +808,11 @@ where } } -/// Applies a parser `n` times or until it fails and accumulates -/// the results using a given function and initial value. -/// Fails if the embedded parser does not succeed at least `m` -/// times. +/// Repeats the embedded parser `m..=n` times, calling `g` to gather the results +/// +/// This stops before `n` when the parser returns [`Err::Error`]. To instead chain an error up, see +/// [`cut`][crate::combinator::cut]. +/// /// # Arguments /// * `m` The minimum number of iterations. /// * `n` The maximum number of iterations. @@ -759,6 +820,11 @@ where /// * `init` A function returning the initial value. /// * `g` The function that combines a result of `f` with /// the current accumulator. +/// +/// *Note*: If the parser passed to `many1` accepts empty inputs +/// (like `alpha0` or `digit0`), `many1` will return an error, +/// to prevent going into an infinite loop. +/// /// ```rust /// # use nom::{Err, error::ErrorKind, Needed, IResult}; /// use nom::multi::fold_many_m_n; diff --git a/src/multi/tests.rs b/src/multi/tests.rs index 090c5f5f3..39918c806 100644 --- a/src/multi/tests.rs +++ b/src/multi/tests.rs @@ -129,16 +129,6 @@ fn many0_test() { ); } -#[cfg(nightly)] -use test::Bencher; - -#[cfg(nightly)] -#[bench] -fn many0_bench(b: &mut Bencher) { - named!(multi<&[u8],Vec<&[u8]> >, many0!(tag!("abcd"))); - b.iter(|| multi(&b"abcdabcdabcdabcdabcdabcdabcdabcdabcdabcdabcdabcdabcdabcd"[..])); -} - #[test] #[cfg(feature = "alloc")] fn many1_test() { @@ -165,6 +155,7 @@ fn many1_test() { #[test] #[cfg(feature = "alloc")] fn many_till_test() { + #[allow(clippy::type_complexity)] fn multi(i: &[u8]) -> IResult<&[u8], (Vec<&[u8]>, &[u8])> { many_till(tag("abcd"), tag("efgh"))(i) } @@ -311,7 +302,7 @@ fn count_zero() { assert_eq!(counter_2(error_2), Ok((error_2_remain, parsed_err_2))); } -#[derive(Debug, Clone, PartialEq)] +#[derive(Debug, Clone, Eq, PartialEq)] pub struct NilError; impl From<(I, ErrorKind)> for NilError { diff --git a/src/number/complete.rs b/src/number/complete.rs index 35289955e..f8cbcb740 100644 --- a/src/number/complete.rs +++ b/src/number/complete.rs @@ -7,32 +7,12 @@ use crate::combinator::{cut, map, opt, recognize}; use crate::error::ParseError; use crate::error::{make_error, ErrorKind}; use crate::internal::*; -use crate::lib::std::ops::{Range, RangeFrom, RangeTo}; +use crate::lib::std::ops::{Add, Range, RangeFrom, RangeTo, Shl}; use crate::sequence::pair; use crate::traits::{ AsBytes, AsChar, Compare, InputIter, InputLength, InputTake, InputTakeAtPosition, Offset, Slice, }; -#[doc(hidden)] -macro_rules! map( - // Internal parser, do not use directly - (__impl $i:expr, $submac:ident!( $($args:tt)* ), $g:expr) => ( - $crate::combinator::map(move |i| {$submac!(i, $($args)*)}, $g).parse($i) - ); - ($i:expr, $submac:ident!( $($args:tt)* ), $g:expr) => ( - map!(__impl $i, $submac!($($args)*), $g) - ); - ($i:expr, $f:expr, $g:expr) => ( - map!(__impl $i, call!($f), $g) - ); -); - -#[doc(hidden)] -macro_rules! call ( - ($i:expr, $fun:expr) => ( $fun( $i ) ); - ($i:expr, $fun:expr, $($args:expr),* ) => ( $fun( $i, $($args),* ) ); -); - /// Recognizes an unsigned 1 byte integer. /// /// *Complete version*: Returns an error if there is not enough input data. @@ -53,14 +33,7 @@ pub fn be_u8>(input: I) -> IResult where I: Slice> + InputIter + InputLength, { - let bound: usize = 1; - if input.input_len() < bound { - Err(Err::Error(make_error(input, ErrorKind::Eof))) - } else { - let res = input.iter_elements().next().unwrap(); - - Ok((input.slice(bound..), res)) - } + be_uint(input, 1) } /// Recognizes a big endian unsigned 2 bytes integer. @@ -83,17 +56,7 @@ pub fn be_u16>(input: I) -> IResult where I: Slice> + InputIter + InputLength, { - let bound: usize = 2; - if input.input_len() < bound { - Err(Err::Error(make_error(input, ErrorKind::Eof))) - } else { - let mut res = 0u16; - for byte in input.iter_elements().take(bound) { - res = (res << 8) + byte as u16; - } - - Ok((input.slice(bound..), res)) - } + be_uint(input, 2) } /// Recognizes a big endian unsigned 3 byte integer. @@ -116,17 +79,7 @@ pub fn be_u24>(input: I) -> IResult where I: Slice> + InputIter + InputLength, { - let bound: usize = 3; - if input.input_len() < bound { - Err(Err::Error(make_error(input, ErrorKind::Eof))) - } else { - let mut res = 0u32; - for byte in input.iter_elements().take(bound) { - res = (res << 8) + byte as u32; - } - - Ok((input.slice(bound..), res)) - } + be_uint(input, 3) } /// Recognizes a big endian unsigned 4 bytes integer. @@ -149,17 +102,7 @@ pub fn be_u32>(input: I) -> IResult where I: Slice> + InputIter + InputLength, { - let bound: usize = 4; - if input.input_len() < bound { - Err(Err::Error(make_error(input, ErrorKind::Eof))) - } else { - let mut res = 0u32; - for byte in input.iter_elements().take(bound) { - res = (res << 8) + byte as u32; - } - - Ok((input.slice(bound..), res)) - } + be_uint(input, 4) } /// Recognizes a big endian unsigned 8 bytes integer. @@ -182,17 +125,7 @@ pub fn be_u64>(input: I) -> IResult where I: Slice> + InputIter + InputLength, { - let bound: usize = 8; - if input.input_len() < bound { - Err(Err::Error(make_error(input, ErrorKind::Eof))) - } else { - let mut res = 0u64; - for byte in input.iter_elements().take(bound) { - res = (res << 8) + byte as u64; - } - - Ok((input.slice(bound..), res)) - } + be_uint(input, 8) } /// Recognizes a big endian unsigned 16 bytes integer. @@ -211,18 +144,33 @@ where /// assert_eq!(parser(&b"\x01"[..]), Err(Err::Error((&[0x01][..], ErrorKind::Eof)))); /// ``` #[inline] -#[cfg(stable_i128)] pub fn be_u128>(input: I) -> IResult where I: Slice> + InputIter + InputLength, { - let bound: usize = 16; + be_uint(input, 16) +} + +#[inline] +fn be_uint>(input: I, bound: usize) -> IResult +where + I: Slice> + InputIter + InputLength, + Uint: Default + Shl + Add + From, +{ if input.input_len() < bound { Err(Err::Error(make_error(input, ErrorKind::Eof))) } else { - let mut res = 0u128; - for byte in input.iter_elements().take(bound) { - res = (res << 8) + byte as u128; + let mut res = Uint::default(); + + // special case to avoid shift a byte with overflow + if bound > 1 { + for byte in input.iter_elements().take(bound) { + res = (res << 8) + byte.into(); + } + } else { + for byte in input.iter_elements().take(bound) { + res = byte.into(); + } } Ok((input.slice(bound..), res)) @@ -249,7 +197,7 @@ pub fn be_i8>(input: I) -> IResult where I: Slice> + InputIter + InputLength, { - map!(input, be_u8, |x| x as i8) + be_u8.map(|x| x as i8).parse(input) } /// Recognizes a big endian signed 2 bytes integer. @@ -272,7 +220,7 @@ pub fn be_i16>(input: I) -> IResult where I: Slice> + InputIter + InputLength, { - map!(input, be_u16, |x| x as i16) + be_u16.map(|x| x as i16).parse(input) } /// Recognizes a big endian signed 3 bytes integer. @@ -296,16 +244,20 @@ where I: Slice> + InputIter + InputLength, { // Same as the unsigned version but we need to sign-extend manually here - map!(input, be_u24, |x| if x & 0x80_00_00 != 0 { - (x | 0xff_00_00_00) as i32 - } else { - x as i32 - }) + be_u24 + .map(|x| { + if x & 0x80_00_00 != 0 { + (x | 0xff_00_00_00) as i32 + } else { + x as i32 + } + }) + .parse(input) } /// Recognizes a big endian signed 4 bytes integer. /// -/// *Complete version*: Teturns an error if there is not enough input data. +/// *Complete version*: Returns an error if there is not enough input data. /// ```rust /// # use nom::{Err, error::ErrorKind, Needed}; /// # use nom::Needed::Size; @@ -323,7 +275,7 @@ pub fn be_i32>(input: I) -> IResult where I: Slice> + InputIter + InputLength, { - map!(input, be_u32, |x| x as i32) + be_u32.map(|x| x as i32).parse(input) } /// Recognizes a big endian signed 8 bytes integer. @@ -346,7 +298,7 @@ pub fn be_i64>(input: I) -> IResult where I: Slice> + InputIter + InputLength, { - map!(input, be_u64, |x| x as i64) + be_u64.map(|x| x as i64).parse(input) } /// Recognizes a big endian signed 16 bytes integer. @@ -365,12 +317,11 @@ where /// assert_eq!(parser(&b"\x01"[..]), Err(Err::Error((&[0x01][..], ErrorKind::Eof)))); /// ``` #[inline] -#[cfg(stable_i128)] pub fn be_i128>(input: I) -> IResult where I: Slice> + InputIter + InputLength, { - map!(input, be_u128, |x| x as i128) + be_u128.map(|x| x as i128).parse(input) } /// Recognizes an unsigned 1 byte integer. @@ -393,14 +344,7 @@ pub fn le_u8>(input: I) -> IResult where I: Slice> + InputIter + InputLength, { - let bound: usize = 1; - if input.input_len() < bound { - Err(Err::Error(make_error(input, ErrorKind::Eof))) - } else { - let res = input.iter_elements().next().unwrap(); - - Ok((input.slice(bound..), res)) - } + le_uint(input, 1) } /// Recognizes a little endian unsigned 2 bytes integer. @@ -423,17 +367,7 @@ pub fn le_u16>(input: I) -> IResult where I: Slice> + InputIter + InputLength, { - let bound: usize = 2; - if input.input_len() < bound { - Err(Err::Error(make_error(input, ErrorKind::Eof))) - } else { - let mut res = 0u16; - for (index, byte) in input.iter_indices().take(bound) { - res += (byte as u16) << (8 * index); - } - - Ok((input.slice(bound..), res)) - } + le_uint(input, 2) } /// Recognizes a little endian unsigned 3 byte integer. @@ -456,17 +390,7 @@ pub fn le_u24>(input: I) -> IResult where I: Slice> + InputIter + InputLength, { - let bound: usize = 3; - if input.input_len() < bound { - Err(Err::Error(make_error(input, ErrorKind::Eof))) - } else { - let mut res = 0u32; - for (index, byte) in input.iter_indices().take(bound) { - res += (byte as u32) << (8 * index); - } - - Ok((input.slice(bound..), res)) - } + le_uint(input, 3) } /// Recognizes a little endian unsigned 4 bytes integer. @@ -489,17 +413,7 @@ pub fn le_u32>(input: I) -> IResult where I: Slice> + InputIter + InputLength, { - let bound: usize = 4; - if input.input_len() < bound { - Err(Err::Error(make_error(input, ErrorKind::Eof))) - } else { - let mut res = 0u32; - for (index, byte) in input.iter_indices().take(bound) { - res += (byte as u32) << (8 * index); - } - - Ok((input.slice(bound..), res)) - } + le_uint(input, 4) } /// Recognizes a little endian unsigned 8 bytes integer. @@ -522,17 +436,7 @@ pub fn le_u64>(input: I) -> IResult where I: Slice> + InputIter + InputLength, { - let bound: usize = 8; - if input.input_len() < bound { - Err(Err::Error(make_error(input, ErrorKind::Eof))) - } else { - let mut res = 0u64; - for (index, byte) in input.iter_indices().take(bound) { - res += (byte as u64) << (8 * index); - } - - Ok((input.slice(bound..), res)) - } + le_uint(input, 8) } /// Recognizes a little endian unsigned 16 bytes integer. @@ -551,18 +455,25 @@ where /// assert_eq!(parser(&b"\x01"[..]), Err(Err::Error((&[0x01][..], ErrorKind::Eof)))); /// ``` #[inline] -#[cfg(stable_i128)] pub fn le_u128>(input: I) -> IResult where I: Slice> + InputIter + InputLength, { - let bound: usize = 16; + le_uint(input, 16) +} + +#[inline] +fn le_uint>(input: I, bound: usize) -> IResult +where + I: Slice> + InputIter + InputLength, + Uint: Default + Shl + Add + From, +{ if input.input_len() < bound { Err(Err::Error(make_error(input, ErrorKind::Eof))) } else { - let mut res = 0u128; + let mut res = Uint::default(); for (index, byte) in input.iter_indices().take(bound) { - res += (byte as u128) << (8 * index); + res = res + (Uint::from(byte) << (8 * index as u8)); } Ok((input.slice(bound..), res)) @@ -589,7 +500,7 @@ pub fn le_i8>(input: I) -> IResult where I: Slice> + InputIter + InputLength, { - map!(input, be_u8, |x| x as i8) + be_u8.map(|x| x as i8).parse(input) } /// Recognizes a little endian signed 2 bytes integer. @@ -612,7 +523,7 @@ pub fn le_i16>(input: I) -> IResult where I: Slice> + InputIter + InputLength, { - map!(input, le_u16, |x| x as i16) + le_u16.map(|x| x as i16).parse(input) } /// Recognizes a little endian signed 3 bytes integer. @@ -636,11 +547,15 @@ where I: Slice> + InputIter + InputLength, { // Same as the unsigned version but we need to sign-extend manually here - map!(input, le_u24, |x| if x & 0x80_00_00 != 0 { - (x | 0xff_00_00_00) as i32 - } else { - x as i32 - }) + le_u24 + .map(|x| { + if x & 0x80_00_00 != 0 { + (x | 0xff_00_00_00) as i32 + } else { + x as i32 + } + }) + .parse(input) } /// Recognizes a little endian signed 4 bytes integer. @@ -663,7 +578,7 @@ pub fn le_i32>(input: I) -> IResult where I: Slice> + InputIter + InputLength, { - map!(input, le_u32, |x| x as i32) + le_u32.map(|x| x as i32).parse(input) } /// Recognizes a little endian signed 8 bytes integer. @@ -686,7 +601,7 @@ pub fn le_i64>(input: I) -> IResult where I: Slice> + InputIter + InputLength, { - map!(input, le_u64, |x| x as i64) + le_u64.map(|x| x as i64).parse(input) } /// Recognizes a little endian signed 16 bytes integer. @@ -705,12 +620,11 @@ where /// assert_eq!(parser(&b"\x01"[..]), Err(Err::Error((&[0x01][..], ErrorKind::Eof)))); /// ``` #[inline] -#[cfg(stable_i128)] pub fn le_i128>(input: I) -> IResult where I: Slice> + InputIter + InputLength, { - map!(input, le_u128, |x| x as i128) + le_u128.map(|x| x as i128).parse(input) } /// Recognizes an unsigned 1 byte integer @@ -926,7 +840,6 @@ where /// assert_eq!(le_u128(&b"\x01"[..]), Err(Err::Error((&[0x01][..], ErrorKind::Eof)))); /// ``` #[inline] -#[cfg(stable_i128)] pub fn u128>(endian: crate::number::Endianness) -> fn(I) -> IResult where I: Slice> + InputIter + InputLength, @@ -962,7 +875,7 @@ pub fn i8>(i: I) -> IResult where I: Slice> + InputIter + InputLength, { - map!(i, u8, |x| x as i8) + u8.map(|x| x as i8).parse(i) } /// Recognizes a signed 2 byte integer @@ -1146,7 +1059,6 @@ where /// assert_eq!(le_i128(&b"\x01"[..]), Err(Err::Error((&[0x01][..], ErrorKind::Eof)))); /// ``` #[inline] -#[cfg(stable_i128)] pub fn i128>(endian: crate::number::Endianness) -> fn(I) -> IResult where I: Slice> + InputIter + InputLength, @@ -1360,16 +1272,32 @@ where /// assert_eq!(parser(&b"ggg"[..]), Err(Err::Error((&b"ggg"[..], ErrorKind::IsA)))); /// ``` #[inline] -pub fn hex_u32<'a, E: ParseError<&'a [u8]>>(input: &'a [u8]) -> IResult<&'a [u8], u32, E> { - let (i, o) = crate::bytes::complete::is_a(&b"0123456789abcdefABCDEF"[..])(input)?; +pub fn hex_u32>(input: I) -> IResult +where + I: InputTakeAtPosition, + I: Slice> + Slice>, + ::Item: AsChar, + I: AsBytes, + I: InputLength, +{ + let e: ErrorKind = ErrorKind::IsA; + let (i, o) = input.split_at_position1_complete( + |c| { + let c = c.as_char(); + !"0123456789abcdefABCDEF".contains(c) + }, + e, + )?; + // Do not parse more than 8 characters for a u32 - let (parsed, remaining) = if o.len() <= 8 { + let (parsed, remaining) = if o.input_len() <= 8 { (o, i) } else { - (&input[..8], &input[8..]) + (input.slice(..8), input.slice(8..)) }; let res = parsed + .as_bytes() .iter() .rev() .enumerate() @@ -1424,7 +1352,44 @@ where ))(input) } -/// Recognizes a floating point number in text format and returns the integer, fraction and exponent parts of the input data +// workaround until issues with minimal-lexical are fixed +#[doc(hidden)] +pub fn recognize_float_or_exceptions>(input: T) -> IResult +where + T: Slice> + Slice>, + T: Clone + Offset, + T: InputIter + InputTake + Compare<&'static str>, + ::Item: AsChar, + T: InputTakeAtPosition, + ::Item: AsChar, +{ + alt(( + |i: T| { + recognize_float::<_, E>(i.clone()).map_err(|e| match e { + crate::Err::Error(_) => crate::Err::Error(E::from_error_kind(i, ErrorKind::Float)), + crate::Err::Failure(_) => crate::Err::Failure(E::from_error_kind(i, ErrorKind::Float)), + crate::Err::Incomplete(needed) => crate::Err::Incomplete(needed), + }) + }, + |i: T| { + crate::bytes::complete::tag_no_case::<_, _, E>("nan")(i.clone()) + .map_err(|_| crate::Err::Error(E::from_error_kind(i, ErrorKind::Float))) + }, + |i: T| { + crate::bytes::complete::tag_no_case::<_, _, E>("inf")(i.clone()) + .map_err(|_| crate::Err::Error(E::from_error_kind(i, ErrorKind::Float))) + }, + |i: T| { + crate::bytes::complete::tag_no_case::<_, _, E>("infinity")(i.clone()) + .map_err(|_| crate::Err::Error(E::from_error_kind(i, ErrorKind::Float))) + }, + ))(input) +} + +/// Recognizes a floating point number in text format +/// +/// It returns a tuple of (`sign`, `integer part`, `fraction part` and `exponent`) of the input +/// data. /// /// *Complete version*: Can parse until the end of input. /// @@ -1482,6 +1447,7 @@ where } } + #[allow(clippy::or_fun_call)] let position = position.unwrap_or(i.input_len()); let index = if zero_count == 0 { @@ -1515,6 +1481,8 @@ where Ok((i, (sign, integer, fraction, exp))) } +use crate::traits::ParseTo; + /// Recognizes floating point number in text format and returns a f32. /// /// *Complete version*: Can parse until the end of input. @@ -1535,7 +1503,7 @@ where pub fn float>(input: T) -> IResult where T: Slice> + Slice> + Slice>, - T: Clone + Offset, + T: Clone + Offset + ParseTo + Compare<&'static str>, T: InputIter + InputLength + InputTake, ::Item: AsChar + Copy, ::IterElem: Clone, @@ -1544,6 +1512,7 @@ where T: AsBytes, T: for<'a> Compare<&'a [u8]>, { + /* let (i, (sign, integer, fraction, exponent)) = recognize_float_parts(input)?; let mut float: f32 = minimal_lexical::parse_float( @@ -1556,6 +1525,15 @@ where } Ok((i, float)) + */ + let (i, s) = recognize_float_or_exceptions(input)?; + match s.parse_to() { + Some(f) => Ok((i, f)), + None => Err(crate::Err::Error(E::from_error_kind( + i, + crate::error::ErrorKind::Float, + ))), + } } /// Recognizes floating point number in text format and returns a f64. @@ -1578,7 +1556,7 @@ where pub fn double>(input: T) -> IResult where T: Slice> + Slice> + Slice>, - T: Clone + Offset, + T: Clone + Offset + ParseTo + Compare<&'static str>, T: InputIter + InputLength + InputTake, ::Item: AsChar + Copy, ::IterElem: Clone, @@ -1587,6 +1565,7 @@ where T: AsBytes, T: for<'a> Compare<&'a [u8]>, { + /* let (i, (sign, integer, fraction, exponent)) = recognize_float_parts(input)?; let mut float: f64 = minimal_lexical::parse_float( @@ -1599,6 +1578,15 @@ where } Ok((i, float)) + */ + let (i, s) = recognize_float_or_exceptions(input)?; + match s.parse_to() { + Some(f) => Ok((i, f)), + None => Err(crate::Err::Error(E::from_error_kind( + i, + crate::error::ErrorKind::Float, + ))), + } } #[cfg(test)] @@ -1694,7 +1682,6 @@ mod tests { } #[test] - #[cfg(stable_i128)] fn be_i128_tests() { assert_parse!( be_i128( @@ -1811,7 +1798,6 @@ mod tests { } #[test] - #[cfg(stable_i128)] fn le_i128_tests() { assert_parse!( le_i128( @@ -1939,6 +1925,7 @@ mod tests { "12.34", "-1.234E-12", "-1.234e-12", + "0.00000000000000000087", ]; for test in test_cases.drain(..) { @@ -1947,14 +1934,13 @@ mod tests { println!("now parsing: {} -> {}", test, expected32); - let larger = format!("{}", test); - assert_parse!(recognize_float(&larger[..]), Ok(("", test))); + assert_parse!(recognize_float(test), Ok(("", test))); - assert_parse!(float(larger.as_bytes()), Ok((&b""[..], expected32))); - assert_parse!(float(&larger[..]), Ok(("", expected32))); + assert_parse!(float(test.as_bytes()), Ok((&b""[..], expected32))); + assert_parse!(float(test), Ok(("", expected32))); - assert_parse!(double(larger.as_bytes()), Ok((&b""[..], expected64))); - assert_parse!(double(&larger[..]), Ok(("", expected64))); + assert_parse!(double(test.as_bytes()), Ok((&b""[..], expected64))); + assert_parse!(double(test), Ok(("", expected64))); } let remaining_exponent = "-1.234E-"; @@ -1962,6 +1948,14 @@ mod tests { recognize_float(remaining_exponent), Err(Err::Failure(("", ErrorKind::Digit))) ); + + let (_i, nan) = float::<_, ()>("NaN").unwrap(); + assert!(nan.is_nan()); + + let (_i, inf) = float::<_, ()>("inf").unwrap(); + assert!(inf.is_infinite()); + let (_i, inf) = float::<_, ()>("infinite").unwrap(); + assert!(inf.is_infinite()); } #[test] @@ -2049,8 +2043,7 @@ mod tests { #[cfg(feature = "std")] fn parse_f64(i: &str) -> IResult<&str, f64, ()> { - use crate::traits::ParseTo; - match recognize_float(i) { + match recognize_float_or_exceptions(i) { Err(e) => Err(e), Ok((i, s)) => { if s.is_empty() { diff --git a/src/number/streaming.rs b/src/number/streaming.rs index 134bc2483..0d80e1f98 100644 --- a/src/number/streaming.rs +++ b/src/number/streaming.rs @@ -6,32 +6,12 @@ use crate::character::streaming::{char, digit1, sign}; use crate::combinator::{cut, map, opt, recognize}; use crate::error::{ErrorKind, ParseError}; use crate::internal::*; -use crate::lib::std::ops::{RangeFrom, RangeTo}; +use crate::lib::std::ops::{Add, RangeFrom, RangeTo, Shl}; use crate::sequence::pair; use crate::traits::{ AsBytes, AsChar, Compare, InputIter, InputLength, InputTake, InputTakeAtPosition, Offset, Slice, }; -#[doc(hidden)] -macro_rules! map( - // Internal parser, do not use directly - (__impl $i:expr, $submac:ident!( $($args:tt)* ), $g:expr) => ( - $crate::combinator::map(move |i| {$submac!(i, $($args)*)}, $g).parse($i) - ); - ($i:expr, $submac:ident!( $($args:tt)* ), $g:expr) => ( - map!(__impl $i, $submac!($($args)*), $g) - ); - ($i:expr, $f:expr, $g:expr) => ( - map!(__impl $i, call!($f), $g) - ); -); - -#[doc(hidden)] -macro_rules! call ( - ($i:expr, $fun:expr) => ( $fun( $i ) ); - ($i:expr, $fun:expr, $($args:expr),* ) => ( $fun( $i, $($args),* ) ); -); - /// Recognizes an unsigned 1 byte integer. /// /// *Streaming version*: Will return `Err(nom::Err::Incomplete(_))` if there is not enough data. @@ -51,14 +31,7 @@ pub fn be_u8>(input: I) -> IResult where I: Slice> + InputIter + InputLength, { - let bound: usize = 1; - if input.input_len() < bound { - Err(Err::Incomplete(Needed::new(1))) - } else { - let res = input.iter_elements().next().unwrap(); - - Ok((input.slice(bound..), res)) - } + be_uint(input, 1) } /// Recognizes a big endian unsigned 2 bytes integer. @@ -81,17 +54,7 @@ pub fn be_u16>(input: I) -> IResult where I: Slice> + InputIter + InputLength, { - let bound: usize = 2; - if input.input_len() < bound { - Err(Err::Incomplete(Needed::new(bound - input.input_len()))) - } else { - let mut res = 0u16; - for byte in input.iter_elements().take(bound) { - res = (res << 8) + byte as u16; - } - - Ok((input.slice(bound..), res)) - } + be_uint(input, 2) } /// Recognizes a big endian unsigned 3 byte integer. @@ -114,17 +77,7 @@ pub fn be_u24>(input: I) -> IResult where I: Slice> + InputIter + InputLength, { - let bound: usize = 3; - if input.input_len() < bound { - Err(Err::Incomplete(Needed::new(bound - input.input_len()))) - } else { - let mut res = 0u32; - for byte in input.iter_elements().take(bound) { - res = (res << 8) + byte as u32; - } - - Ok((input.slice(bound..), res)) - } + be_uint(input, 3) } /// Recognizes a big endian unsigned 4 bytes integer. @@ -147,17 +100,7 @@ pub fn be_u32>(input: I) -> IResult where I: Slice> + InputIter + InputLength, { - let bound: usize = 4; - if input.input_len() < bound { - Err(Err::Incomplete(Needed::new(bound - input.input_len()))) - } else { - let mut res = 0u32; - for byte in input.iter_elements().take(bound) { - res = (res << 8) + byte as u32; - } - - Ok((input.slice(bound..), res)) - } + be_uint(input, 4) } /// Recognizes a big endian unsigned 8 bytes integer. @@ -180,17 +123,7 @@ pub fn be_u64>(input: I) -> IResult where I: Slice> + InputIter + InputLength, { - let bound: usize = 8; - if input.input_len() < bound { - Err(Err::Incomplete(Needed::new(bound - input.input_len()))) - } else { - let mut res = 0u64; - for byte in input.iter_elements().take(bound) { - res = (res << 8) + byte as u64; - } - - Ok((input.slice(bound..), res)) - } + be_uint(input, 8) } /// Recognizes a big endian unsigned 16 bytes integer. @@ -208,18 +141,33 @@ where /// assert_eq!(parser(&b"\x01"[..]), Err(Err::Incomplete(Needed::new(15)))); /// ``` #[inline] -#[cfg(stable_i128)] pub fn be_u128>(input: I) -> IResult where I: Slice> + InputIter + InputLength, { - let bound: usize = 16; + be_uint(input, 16) +} + +#[inline] +fn be_uint>(input: I, bound: usize) -> IResult +where + I: Slice> + InputIter + InputLength, + Uint: Default + Shl + Add + From, +{ if input.input_len() < bound { Err(Err::Incomplete(Needed::new(bound - input.input_len()))) } else { - let mut res = 0u128; - for byte in input.iter_elements().take(bound) { - res = (res << 8) + byte as u128; + let mut res = Uint::default(); + + // special case to avoid shift a byte with overflow + if bound > 1 { + for byte in input.iter_elements().take(bound) { + res = (res << 8) + byte.into(); + } + } else { + for byte in input.iter_elements().take(bound) { + res = byte.into(); + } } Ok((input.slice(bound..), res)) @@ -243,7 +191,7 @@ pub fn be_i8>(input: I) -> IResult where I: Slice> + InputIter + InputLength, { - map!(input, be_u8, |x| x as i8) + be_u8.map(|x| x as i8).parse(input) } /// Recognizes a big endian signed 2 bytes integer. @@ -263,7 +211,7 @@ pub fn be_i16>(input: I) -> IResult where I: Slice> + InputIter + InputLength, { - map!(input, be_u16, |x| x as i16) + be_u16.map(|x| x as i16).parse(input) } /// Recognizes a big endian signed 3 bytes integer. @@ -284,11 +232,15 @@ where I: Slice> + InputIter + InputLength, { // Same as the unsigned version but we need to sign-extend manually here - map!(input, be_u24, |x| if x & 0x80_00_00 != 0 { - (x | 0xff_00_00_00) as i32 - } else { - x as i32 - }) + be_u24 + .map(|x| { + if x & 0x80_00_00 != 0 { + (x | 0xff_00_00_00) as i32 + } else { + x as i32 + } + }) + .parse(input) } /// Recognizes a big endian signed 4 bytes integer. @@ -308,7 +260,7 @@ pub fn be_i32>(input: I) -> IResult where I: Slice> + InputIter + InputLength, { - map!(input, be_u32, |x| x as i32) + be_u32.map(|x| x as i32).parse(input) } /// Recognizes a big endian signed 8 bytes integer. @@ -329,7 +281,7 @@ pub fn be_i64>(input: I) -> IResult where I: Slice> + InputIter + InputLength, { - map!(input, be_u64, |x| x as i64) + be_u64.map(|x| x as i64).parse(input) } /// Recognizes a big endian signed 16 bytes integer. @@ -345,12 +297,11 @@ where /// assert_eq!(parser(&b"\x01"[..]), Err(Err::Incomplete(Needed::new(15)))); /// ``` #[inline] -#[cfg(stable_i128)] pub fn be_i128>(input: I) -> IResult where I: Slice> + InputIter + InputLength, { - map!(input, be_u128, |x| x as i128) + be_u128.map(|x| x as i128).parse(input) } /// Recognizes an unsigned 1 byte integer. @@ -370,14 +321,7 @@ pub fn le_u8>(input: I) -> IResult where I: Slice> + InputIter + InputLength, { - let bound: usize = 1; - if input.input_len() < bound { - Err(Err::Incomplete(Needed::new(1))) - } else { - let res = input.iter_elements().next().unwrap(); - - Ok((input.slice(bound..), res)) - } + le_uint(input, 1) } /// Recognizes a little endian unsigned 2 bytes integer. @@ -400,17 +344,7 @@ pub fn le_u16>(input: I) -> IResult where I: Slice> + InputIter + InputLength, { - let bound: usize = 2; - if input.input_len() < bound { - Err(Err::Incomplete(Needed::new(bound - input.input_len()))) - } else { - let mut res = 0u16; - for (index, byte) in input.iter_indices().take(bound) { - res += (byte as u16) << (8 * index); - } - - Ok((input.slice(bound..), res)) - } + le_uint(input, 2) } /// Recognizes a little endian unsigned 3 bytes integer. @@ -433,17 +367,7 @@ pub fn le_u24>(input: I) -> IResult where I: Slice> + InputIter + InputLength, { - let bound: usize = 3; - if input.input_len() < bound { - Err(Err::Incomplete(Needed::new(bound - input.input_len()))) - } else { - let mut res = 0u32; - for (index, byte) in input.iter_indices().take(bound) { - res += (byte as u32) << (8 * index); - } - - Ok((input.slice(bound..), res)) - } + le_uint(input, 3) } /// Recognizes a little endian unsigned 4 bytes integer. @@ -466,17 +390,7 @@ pub fn le_u32>(input: I) -> IResult where I: Slice> + InputIter + InputLength, { - let bound: usize = 4; - if input.input_len() < bound { - Err(Err::Incomplete(Needed::new(bound - input.input_len()))) - } else { - let mut res = 0u32; - for (index, byte) in input.iter_indices().take(bound) { - res += (byte as u32) << (8 * index); - } - - Ok((input.slice(bound..), res)) - } + le_uint(input, 4) } /// Recognizes a little endian unsigned 8 bytes integer. @@ -499,17 +413,7 @@ pub fn le_u64>(input: I) -> IResult where I: Slice> + InputIter + InputLength, { - let bound: usize = 8; - if input.input_len() < bound { - Err(Err::Incomplete(Needed::new(bound - input.input_len()))) - } else { - let mut res = 0u64; - for (index, byte) in input.iter_indices().take(bound) { - res += (byte as u64) << (8 * index); - } - - Ok((input.slice(bound..), res)) - } + le_uint(input, 8) } /// Recognizes a little endian unsigned 16 bytes integer. @@ -528,18 +432,25 @@ where /// assert_eq!(parser(&b"\x01"[..]), Err(Err::Incomplete(Needed::new(15)))); /// ``` #[inline] -#[cfg(stable_i128)] pub fn le_u128>(input: I) -> IResult where I: Slice> + InputIter + InputLength, { - let bound: usize = 16; + le_uint(input, 16) +} + +#[inline] +fn le_uint>(input: I, bound: usize) -> IResult +where + I: Slice> + InputIter + InputLength, + Uint: Default + Shl + Add + From, +{ if input.input_len() < bound { Err(Err::Incomplete(Needed::new(bound - input.input_len()))) } else { - let mut res = 0u128; + let mut res = Uint::default(); for (index, byte) in input.iter_indices().take(bound) { - res += (byte as u128) << (8 * index); + res = res + (Uint::from(byte) << (8 * index as u8)); } Ok((input.slice(bound..), res)) @@ -563,7 +474,7 @@ pub fn le_i8>(input: I) -> IResult where I: Slice> + InputIter + InputLength, { - map!(input, le_u8, |x| x as i8) + le_u8.map(|x| x as i8).parse(input) } /// Recognizes a little endian signed 2 bytes integer. @@ -586,7 +497,7 @@ pub fn le_i16>(input: I) -> IResult where I: Slice> + InputIter + InputLength, { - map!(input, le_u16, |x| x as i16) + le_u16.map(|x| x as i16).parse(input) } /// Recognizes a little endian signed 3 bytes integer. @@ -610,11 +521,15 @@ where I: Slice> + InputIter + InputLength, { // Same as the unsigned version but we need to sign-extend manually here - map!(input, le_u24, |x| if x & 0x80_00_00 != 0 { - (x | 0xff_00_00_00) as i32 - } else { - x as i32 - }) + le_u24 + .map(|x| { + if x & 0x80_00_00 != 0 { + (x | 0xff_00_00_00) as i32 + } else { + x as i32 + } + }) + .parse(input) } /// Recognizes a little endian signed 4 bytes integer. @@ -637,7 +552,7 @@ pub fn le_i32>(input: I) -> IResult where I: Slice> + InputIter + InputLength, { - map!(input, le_u32, |x| x as i32) + le_u32.map(|x| x as i32).parse(input) } /// Recognizes a little endian signed 8 bytes integer. @@ -660,7 +575,7 @@ pub fn le_i64>(input: I) -> IResult where I: Slice> + InputIter + InputLength, { - map!(input, le_u64, |x| x as i64) + le_u64.map(|x| x as i64).parse(input) } /// Recognizes a little endian signed 16 bytes integer. @@ -679,12 +594,11 @@ where /// assert_eq!(parser(&b"\x01"[..]), Err(Err::Incomplete(Needed::new(15)))); /// ``` #[inline] -#[cfg(stable_i128)] pub fn le_i128>(input: I) -> IResult where I: Slice> + InputIter + InputLength, { - map!(input, le_u128, |x| x as i128) + le_u128.map(|x| x as i128).parse(input) } /// Recognizes an unsigned 1 byte integer @@ -900,7 +814,6 @@ where /// assert_eq!(le_u128(&b"\x01"[..]), Err(Err::Incomplete(Needed::new(15)))); /// ``` #[inline] -#[cfg(stable_i128)] pub fn u128>(endian: crate::number::Endianness) -> fn(I) -> IResult where I: Slice> + InputIter + InputLength, @@ -936,7 +849,7 @@ pub fn i8>(i: I) -> IResult where I: Slice> + InputIter + InputLength, { - map!(i, u8, |x| x as i8) + u8.map(|x| x as i8).parse(i) } /// Recognizes a signed 2 byte integer @@ -1120,7 +1033,6 @@ where /// assert_eq!(le_i128(&b"\x01"[..]), Err(Err::Incomplete(Needed::new(15)))); /// ``` #[inline] -#[cfg(stable_i128)] pub fn i128>(endian: crate::number::Endianness) -> fn(I) -> IResult where I: Slice> + InputIter + InputLength, @@ -1324,22 +1236,37 @@ where /// hex_u32(s) /// }; /// -/// assert_eq!(parser(b"01AE;"), Ok((&b";"[..], 0x01AE))); -/// assert_eq!(parser(b"abc"), Err(Err::Incomplete(Needed::new(1)))); -/// assert_eq!(parser(b"ggg"), Err(Err::Error((&b"ggg"[..], ErrorKind::IsA)))); +/// assert_eq!(parser(&b"01AE;"[..]), Ok((&b";"[..], 0x01AE))); +/// assert_eq!(parser(&b"abc"[..]), Err(Err::Incomplete(Needed::new(1)))); +/// assert_eq!(parser(&b"ggg"[..]), Err(Err::Error((&b"ggg"[..], ErrorKind::IsA)))); /// ``` #[inline] -pub fn hex_u32<'a, E: ParseError<&'a [u8]>>(input: &'a [u8]) -> IResult<&'a [u8], u32, E> { - let (i, o) = crate::bytes::streaming::is_a(&b"0123456789abcdefABCDEF"[..])(input)?; +pub fn hex_u32>(input: I) -> IResult +where + I: InputTakeAtPosition, + I: Slice> + Slice>, + ::Item: AsChar, + I: AsBytes, + I: InputLength, +{ + let e: ErrorKind = ErrorKind::IsA; + let (i, o) = input.split_at_position1( + |c| { + let c = c.as_char(); + !"0123456789abcdefABCDEF".contains(c) + }, + e, + )?; // Do not parse more than 8 characters for a u32 - let (parsed, remaining) = if o.len() <= 8 { + let (parsed, remaining) = if o.input_len() <= 8 { (o, i) } else { - (&input[..8], &input[8..]) + (input.slice(..8), input.slice(8..)) }; let res = parsed + .as_bytes() .iter() .rev() .enumerate() @@ -1393,7 +1320,44 @@ where ))(input) } -/// Recognizes a floating point number in text format and returns the integer, fraction and exponent parts of the input data +// workaround until issues with minimal-lexical are fixed +#[doc(hidden)] +pub fn recognize_float_or_exceptions>(input: T) -> IResult +where + T: Slice> + Slice>, + T: Clone + Offset, + T: InputIter + InputTake + InputLength + Compare<&'static str>, + ::Item: AsChar, + T: InputTakeAtPosition, + ::Item: AsChar, +{ + alt(( + |i: T| { + recognize_float::<_, E>(i.clone()).map_err(|e| match e { + crate::Err::Error(_) => crate::Err::Error(E::from_error_kind(i, ErrorKind::Float)), + crate::Err::Failure(_) => crate::Err::Failure(E::from_error_kind(i, ErrorKind::Float)), + crate::Err::Incomplete(needed) => crate::Err::Incomplete(needed), + }) + }, + |i: T| { + crate::bytes::streaming::tag_no_case::<_, _, E>("nan")(i.clone()) + .map_err(|_| crate::Err::Error(E::from_error_kind(i, ErrorKind::Float))) + }, + |i: T| { + crate::bytes::streaming::tag_no_case::<_, _, E>("inf")(i.clone()) + .map_err(|_| crate::Err::Error(E::from_error_kind(i, ErrorKind::Float))) + }, + |i: T| { + crate::bytes::streaming::tag_no_case::<_, _, E>("infinity")(i.clone()) + .map_err(|_| crate::Err::Error(E::from_error_kind(i, ErrorKind::Float))) + }, + ))(input) +} + +/// Recognizes a floating point number in text format +/// +/// It returns a tuple of (`sign`, `integer part`, `fraction part` and `exponent`) of the input +/// data. /// /// *Streaming version*: Will return `Err(nom::Err::Incomplete(_))` if there is not enough data. /// @@ -1401,7 +1365,7 @@ pub fn recognize_float_parts>(input: T) -> IResult> + Slice>, T: Clone + Offset, - T: InputIter + crate::traits::ParseTo, + T: InputIter, ::Item: AsChar, T: InputTakeAtPosition + InputTake + InputLength, ::Item: AsChar, @@ -1510,7 +1474,7 @@ pub fn float>(input: T) -> IResult where T: Slice> + Slice>, T: Clone + Offset, - T: InputIter + InputLength + InputTake + crate::traits::ParseTo, + T: InputIter + InputLength + InputTake + crate::traits::ParseTo + Compare<&'static str>, ::Item: AsChar, ::IterElem: Clone, T: InputTakeAtPosition, @@ -1518,6 +1482,7 @@ where T: AsBytes, T: for<'a> Compare<&'a [u8]>, { + /* let (i, (sign, integer, fraction, exponent)) = recognize_float_parts(input)?; let mut float: f32 = minimal_lexical::parse_float( @@ -1530,6 +1495,15 @@ where } Ok((i, float)) + */ + let (i, s) = recognize_float_or_exceptions(input)?; + match s.parse_to() { + Some(f) => Ok((i, f)), + None => Err(crate::Err::Error(E::from_error_kind( + i, + crate::error::ErrorKind::Float, + ))), + } } /// Recognizes floating point number in text format and returns a f64. @@ -1554,7 +1528,7 @@ pub fn double>(input: T) -> IResult where T: Slice> + Slice>, T: Clone + Offset, - T: InputIter + InputLength + InputTake + crate::traits::ParseTo, + T: InputIter + InputLength + InputTake + crate::traits::ParseTo + Compare<&'static str>, ::Item: AsChar, ::IterElem: Clone, T: InputTakeAtPosition, @@ -1562,6 +1536,7 @@ where T: AsBytes, T: for<'a> Compare<&'a [u8]>, { + /* let (i, (sign, integer, fraction, exponent)) = recognize_float_parts(input)?; let mut float: f64 = minimal_lexical::parse_float( @@ -1574,6 +1549,15 @@ where } Ok((i, float)) + */ + let (i, s) = recognize_float_or_exceptions(input)?; + match s.parse_to() { + Some(f) => Ok((i, f)), + None => Err(crate::Err::Error(E::from_error_kind( + i, + crate::error::ErrorKind::Float, + ))), + } } #[cfg(test)] @@ -1712,7 +1696,6 @@ mod tests { } #[test] - #[cfg(stable_i128)] fn i128_tests() { assert_parse!( be_i128( @@ -1892,7 +1875,6 @@ mod tests { } #[test] - #[cfg(stable_i128)] fn le_i128_tests() { assert_parse!( le_i128( @@ -2020,6 +2002,7 @@ mod tests { "12.34", "-1.234E-12", "-1.234e-12", + "0.00000000000000000087", ]; for test in test_cases.drain(..) { @@ -2043,6 +2026,14 @@ mod tests { recognize_float(remaining_exponent), Err(Err::Incomplete(Needed::new(1))) ); + + let (_i, nan) = float::<_, ()>("NaN").unwrap(); + assert!(nan.is_nan()); + + let (_i, inf) = float::<_, ()>("inf").unwrap(); + assert!(inf.is_infinite()); + let (_i, inf) = float::<_, ()>("infinite").unwrap(); + assert!(inf.is_infinite()); } #[test] @@ -2131,7 +2122,7 @@ mod tests { #[cfg(feature = "std")] fn parse_f64(i: &str) -> IResult<&str, f64, ()> { use crate::traits::ParseTo; - match recognize_float(i) { + match recognize_float_or_exceptions(i) { Err(e) => Err(e), Ok((i, s)) => { if s.is_empty() { diff --git a/src/sequence/mod.rs b/src/sequence/mod.rs index 0708fd4ec..b55e33f05 100644 --- a/src/sequence/mod.rs +++ b/src/sequence/mod.rs @@ -12,6 +12,7 @@ use crate::internal::{IResult, Parser}; /// # Arguments /// * `first` The first parser to apply. /// * `second` The second parser to apply. +/// /// ```rust /// # use nom::{Err, error::ErrorKind, Needed}; /// # use nom::Needed::Size; @@ -45,6 +46,7 @@ where /// # Arguments /// * `first` The opening parser. /// * `second` The second parser to get object. +/// /// ```rust /// # use nom::{Err, error::ErrorKind, Needed}; /// # use nom::Needed::Size; @@ -78,6 +80,7 @@ where /// # Arguments /// * `first` The first parser to apply. /// * `second` The second parser to match an object. +/// /// ```rust /// # use nom::{Err, error::ErrorKind, Needed}; /// # use nom::Needed::Size; @@ -113,6 +116,7 @@ where /// * `first` The first parser to apply. /// * `sep` The separator parser to apply. /// * `second` The second parser to apply. +/// /// ```rust /// # use nom::{Err, error::ErrorKind, Needed}; /// # use nom::Needed::Size; @@ -151,6 +155,7 @@ where /// * `first` The first parser to apply and discard. /// * `second` The second parser to apply. /// * `third` The third parser to apply and discard. +/// /// ```rust /// # use nom::{Err, error::ErrorKind, Needed}; /// # use nom::Needed::Size; @@ -181,4 +186,95 @@ where } } +/// Helper trait for the tuple combinator. +/// +/// This trait is implemented for tuples of parsers of up to 21 elements. +pub trait Tuple { + /// Parses the input and returns a tuple of results of each parser. + fn parse(&mut self, input: I) -> IResult; +} + +impl, F: Parser> + Tuple for (F,) +{ + fn parse(&mut self, input: Input) -> IResult { + self.0.parse(input).map(|(i, o)| (i, (o,))) + } +} + +macro_rules! tuple_trait( + ($name1:ident $ty1:ident, $name2: ident $ty2:ident, $($name:ident $ty:ident),*) => ( + tuple_trait!(__impl $name1 $ty1, $name2 $ty2; $($name $ty),*); + ); + (__impl $($name:ident $ty: ident),+; $name1:ident $ty1:ident, $($name2:ident $ty2:ident),*) => ( + tuple_trait_impl!($($name $ty),+); + tuple_trait!(__impl $($name $ty),+ , $name1 $ty1; $($name2 $ty2),*); + ); + (__impl $($name:ident $ty: ident),+; $name1:ident $ty1:ident) => ( + tuple_trait_impl!($($name $ty),+); + tuple_trait_impl!($($name $ty),+, $name1 $ty1); + ); +); + +macro_rules! tuple_trait_impl( + ($($name:ident $ty: ident),+) => ( + impl< + Input: Clone, $($ty),+ , Error: ParseError, + $($name: Parser),+ + > Tuple for ( $($name),+ ) { + + fn parse(&mut self, input: Input) -> IResult { + tuple_trait_inner!(0, self, input, (), $($name)+) + } + } + ); +); + +macro_rules! tuple_trait_inner( + ($it:tt, $self:expr, $input:expr, (), $head:ident $($id:ident)+) => ({ + let (i, o) = $self.$it.parse($input.clone())?; + + succ!($it, tuple_trait_inner!($self, i, ( o ), $($id)+)) + }); + ($it:tt, $self:expr, $input:expr, ($($parsed:tt)*), $head:ident $($id:ident)+) => ({ + let (i, o) = $self.$it.parse($input.clone())?; + + succ!($it, tuple_trait_inner!($self, i, ($($parsed)* , o), $($id)+)) + }); + ($it:tt, $self:expr, $input:expr, ($($parsed:tt)*), $head:ident) => ({ + let (i, o) = $self.$it.parse($input.clone())?; + + Ok((i, ($($parsed)* , o))) + }); +); + +tuple_trait!(FnA A, FnB B, FnC C, FnD D, FnE E, FnF F, FnG G, FnH H, FnI I, FnJ J, FnK K, FnL L, + FnM M, FnN N, FnO O, FnP P, FnQ Q, FnR R, FnS S, FnT T, FnU U); + +// Special case: implement `Tuple` for `()`, the unit type. +// This can come up in macros which accept a variable number of arguments. +// Literally, `()` is an empty tuple, so it should simply parse nothing. +impl> Tuple for () { + fn parse(&mut self, input: I) -> IResult { + Ok((input, ())) + } +} + +///Applies a tuple of parsers one by one and returns their results as a tuple. +///There is a maximum of 21 parsers +/// ```rust +/// # use nom::{Err, error::ErrorKind}; +/// use nom::sequence::tuple; +/// use nom::character::complete::{alpha1, digit1}; +/// let mut parser = tuple((alpha1, digit1, alpha1)); +/// +/// assert_eq!(parser("abc123def"), Ok(("", ("abc", "123", "def")))); +/// assert_eq!(parser("123def"), Err(Err::Error(("123def", ErrorKind::Alpha)))); +/// ``` +#[deprecated(since = "8.0.0", note = "`Parser` is directly implemented for tuples")] +pub fn tuple, List: Tuple>( + mut l: List, +) -> impl FnMut(I) -> IResult { + move |i: I| l.parse(i) +} diff --git a/src/sequence/tests.rs b/src/sequence/tests.rs index 576726c81..0d226de5f 100644 --- a/src/sequence/tests.rs +++ b/src/sequence/tests.rs @@ -1,6 +1,6 @@ use super::*; use crate::bytes::streaming::{tag, take}; -use crate::error::ErrorKind; +use crate::error::{Error, ErrorKind}; use crate::internal::{Err, IResult, Needed}; use crate::number::streaming::be_u16; use crate::combinator::parse; @@ -258,6 +258,7 @@ fn delimited_test() { #[test] fn tuple_test() { + #[allow(clippy::type_complexity)] fn tuple_3(i: &[u8]) -> IResult<&[u8], (u16, &[u8], &[u8])> { parse((be_u16, take(3u8), tag("fg")))(i) } @@ -273,3 +274,19 @@ fn tuple_test() { Err(Err::Error(error_position!(&b"jk"[..], ErrorKind::Tag))) ); } + +#[test] +fn unit_type() { + assert_eq!( + tuple::<&'static str, (), Error<&'static str>, ()>(())("abxsbsh"), + Ok(("abxsbsh", ())) + ); + assert_eq!( + tuple::<&'static str, (), Error<&'static str>, ()>(())("sdfjakdsas"), + Ok(("sdfjakdsas", ())) + ); + assert_eq!( + tuple::<&'static str, (), Error<&'static str>, ()>(())(""), + Ok(("", ())) + ); +} diff --git a/src/str.rs b/src/str.rs index e8d38c6c2..575a7eab3 100644 --- a/src/str.rs +++ b/src/str.rs @@ -88,7 +88,7 @@ mod test { ); assert!( output == CONSUMED, - "Parser `take_s` doens't return the string it consumed on success. Expected `{}`, got `{}`.", + "Parser `take_s` doesn't return the string it consumed on success. Expected `{}`, got `{}`.", CONSUMED, output ); @@ -120,7 +120,7 @@ mod test { assert!( output == CONSUMED, "Parser `take_until`\ - doens't return the string it consumed on success. Expected `{}`, got `{}`.", + doesn't return the string it consumed on success. Expected `{}`, got `{}`.", CONSUMED, output ); @@ -168,10 +168,10 @@ mod test { let c = "abcd123"; let d = "123"; - assert_eq!(f(&a[..]), Err(Err::Incomplete(Needed::new(1)))); - assert_eq!(f(&b[..]), Err(Err::Incomplete(Needed::new(1)))); - assert_eq!(f(&c[..]), Ok((&d[..], &b[..]))); - assert_eq!(f(&d[..]), Ok((&d[..], &a[..]))); + assert_eq!(f(a), Err(Err::Incomplete(Needed::new(1)))); + assert_eq!(f(b), Err(Err::Incomplete(Needed::new(1)))); + assert_eq!(f(c), Ok((d, b))); + assert_eq!(f(d), Ok((d, a))); } #[test] @@ -186,12 +186,12 @@ mod test { let c = "abcd123"; let d = "123"; - assert_eq!(f(&a[..]), Err(Err::Incomplete(Needed::new(1)))); - assert_eq!(f(&b[..]), Err(Err::Incomplete(Needed::new(1)))); - assert_eq!(f(&c[..]), Ok((&"123"[..], &b[..]))); + assert_eq!(f(a), Err(Err::Incomplete(Needed::new(1)))); + assert_eq!(f(b), Err(Err::Incomplete(Needed::new(1)))); + assert_eq!(f(c), Ok(("123", b))); assert_eq!( - f(&d[..]), - Err(Err::Error(error_position!(&d[..], ErrorKind::TakeWhile1))) + f(d), + Err(Err::Error(error_position!(d, ErrorKind::TakeWhile1))) ); } @@ -281,7 +281,7 @@ mod test { ); assert!( output == CONSUMED, - "Parser `is_not` doens't return the string it consumed on success. Expected `{}`, got `{}`.", + "Parser `is_not` doesn't return the string it consumed on success. Expected `{}`, got `{}`.", CONSUMED, output ); @@ -302,15 +302,7 @@ mod test { const CONSUMED: &str = "βèƒôřèÂßÇ"; const LEFTOVER: &str = "áƒƭèř"; fn while_s(c: char) -> bool { - c == 'β' - || c == 'è' - || c == 'ƒ' - || c == 'ô' - || c == 'ř' - || c == 'è' - || c == 'Â' - || c == 'ß' - || c == 'Ç' + matches!(c, 'β' | 'è' | 'ƒ' | 'ô' | 'ř' | 'Â' | 'ß' | 'Ç') } fn test(input: &str) -> IResult<&str, &str> { take_while(while_s)(input) @@ -361,15 +353,7 @@ mod test { const CONSUMED: &str = "βèƒôřèÂßÇ"; const LEFTOVER: &str = "áƒƭèř"; fn while1_s(c: char) -> bool { - c == 'β' - || c == 'è' - || c == 'ƒ' - || c == 'ô' - || c == 'ř' - || c == 'è' - || c == 'Â' - || c == 'ß' - || c == 'Ç' + matches!(c, 'β' | 'è' | 'ƒ' | 'ô' | 'ř' | 'Â' | 'ß' | 'Ç') } fn test(input: &str) -> IResult<&str, &str> { take_while1(while1_s)(input) @@ -432,7 +416,7 @@ mod test { ); assert!( output == CONSUMED, - "Parser `is_a` doens't return the string it consumed on success. Expected `{}`, got `{}`.", + "Parser `is_a` doesn't return the string it consumed on success. Expected `{}`, got `{}`.", CONSUMED, output ); @@ -510,8 +494,8 @@ mod test { recognize(many1(alt((tag("a"), tag("b")))))(i) } - assert_eq!(f(&a[..]), Ok((&a[6..], &a[..]))); - assert_eq!(f(&b[..]), Ok((&b[4..], &b[..4]))); + assert_eq!(f(a), Ok((&a[6..], a))); + assert_eq!(f(b), Ok((&b[4..], &b[..4]))); } #[test] diff --git a/src/traits.rs b/src/traits.rs index a073fd3da..43d175c42 100644 --- a/src/traits.rs +++ b/src/traits.rs @@ -109,7 +109,7 @@ impl AsBytes for str { impl<'a> AsBytes for &'a [u8] { #[inline(always)] fn as_bytes(&self) -> &[u8] { - *self + self } } @@ -148,7 +148,8 @@ as_bytes_array_impls! { } /// Transforms common types to a char for basic token parsing -pub trait AsChar { +#[allow(clippy::len_without_is_empty)] +pub trait AsChar: Copy { /// makes a char from self fn as_char(self) -> char; @@ -178,7 +179,7 @@ impl AsChar for u8 { } #[inline] fn is_alpha(self) -> bool { - (self >= 0x41 && self <= 0x5A) || (self >= 0x61 && self <= 0x7A) + matches!(self, 0x41..=0x5A | 0x61..=0x7A) } #[inline] fn is_alphanum(self) -> bool { @@ -186,17 +187,15 @@ impl AsChar for u8 { } #[inline] fn is_dec_digit(self) -> bool { - self >= 0x30 && self <= 0x39 + matches!(self, 0x30..=0x39) } #[inline] fn is_hex_digit(self) -> bool { - (self >= 0x30 && self <= 0x39) - || (self >= 0x41 && self <= 0x46) - || (self >= 0x61 && self <= 0x66) + matches!(self, 0x30..=0x39 | 0x41..=0x46 | 0x61..=0x66) } #[inline] fn is_oct_digit(self) -> bool { - self >= 0x30 && self <= 0x37 + matches!(self, 0x30..=0x37) } #[inline] fn len(self) -> usize { @@ -210,7 +209,7 @@ impl<'a> AsChar for &'a u8 { } #[inline] fn is_alpha(self) -> bool { - (*self >= 0x41 && *self <= 0x5A) || (*self >= 0x61 && *self <= 0x7A) + matches!(*self, 0x41..=0x5A | 0x61..=0x7A) } #[inline] fn is_alphanum(self) -> bool { @@ -218,17 +217,15 @@ impl<'a> AsChar for &'a u8 { } #[inline] fn is_dec_digit(self) -> bool { - *self >= 0x30 && *self <= 0x39 + matches!(*self, 0x30..=0x39) } #[inline] fn is_hex_digit(self) -> bool { - (*self >= 0x30 && *self <= 0x39) - || (*self >= 0x41 && *self <= 0x46) - || (*self >= 0x61 && *self <= 0x66) + matches!(*self, 0x30..=0x39 | 0x41..=0x46 | 0x61..=0x66) } #[inline] fn is_oct_digit(self) -> bool { - *self >= 0x30 && *self <= 0x37 + matches!(*self, 0x30..=0x37) } #[inline] fn len(self) -> usize { @@ -701,9 +698,9 @@ impl<'a> InputTakeAtPosition for &'a str { } } -/// Indicates wether a comparison was successful, an error, or +/// Indicates whether a comparison was successful, an error, or /// if more data was needed -#[derive(Debug, PartialEq)] +#[derive(Debug, Eq, PartialEq)] pub enum CompareResult { /// Comparison was successful Ok, @@ -914,6 +911,18 @@ impl<'a> FindToken for &'a str { } } +impl<'a> FindToken for &'a [char] { + fn find_token(&self, token: char) -> bool { + self.iter().any(|i| *i == token) + } +} + +impl<'a, 'b> FindToken<&'a char> for &'b [char] { + fn find_token(&self, token: &char) -> bool { + self.find_token(*token) + } +} + /// Look for a substring in self pub trait FindSubstring { /// Returns the byte position of the substring if it is found @@ -1318,6 +1327,10 @@ impl ErrorConvert> for error::VerboseError } } +impl ErrorConvert<()> for () { + fn convert(self) {} +} + #[cfg(feature = "std")] #[cfg_attr(feature = "docsrs", doc(cfg(feature = "std")))] /// Helper trait to show a byte slice as a hex dump @@ -1369,7 +1382,7 @@ impl HexDisplay for [u8] { v.push(b'\t'); for &byte in chunk { - if (byte >= 32 && byte <= 126) || byte >= 128 { + if matches!(byte, 32..=126 | 128..=255) { v.push(byte); } else { v.push(b'.'); @@ -1413,8 +1426,7 @@ mod tests { #[test] fn test_offset_str() { - let s = "abcřèÂßÇd123"; - let a = &s[..]; + let a = "abcřèÂßÇd123"; let b = &a[7..]; let c = &a[..5]; let d = &a[5..9]; diff --git a/tests/blockbuf-arithmetic.rs b/tests/blockbuf-arithmetic.rs deleted file mode 100644 index 27a295fc7..000000000 --- a/tests/blockbuf-arithmetic.rs +++ /dev/null @@ -1,321 +0,0 @@ -/* -#[macro_use] -extern crate nom; -extern crate bytes; - -use nom::{Compare,CompareResult,InputLength,InputIter,Slice,HexDisplay}; - -use std::str; -use std::str::FromStr; -use bytes::{Buf,MutBuf}; -use bytes::buf::{BlockBuf,BlockBufCursor}; -use std::ops::{Range,RangeTo,RangeFrom,RangeFull}; -use std::iter::{Enumerate,Iterator}; -use std::fmt; -use std::cmp::{min,PartialEq}; - -#[derive(Clone,Copy)] -#[repr(C)] -pub struct BlockSlice<'a> { - buf: &'a BlockBuf, - start: usize, - end: usize, -} - -impl<'a> BlockSlice<'a> { - fn cursor(&self) -> WrapCursor<'a> { - let mut cur = self.buf.buf(); - cur.advance(self.start); - WrapCursor { - cursor: cur, - length: self.end - self.start, - } - } -} - -impl<'a> fmt::Debug for BlockSlice<'a> { - fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { - write!(f, "BlockSlice {{ start: {}, end: {}, data:\n{}\n}}", self.start, self.end, self.buf.bytes().unwrap_or(&b""[..]).to_hex(16)) - } -} - -impl<'a> PartialEq for BlockSlice<'a> { - fn eq(&self, other: &BlockSlice<'a>) -> bool { - let bufs = (self.buf as *const BlockBuf) == (other.buf as *const BlockBuf); - self.start == other.start && self.end == other.end && bufs - } -} - -impl<'a> Slice> for BlockSlice<'a> { - fn slice(&self, range:Range) -> Self { - BlockSlice { - buf: self.buf, - start: self.start + range.start, - //FIXME: check for valid end here - end: self.start + range.end, - } - } -} - -impl<'a> Slice> for BlockSlice<'a> { - fn slice(&self, range:RangeTo) -> Self { - self.slice(0..range.end) - } -} - -impl<'a> Slice> for BlockSlice<'a> { - fn slice(&self, range:RangeFrom) -> Self { - self.slice(range.start..self.end - self.start) - } -} - -impl<'a> Slice for BlockSlice<'a> { - fn slice(&self, _:RangeFull) -> Self { - BlockSlice { - buf: self.buf, - start: self.start, - end: self.end, - } - } -} - - -impl<'a> InputIter for BlockSlice<'a> { - type Item = u8; - type RawItem = u8; - type Iter = Enumerate>; - type IterElem = WrapCursor<'a>; - - fn iter_indices(&self) -> Self::Iter { - self.cursor().enumerate() - } - fn iter_elements(&self) -> Self::IterElem { - self.cursor() - } - fn position

(&self, predicate: P) -> Option where P: Fn(Self::RawItem) -> bool { - self.cursor().position(|b| predicate(b)) - } - fn slice_index(&self, count:usize) -> Option { - if self.end - self.start >= count { - Some(count) - } else { - None - } - } -} - - -impl<'a> InputLength for BlockSlice<'a> { - fn input_len(&self) -> usize { - self.end - self.start - } -} - -impl<'a,'b> Compare<&'b[u8]> for BlockSlice<'a> { - fn compare(&self, t: &'b[u8]) -> CompareResult { - let len = self.end - self.start; - let blen = t.len(); - let m = if len < blen { len } else { blen }; - let reduced = self.slice(..m); - let b = &t[..m]; - - for (a,b) in reduced.cursor().zip(b.iter()) { - if a != *b { - return CompareResult::Error; - } - } - if m < blen { - CompareResult::Incomplete - } else { - CompareResult::Ok - } - } - - - #[inline(always)] - fn compare_no_case(&self, t: &'b[u8]) -> CompareResult { - let len = self.end - self.start; - let blen = t.len(); - let m = if len < blen { len } else { blen }; - let reduced = self.slice(..m); - let other = &t[..m]; - - if !reduced.cursor().zip(other).all(|(a, b)| { - match (a,*b) { - (0...64, 0...64) | (91...96, 91...96) | (123...255, 123...255) => a == *b, - (65...90, 65...90) | (97...122, 97...122) | (65...90, 97...122 ) |(97...122, 65...90) => { - a & 0b01000000 == *b & 0b01000000 - } - _ => false - } - }) { - CompareResult::Error - } else if m < blen { - CompareResult::Incomplete - } else { - CompareResult::Ok - } - } -} - -impl<'a,'b> Compare<&'b str> for BlockSlice<'a> { - fn compare(&self, t: &'b str) -> CompareResult { - self.compare(str::as_bytes(t)) - } - fn compare_no_case(&self, t: &'b str) -> CompareResult { - self.compare_no_case(str::as_bytes(t)) - } -} - -//Wrapper to implement Iterator on BlockBufCursor -pub struct WrapCursor<'a> { - pub cursor: BlockBufCursor<'a>, - pub length: usize, -} - -impl<'a> Iterator for WrapCursor<'a> { - type Item = u8; - fn next(&mut self) -> Option { - //println!("NEXT: length={}, remaining={}", self.length, self.cursor.remaining()); - if min(self.length, self.cursor.remaining()) > 0 { - self.length -=1; - Some(self.cursor.read_u8()) - } else { - None - } - } -} - -//Reimplement eat_separator instead of fixing iterators -#[macro_export] -macro_rules! block_eat_separator ( - ($i:expr, $arr:expr) => ( - { - use nom::{InputLength,InputIter,Slice}; - if ($i).input_len() == 0 { - Ok(($i, ($i).slice(0..0))) - } else { - match ($i).iter_indices().position(|(_, item)| { - for (_,c) in ($arr).iter_indices() { - if *c == item { return false; } - } - true - }) { - Some(index) => { - Ok((($i).slice(index..), ($i).slice(..index))) - }, - None => { - Ok((($i).slice(($i).input_len()..), $i)) - } - } - } - } - ) -); - -#[macro_export] -macro_rules! block_named ( - ($name:ident, $submac:ident!( $($args:tt)* )) => ( - fn $name<'a>( i: BlockSlice<'a> ) -> nom::IResult, BlockSlice<'a>, u32> { - $submac!(i, $($args)*) - } - ); - ($name:ident<$o:ty>, $submac:ident!( $($args:tt)* )) => ( - fn $name<'a>( i: BlockSlice<'a> ) -> nom::IResult, $o, u32> { - $submac!(i, $($args)*) - } - ); -); - -block_named!(sp, block_eat_separator!(&b" \t\r\n"[..])); - -macro_rules! block_ws ( - ($i:expr, $($args:tt)*) => ( - { - sep!($i, sp, $($args)*) - } - ) -); - -block_named!(digit, is_a!("0123456789")); - -block_named!(parens, block_ws!(delimited!( tag!("("), expr, tag!(")") )) ); - - -block_named!(factor, alt!( - map_res!( - block_ws!(digit), - to_i64 - ) - | parens - ) -); - -block_named!(term , do_parse!( - init: factor >> - res: fold_many0!( - pair!(alt!(tag!("*") | tag!("/")), factor), - init, - |acc, (op, val): (BlockSlice, i64)| { - if (op.cursor().next().unwrap() as char) == '*' { acc * val } else { acc / val } - } - ) >> - (res) - ) -); - -block_named!(expr , do_parse!( - init: term >> - res: fold_many0!( - pair!(alt!(tag!("+") | tag!("-")), term), - init, - |acc, (op, val): (BlockSlice, i64)| { - if (op.cursor().next().unwrap() as char) == '+' { acc + val } else { acc - val } - } - ) >> - (res) - ) -); - - -fn blockbuf_from(input: &[u8]) -> BlockBuf { - let mut b = BlockBuf::new(2, 100); - b.copy_from(input); - b -} - - -fn sl<'a>(input: &'a BlockBuf) -> BlockSlice<'a> { - BlockSlice { - buf: input, - start: 0, - end: input.len(), - } -} - -fn to_i64<'a>(input: BlockSlice<'a>) -> Result { - let v: Vec = input.cursor().collect(); - - match str::from_utf8(&v) { - Err(_) => Err(()), - Ok(s) => match FromStr::from_str(s) { - Err(_) => Err(()), - Ok(i) => Ok(i) - } - } -} - -#[test] -fn factor_test() { - let a = blockbuf_from(&b"3"[..]); - println!("calculated: {:?}", factor(sl(&a))); -} - -#[test] -fn parens_test() { - let input1 = blockbuf_from(&b" 2* ( 3 + 4 ) "[..]); - println!("calculated 1: {:?}", expr(sl(&input1))); - let input2 = blockbuf_from(&b" 2*2 / ( 5 - 1) + 3"[..]); - println!("calculated 2: {:?}", expr(sl(&input2))); -} -*/ diff --git a/tests/css.rs b/tests/css.rs index 4f34255c7..60729be2e 100644 --- a/tests/css.rs +++ b/tests/css.rs @@ -2,7 +2,7 @@ use nom::bytes::complete::{tag, take_while_m_n}; use nom::combinator::{map_res, parse}; use nom::IResult; -#[derive(Debug, PartialEq)] +#[derive(Debug, Eq, PartialEq)] pub struct Color { pub red: u8, pub green: u8, @@ -14,7 +14,7 @@ fn from_hex(input: &str) -> Result { } fn is_hex_digit(c: char) -> bool { - c.is_digit(16) + c.is_ascii_hexdigit() } fn hex_primary(input: &str) -> IResult<&str, u8> { diff --git a/tests/ini_str.rs b/tests/ini_str.rs index 543040134..17df34339 100644 --- a/tests/ini_str.rs +++ b/tests/ini_str.rs @@ -54,6 +54,7 @@ fn category_and_keys(i: &str) -> IResult<&str, (&str, HashMap<&str, &str>)> { pair(category, keys_and_values)(i) } +#[allow(clippy::type_complexity)] fn categories_aggregator(i: &str) -> IResult<&str, Vec<(&str, HashMap<&str, &str>)>> { many0(category_and_keys)(i) } diff --git a/tests/issues.rs b/tests/issues.rs index 278d14c34..0de6b42a8 100644 --- a/tests/issues.rs +++ b/tests/issues.rs @@ -1,6 +1,6 @@ //#![feature(trace_macros)] #![allow(dead_code)] -#![cfg_attr(feature = "cargo-clippy", allow(redundant_closure))] +#![allow(clippy::redundant_closure)] use nom::{error::ErrorKind, Err, IResult, Needed}; @@ -120,6 +120,7 @@ mod issue_647 { v: Vec, } + #[allow(clippy::type_complexity)] fn list<'a, 'b>( input: Input<'a>, _cs: &'b f64, @@ -172,7 +173,7 @@ fn issue_many_m_n_with_zeros() { use nom::character::complete::char; use nom::multi::many_m_n; let mut parser = many_m_n::<_, _, (), _>(0, 0, char('a')); - assert_eq!(parser("aaa"), Ok(("aaa", vec!()))); + assert_eq!(parser("aaa"), Ok(("aaa", vec![]))); } #[test] @@ -205,3 +206,52 @@ fn issue_1231_bits_expect_fn_closure() { } assert_eq!(example(&[0xff]), Ok((&b""[..], (1, 1)))); } + +#[test] +fn issue_1282_findtoken_char() { + use nom::character::complete::one_of; + use nom::error::Error; + let parser = one_of::<_, _, Error<_>>(&['a', 'b', 'c'][..]); + assert_eq!(parser("aaa"), Ok(("aa", 'a'))); +} + +#[test] +fn issue_x_looser_fill_bounds() { + use nom::{ + bytes::streaming::tag, character::streaming::digit1, error_position, multi::fill, + sequence::terminated, + }; + + fn fill_pair(i: &[u8]) -> IResult<&[u8], [&[u8]; 2]> { + let mut buf = [&[][..], &[][..]]; + let (i, _) = fill(terminated(digit1, tag(",")), &mut buf)(i)?; + Ok((i, buf)) + } + + assert_eq!( + fill_pair(b"123,456,"), + Ok((&b""[..], [&b"123"[..], &b"456"[..]])) + ); + assert_eq!( + fill_pair(b"123,456,789"), + Ok((&b"789"[..], [&b"123"[..], &b"456"[..]])) + ); + assert_eq!( + fill_pair(b"123,,"), + Err(Err::Error(error_position!(&b","[..], ErrorKind::Digit))) + ); +} + +fn issue_1459_clamp_capacity() { + use nom::character::complete::char; + + // shouldn't panic + use nom::multi::many_m_n; + let mut parser = many_m_n::<_, _, (), _>(usize::MAX, usize::MAX, char('a')); + assert_eq!(parser("a"), Err(nom::Err::Error(()))); + + // shouldn't panic + use nom::multi::count; + let mut parser = count::<_, _, (), _>(char('a'), usize::MAX); + assert_eq!(parser("a"), Err(nom::Err::Error(()))); +} diff --git a/tests/mp4.rs b/tests/mp4.rs index 852bf2955..79dbc7c65 100644 --- a/tests/mp4.rs +++ b/tests/mp4.rs @@ -26,7 +26,7 @@ fn mp4_box(input: &[u8]) -> IResult<&[u8], &[u8]> { } } -#[cfg_attr(rustfmt, rustfmt_skip)] +#[rustfmt::skip] #[derive(PartialEq,Eq,Debug)] struct FileType<'a> { major_brand: &'a str, @@ -34,7 +34,7 @@ struct FileType<'a> { compatible_brands: Vec<&'a str> } -#[cfg_attr(rustfmt, rustfmt_skip)] +#[rustfmt::skip] #[allow(non_snake_case)] #[derive(Debug,Clone)] pub struct Mvhd32 { @@ -64,7 +64,7 @@ pub struct Mvhd32 { track_id: u32 } -#[cfg_attr(rustfmt, rustfmt_skip)] +#[rustfmt::skip] #[allow(non_snake_case)] #[derive(Debug,Clone)] pub struct Mvhd64 { @@ -94,7 +94,7 @@ pub struct Mvhd64 { track_id: u32 } -#[cfg_attr(rustfmt, rustfmt_skip)] +#[rustfmt::skip] fn mvhd32(i: &[u8]) -> IResult<&[u8], MvhdBox> { let (i, version_flags) = be_u32(i)?; let (i, created_date) = be_u32(i)?; @@ -146,7 +146,7 @@ fn mvhd32(i: &[u8]) -> IResult<&[u8], MvhdBox> { Ok((i, mvhd_box)) } -#[cfg_attr(rustfmt, rustfmt_skip)] +#[rustfmt::skip] fn mvhd64(i: &[u8]) -> IResult<&[u8], MvhdBox> { let (i, version_flags) = be_u32(i)?; let (i, created_date) = be_u64(i)?; diff --git a/tests/overflow.rs b/tests/overflow.rs index d177886f3..23d820b6b 100644 --- a/tests/overflow.rs +++ b/tests/overflow.rs @@ -1,4 +1,4 @@ -#![cfg_attr(feature = "cargo-clippy", allow(unreadable_literal))] +#![allow(clippy::unreadable_literal)] #![cfg(target_pointer_width = "64")] use nom::bytes::streaming::take; @@ -73,6 +73,7 @@ fn overflow_incomplete_many1() { fn overflow_incomplete_many_till() { use nom::{bytes::complete::tag, multi::many_till}; + #[allow(clippy::type_complexity)] fn multi(i: &[u8]) -> IResult<&[u8], (Vec<&[u8]>, &[u8])> { many_till(length_data(be_u64), tag("abc"))(i) } diff --git a/tests/reborrow_fold.rs b/tests/reborrow_fold.rs index 486617e42..ba3df092d 100644 --- a/tests/reborrow_fold.rs +++ b/tests/reborrow_fold.rs @@ -10,7 +10,7 @@ use nom::multi::fold_many0; use nom::sequence::delimited; use nom::IResult; -fn atom<'a>(_tomb: &'a mut ()) -> impl FnMut(&'a [u8]) -> IResult<&'a [u8], String> { +fn atom(_tomb: &mut ()) -> impl for<'a> FnMut(&'a [u8]) -> IResult<&'a [u8], String> { move |input| { map( map_res(is_not(" \t\r\n"), str::from_utf8), @@ -20,7 +20,7 @@ fn atom<'a>(_tomb: &'a mut ()) -> impl FnMut(&'a [u8]) -> IResult<&'a [u8], Stri } // FIXME: should we support the use case of borrowing data mutably in a parser? -fn list<'a>(i: &'a [u8], tomb: &'a mut ()) -> IResult<&'a [u8], String> { +fn list<'a>(i: &'a [u8], tomb: &mut ()) -> IResult<&'a [u8], String> { delimited( char('('), fold_many0(atom(tomb), String::new, |acc: String, next: String| {