diff options
author | Android Build Coastguard Worker <android-build-coastguard-worker@google.com> | 2024-02-08 01:24:05 +0000 |
---|---|---|
committer | Android Build Coastguard Worker <android-build-coastguard-worker@google.com> | 2024-02-08 01:24:05 +0000 |
commit | 065ff25d23062974f764c6190d68c80a67a6bfb1 (patch) | |
tree | 919294d605d7361ac40c3deea144cdbcc5b9e114 | |
parent | 61bd3bd1cae12eae74ef4882fea0011807d78f8f (diff) | |
parent | 0c65325da3ac3393f0f225cb4d19c3f289fa3519 (diff) | |
download | winnow-simpleperf-release.tar.gz |
Snap for 11421525 from 0c65325da3ac3393f0f225cb4d19c3f289fa3519 to simpleperf-releasesimpleperf-release
Change-Id: I0d8d5442ec91ed40017f619420322b1b801cc585
74 files changed, 5298 insertions, 1184 deletions
diff --git a/.cargo_vcs_info.json b/.cargo_vcs_info.json index 57ad92d..8f6a0d4 100644 --- a/.cargo_vcs_info.json +++ b/.cargo_vcs_info.json @@ -1,6 +1,6 @@ { "git": { - "sha1": "e01b1a91a876616fcbcf0f1caff25523898cba2a" + "sha1": "f674e29118c832ec09ea692e2fc20e891614b263" }, "path_in_vcs": "" }
\ No newline at end of file @@ -5,7 +5,7 @@ rust_library_host { name: "libwinnow", crate_name: "winnow", cargo_env_compat: true, - cargo_pkg_version: "0.5.14", + cargo_pkg_version: "0.5.37", srcs: ["src/lib.rs"], edition: "2021", features: [ @@ -863,6 +863,12 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "436b050e76ed2903236f032a59761c1eb99e1b0aead2c257922771dab1fc8c78" [[package]] +name = "rustc-hash" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "08d43f7aa6b08d49f382cde6a7982047c3426db949b1424bc4b7ec9ae12c6ce2" + +[[package]] name = "rustix" version = "0.37.20" source = "registry+https://github.com/rust-lang/crates.io-index" @@ -1391,7 +1397,7 @@ checksum = "1a515f5799fe4961cb532f983ce2b23082366b898e52ffbce459c86f67c8378a" [[package]] name = "winnow" -version = "0.5.14" +version = "0.5.37" dependencies = [ "anstream", "anstyle", @@ -1403,6 +1409,7 @@ dependencies = [ "lexopt", "memchr", "proptest", + "rustc-hash", "snapbox", "term-transcript", "terminal_size", @@ -13,7 +13,7 @@ edition = "2021" rust-version = "1.64.0" name = "winnow" -version = "0.5.14" +version = "0.5.37" include = [ "build.rs", "src/**/*", @@ -84,6 +84,12 @@ replace = """ [Unreleased]: https://github.com/winnow-rs/winnow/compare/{{tag_name}}...HEAD""" search = "<!-- next-url -->" +[[package.metadata.release.pre-release-replacements]] +exactly = 1 +file = "src/lib.rs" +replace = "blob/v{{version}}/CHANGELOG.md" +search = 'blob/v.+\..+\..+/CHANGELOG.md' + [profile.bench] lto = true codegen-units = 1 @@ -142,12 +148,25 @@ required-features = ["alloc"] name = "arithmetic" path = "examples/arithmetic/bench.rs" harness = false +required-features = ["alloc"] [[bench]] name = "contains_token" harness = false [[bench]] +name = "find_slice" +harness = false + +[[bench]] +name = "iter" +harness = false + +[[bench]] +name = "next_slice" +harness = false + +[[bench]] name = "number" harness = false @@ -208,6 +227,9 @@ version = "0.3.0" [dev-dependencies.proptest] version = "1.2.0" +[dev-dependencies.rustc-hash] +version = "1.1.0" + [dev-dependencies.snapbox] version = "0.4.11" features = ["examples"] @@ -233,4 +255,6 @@ unstable-doc = [ "alloc", "std", "simd", + "unstable-recover", ] +unstable-recover = [] diff --git a/Cargo.toml.orig b/Cargo.toml.orig index 3e7b219..7542fbb 100644 --- a/Cargo.toml.orig +++ b/Cargo.toml.orig @@ -19,7 +19,7 @@ include = [ [package] name = "winnow" -version = "0.5.14" +version = "0.5.37" description = "A byte-oriented, zero-copy, parser combinators library" repository = "https://github.com/winnow-rs/winnow" categories = ["parsing"] @@ -42,6 +42,7 @@ pre-release-replacements = [ {file="CHANGELOG.md", search="ReleaseDate", replace="{{date}}", min=1}, {file="CHANGELOG.md", search="<!-- next-header -->", replace="<!-- next-header -->\n## [Unreleased] - ReleaseDate\n", exactly=1}, {file="CHANGELOG.md", search="<!-- next-url -->", replace="<!-- next-url -->\n[Unreleased]: https://github.com/winnow-rs/winnow/compare/{{tag_name}}...HEAD", exactly=1}, + {file="src/lib.rs", search="blob/v.+\\..+\\..+/CHANGELOG.md", replace="blob/v{{version}}/CHANGELOG.md", exactly=1}, ] [features] @@ -50,8 +51,9 @@ alloc = [] std = ["alloc", "memchr?/std"] simd = ["dep:memchr"] debug = ["dep:anstream", "dep:anstyle", "dep:is-terminal", "dep:terminal_size"] +unstable-recover = [] -unstable-doc = ["alloc", "std", "simd"] +unstable-doc = ["alloc", "std", "simd", "unstable-recover"] [dependencies] anstream = { version = "0.3.2", optional = true } @@ -69,6 +71,7 @@ term-transcript = "0.2.0" escargot = "0.5.7" snapbox = { version = "0.4.11", features = ["examples"] } circular = "0.3.0" +rustc-hash = "1.1.0" [profile.bench] debug = true @@ -128,12 +131,25 @@ required-features = ["alloc"] name = "arithmetic" path = "examples/arithmetic/bench.rs" harness = false +required-features = ["alloc"] [[bench]] name = "contains_token" harness = false [[bench]] +name = "find_slice" +harness = false + +[[bench]] +name = "iter" +harness = false + +[[bench]] +name = "next_slice" +harness = false + +[[bench]] name = "number" harness = false @@ -1,19 +1,24 @@ +# This project was upgraded with external_updater. +# Usage: tools/external_updater/updater.sh update external/rust/crates/winnow +# For more info, check https://cs.android.com/android/platform/superproject/+/main:tools/external_updater/README.md + name: "winnow" description: "A byte-oriented, zero-copy, parser combinators library" third_party { + license_type: NOTICE + last_upgrade_date { + year: 2024 + month: 2 + day: 6 + } identifier { type: "crates.io" - value: "https://crates.io/crates/winnow" + value: "https://static.crates.io/crates/winnow/winnow-0.5.37.crate" + version: "0.5.14" } identifier { type: "Archive" value: "https://static.crates.io/crates/winnow/winnow-0.5.14.crate" - } - version: "0.5.14" - license_type: NOTICE - last_upgrade_date { - year: 2023 - month: 8 - day: 23 + version: "0.5.37" } } @@ -19,7 +19,7 @@ For more details, see: # Contributors winnow is the fruit of the work of many contributors over the years, many -thanks for your help! In particular, thanks to [Geal](https://github.com/Geal) +thanks for your help! In particular, thanks to [Geal](https://github.com/Geal) for the original [`nom` crate](https://crates.io/crates/nom). <a href="https://github.com/winnow-rs/winnow/graphs/contributors"> diff --git a/benches/contains_token.rs b/benches/contains_token.rs index 2980ce6..675b08e 100644 --- a/benches/contains_token.rs +++ b/benches/contains_token.rs @@ -3,7 +3,7 @@ use criterion::black_box; use winnow::combinator::alt; use winnow::combinator::repeat; use winnow::prelude::*; -use winnow::token::take_till1; +use winnow::token::take_till; use winnow::token::take_while; fn contains_token(c: &mut criterion::Criterion) { @@ -52,17 +52,29 @@ fn contains_token(c: &mut criterion::Criterion) { fn parser_slice(input: &mut &str) -> PResult<usize> { let contains = &['0', '1', '2', '3', '4', '5', '6', '7', '8', '9'][..]; - repeat(0.., alt((take_while(1.., contains), take_till1(contains)))).parse_next(input) + repeat( + 0.., + alt((take_while(1.., contains), take_till(1.., contains))), + ) + .parse_next(input) } fn parser_array(input: &mut &str) -> PResult<usize> { let contains = ['0', '1', '2', '3', '4', '5', '6', '7', '8', '9']; - repeat(0.., alt((take_while(1.., contains), take_till1(contains)))).parse_next(input) + repeat( + 0.., + alt((take_while(1.., contains), take_till(1.., contains))), + ) + .parse_next(input) } fn parser_tuple(input: &mut &str) -> PResult<usize> { let contains = ('0', '1', '2', '3', '4', '5', '6', '7', '8', '9'); - repeat(0.., alt((take_while(1.., contains), take_till1(contains)))).parse_next(input) + repeat( + 0.., + alt((take_while(1.., contains), take_till(1.., contains))), + ) + .parse_next(input) } fn parser_closure_or(input: &mut &str) -> PResult<usize> { @@ -78,12 +90,20 @@ fn parser_closure_or(input: &mut &str) -> PResult<usize> { || c == '8' || c == '9' }; - repeat(0.., alt((take_while(1.., contains), take_till1(contains)))).parse_next(input) + repeat( + 0.., + alt((take_while(1.., contains), take_till(1.., contains))), + ) + .parse_next(input) } fn parser_closure_matches(input: &mut &str) -> PResult<usize> { let contains = |c: char| matches!(c, '0' | '1' | '2' | '3' | '4' | '5' | '6' | '7' | '8' | '9'); - repeat(0.., alt((take_while(1.., contains), take_till1(contains)))).parse_next(input) + repeat( + 0.., + alt((take_while(1.., contains), take_till(1.., contains))), + ) + .parse_next(input) } const CONTIGUOUS: &str = "012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789"; diff --git a/benches/find_slice.rs b/benches/find_slice.rs new file mode 100644 index 0000000..656c989 --- /dev/null +++ b/benches/find_slice.rs @@ -0,0 +1,50 @@ +use criterion::black_box; + +use winnow::combinator::repeat; +use winnow::prelude::*; +use winnow::token::take_until; + +fn find_slice(c: &mut criterion::Criterion) { + let empty = ""; + let start_byte = "\r".repeat(100); + let start_slice = "\r\n".repeat(100); + let small = format!("{:>10}\r\n", "").repeat(100); + let large = format!("{:>10000}\r\n", "").repeat(100); + + let data = [ + ("empty", (empty, empty)), + ("start", (&start_byte, &start_slice)), + ("medium", (&small, &small)), + ("large", (&large, &large)), + ]; + let mut group = c.benchmark_group("find_slice"); + for (name, samples) in data { + group.bench_with_input( + criterion::BenchmarkId::new("byte", name), + samples.0, + |b, sample| { + b.iter(|| black_box(parser_byte.parse_peek(black_box(sample)).unwrap())); + }, + ); + + group.bench_with_input( + criterion::BenchmarkId::new("slice", name), + samples.1, + |b, sample| { + b.iter(|| black_box(parser_slice.parse_peek(black_box(sample)).unwrap())); + }, + ); + } + group.finish(); +} + +fn parser_byte(input: &mut &str) -> PResult<usize> { + repeat(0.., (take_until(0.., "\r"), "\r")).parse_next(input) +} + +fn parser_slice(input: &mut &str) -> PResult<usize> { + repeat(0.., (take_until(0.., "\r\n"), "\r\n")).parse_next(input) +} + +criterion::criterion_group!(benches, find_slice); +criterion::criterion_main!(benches); diff --git a/benches/iter.rs b/benches/iter.rs new file mode 100644 index 0000000..0a0d5ff --- /dev/null +++ b/benches/iter.rs @@ -0,0 +1,120 @@ +use criterion::black_box; + +use winnow::combinator::opt; +use winnow::prelude::*; +use winnow::stream::AsChar; +use winnow::stream::Stream as _; +use winnow::token::one_of; + +fn iter(c: &mut criterion::Criterion) { + let data = [ + ("contiguous", CONTIGUOUS.as_bytes()), + ("interleaved", INTERLEAVED.as_bytes()), + ("canada", CANADA.as_bytes()), + ]; + let mut group = c.benchmark_group("iter"); + for (name, sample) in data { + let len = sample.len(); + group.throughput(criterion::Throughput::Bytes(len as u64)); + + group.bench_with_input( + criterion::BenchmarkId::new("iterate", name), + &len, + |b, _| { + b.iter(|| black_box(iterate.parse_peek(black_box(sample)).unwrap())); + }, + ); + group.bench_with_input( + criterion::BenchmarkId::new("next_token", name), + &len, + |b, _| { + b.iter(|| black_box(next_token.parse_peek(black_box(sample)).unwrap())); + }, + ); + group.bench_with_input( + criterion::BenchmarkId::new("opt(one_of)", name), + &len, + |b, _| { + b.iter(|| black_box(opt_one_of.parse_peek(black_box(sample)).unwrap())); + }, + ); + group.bench_with_input( + criterion::BenchmarkId::new("take_while", name), + &len, + |b, _| { + b.iter(|| black_box(take_while.parse_peek(black_box(sample)).unwrap())); + }, + ); + group.bench_with_input(criterion::BenchmarkId::new("repeat", name), &len, |b, _| { + b.iter(|| black_box(repeat.parse_peek(black_box(sample)).unwrap())); + }); + } + group.finish(); +} + +fn iterate(input: &mut &[u8]) -> PResult<usize> { + let mut count = 0; + for byte in input.iter() { + if byte.is_dec_digit() { + count += 1; + } + } + input.finish(); + Ok(count) +} + +fn next_token(input: &mut &[u8]) -> PResult<usize> { + let mut count = 0; + while let Some(byte) = input.next_token() { + if byte.is_dec_digit() { + count += 1; + } + } + Ok(count) +} + +fn opt_one_of(input: &mut &[u8]) -> PResult<usize> { + let mut count = 0; + while !input.is_empty() { + while opt(one_of(AsChar::is_dec_digit)) + .parse_next(input)? + .is_some() + { + count += 1; + } + while opt(one_of(|b: u8| !b.is_dec_digit())) + .parse_next(input)? + .is_some() + {} + } + Ok(count) +} + +fn take_while(input: &mut &[u8]) -> PResult<usize> { + let mut count = 0; + while !input.is_empty() { + count += winnow::token::take_while(0.., AsChar::is_dec_digit) + .parse_next(input)? + .len(); + let _ = winnow::token::take_while(0.., |b: u8| !b.is_dec_digit()).parse_next(input)?; + } + Ok(count) +} + +fn repeat(input: &mut &[u8]) -> PResult<usize> { + let mut count = 0; + while !input.is_empty() { + count += winnow::combinator::repeat(0.., one_of(AsChar::is_dec_digit)) + .map(|count: usize| count) + .parse_next(input)?; + winnow::combinator::repeat(0.., one_of(|b: u8| !b.is_dec_digit())).parse_next(input)?; + } + Ok(count) +} + +const CONTIGUOUS: &str = "012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789"; +const INTERLEAVED: &str = "0123456789abc0123456789ab0123456789ab0123456789ab0123456789ab0123456789ab0123456789ab0123456789ab0123456789ab0123456789ab0123456789ab0123456789ab0123456789ab0123456789ab0123456789ab0123456789ab0123456789ab0123456789ab0123456789ab0123456789ab0123456789ab0123456789ab0123456789ab"; +const CANADA: &str = include_str!("../third_party/nativejson-benchmark/data/canada.json"); + +criterion::criterion_group!(benches, iter); +criterion::criterion_main!(benches); diff --git a/benches/next_slice.rs b/benches/next_slice.rs new file mode 100644 index 0000000..6c25d23 --- /dev/null +++ b/benches/next_slice.rs @@ -0,0 +1,133 @@ +use criterion::black_box; + +use winnow::combinator::repeat; +use winnow::prelude::*; +use winnow::token::one_of; +use winnow::token::tag; + +fn next_slice(c: &mut criterion::Criterion) { + let mut group = c.benchmark_group("next_slice"); + + let name = "ascii"; + let sample = "h".repeat(100); + let sample = sample.as_str(); + group.bench_with_input( + criterion::BenchmarkId::new("char", name), + sample, + |b, sample| { + b.iter(|| black_box(parser_ascii_char.parse_peek(black_box(sample)).unwrap())); + }, + ); + group.bench_with_input( + criterion::BenchmarkId::new("str", name), + sample, + |b, sample| { + b.iter(|| black_box(parser_ascii_str.parse_peek(black_box(sample)).unwrap())); + }, + ); + group.bench_with_input( + criterion::BenchmarkId::new("one_of", name), + sample, + |b, sample| { + b.iter(|| black_box(parser_ascii_one_of.parse_peek(black_box(sample)).unwrap())); + }, + ); + group.bench_with_input( + criterion::BenchmarkId::new("tag_char", name), + sample, + |b, sample| { + b.iter(|| black_box(parser_ascii_tag_char.parse_peek(black_box(sample)).unwrap())); + }, + ); + group.bench_with_input( + criterion::BenchmarkId::new("tag_str", name), + sample, + |b, sample| { + b.iter(|| black_box(parser_ascii_tag_str.parse_peek(black_box(sample)).unwrap())); + }, + ); + + let name = "utf8"; + let sample = "🧑".repeat(100); + let sample = sample.as_str(); + group.bench_with_input( + criterion::BenchmarkId::new("char", name), + sample, + |b, sample| { + b.iter(|| black_box(parser_utf8_char.parse_peek(black_box(sample)).unwrap())); + }, + ); + group.bench_with_input( + criterion::BenchmarkId::new("str", name), + sample, + |b, sample| { + b.iter(|| black_box(parser_utf8_str.parse_peek(black_box(sample)).unwrap())); + }, + ); + group.bench_with_input( + criterion::BenchmarkId::new("one_of", name), + sample, + |b, sample| { + b.iter(|| black_box(parser_utf8_one_of.parse_peek(black_box(sample)).unwrap())); + }, + ); + group.bench_with_input( + criterion::BenchmarkId::new("tag_char", name), + sample, + |b, sample| { + b.iter(|| black_box(parser_utf8_tag_char.parse_peek(black_box(sample)).unwrap())); + }, + ); + group.bench_with_input( + criterion::BenchmarkId::new("tag_str", name), + sample, + |b, sample| { + b.iter(|| black_box(parser_utf8_tag_str.parse_peek(black_box(sample)).unwrap())); + }, + ); + + group.finish(); +} + +fn parser_ascii_char(input: &mut &str) -> PResult<usize> { + repeat(0.., 'h').parse_next(input) +} + +fn parser_ascii_str(input: &mut &str) -> PResult<usize> { + repeat(0.., "h").parse_next(input) +} + +fn parser_ascii_one_of(input: &mut &str) -> PResult<usize> { + repeat(0.., one_of('h')).parse_next(input) +} + +fn parser_ascii_tag_char(input: &mut &str) -> PResult<usize> { + repeat(0.., tag('h')).parse_next(input) +} + +fn parser_ascii_tag_str(input: &mut &str) -> PResult<usize> { + repeat(0.., tag("h")).parse_next(input) +} + +fn parser_utf8_char(input: &mut &str) -> PResult<usize> { + repeat(0.., '🧑').parse_next(input) +} + +fn parser_utf8_str(input: &mut &str) -> PResult<usize> { + repeat(0.., "🧑").parse_next(input) +} + +fn parser_utf8_one_of(input: &mut &str) -> PResult<usize> { + repeat(0.., one_of('🧑')).parse_next(input) +} + +fn parser_utf8_tag_char(input: &mut &str) -> PResult<usize> { + repeat(0.., tag('🧑')).parse_next(input) +} + +fn parser_utf8_tag_str(input: &mut &str) -> PResult<usize> { + repeat(0.., tag("🧑")).parse_next(input) +} + +criterion::criterion_group!(benches, next_slice); +criterion::criterion_main!(benches); diff --git a/examples/arithmetic/bench.rs b/examples/arithmetic/bench.rs index 6504454..692ac22 100644 --- a/examples/arithmetic/bench.rs +++ b/examples/arithmetic/bench.rs @@ -1,19 +1,31 @@ mod parser; +mod parser_ast; +mod parser_lexer; use winnow::prelude::*; -use parser::expr; - #[allow(clippy::eq_op, clippy::erasing_op)] fn arithmetic(c: &mut criterion::Criterion) { - let data = " 2*2 / ( 5 - 1) + 3 / 4 * (2 - 7 + 567 *12 /2) + 3*(1+2*( 45 /2));"; + let data = " 2*2 / ( 5 - 1) + 3 / 4 * (2 - 7 + 567 *12 /2) + 3*(1+2*( 45 /2))"; + let expected = 2 * 2 / (5 - 1) + 3 * (1 + 2 * (45 / 2)); + assert_eq!(parser::expr.parse(data), Ok(expected)); + assert_eq!( + parser_ast::expr.parse(data).map(|ast| ast.eval()), + Ok(expected) + ); assert_eq!( - expr.parse_peek(data), - Ok((";", 2 * 2 / (5 - 1) + 3 * (1 + 2 * (45 / 2)),)) + parser_lexer::expr2.parse(data).map(|ast| ast.eval()), + Ok(expected) ); - c.bench_function("arithmetic", |b| { - b.iter(|| expr.parse_peek(data).unwrap()); + c.bench_function("direct", |b| { + b.iter(|| parser::expr.parse(data).unwrap()); + }); + c.bench_function("ast", |b| { + b.iter(|| parser_ast::expr.parse(data).unwrap().eval()); + }); + c.bench_function("lexer", |b| { + b.iter(|| parser_lexer::expr2.parse_peek(data).unwrap()); }); } diff --git a/examples/arithmetic/main.rs b/examples/arithmetic/main.rs index 94a17d8..e46cf2f 100644 --- a/examples/arithmetic/main.rs +++ b/examples/arithmetic/main.rs @@ -2,32 +2,41 @@ use winnow::prelude::*; mod parser; mod parser_ast; +mod parser_lexer; fn main() -> Result<(), lexopt::Error> { let args = Args::parse()?; let input = args.input.as_deref().unwrap_or("1 + 1"); + if let Err(err) = calc(input, args.implementation) { + println!("FAILED"); + println!("{}", err); + } + + Ok(()) +} +fn calc( + input: &str, + imp: Impl, +) -> Result<(), winnow::error::ParseError<&str, winnow::error::ContextError>> { println!("{} =", input); - match args.implementation { - Impl::Eval => match parser::expr.parse(input) { - Ok(result) => { - println!(" {}", result); - } - Err(err) => { - println!(" {}", err); - } - }, - Impl::Ast => match parser_ast::expr.parse(input) { - Ok(result) => { - println!(" {:#?}", result); - } - Err(err) => { - println!(" {}", err); - } - }, + match imp { + Impl::Eval => { + let result = parser::expr.parse(input)?; + println!(" {}", result); + } + Impl::Ast => { + let result = parser_ast::expr.parse(input)?; + println!(" {:#?}={}", result, result.eval()); + } + Impl::Lexer => { + let tokens = parser_lexer::lex.parse(input)?; + println!(" {:#?}", tokens); + let result = parser_lexer::expr.parse(tokens.as_slice()).unwrap(); + println!(" {:#?}={}", result, result.eval()); + } } - Ok(()) } @@ -40,6 +49,7 @@ struct Args { enum Impl { Eval, Ast, + Lexer, } impl Default for Impl { @@ -61,6 +71,7 @@ impl Args { res.implementation = args.value()?.parse_with(|s| match s { "eval" => Ok(Impl::Eval), "ast" => Ok(Impl::Ast), + "lexer" => Ok(Impl::Lexer), _ => Err("expected `eval`, `ast`"), })?; } diff --git a/examples/arithmetic/parser.rs b/examples/arithmetic/parser.rs index 50ffbdb..d51007a 100644 --- a/examples/arithmetic/parser.rs +++ b/examples/arithmetic/parser.rs @@ -2,10 +2,10 @@ use std::str::FromStr; use winnow::prelude::*; use winnow::{ - ascii::{digit1 as digits, space0 as spaces}, + ascii::{digit1 as digits, multispace0 as multispaces}, combinator::alt, combinator::delimited, - combinator::fold_repeat, + combinator::repeat, token::one_of, }; @@ -14,19 +14,18 @@ use winnow::{ pub fn expr(i: &mut &str) -> PResult<i64> { let init = term.parse_next(i)?; - fold_repeat( - 0.., - (one_of(['+', '-']), term), - move || init, - |acc, (op, val): (char, i64)| { - if op == '+' { - acc + val - } else { - acc - val - } - }, - ) - .parse_next(i) + repeat(0.., (one_of(['+', '-']), term)) + .fold( + move || init, + |acc, (op, val): (char, i64)| { + if op == '+' { + acc + val + } else { + acc - val + } + }, + ) + .parse_next(i) } // We read an initial factor and for each time we find @@ -35,68 +34,102 @@ pub fn expr(i: &mut &str) -> PResult<i64> { fn term(i: &mut &str) -> PResult<i64> { let init = factor.parse_next(i)?; - fold_repeat( - 0.., - (one_of(['*', '/']), factor), - move || init, - |acc, (op, val): (char, i64)| { - if op == '*' { - acc * val - } else { - acc / val - } - }, - ) - .parse_next(i) + repeat(0.., (one_of(['*', '/']), factor)) + .fold( + move || init, + |acc, (op, val): (char, i64)| { + if op == '*' { + acc * val + } else { + acc / val + } + }, + ) + .parse_next(i) } -// We transform an integer string into a i64, ignoring surrounding whitespaces +// We transform an integer string into a i64, ignoring surrounding whitespace // We look for a digit suite, and try to convert it. // If either str::from_utf8 or FromStr::from_str fail, // we fallback to the parens parser defined above fn factor(i: &mut &str) -> PResult<i64> { delimited( - spaces, - alt(( - digits.try_map(FromStr::from_str), - delimited('(', expr, ')'), - parens, - )), - spaces, + multispaces, + alt((digits.try_map(FromStr::from_str), parens)), + multispaces, ) .parse_next(i) } -// We parse any expr surrounded by parens, ignoring all whitespaces around those +// We parse any expr surrounded by parens, ignoring all whitespace around those fn parens(i: &mut &str) -> PResult<i64> { delimited('(', expr, ')').parse_next(i) } #[test] fn factor_test() { - assert_eq!(factor.parse_peek("3"), Ok(("", 3))); - assert_eq!(factor.parse_peek(" 12"), Ok(("", 12))); - assert_eq!(factor.parse_peek("537 "), Ok(("", 537))); - assert_eq!(factor.parse_peek(" 24 "), Ok(("", 24))); + let input = "3"; + let expected = Ok(("", 3)); + assert_eq!(factor.parse_peek(input), expected); + + let input = " 12"; + let expected = Ok(("", 12)); + assert_eq!(factor.parse_peek(input), expected); + + let input = "537 "; + let expected = Ok(("", 537)); + assert_eq!(factor.parse_peek(input), expected); + + let input = " 24 "; + let expected = Ok(("", 24)); + assert_eq!(factor.parse_peek(input), expected); } #[test] fn term_test() { - assert_eq!(term.parse_peek(" 12 *2 / 3"), Ok(("", 8))); - assert_eq!(term.parse_peek(" 2* 3 *2 *2 / 3"), Ok(("", 8))); - assert_eq!(term.parse_peek(" 48 / 3/2"), Ok(("", 8))); + let input = " 12 *2 / 3"; + let expected = Ok(("", 8)); + assert_eq!(term.parse_peek(input), expected); + + let input = " 12 *2 / 3"; + let expected = Ok(("", 8)); + assert_eq!(term.parse_peek(input), expected); + + let input = " 2* 3 *2 *2 / 3"; + let expected = Ok(("", 8)); + assert_eq!(term.parse_peek(input), expected); + + let input = " 48 / 3/2"; + let expected = Ok(("", 8)); + assert_eq!(term.parse_peek(input), expected); } #[test] fn expr_test() { - assert_eq!(expr.parse_peek(" 1 + 2 "), Ok(("", 3))); - assert_eq!(expr.parse_peek(" 12 + 6 - 4+ 3"), Ok(("", 17))); - assert_eq!(expr.parse_peek(" 1 + 2*3 + 4"), Ok(("", 11))); + let input = " 1 + 2 "; + let expected = Ok(("", 3)); + assert_eq!(expr.parse_peek(input), expected); + + let input = " 12 + 6 - 4+ 3"; + let expected = Ok(("", 17)); + assert_eq!(expr.parse_peek(input), expected); + + let input = " 1 + 2*3 + 4"; + let expected = Ok(("", 11)); + assert_eq!(expr.parse_peek(input), expected); } #[test] fn parens_test() { - assert_eq!(expr.parse_peek(" ( 2 )"), Ok(("", 2))); - assert_eq!(expr.parse_peek(" 2* ( 3 + 4 ) "), Ok(("", 14))); - assert_eq!(expr.parse_peek(" 2*2 / ( 5 - 1) + 3"), Ok(("", 4))); + let input = " ( 2 )"; + let expected = Ok(("", 2)); + assert_eq!(expr.parse_peek(input), expected); + + let input = " 2* ( 3 + 4 ) "; + let expected = Ok(("", 14)); + assert_eq!(expr.parse_peek(input), expected); + + let input = " 2*2 / ( 5 - 1) + 3"; + let expected = Ok(("", 4)); + assert_eq!(expr.parse_peek(input), expected); } diff --git a/examples/arithmetic/parser_ast.rs b/examples/arithmetic/parser_ast.rs index 5fb9847..20feb26 100644 --- a/examples/arithmetic/parser_ast.rs +++ b/examples/arithmetic/parser_ast.rs @@ -5,13 +5,14 @@ use std::str::FromStr; use winnow::prelude::*; use winnow::{ - ascii::{digit1 as digit, multispace0 as multispace}, + ascii::{digit1 as digits, multispace0 as multispaces}, combinator::alt, + combinator::delimited, combinator::repeat, - combinator::{delimited, preceded}, + token::one_of, }; -#[derive(Debug)] +#[derive(Debug, Clone)] pub enum Expr { Value(i64), Add(Box<Expr>, Box<Expr>), @@ -21,12 +22,17 @@ pub enum Expr { Paren(Box<Expr>), } -#[derive(Debug)] -pub enum Oper { - Add, - Sub, - Mul, - Div, +impl Expr { + pub fn eval(&self) -> i64 { + match self { + Self::Value(v) => *v, + Self::Add(lhs, rhs) => lhs.eval() + rhs.eval(), + Self::Sub(lhs, rhs) => lhs.eval() - rhs.eval(), + Self::Mul(lhs, rhs) => lhs.eval() * rhs.eval(), + Self::Div(lhs, rhs) => lhs.eval() / rhs.eval(), + Self::Paren(expr) => expr.eval(), + } + } } impl Display for Expr { @@ -44,125 +50,133 @@ impl Display for Expr { } pub fn expr(i: &mut &str) -> PResult<Expr> { - let initial = term(i)?; - let remainder = repeat( - 0.., - alt(( - |i: &mut &str| { - let add = preceded("+", term).parse_next(i)?; - Ok((Oper::Add, add)) - }, - |i: &mut &str| { - let sub = preceded("-", term).parse_next(i)?; - Ok((Oper::Sub, sub)) + let init = term.parse_next(i)?; + + repeat(0.., (one_of(['+', '-']), term)) + .fold( + move || init.clone(), + |acc, (op, val): (char, Expr)| { + if op == '+' { + Expr::Add(Box::new(acc), Box::new(val)) + } else { + Expr::Sub(Box::new(acc), Box::new(val)) + } }, - )), - ) - .parse_next(i)?; - - Ok(fold_exprs(initial, remainder)) + ) + .parse_next(i) } fn term(i: &mut &str) -> PResult<Expr> { - let initial = factor(i)?; - let remainder = repeat( - 0.., - alt(( - |i: &mut &str| { - let mul = preceded("*", factor).parse_next(i)?; - Ok((Oper::Mul, mul)) + let init = factor.parse_next(i)?; + + repeat(0.., (one_of(['*', '/']), factor)) + .fold( + move || init.clone(), + |acc, (op, val): (char, Expr)| { + if op == '*' { + Expr::Mul(Box::new(acc), Box::new(val)) + } else { + Expr::Div(Box::new(acc), Box::new(val)) + } }, - |i: &mut &str| { - let div = preceded("/", factor).parse_next(i)?; - Ok((Oper::Div, div)) - }, - )), - ) - .parse_next(i)?; - - Ok(fold_exprs(initial, remainder)) + ) + .parse_next(i) } fn factor(i: &mut &str) -> PResult<Expr> { - alt(( - delimited(multispace, digit, multispace) - .try_map(FromStr::from_str) - .map(Expr::Value), - parens, - )) - .parse_next(i) -} - -fn parens(i: &mut &str) -> PResult<Expr> { delimited( - multispace, - delimited("(", expr.map(|e| Expr::Paren(Box::new(e))), ")"), - multispace, + multispaces, + alt((digits.try_map(FromStr::from_str).map(Expr::Value), parens)), + multispaces, ) .parse_next(i) } -fn fold_exprs(initial: Expr, remainder: Vec<(Oper, Expr)>) -> Expr { - remainder.into_iter().fold(initial, |acc, pair| { - let (oper, expr) = pair; - match oper { - Oper::Add => Expr::Add(Box::new(acc), Box::new(expr)), - Oper::Sub => Expr::Sub(Box::new(acc), Box::new(expr)), - Oper::Mul => Expr::Mul(Box::new(acc), Box::new(expr)), - Oper::Div => Expr::Div(Box::new(acc), Box::new(expr)), - } - }) +fn parens(i: &mut &str) -> PResult<Expr> { + delimited("(", expr, ")") + .map(|e| Expr::Paren(Box::new(e))) + .parse_next(i) } #[test] fn factor_test() { - assert_eq!( - factor - .parse_peek(" 3 ") - .map(|(i, x)| (i, format!("{:?}", x))), - Ok(("", String::from("Value(3)"))) - ); + let input = "3"; + let expected = Ok(("", String::from("Value(3)"))); + assert_eq!(factor.map(|e| format!("{e:?}")).parse_peek(input), expected); + + let input = " 12"; + let expected = Ok(("", String::from("Value(12)"))); + assert_eq!(factor.map(|e| format!("{e:?}")).parse_peek(input), expected); + + let input = "537 "; + let expected = Ok(("", String::from("Value(537)"))); + assert_eq!(factor.map(|e| format!("{e:?}")).parse_peek(input), expected); + + let input = " 24 "; + let expected = Ok(("", String::from("Value(24)"))); + assert_eq!(factor.map(|e| format!("{e:?}")).parse_peek(input), expected); } #[test] fn term_test() { - assert_eq!( - term.parse_peek(" 3 * 5 ") - .map(|(i, x)| (i, format!("{:?}", x))), - Ok(("", String::from("Mul(Value(3), Value(5))"))) - ); + let input = " 12 *2 / 3"; + let expected = Ok(("", String::from("Div(Mul(Value(12), Value(2)), Value(3))"))); + assert_eq!(term.map(|e| format!("{e:?}")).parse_peek(input), expected); + + let input = " 12 *2 / 3"; + let expected = Ok(("", String::from("Div(Mul(Value(12), Value(2)), Value(3))"))); + assert_eq!(term.map(|e| format!("{e:?}")).parse_peek(input), expected); + + let input = " 2* 3 *2 *2 / 3"; + let expected = Ok(( + "", + String::from("Div(Mul(Mul(Mul(Value(2), Value(3)), Value(2)), Value(2)), Value(3))"), + )); + assert_eq!(term.map(|e| format!("{e:?}")).parse_peek(input), expected); + + let input = " 48 / 3/2"; + let expected = Ok(("", String::from("Div(Div(Value(48), Value(3)), Value(2))"))); + assert_eq!(term.map(|e| format!("{e:?}")).parse_peek(input), expected); } #[test] fn expr_test() { - assert_eq!( - expr.parse_peek(" 1 + 2 * 3 ") - .map(|(i, x)| (i, format!("{:?}", x))), - Ok(("", String::from("Add(Value(1), Mul(Value(2), Value(3)))"))) - ); - assert_eq!( - expr.parse_peek(" 1 + 2 * 3 / 4 - 5 ") - .map(|(i, x)| (i, format!("{:?}", x))), - Ok(( - "", - String::from("Sub(Add(Value(1), Div(Mul(Value(2), Value(3)), Value(4))), Value(5))") - )) - ); - assert_eq!( - expr.parse_peek(" 72 / 2 / 3 ") - .map(|(i, x)| (i, format!("{:?}", x))), - Ok(("", String::from("Div(Div(Value(72), Value(2)), Value(3))"))) - ); + let input = " 1 + 2 "; + let expected = Ok(("", String::from("Add(Value(1), Value(2))"))); + assert_eq!(expr.map(|e| format!("{e:?}")).parse_peek(input), expected); + + let input = " 12 + 6 - 4+ 3"; + let expected = Ok(( + "", + String::from("Add(Sub(Add(Value(12), Value(6)), Value(4)), Value(3))"), + )); + assert_eq!(expr.map(|e| format!("{e:?}")).parse_peek(input), expected); + + let input = " 1 + 2*3 + 4"; + let expected = Ok(( + "", + String::from("Add(Add(Value(1), Mul(Value(2), Value(3))), Value(4))"), + )); + assert_eq!(expr.map(|e| format!("{e:?}")).parse_peek(input), expected); } #[test] fn parens_test() { - assert_eq!( - expr.parse_peek(" ( 1 + 2 ) * 3 ") - .map(|(i, x)| (i, format!("{:?}", x))), - Ok(( - "", - String::from("Mul(Paren(Add(Value(1), Value(2))), Value(3))") - )) - ); + let input = " ( 2 )"; + let expected = Ok(("", String::from("Paren(Value(2))"))); + assert_eq!(expr.map(|e| format!("{e:?}")).parse_peek(input), expected); + + let input = " 2* ( 3 + 4 ) "; + let expected = Ok(( + "", + String::from("Mul(Value(2), Paren(Add(Value(3), Value(4))))"), + )); + assert_eq!(expr.map(|e| format!("{e:?}")).parse_peek(input), expected); + + let input = " 2*2 / ( 5 - 1) + 3"; + let expected = Ok(( + "", + String::from("Add(Div(Mul(Value(2), Value(2)), Paren(Sub(Value(5), Value(1)))), Value(3))"), + )); + assert_eq!(expr.map(|e| format!("{e:?}")).parse_peek(input), expected); } diff --git a/examples/arithmetic/parser_lexer.rs b/examples/arithmetic/parser_lexer.rs new file mode 100644 index 0000000..d6b7422 --- /dev/null +++ b/examples/arithmetic/parser_lexer.rs @@ -0,0 +1,300 @@ +use std::fmt; +use std::fmt::{Debug, Display, Formatter}; + +use std::str::FromStr; + +use winnow::prelude::*; +use winnow::{ + ascii::{digit1 as digits, multispace0 as multispaces}, + combinator::alt, + combinator::dispatch, + combinator::fail, + combinator::peek, + combinator::repeat, + combinator::{delimited, preceded, terminated}, + token::any, + token::one_of, +}; + +#[derive(Debug, Clone)] +pub enum Expr { + Value(i64), + Add(Box<Expr>, Box<Expr>), + Sub(Box<Expr>, Box<Expr>), + Mul(Box<Expr>, Box<Expr>), + Div(Box<Expr>, Box<Expr>), + Paren(Box<Expr>), +} + +impl Expr { + pub fn eval(&self) -> i64 { + match self { + Self::Value(v) => *v, + Self::Add(lhs, rhs) => lhs.eval() + rhs.eval(), + Self::Sub(lhs, rhs) => lhs.eval() - rhs.eval(), + Self::Mul(lhs, rhs) => lhs.eval() * rhs.eval(), + Self::Div(lhs, rhs) => lhs.eval() / rhs.eval(), + Self::Paren(expr) => expr.eval(), + } + } +} + +impl Display for Expr { + fn fmt(&self, format: &mut Formatter<'_>) -> fmt::Result { + use Expr::{Add, Div, Mul, Paren, Sub, Value}; + match *self { + Value(val) => write!(format, "{}", val), + Add(ref left, ref right) => write!(format, "{} + {}", left, right), + Sub(ref left, ref right) => write!(format, "{} - {}", left, right), + Mul(ref left, ref right) => write!(format, "{} * {}", left, right), + Div(ref left, ref right) => write!(format, "{} / {}", left, right), + Paren(ref expr) => write!(format, "({})", expr), + } + } +} + +#[derive(Copy, Clone, Debug, PartialEq, Eq)] +pub enum Token { + Value(i64), + Oper(Oper), + OpenParen, + CloseParen, +} + +#[derive(Copy, Clone, Debug, PartialEq, Eq)] +pub enum Oper { + Add, + Sub, + Mul, + Div, +} + +impl winnow::stream::ContainsToken<Token> for Token { + #[inline(always)] + fn contains_token(&self, token: Token) -> bool { + *self == token + } +} + +impl winnow::stream::ContainsToken<Token> for &'_ [Token] { + #[inline] + fn contains_token(&self, token: Token) -> bool { + self.iter().any(|t| *t == token) + } +} + +impl<const LEN: usize> winnow::stream::ContainsToken<Token> for &'_ [Token; LEN] { + #[inline] + fn contains_token(&self, token: Token) -> bool { + self.iter().any(|t| *t == token) + } +} + +impl<const LEN: usize> winnow::stream::ContainsToken<Token> for [Token; LEN] { + #[inline] + fn contains_token(&self, token: Token) -> bool { + self.iter().any(|t| *t == token) + } +} + +#[allow(dead_code)] +pub fn expr2(i: &mut &str) -> PResult<Expr> { + let tokens = lex.parse_next(i)?; + expr.parse_next(&mut tokens.as_slice()) +} + +pub fn lex(i: &mut &str) -> PResult<Vec<Token>> { + preceded(multispaces, repeat(1.., terminated(token, multispaces))).parse_next(i) +} + +fn token(i: &mut &str) -> PResult<Token> { + dispatch! {peek(any); + '0'..='9' => digits.try_map(FromStr::from_str).map(Token::Value), + '(' => '('.value(Token::OpenParen), + ')' => ')'.value(Token::CloseParen), + '+' => '+'.value(Token::Oper(Oper::Add)), + '-' => '-'.value(Token::Oper(Oper::Sub)), + '*' => '*'.value(Token::Oper(Oper::Mul)), + '/' => '/'.value(Token::Oper(Oper::Div)), + _ => fail, + } + .parse_next(i) +} + +pub fn expr(i: &mut &[Token]) -> PResult<Expr> { + let init = term.parse_next(i)?; + + repeat( + 0.., + ( + one_of([Token::Oper(Oper::Add), Token::Oper(Oper::Sub)]), + term, + ), + ) + .fold( + move || init.clone(), + |acc, (op, val): (Token, Expr)| { + if op == Token::Oper(Oper::Add) { + Expr::Add(Box::new(acc), Box::new(val)) + } else { + Expr::Sub(Box::new(acc), Box::new(val)) + } + }, + ) + .parse_next(i) +} + +fn term(i: &mut &[Token]) -> PResult<Expr> { + let init = factor.parse_next(i)?; + + repeat( + 0.., + ( + one_of([Token::Oper(Oper::Mul), Token::Oper(Oper::Div)]), + factor, + ), + ) + .fold( + move || init.clone(), + |acc, (op, val): (Token, Expr)| { + if op == Token::Oper(Oper::Mul) { + Expr::Mul(Box::new(acc), Box::new(val)) + } else { + Expr::Div(Box::new(acc), Box::new(val)) + } + }, + ) + .parse_next(i) +} + +fn factor(i: &mut &[Token]) -> PResult<Expr> { + alt(( + one_of(|t| matches!(t, Token::Value(_))).map(|t| match t { + Token::Value(v) => Expr::Value(v), + _ => unreachable!(), + }), + parens, + )) + .parse_next(i) +} + +fn parens(i: &mut &[Token]) -> PResult<Expr> { + delimited(one_of(Token::OpenParen), expr, one_of(Token::CloseParen)) + .map(|e| Expr::Paren(Box::new(e))) + .parse_next(i) +} + +#[test] +fn lex_test() { + let input = "3"; + let expected = Ok(String::from(r#"("", [Value(3)])"#)); + assert_eq!(lex.parse_peek(input).map(|e| format!("{e:?}")), expected); + + let input = " 24 "; + let expected = Ok(String::from(r#"("", [Value(24)])"#)); + assert_eq!(lex.parse_peek(input).map(|e| format!("{e:?}")), expected); + + let input = " 12 *2 / 3"; + let expected = Ok(String::from( + r#"("", [Value(12), Oper(Mul), Value(2), Oper(Div), Value(3)])"#, + )); + assert_eq!(lex.parse_peek(input).map(|e| format!("{e:?}")), expected); + + let input = " 2*2 / ( 5 - 1) + 3"; + let expected = Ok(String::from( + r#"("", [Value(2), Oper(Mul), Value(2), Oper(Div), OpenParen, Value(5), Oper(Sub), Value(1), CloseParen, Oper(Add), Value(3)])"#, + )); + assert_eq!(lex.parse_peek(input).map(|e| format!("{e:?}")), expected); +} + +#[test] +fn factor_test() { + let input = "3"; + let expected = Ok(String::from("Value(3)")); + let input = lex.parse(input).unwrap(); + assert_eq!(factor.map(|e| format!("{e:?}")).parse(&input), expected); + + let input = " 12"; + let expected = Ok(String::from("Value(12)")); + let input = lex.parse(input).unwrap(); + assert_eq!(factor.map(|e| format!("{e:?}")).parse(&input), expected); + + let input = "537 "; + let expected = Ok(String::from("Value(537)")); + let input = lex.parse(input).unwrap(); + assert_eq!(factor.map(|e| format!("{e:?}")).parse(&input), expected); + + let input = " 24 "; + let expected = Ok(String::from("Value(24)")); + let input = lex.parse(input).unwrap(); + assert_eq!(factor.map(|e| format!("{e:?}")).parse(&input), expected); +} + +#[test] +fn term_test() { + let input = " 12 *2 / 3"; + let expected = Ok(String::from("Div(Mul(Value(12), Value(2)), Value(3))")); + let input = lex.parse(input).unwrap(); + assert_eq!(term.map(|e| format!("{e:?}")).parse(&input), expected); + + let input = " 12 *2 / 3"; + let expected = Ok(String::from("Div(Mul(Value(12), Value(2)), Value(3))")); + let input = lex.parse(input).unwrap(); + assert_eq!(term.map(|e| format!("{e:?}")).parse(&input), expected); + + let input = " 2* 3 *2 *2 / 3"; + let expected = Ok(String::from( + "Div(Mul(Mul(Mul(Value(2), Value(3)), Value(2)), Value(2)), Value(3))", + )); + let input = lex.parse(input).unwrap(); + assert_eq!(term.map(|e| format!("{e:?}")).parse(&input), expected); + + let input = " 48 / 3/2"; + let expected = Ok(String::from("Div(Div(Value(48), Value(3)), Value(2))")); + let input = lex.parse(input).unwrap(); + assert_eq!(term.map(|e| format!("{e:?}")).parse(&input), expected); +} + +#[test] +fn expr_test() { + let input = " 1 + 2 "; + let expected = Ok(String::from("Add(Value(1), Value(2))")); + let input = lex.parse(input).unwrap(); + assert_eq!(expr.map(|e| format!("{e:?}")).parse(&input), expected); + + let input = " 12 + 6 - 4+ 3"; + let expected = Ok(String::from( + "Add(Sub(Add(Value(12), Value(6)), Value(4)), Value(3))", + )); + let input = lex.parse(input).unwrap(); + assert_eq!(expr.map(|e| format!("{e:?}")).parse(&input), expected); + + let input = " 1 + 2*3 + 4"; + let expected = Ok(String::from( + "Add(Add(Value(1), Mul(Value(2), Value(3))), Value(4))", + )); + let input = lex.parse(input).unwrap(); + assert_eq!(expr.map(|e| format!("{e:?}")).parse(&input), expected); +} + +#[test] +fn parens_test() { + let input = " ( 2 )"; + let expected = Ok(String::from("Paren(Value(2))")); + let input = lex.parse(input).unwrap(); + assert_eq!(expr.map(|e| format!("{e:?}")).parse(&input), expected); + + let input = " 2* ( 3 + 4 ) "; + let expected = Ok(String::from( + "Mul(Value(2), Paren(Add(Value(3), Value(4))))", + )); + let input = lex.parse(input).unwrap(); + assert_eq!(expr.map(|e| format!("{e:?}")).parse(&input), expected); + + let input = " 2*2 / ( 5 - 1) + 3"; + let expected = Ok(String::from( + "Add(Div(Mul(Value(2), Value(2)), Paren(Sub(Value(5), Value(1)))), Value(3))", + )); + let input = lex.parse(input).unwrap(); + assert_eq!(expr.map(|e| format!("{e:?}")).parse(&input), expected); +} diff --git a/examples/css/parser.rs b/examples/css/parser.rs index d31ed0b..fa13078 100644 --- a/examples/css/parser.rs +++ b/examples/css/parser.rs @@ -1,3 +1,4 @@ +use winnow::combinator::seq; use winnow::prelude::*; use winnow::token::take_while; @@ -18,10 +19,13 @@ impl std::str::FromStr for Color { } pub fn hex_color(input: &mut &str) -> PResult<Color> { - let _ = "#".parse_next(input)?; - let (red, green, blue) = (hex_primary, hex_primary, hex_primary).parse_next(input)?; - - Ok(Color { red, green, blue }) + seq!(Color { + _: '#', + red: hex_primary, + green: hex_primary, + blue: hex_primary + }) + .parse_next(input) } fn hex_primary(input: &mut &str) -> PResult<u8> { diff --git a/examples/http/parser.rs b/examples/http/parser.rs index 7f62c44..0afe33b 100644 --- a/examples/http/parser.rs +++ b/examples/http/parser.rs @@ -1,3 +1,4 @@ +use winnow::combinator::seq; use winnow::prelude::*; use winnow::{ascii::line_ending, combinator::repeat, token::take_while}; @@ -51,18 +52,15 @@ fn request<'s>(input: &mut Stream<'s>) -> PResult<(Request<'s>, Vec<Header<'s>>) } fn request_line<'s>(input: &mut Stream<'s>) -> PResult<Request<'s>> { - let method = take_while(1.., is_token).parse_next(input)?; - let _ = take_while(1.., is_space).parse_next(input)?; - let uri = take_while(1.., is_not_space).parse_next(input)?; - let _ = take_while(1.., is_space).parse_next(input)?; - let version = http_version(input)?; - let _ = line_ending.parse_next(input)?; - - Ok(Request { - method, - uri, - version, + seq!( Request { + method: take_while(1.., is_token), + _: take_while(1.., is_space), + uri: take_while(1.., is_not_space), + _: take_while(1.., is_space), + version: http_version, + _: line_ending, }) + .parse_next(input) } fn http_version<'s>(input: &mut Stream<'s>) -> PResult<&'s [u8]> { @@ -74,18 +72,19 @@ fn http_version<'s>(input: &mut Stream<'s>) -> PResult<&'s [u8]> { fn message_header_value<'s>(input: &mut Stream<'s>) -> PResult<&'s [u8]> { let _ = take_while(1.., is_horizontal_space).parse_next(input)?; - let data = take_while(1.., not_line_ending).parse_next(input)?; + let data = take_while(1.., till_line_ending).parse_next(input)?; let _ = line_ending.parse_next(input)?; Ok(data) } fn message_header<'s>(input: &mut Stream<'s>) -> PResult<Header<'s>> { - let name = take_while(1.., is_token).parse_next(input)?; - let _ = ':'.parse_next(input)?; - let value = repeat(1.., message_header_value).parse_next(input)?; - - Ok(Header { name, value }) + seq!(Header { + name: take_while(1.., is_token), + _: ':', + value: repeat(1.., message_header_value), + }) + .parse_next(input) } #[rustfmt::skip] @@ -118,10 +117,10 @@ fn is_token(c: u8) -> bool { } fn is_version(c: u8) -> bool { - (b'0'..=b'9').contains(&c) || c == b'.' + c.is_ascii_digit() || c == b'.' } -fn not_line_ending(c: u8) -> bool { +fn till_line_ending(c: u8) -> bool { c != b'\r' && c != b'\n' } diff --git a/examples/http/parser_streaming.rs b/examples/http/parser_streaming.rs index d59e6f8..1079bc0 100644 --- a/examples/http/parser_streaming.rs +++ b/examples/http/parser_streaming.rs @@ -1,3 +1,4 @@ +use winnow::combinator::seq; use winnow::{ ascii::line_ending, combinator::repeat, prelude::*, stream::Partial, token::take_while, }; @@ -52,18 +53,15 @@ fn request<'s>(input: &mut Stream<'s>) -> PResult<(Request<'s>, Vec<Header<'s>>) } fn request_line<'s>(input: &mut Stream<'s>) -> PResult<Request<'s>> { - let method = take_while(1.., is_token).parse_next(input)?; - let _ = take_while(1.., is_space).parse_next(input)?; - let uri = take_while(1.., is_not_space).parse_next(input)?; - let _ = take_while(1.., is_space).parse_next(input)?; - let version = http_version(input)?; - let _ = line_ending.parse_next(input)?; - - Ok(Request { - method, - uri, - version, + seq!( Request { + method: take_while(1.., is_token), + _: take_while(1.., is_space), + uri: take_while(1.., is_not_space), + _: take_while(1.., is_space), + version: http_version, + _: line_ending, }) + .parse_next(input) } fn http_version<'s>(input: &mut Stream<'s>) -> PResult<&'s [u8]> { @@ -75,18 +73,19 @@ fn http_version<'s>(input: &mut Stream<'s>) -> PResult<&'s [u8]> { fn message_header_value<'s>(input: &mut Stream<'s>) -> PResult<&'s [u8]> { let _ = take_while(1.., is_horizontal_space).parse_next(input)?; - let data = take_while(1.., not_line_ending).parse_next(input)?; + let data = take_while(1.., till_line_ending).parse_next(input)?; let _ = line_ending.parse_next(input)?; Ok(data) } fn message_header<'s>(input: &mut Stream<'s>) -> PResult<Header<'s>> { - let name = take_while(1.., is_token).parse_next(input)?; - let _ = ':'.parse_next(input)?; - let value = repeat(1.., message_header_value).parse_next(input)?; - - Ok(Header { name, value }) + seq!(Header { + name: take_while(1.., is_token), + _: ':', + value: repeat(1.., message_header_value), + }) + .parse_next(input) } #[rustfmt::skip] @@ -119,10 +118,10 @@ fn is_token(c: u8) -> bool { } fn is_version(c: u8) -> bool { - (b'0'..=b'9').contains(&c) || c == b'.' + c.is_ascii_digit() || c == b'.' } -fn not_line_ending(c: u8) -> bool { +fn till_line_ending(c: u8) -> bool { c != b'\r' && c != b'\n' } diff --git a/examples/ini/parser_str.rs b/examples/ini/parser_str.rs index 8f7b9ce..7c3603c 100644 --- a/examples/ini/parser_str.rs +++ b/examples/ini/parser_str.rs @@ -6,7 +6,7 @@ use winnow::{ combinator::opt, combinator::repeat, combinator::{delimited, terminated}, - token::{take_till0, take_while}, + token::{take_till, take_while}, }; pub type Stream<'i> = &'i str; @@ -36,9 +36,9 @@ fn keys_and_values<'s>(input: &mut Stream<'s>) -> PResult<HashMap<&'s str, &'s s fn key_value<'s>(i: &mut Stream<'s>) -> PResult<(&'s str, &'s str)> { let key = alphanumeric.parse_next(i)?; let _ = (opt(space), "=", opt(space)).parse_next(i)?; - let val = take_till0(is_line_ending_or_comment).parse_next(i)?; + let val = take_till(0.., is_line_ending_or_comment).parse_next(i)?; let _ = opt(space).parse_next(i)?; - let _ = opt((";", not_line_ending)).parse_next(i)?; + let _ = opt((";", till_line_ending)).parse_next(i)?; let _ = opt(space_or_line_ending).parse_next(i)?; Ok((key, val)) @@ -48,7 +48,7 @@ fn is_line_ending_or_comment(chr: char) -> bool { chr == ';' || chr == '\n' } -fn not_line_ending<'s>(i: &mut Stream<'s>) -> PResult<&'s str> { +fn till_line_ending<'s>(i: &mut Stream<'s>) -> PResult<&'s str> { take_while(0.., |c| c != '\r' && c != '\n').parse_next(i) } diff --git a/examples/json/parser.rs b/examples/json/parser.rs index 8aa3bd3..e8d9c8a 100644 --- a/examples/json/parser.rs +++ b/examples/json/parser.rs @@ -7,7 +7,7 @@ use winnow::{ combinator::alt, combinator::cut_err, combinator::{delimited, preceded, separated_pair, terminated}, - combinator::{fold_repeat, separated0}, + combinator::{repeat, separated}, error::{AddContext, ParserError}, token::{any, none_of, take, take_while}, }; @@ -87,7 +87,7 @@ fn string<'i, E: ParserError<Stream<'i>> + AddContext<Stream<'i>, &'static str>> // right branch (since we found the `"` character) but encountered an error when // parsing the string cut_err(terminated( - fold_repeat(0.., character, String::new, |mut string, c| { + repeat(0.., character).fold(String::new, |mut string, c| { string.push(c); string }), @@ -153,7 +153,7 @@ fn u16_hex<'i, E: ParserError<Stream<'i>>>(input: &mut Stream<'i>) -> PResult<u1 .parse_next(input) } -/// Some combinators, like `separated0` or `many0`, will call a parser repeatedly, +/// Some combinators, like `separated` or `repeat`, will call a parser repeatedly, /// accumulating results in a `Vec`, until it encounters an error. /// If you want more control on the parser application, check out the `iterator` /// combinator (cf `examples/iterator.rs`) @@ -162,7 +162,10 @@ fn array<'i, E: ParserError<Stream<'i>> + AddContext<Stream<'i>, &'static str>>( ) -> PResult<Vec<JsonValue>, E> { preceded( ('[', ws), - cut_err(terminated(separated0(json_value, (ws, ',', ws)), (ws, ']'))), + cut_err(terminated( + separated(0.., json_value, (ws, ',', ws)), + (ws, ']'), + )), ) .context("array") .parse_next(input) @@ -173,7 +176,10 @@ fn object<'i, E: ParserError<Stream<'i>> + AddContext<Stream<'i>, &'static str>> ) -> PResult<HashMap<String, JsonValue>, E> { preceded( ('{', ws), - cut_err(terminated(separated0(key_value, (ws, ',', ws)), (ws, '}'))), + cut_err(terminated( + separated(0.., key_value, (ws, ',', ws)), + (ws, '}'), + )), ) .context("object") .parse_next(input) diff --git a/examples/json/parser_dispatch.rs b/examples/json/parser_dispatch.rs index 6fa722b..11bda4f 100644 --- a/examples/json/parser_dispatch.rs +++ b/examples/json/parser_dispatch.rs @@ -5,12 +5,12 @@ use winnow::prelude::*; use winnow::{ ascii::float, combinator::cut_err, + combinator::empty, combinator::fail, combinator::peek, - combinator::success, combinator::{alt, dispatch}, combinator::{delimited, preceded, separated_pair, terminated}, - combinator::{fold_repeat, separated0}, + combinator::{repeat, separated}, error::{AddContext, ParserError}, token::{any, none_of, take, take_while}, }; @@ -96,7 +96,7 @@ fn string<'i, E: ParserError<Stream<'i>> + AddContext<Stream<'i>, &'static str>> // right branch (since we found the `"` character) but encountered an error when // parsing the string cut_err(terminated( - fold_repeat(0.., character, String::new, |mut string, c| { + repeat(0.., character).fold(String::new, |mut string, c| { string.push(c); string }), @@ -115,14 +115,14 @@ fn character<'i, E: ParserError<Stream<'i>>>(input: &mut Stream<'i>) -> PResult< let c = none_of('\"').parse_next(input)?; if c == '\\' { dispatch!(any; - '"' => success('"'), - '\\' => success('\\'), - '/' => success('/'), - 'b' => success('\x08'), - 'f' => success('\x0C'), - 'n' => success('\n'), - 'r' => success('\r'), - 't' => success('\t'), + '"' => empty.value('"'), + '\\' => empty.value('\\'), + '/' => empty.value('/'), + 'b' => empty.value('\x08'), + 'f' => empty.value('\x0C'), + 'n' => empty.value('\n'), + 'r' => empty.value('\r'), + 't' => empty.value('\t'), 'u' => unicode_escape, _ => fail, ) @@ -160,7 +160,7 @@ fn u16_hex<'i, E: ParserError<Stream<'i>>>(input: &mut Stream<'i>) -> PResult<u1 .parse_next(input) } -/// Some combinators, like `separated0` or `many0`, will call a parser repeatedly, +/// Some combinators, like `separated` or `repeat`, will call a parser repeatedly, /// accumulating results in a `Vec`, until it encounters an error. /// If you want more control on the parser application, check out the `iterator` /// combinator (cf `examples/iterator.rs`) @@ -169,7 +169,10 @@ fn array<'i, E: ParserError<Stream<'i>> + AddContext<Stream<'i>, &'static str>>( ) -> PResult<Vec<JsonValue>, E> { preceded( ('[', ws), - cut_err(terminated(separated0(json_value, (ws, ',', ws)), (ws, ']'))), + cut_err(terminated( + separated(0.., json_value, (ws, ',', ws)), + (ws, ']'), + )), ) .context("array") .parse_next(input) @@ -180,7 +183,10 @@ fn object<'i, E: ParserError<Stream<'i>> + AddContext<Stream<'i>, &'static str>> ) -> PResult<HashMap<String, JsonValue>, E> { preceded( ('{', ws), - cut_err(terminated(separated0(key_value, (ws, ',', ws)), (ws, '}'))), + cut_err(terminated( + separated(0.., key_value, (ws, ',', ws)), + (ws, '}'), + )), ) .context("object") .parse_next(input) diff --git a/examples/json/parser_partial.rs b/examples/json/parser_partial.rs index 3538d8e..31aba00 100644 --- a/examples/json/parser_partial.rs +++ b/examples/json/parser_partial.rs @@ -7,7 +7,7 @@ use winnow::{ combinator::alt, combinator::{cut_err, rest}, combinator::{delimited, preceded, separated_pair, terminated}, - combinator::{fold_repeat, separated0}, + combinator::{repeat, separated}, error::{AddContext, ParserError}, stream::Partial, token::{any, none_of, take, take_while}, @@ -88,7 +88,7 @@ fn string<'i, E: ParserError<Stream<'i>> + AddContext<Stream<'i>, &'static str>> // right branch (since we found the `"` character) but encountered an error when // parsing the string cut_err(terminated( - fold_repeat(0.., character, String::new, |mut string, c| { + repeat(0.., character).fold(String::new, |mut string, c| { string.push(c); string }), @@ -154,7 +154,7 @@ fn u16_hex<'i, E: ParserError<Stream<'i>>>(input: &mut Stream<'i>) -> PResult<u1 .parse_next(input) } -/// Some combinators, like `separated0` or `many0`, will call a parser repeatedly, +/// Some combinators, like `separated` or `repeat`, will call a parser repeatedly, /// accumulating results in a `Vec`, until it encounters an error. /// If you want more control on the parser application, check out the `iterator` /// combinator (cf `examples/iterator.rs`) @@ -163,7 +163,10 @@ fn array<'i, E: ParserError<Stream<'i>> + AddContext<Stream<'i>, &'static str>>( ) -> PResult<Vec<JsonValue>, E> { preceded( ('[', ws), - cut_err(terminated(separated0(json_value, (ws, ',', ws)), (ws, ']'))), + cut_err(terminated( + separated(0.., json_value, (ws, ',', ws)), + (ws, ']'), + )), ) .context("array") .parse_next(input) @@ -174,7 +177,10 @@ fn object<'i, E: ParserError<Stream<'i>> + AddContext<Stream<'i>, &'static str>> ) -> PResult<HashMap<String, JsonValue>, E> { preceded( ('{', ws), - cut_err(terminated(separated0(key_value, (ws, ',', ws)), (ws, '}'))), + cut_err(terminated( + separated(0.., key_value, (ws, ',', ws)), + (ws, '}'), + )), ) .context("object") .parse_next(input) diff --git a/examples/json_iterator.rs b/examples/json_iterator.rs index b8b46f3..9c21ae3 100644 --- a/examples/json_iterator.rs +++ b/examples/json_iterator.rs @@ -5,7 +5,7 @@ use winnow::{ ascii::{alphanumeric1 as alphanumeric, escaped, float}, combinator::alt, combinator::cut_err, - combinator::separated0, + combinator::separated, combinator::{preceded, separated_pair, terminated}, error::ParserError, error::StrContext, @@ -233,7 +233,7 @@ fn array(i: &mut &str) -> PResult<()> { preceded( '[', cut_err(terminated( - separated0(value, preceded(sp, ',')), + separated(0.., value, preceded(sp, ',')), preceded(sp, ']'), )), ) @@ -249,7 +249,7 @@ fn hash(i: &mut &str) -> PResult<()> { preceded( '{', cut_err(terminated( - separated0(key_value, preceded(sp, ',')), + separated(0.., key_value, preceded(sp, ',')), preceded(sp, '}'), )), ) diff --git a/examples/ndjson/parser.rs b/examples/ndjson/parser.rs index aaa5c93..101391e 100644 --- a/examples/ndjson/parser.rs +++ b/examples/ndjson/parser.rs @@ -8,7 +8,7 @@ use winnow::{ combinator::alt, combinator::cut_err, combinator::{delimited, preceded, separated_pair, terminated}, - combinator::{fold_repeat, separated0}, + combinator::{repeat, separated}, error::{AddContext, ParserError}, stream::Partial, token::{any, none_of, take, take_while}, @@ -92,7 +92,7 @@ fn string<'i, E: ParserError<Stream<'i>> + AddContext<Stream<'i>, &'static str>> // right branch (since we found the `"` character) but encountered an error when // parsing the string cut_err(terminated( - fold_repeat(0.., character, String::new, |mut string, c| { + repeat(0.., character).fold(String::new, |mut string, c| { string.push(c); string }), @@ -158,7 +158,7 @@ fn u16_hex<'i, E: ParserError<Stream<'i>>>(input: &mut Stream<'i>) -> PResult<u1 .parse_next(input) } -/// Some combinators, like `separated0` or `many0`, will call a parser repeatedly, +/// Some combinators, like `separated` or `repeat`, will call a parser repeatedly, /// accumulating results in a `Vec`, until it encounters an error. /// If you want more control on the parser application, check out the `iterator` /// combinator (cf `examples/iterator.rs`) @@ -167,7 +167,10 @@ fn array<'i, E: ParserError<Stream<'i>> + AddContext<Stream<'i>, &'static str>>( ) -> PResult<Vec<JsonValue>, E> { preceded( ('[', ws), - cut_err(terminated(separated0(json_value, (ws, ',', ws)), (ws, ']'))), + cut_err(terminated( + separated(0.., json_value, (ws, ',', ws)), + (ws, ']'), + )), ) .context("array") .parse_next(input) @@ -178,7 +181,10 @@ fn object<'i, E: ParserError<Stream<'i>> + AddContext<Stream<'i>, &'static str>> ) -> PResult<HashMap<String, JsonValue>, E> { preceded( ('{', ws), - cut_err(terminated(separated0(key_value, (ws, ',', ws)), (ws, '}'))), + cut_err(terminated( + separated(0.., key_value, (ws, ',', ws)), + (ws, '}'), + )), ) .context("object") .parse_next(input) diff --git a/examples/s_expression/parser.rs b/examples/s_expression/parser.rs index 919dcf4..c445053 100644 --- a/examples/s_expression/parser.rs +++ b/examples/s_expression/parser.rs @@ -58,7 +58,7 @@ pub enum Atom { BuiltIn(BuiltIn), } -/// Now, the most basic type. We define some built-in functions that our lisp has +/// Now, the most basic type. We define some built-in functions that our lisp has #[derive(Debug, Eq, PartialEq, Clone, Copy)] pub enum BuiltIn { Plus, diff --git a/examples/string/parser.rs b/examples/string/parser.rs index 6b63458..7701335 100644 --- a/examples/string/parser.rs +++ b/examples/string/parser.rs @@ -11,11 +11,11 @@ use winnow::ascii::multispace1; use winnow::combinator::alt; -use winnow::combinator::fold_repeat; +use winnow::combinator::repeat; use winnow::combinator::{delimited, preceded}; use winnow::error::{FromExternalError, ParserError}; use winnow::prelude::*; -use winnow::token::{take_till1, take_while}; +use winnow::token::{take_till, take_while}; /// Parse a string. Use a loop of `parse_fragment` and push all of the fragments /// into an output string. @@ -23,12 +23,14 @@ pub fn parse_string<'a, E>(input: &mut &'a str) -> PResult<String, E> where E: ParserError<&'a str> + FromExternalError<&'a str, std::num::ParseIntError>, { - // fold_repeat is the equivalent of iterator::fold. It runs a parser in a loop, + // Repeat::fold is the equivalent of iterator::fold. It runs a parser in a loop, // and for each output value, calls a folding function on each output value. - let build_string = fold_repeat( + let build_string = repeat( 0.., // Our parser function – parses a single string fragment parse_fragment, + ) + .fold( // Our init value, an empty string String::new, // Our folding function. For each fragment, append the fragment to the @@ -45,7 +47,7 @@ where // Finally, parse the string. Note that, if `build_string` could accept a raw // " character, the closing delimiter " would never match. When using - // `delimited` with a looping parser (like fold_repeat), be sure that the + // `delimited` with a looping parser (like Repeat::fold), be sure that the // loop won't accidentally match your closing delimiter! delimited('"', build_string, '"').parse_next(input) } @@ -78,13 +80,13 @@ where /// Parse a non-empty block of text that doesn't include \ or " fn parse_literal<'a, E: ParserError<&'a str>>(input: &mut &'a str) -> PResult<&'a str, E> { - // `take_till1` parses a string of 0 or more characters that aren't one of the + // `take_till` parses a string of 0 or more characters that aren't one of the // given characters. - let not_quote_slash = take_till1(['"', '\\']); + let not_quote_slash = take_till(1.., ['"', '\\']); // `verify` runs a parser, then runs a verification function on the output of // the parser. The verification function accepts the output only if it - // returns true. In this case, we want to ensure that the output of take_till1 + // returns true. In this case, we want to ensure that the output of take_till // is non-empty. not_quote_slash .verify(|s: &str| !s.is_empty()) diff --git a/src/_topic/arithmetic.rs b/src/_topic/arithmetic.rs index 1a6eddc..d94b4fa 100644 --- a/src/_topic/arithmetic.rs +++ b/src/_topic/arithmetic.rs @@ -11,3 +11,9 @@ //! ```rust #![doc = include_str!("../../examples/arithmetic/parser_ast.rs")] //! ``` +//! +//! ## Parse to Tokens then AST +//! +//! ```rust +#![doc = include_str!("../../examples/arithmetic/parser_lexer.rs")] +//! ``` diff --git a/src/_topic/error.rs b/src/_topic/error.rs index c5374b4..8a401b4 100644 --- a/src/_topic/error.rs +++ b/src/_topic/error.rs @@ -1,13 +1,27 @@ //! # Custom Errors //! -//! The most basic error type is [`ParserError`][crate::error::ParserError] +//! Between [`ContextError`], [`Parser::context`], and [`cut_err`], +//! most error needs will likely be met +//! (see [tutorial][chapter_6]). +//! When that isn't the case, you can implement your own error type. +//! +//! The most basic error trait is [`ParserError`]. //! //! Optional traits include: -//! - [`AddContext`][crate::error::AddContext] -//! - [`FromExternalError`][crate::error::FromExternalError] +//! - [`AddContext`] +//! - [`FromExternalError`] //! //! # Example //! //!```rust #![doc = include_str!("../../examples/custom_error.rs")] //!``` + +#![allow(unused_imports)] +use crate::combinator::cut_err; +use crate::error::ContextError; +use crate::Parser; +use crate::_tutorial::chapter_6; +use crate::error::AddContext; +use crate::error::FromExternalError; +use crate::error::ParserError; diff --git a/src/_topic/language.rs b/src/_topic/language.rs index 0cebc99..c6a713a 100644 --- a/src/_topic/language.rs +++ b/src/_topic/language.rs @@ -82,13 +82,13 @@ //! use winnow::prelude::*; //! use winnow::{ //! error::ParserError, -//! token::{tag, take_until0}, +//! token::{tag, take_until}, //! }; //! //! pub fn pinline_comment<'a, E: ParserError<&'a str>>(i: &mut &'a str) -> PResult<(), E> { //! ( //! "(*", -//! take_until0("*)"), +//! take_until(0.., "*)"), //! "*)" //! ) //! .void() // Output is thrown away. diff --git a/src/_topic/mod.rs b/src/_topic/mod.rs index 72c8145..c7bcc53 100644 --- a/src/_topic/mod.rs +++ b/src/_topic/mod.rs @@ -3,6 +3,7 @@ //! These are short recipes for accomplishing common tasks. //! //! - [Why `winnow`?][why] +//! - [Migrating from `nom`][nom] //! - Formats: //! - [Elements of Programming Languages][language] //! - [Arithmetic][arithmetic] @@ -14,13 +15,15 @@ //! - [Implementing `FromStr`][fromstr] //! - [Performance][performance] //! - [Parsing Partial Input][partial] -//! - [Custom stream][stream] +//! - [Custom stream or token][stream] //! - [Custom errors][error] +//! - [Debugging][crate::_tutorial::chapter_8] //! //! See also parsers written with `winnow`: //! //! - [`toml_edit`](https://crates.io/crates/toml_edit) //! - [`hcl-edit`](https://crates.io/crates/hcl-edit) +#![allow(clippy::std_instead_of_core)] pub mod arithmetic; pub mod error; @@ -29,6 +32,7 @@ pub mod http; pub mod ini; pub mod json; pub mod language; +pub mod nom; pub mod partial; pub mod performance; pub mod s_expression; diff --git a/src/_topic/nom.rs b/src/_topic/nom.rs new file mode 100644 index 0000000..f3cb74e --- /dev/null +++ b/src/_topic/nom.rs @@ -0,0 +1,49 @@ +//! # Migrating from `nom` +//! +//! For comparisons with `nom`, see +//! - [Why `winnow`][super::why] +//! - [parse-rosetta-rs](https://github.com/rosetta-rs/parse-rosetta-rs/) +//! +//! What approach you take depends on the size and complexity of your parser. +//! For small, simple parsers, its likely easiest to directly port from `nom`. +//! When trying to look for the equivalent of a `nom` combinator, search in the docs for the name +//! of the `nom` combinator. It is expected that, where names diverge, a doc alias exists. +//! See also the [List of combinators][crate::combinator]. +//! +//! For larger parsers, it is likely best to take smaller steps +//! - Easier to debug when something goes wrong +//! - Deprecation messages will help assist through the process +//! +//! The workflow goes something like: +//! 1. Run `cargo rm nom && cargo add winnow@0.3` +//! 1. Ensure everything compiles and tests pass, ignoring deprecation messages (see [migration +//! notes](https://github.com/winnow-rs/winnow/blob/v0.3-main/CHANGELOG.md#nom-migration-guide)) +//! 1. Commit +//! 1. Switch any `impl FnMut(I) -> IResult<I, O, E>` to `impl Parser<I, O, E>` +//! 1. Resolve deprecation messages +//! 1. Commit +//! 1. Run `cargo add winnow@0.4` +//! 1. Ensure everything compiles and tests pass, ignoring deprecation messages (see [changelog](https://github.com/winnow-rs/winnow/blob/v0.4-main/CHANGELOG.md#compatibility-2) for more details) +//! 1. Commit +//! 1. Resolve deprecation messages +//! 1. Commit +//! 1. Run `cargo add winnow@0.5` +//! 1. Ensure everything compiles and tests pass, ignoring deprecation messages (see [migration +//! notes](https://github.com/winnow-rs/winnow/blob/v0.5.0/CHANGELOG.md)) +//! 1. Commit +//! 1. Resolve deprecation messagess +//! 1. Commit +//! +//! For example migrations, see +//! - [git-config-env](https://github.com/gitext-rs/git-config-env/pull/11) (nom to winnow 0.3) +//! - [git-conventional](https://github.com/crate-ci/git-conventional/pull/37) (nom to winnow 0.3, +//! adds explicit tracing for easier debugging) +//! - [typos](https://github.com/crate-ci/typos/pull/664) (nom to winnow 0.3) +//! - [cargo-smart-release](https://github.com/Byron/gitoxide/pull/948) (gradual migration from nom +//! to winnow 0.5) +//! - [gix-config](https://github.com/Byron/gitoxide/pull/951) (gradual migration from nom +//! to winnow 0.5) +//! - [gix-protocol](https://github.com/Byron/gitoxide/pull/1009) (gradual migration from nom +//! to winnow 0.5) +//! - [gitoxide](https://github.com/Byron/gitoxide/pull/956) (gradual migration from nom +//! to winnow 0.5) diff --git a/src/_topic/partial.rs b/src/_topic/partial.rs index 19895d3..e4747f1 100644 --- a/src/_topic/partial.rs +++ b/src/_topic/partial.rs @@ -1,25 +1,25 @@ //! # Parsing Partial Input //! -//! Typically, the input being parsed is all in-memory, or is complete. Some data sources are too +//! Typically, the input being parsed is all in-memory, or is complete. Some data sources are too //! large to fit into memory, only allowing parsing an incomplete or [`Partial`] subset of the //! data, requiring incrementally parsing. //! //! By wrapping a stream, like `&[u8]`, with [`Partial`], parsers will report when the data is //! [`Incomplete`] and more input is [`Needed`], allowing the caller to stream-in additional data -//! to be parsed. The data is then parsed a chunk at a time. +//! to be parsed. The data is then parsed a chunk at a time. //! //! Chunks are typically defined by either: -//! - A header reporting the number of bytes, like with [`length_value`] +//! - A header reporting the number of bytes, like with [`length_and_then`] //! - [`Partial`] can explicitly be changed to being complete once the specified bytes are //! acquired via [`StreamIsPartial::complete`]. //! - A delimiter, like with [ndjson](http://ndjson.org/) -//! - You can parse up-to the delimiter or do a `take_until0(delim).and_then(parser)` +//! - You can parse up-to the delimiter or do a `take_until(0.., delim).and_then(parser)` //! //! If the chunks are not homogeneous, a state machine will be needed to track what the expected //! parser is for the next chunk. //! //! Caveats: -//! - `winnow` takes the approach of re-parsing from scratch. Chunks should be relatively small to +//! - `winnow` takes the approach of re-parsing from scratch. Chunks should be relatively small to //! prevent the re-parsing overhead from dominating. //! - Parsers like [`repeat`] do not know when an `eof` is from insufficient data or the end of the //! stream, causing them to always report [`Incomplete`]. @@ -38,7 +38,7 @@ #![allow(unused_imports)] // Used for intra-doc links -use crate::binary::length_value; +use crate::binary::length_and_then; use crate::combinator::repeat; use crate::error::ErrMode::Incomplete; use crate::error::Needed; diff --git a/src/_topic/performance.rs b/src/_topic/performance.rs index 8a6555a..2b53038 100644 --- a/src/_topic/performance.rs +++ b/src/_topic/performance.rs @@ -5,19 +5,19 @@ //! See also the general Rust [Performance Book](https://nnethercote.github.io/perf-book/) //! //! Tips -//! - Try `cargo add winnow -F simd`. For some it offers significant performance improvements +//! - Try `cargo add winnow -F simd`. For some it offers significant performance improvements //! - When enough cases of an [`alt`] have unique prefixes, prefer [`dispatch`] //! - When parsing text, try to parse as bytes (`u8`) rather than `char`s ([`BStr`] can make //! debugging easier) //! - Find simplified subsets of the grammar to parse, falling back to the full grammar when it //! doesn't work. For example, when parsing json strings, parse them without support for escapes, //! falling back to escape support if it fails. -//! - Watch for large return types. A surprising place these can show up is when chaining parsers +//! - Watch for large return types. A surprising place these can show up is when chaining parsers //! with a tuple. //! //! ## Build-time Performance //! -//! Returning complex types as `impl Trait` can negatively impact build times. This can hit in +//! Returning complex types as `impl Trait` can negatively impact build times. This can hit in //! surprising cases like: //! ```rust //! # use winnow::prelude::*; diff --git a/src/_topic/stream.rs b/src/_topic/stream.rs index 4f94a94..2254f87 100644 --- a/src/_topic/stream.rs +++ b/src/_topic/stream.rs @@ -1,20 +1,21 @@ -//! # Custom [`Stream`][crate::stream::Stream] +//! # Custom [`Stream`] //! //! `winnow` is batteries included with support for //! - Basic inputs like `&str`, newtypes with -//! - Improved debug output like [`Bytes`][crate::Bytes] -//! - [`Stateful`][crate::Stateful] for passing state through your parser, like tracking recursion +//! - Improved debug output like [`Bytes`] +//! - [`Stateful`] for passing state through your parser, like tracking recursion //! depth -//! - [`Located`][crate::Located] for looking up the absolute position of a token +//! - [`Located`] for looking up the absolute position of a token //! -//! But that won't always cut it for your parser. For example, you might lex `&str` into +//! But that won't always cut it for your parser. For example, you might lex `&str` into //! a series of tokens and then want to parse a `TokenStream`. //! //! ## Implementing a custom stream //! -//! Let's assume we have an input type we'll call `MyStream`. `MyStream` is a sequence of `MyItem` type. -//! The goal is to define parsers with this signature: `&mut MyStream -> PResult<Output>`. +//! Let's assume we have an input type we'll call `MyStream`. +//! `MyStream` is a sequence of `MyItem` type. //! +//! The goal is to define parsers with this signature: `&mut MyStream -> PResult<Output>`. //! ```rust //! # use winnow::prelude::*; //! # use winnow::token::tag; @@ -25,7 +26,7 @@ //! } //! ``` //! -//! Here are the traits we have to implement for `MyStream`: +//! Here are the traits you may have to implement for `MyStream`: //! //! | trait | usage | //! |---|---| @@ -38,17 +39,28 @@ //! | [`Location`] |Calculate location within initial input| //! | [`Offset`] |Calculate the offset between slices| //! -//! Here are the traits we have to implement for `MyItem`: +//! And for `MyItem`: //! //! | trait | usage | //! |---|---| //! | [`AsChar`] |Transforms common types to a char for basic token parsing| //! | [`ContainsToken`] |Look for the token in the given set| //! -//! And traits for slices of `MyItem`: +//! And traits for `&[MyItem]`: //! +//! | trait | usage | +//! |---|---| //! | [`SliceLen`] |Calculate the input length| //! | [`ParseSlice`] |Used to integrate `&str`'s `parse()` method| +//! +//! ## Implementing a custom token +//! +//! If you are parsing `&[Myitem]`, leaving just the `MyItem` traits. +//! +//! For example: +//! ```rust +#![doc = include_str!("../../examples/arithmetic/parser_lexer.rs")] +//! ``` #[allow(unused_imports)] // Here for intra-dock links use crate::stream::*; diff --git a/src/_topic/why.rs b/src/_topic/why.rs index e0328f1..fc1716a 100644 --- a/src/_topic/why.rs +++ b/src/_topic/why.rs @@ -2,7 +2,7 @@ //! //! To answer this question, it will be useful to contrast this with other approaches to parsing. //! -//! **Note:** This will focus on principles and priorities. For a deeper and wider wider +//! **Note:** This will focus on principles and priorities. For a deeper and wider wider //! comparison with other Rust parser libraries, see //! [parse-rosetta-rs](https://github.com/rosetta-rs/parse-rosetta-rs). //! @@ -42,7 +42,7 @@ //! //! For binary formats, `winnow` includes: //! - [A hexadecimal view][crate::Bytes] in [traces][crate::trace] -//! - [TLV](https://en.wikipedia.org/wiki/Type-length-value) +//! - [TLV](https://en.wikipedia.org/wiki/Type-length-value) (e.g. [`length_take`]) //! - Some common parsers to help get started, like numbers //! //! For text formats, `winnow` includes: @@ -58,7 +58,7 @@ //! //! ## `nom` //! -//! `winnow` is a fork of the venerable [`nom`](https://crates.io/crates/nom). The difference +//! `winnow` is a fork of the venerable [`nom`](https://crates.io/crates/nom). The difference //! between them is largely in priorities. `nom` prioritizes: //! - Lower churn for existing users while `winnow` is trying to find ways to make things better //! for the parsers yet to be written. @@ -68,6 +68,8 @@ //! and to not block users on new features being merged while `winnow` aims to include all the //! fundamentals for parsing to ensure the experience is cohesive and high quality. //! +//! See also our [nom migration guide][super::nom] +//! //! ## `chumsky` //! //! [`chumsky`](https://crates.io/crates/chumsky) is an up and coming parser-combinator library @@ -77,11 +79,11 @@ //! //! > "If you need to implement either `Parser` or `Strategy` by hand, that's a problem that needs fixing". //! -//! This is under "batteries included" but it also ties into the feeling that `chumksy` acts more like -//! a framework. Instead of composing together helpers, you are expected to do everything through +//! This is under "batteries included" but it also ties into the feeling that `chumsky` acts more like +//! a framework. Instead of composing together helpers, you are expected to do everything through //! their system to the point that it is non-trivial to implement their `Parser` trait and are //! encouraged to use the -//! [`custom`](https://docs.rs/chumsky/0.9.0/chumsky/primitive/fn.custom.html) helper. This +//! [`custom`](https://docs.rs/chumsky/0.9.0/chumsky/primitive/fn.custom.html) helper. This //! requires re-framing everything to fit within their model and makes the code harder to understand //! and debug as you are working with abstract operations that will eventually be applied //! rather than directly with the parsers. @@ -90,9 +92,10 @@ //! Probably the biggest thing that `winnow` loses out on is optimizations from ["parse modes" via //! GATs](https://github.com/zesterer/chumsky/pull/82) which allows downstream parsers to tell //! upstream parsers when information will be discarded, allowing bypassing expensive operations, -//! like allocations. This requires a lot more complex interaction with parsers that isn't as +//! like allocations. This requires a lot more complex interaction with parsers that isn't as //! trivial to do with bare functions which would lose out on any of that side-band information. //! Instead, we work around this with things like the [`Accumulate`] trait. #![allow(unused_imports)] +use crate::binary::length_take; use crate::stream::Accumulate; diff --git a/src/_tutorial/chapter_0.rs b/src/_tutorial/chapter_0.rs index 4be64c4..35a2d14 100644 --- a/src/_tutorial/chapter_0.rs +++ b/src/_tutorial/chapter_0.rs @@ -10,7 +10,7 @@ //! ## About //! //! `winnow` is a parser-combinator library. In other words, it gives you tools to define: -//! - "parsers", or functions that takes an input and gives back an output +//! - "parsers", or functions that take an input and give back an output //! - "combinators", or functions that take parsers and _combine_ them together! //! //! While "combinator" might be an unfamiliar word, you are likely using them in your rust code @@ -36,4 +36,4 @@ use crate::_topic; use std::iter::Iterator; pub use super::chapter_1 as next; -pub use crate::_tutorial as table_of_content; +pub use crate::_tutorial as table_of_contents; diff --git a/src/_tutorial/chapter_1.rs b/src/_tutorial/chapter_1.rs index a2af005..2d94418 100644 --- a/src/_tutorial/chapter_1.rs +++ b/src/_tutorial/chapter_1.rs @@ -10,17 +10,17 @@ //! - `Err` indicates the parser could not find what it was looking for. //! //! Parsers do more than just return a binary "success"/"failure" code. -//! On success, the parser will return the processed data. The input will be left pointing to +//! On success, the parser will return the processed data. The input will be left pointing to //! data that still needs processing //! //! If the parser failed, then there are multiple errors that could be returned. //! For simplicity, however, in the next chapters we will leave these unexplored. //! //! ```text -//! ┌─► Ok(what matched the parser) -//! ┌─────────┐ │ -//! my input───►│my parser├──►either──┤ -//! └─────────┘ └─► Err(...) +//! ┌─► Ok(what matched the parser) +//! ┌─────────┐ │ +//! my input───►│my parser├──►either──┤ +//! └─────────┘ └─► Err(...) //! ``` //! //! @@ -53,7 +53,7 @@ //! //! This parser function should take in a `&str`: //! -//! - Since it is supposed to succeed, we know it will return the Ok Variant. +//! - Since it is supposed to succeed, we know it will return the `Ok` variant. //! - Since it does nothing to our input, the remaining input is the same as the input. //! - Since it doesn't parse anything, it also should just return an empty string. //! @@ -83,4 +83,4 @@ use crate::Parser; pub use super::chapter_0 as previous; pub use super::chapter_2 as next; -pub use crate::_tutorial as table_of_content; +pub use crate::_tutorial as table_of_contents; diff --git a/src/_tutorial/chapter_2.rs b/src/_tutorial/chapter_2.rs index d3593b9..c27b719 100644 --- a/src/_tutorial/chapter_2.rs +++ b/src/_tutorial/chapter_2.rs @@ -4,7 +4,7 @@ //! //! ## Tokens //! -//! [`Stream`] provides some core operations to help with parsing. For example, to process a +//! [`Stream`] provides some core operations to help with parsing. For example, to process a //! single token, you can do: //! ```rust //! # use winnow::Parser; @@ -135,13 +135,13 @@ //! # } //! ``` //! -//! In `winnow`, we call this type of parser a [`tag`]. See [`token`] for additional individual +//! In `winnow`, we call this type of parser a [`tag`]. See [`token`] for additional individual //! and token-slice parsers. //! //! ## Character Classes //! -//! Selecting a single `char` or a [`tag`] is fairly limited. Sometimes, you will want to select one of several -//! `chars` of a specific class, like digits. For this, we use the [`one_of`] parer: +//! Selecting a single `char` or a [`tag`] is fairly limited. Sometimes, you will want to select one of several +//! `chars` of a specific class, like digits. For this, we use the [`one_of`] parser: //! //! ```rust //! # use winnow::Parser; @@ -207,7 +207,7 @@ //! } //! ``` //! -//! We could simplify this further with by using one of the built-in character classes, [`hex_digit1`]: +//! We could simplify this further by using one of the built-in character classes, [`hex_digit1`]: //! ```rust //! # use winnow::Parser; //! # use winnow::PResult; @@ -245,4 +245,4 @@ use std::ops::RangeInclusive; pub use super::chapter_1 as previous; pub use super::chapter_3 as next; -pub use crate::_tutorial as table_of_content; +pub use crate::_tutorial as table_of_contents; diff --git a/src/_tutorial/chapter_3.rs b/src/_tutorial/chapter_3.rs index 4cbe487..2ad0124 100644 --- a/src/_tutorial/chapter_3.rs +++ b/src/_tutorial/chapter_3.rs @@ -179,7 +179,7 @@ //! ``` //! //! > **Warning:** the above example is for illustrative purposes and relying on `Result::Ok` or -//! > `Result::Err` can lead to incorrect behavior. This will be clarified in later when covering +//! > `Result::Err` can lead to incorrect behavior. This will be clarified in later when covering //! > [error handling][`chapter_6`#errmode] //! //! [`opt`] is a basic building block for correctly handling retrying parsing: @@ -293,11 +293,11 @@ //! # } //! ``` //! -//! > **Note:** [`success`] and [`fail`] are parsers that might be useful in the `else` case. +//! > **Note:** [`empty`] and [`fail`] are parsers that might be useful in the `else` case. //! //! Sometimes a giant if/else-if ladder can be slow and you'd rather have a `match` statement for -//! branches of your parser that have unique prefixes. In this case, you can use the -//! [`dispatch`][crate::combinator::dispatch] macro: +//! branches of your parser that have unique prefixes. In this case, you can use the +//! [`dispatch`] macro: //! //! ```rust //! # use winnow::prelude::*; @@ -364,13 +364,13 @@ use super::chapter_6; use crate::combinator; use crate::combinator::alt; use crate::combinator::dispatch; +use crate::combinator::empty; use crate::combinator::fail; use crate::combinator::opt; use crate::combinator::peek; use crate::combinator::preceded; -use crate::combinator::success; use crate::stream::Stream; pub use super::chapter_2 as previous; pub use super::chapter_4 as next; -pub use crate::_tutorial as table_of_content; +pub use crate::_tutorial as table_of_contents; diff --git a/src/_tutorial/chapter_4.rs b/src/_tutorial/chapter_4.rs index e6a836b..328a648 100644 --- a/src/_tutorial/chapter_4.rs +++ b/src/_tutorial/chapter_4.rs @@ -10,9 +10,6 @@ //! All we need to do for our parser to return a different type is to change //! the type parameter of [`PResult`] to the desired return type. //! For example, to return a `usize`, return a `PResult<usize>`. -//! Recall that the type parameter of the `PResult` is the input -//! type, so even if you're returning something different, if your input -//! is a `&str`, the type argument of `PResult` should be also. //! //! One winnow-native way of doing a type conversion is to use the //! [`Parser::parse_to`] combinator @@ -107,4 +104,4 @@ use std::str::FromStr; pub use super::chapter_3 as previous; pub use super::chapter_5 as next; -pub use crate::_tutorial as table_of_content; +pub use crate::_tutorial as table_of_contents; diff --git a/src/_tutorial/chapter_5.rs b/src/_tutorial/chapter_5.rs index 2d5bac3..8aa719b 100644 --- a/src/_tutorial/chapter_5.rs +++ b/src/_tutorial/chapter_5.rs @@ -136,18 +136,18 @@ //! # } //! ``` //! -//! You'll notice that the above allows trailing `,` when we intended to not support that. We can -//! easily fix this by using [`separated0`]: +//! You'll notice that the above allows trailing `,` when we intended to not support that. We can +//! easily fix this by using [`separated`]: //! ```rust //! # use winnow::prelude::*; //! # use winnow::token::take_while; //! # use winnow::combinator::dispatch; //! # use winnow::token::take; //! # use winnow::combinator::fail; -//! use winnow::combinator::separated0; +//! use winnow::combinator::separated; //! //! fn parse_list(input: &mut &str) -> PResult<Vec<usize>> { -//! separated0(parse_digits, ",").parse_next(input) +//! separated(0.., parse_digits, ",").parse_next(input) //! } //! //! // ... @@ -200,7 +200,7 @@ //! ``` //! //! If you look closely at [`repeat`], it isn't collecting directly into a [`Vec`] but -//! [`Accumulate`] to gather the results. This let's us make more complex parsers than we did in +//! [`Accumulate`] to gather the results. This lets us make more complex parsers than we did in //! [`chapter_2`] by accumulating the results into a `()` and [`recognize`][Parser::recognize]-ing the captured input: //! ```rust //! # use winnow::prelude::*; @@ -208,14 +208,14 @@ //! # use winnow::combinator::dispatch; //! # use winnow::token::take; //! # use winnow::combinator::fail; -//! # use winnow::combinator::separated0; +//! # use winnow::combinator::separated; //! # //! fn recognize_list<'s>(input: &mut &'s str) -> PResult<&'s str> { //! parse_list.recognize().parse_next(input) //! } //! //! fn parse_list(input: &mut &str) -> PResult<()> { -//! separated0(parse_digits, ",").parse_next(input) +//! separated(0.., parse_digits, ",").parse_next(input) //! } //! //! # fn parse_digits(input: &mut &str) -> PResult<usize> { @@ -272,11 +272,11 @@ use super::chapter_2; use super::chapter_3; use crate::combinator; use crate::combinator::repeat; -use crate::combinator::separated0; +use crate::combinator::separated; use crate::stream::Accumulate; use crate::Parser; use std::vec::Vec; pub use super::chapter_4 as previous; pub use super::chapter_6 as next; -pub use crate::_tutorial as table_of_content; +pub use crate::_tutorial as table_of_contents; diff --git a/src/_tutorial/chapter_6.rs b/src/_tutorial/chapter_6.rs index ab21038..0d54e15 100644 --- a/src/_tutorial/chapter_6.rs +++ b/src/_tutorial/chapter_6.rs @@ -72,13 +72,13 @@ //! ```rust //! # use winnow::error::ErrorKind; //! # use winnow::error::ErrMode; -//! pub type OResult<O, E = ErrorKind> = Result<O, ErrMode<E>>; +//! pub type PResult<O, E = ErrorKind> = Result<O, ErrMode<E>>; //! ``` //! [`PResult`] is just a fancy wrapper around `Result` that wraps our error in an [`ErrMode`] //! type. //! //! [`ErrMode`] is an enum with [`Backtrack`] and [`Cut`] variants (ignore [`Incomplete`] as its only -//! relevant for [streaming][_topic::stream]). By default, errors are [`Backtrack`], meaning that +//! relevant for [streaming][_topic::stream]). By default, errors are [`Backtrack`], meaning that //! other parsing branches will be attempted on failure, like the next case of an [`alt`]. [`Cut`] //! shortcircuits all other branches, immediately reporting the error. //! @@ -153,4 +153,4 @@ use crate::_topic; pub use super::chapter_5 as previous; pub use super::chapter_7 as next; -pub use crate::_tutorial as table_of_content; +pub use crate::_tutorial as table_of_contents; diff --git a/src/_tutorial/chapter_7.rs b/src/_tutorial/chapter_7.rs index 0a20d67..659be3a 100644 --- a/src/_tutorial/chapter_7.rs +++ b/src/_tutorial/chapter_7.rs @@ -115,4 +115,5 @@ use crate::PResult; use crate::Parser; pub use super::chapter_6 as previous; -pub use crate::_tutorial as table_of_content; +pub use super::chapter_8 as next; +pub use crate::_tutorial as table_of_contents; diff --git a/src/_tutorial/chapter_8.rs b/src/_tutorial/chapter_8.rs new file mode 100644 index 0000000..6ff8f29 --- /dev/null +++ b/src/_tutorial/chapter_8.rs @@ -0,0 +1,34 @@ +//! # Chapter 8: Debugging +//! +//! When things inevitably go wrong, you can introspect the parsing state by running your test case +//! with `--features debug`: +//! ![Trace output from string example](https://raw.githubusercontent.com/winnow-rs/winnow/main/assets/trace.svg "Example output") +//! +//! You can extend your own parsers to show up by wrapping their body with +//! [`trace`][crate::combinator::trace]. Going back to [`do_nothing_parser`][super::chapter_1]. +//! ```rust +//! # use winnow::PResult; +//! # use winnow::Parser; +//! use winnow::combinator::trace; +//! +//! pub fn do_nothing_parser<'s>(input: &mut &'s str) -> PResult<&'s str> { +//! trace( +//! "do_nothing_parser", +//! |i: &mut _| Ok("") +//! ).parse_next(input) +//! } +//! # +//! # fn main() { +//! # let mut input = "0x1a2b Hello"; +//! # +//! # let output = do_nothing_parser.parse_next(&mut input).unwrap(); +//! # // Same as: +//! # // let output = do_nothing_parser(&mut input).unwrap(); +//! # +//! # assert_eq!(input, "0x1a2b Hello"); +//! # assert_eq!(output, ""); +//! # } +//! ``` + +pub use super::chapter_7 as previous; +pub use crate::_tutorial as table_of_contents; diff --git a/src/_tutorial/mod.rs b/src/_tutorial/mod.rs index e4b8392..3b1680e 100644 --- a/src/_tutorial/mod.rs +++ b/src/_tutorial/mod.rs @@ -1,6 +1,7 @@ //! # Tutorial //! -//! Table of Content +//! Table of Contents +#![allow(clippy::std_instead_of_core)] pub mod chapter_0; pub mod chapter_1; @@ -10,3 +11,4 @@ pub mod chapter_4; pub mod chapter_5; pub mod chapter_6; pub mod chapter_7; +pub mod chapter_8; diff --git a/src/ascii/mod.rs b/src/ascii/mod.rs index 8b3119f..8e6a480 100644 --- a/src/ascii/mod.rs +++ b/src/ascii/mod.rs @@ -10,17 +10,46 @@ use crate::lib::std::ops::{Add, Shl}; use crate::combinator::alt; use crate::combinator::cut_err; use crate::combinator::opt; +use crate::combinator::trace; use crate::error::ParserError; use crate::error::{ErrMode, ErrorKind, Needed}; use crate::stream::{AsBStr, AsChar, ParseSlice, Stream, StreamIsPartial}; use crate::stream::{Compare, CompareResult}; use crate::token::one_of; -use crate::token::take_till0; +use crate::token::take_till; use crate::token::take_while; -use crate::trace::trace; use crate::PResult; use crate::Parser; +/// Mark a value as case-insensitive for ASCII characters +/// +/// # Example +/// ```rust +/// # use winnow::prelude::*; +/// # use winnow::{error::ErrMode, error::{InputError, ErrorKind}}; +/// # use winnow::ascii::Caseless; +/// +/// fn parser<'s>(s: &mut &'s str) -> PResult<&'s str, InputError<&'s str>> { +/// Caseless("hello").parse_next(s) +/// } +/// +/// assert_eq!(parser.parse_peek("Hello, World!"), Ok((", World!", "Hello"))); +/// assert_eq!(parser.parse_peek("hello, World!"), Ok((", World!", "hello"))); +/// assert_eq!(parser.parse_peek("HeLlo, World!"), Ok((", World!", "HeLlo"))); +/// assert_eq!(parser.parse_peek("Some"), Err(ErrMode::Backtrack(InputError::new("Some", ErrorKind::Tag)))); +/// assert_eq!(parser.parse_peek(""), Err(ErrMode::Backtrack(InputError::new("", ErrorKind::Tag)))); +/// ``` +#[derive(Copy, Clone, Debug)] +pub struct Caseless<T>(pub T); + +impl Caseless<&str> { + /// Get the byte-representation of this case-insensitive value + #[inline(always)] + pub fn as_bytes(&self) -> Caseless<&[u8]> { + Caseless(self.0.as_bytes()) + } +} + /// Recognizes the string `"\r\n"`. /// /// *Complete version*: Will return an error if there's not enough input data. @@ -72,9 +101,9 @@ where /// ``` /// # use winnow::prelude::*; /// # use winnow::{error::ErrMode, error::{InputError, ErrorKind}, error::Needed}; -/// # use winnow::ascii::not_line_ending; +/// # use winnow::ascii::till_line_ending; /// fn parser<'s>(input: &mut &'s str) -> PResult<&'s str, InputError<&'s str>> { -/// not_line_ending.parse_next(input) +/// till_line_ending.parse_next(input) /// } /// /// assert_eq!(parser.parse_peek("ab\r\nc"), Ok(("\r\nc", "ab"))); @@ -89,32 +118,45 @@ where /// # use winnow::prelude::*; /// # use winnow::{error::ErrMode, error::{InputError, ErrorKind}, error::Needed}; /// # use winnow::Partial; -/// # use winnow::ascii::not_line_ending; -/// assert_eq!(not_line_ending::<_, InputError<_>>.parse_peek(Partial::new("ab\r\nc")), Ok((Partial::new("\r\nc"), "ab"))); -/// assert_eq!(not_line_ending::<_, InputError<_>>.parse_peek(Partial::new("abc")), Err(ErrMode::Incomplete(Needed::new(1)))); -/// assert_eq!(not_line_ending::<_, InputError<_>>.parse_peek(Partial::new("")), Err(ErrMode::Incomplete(Needed::new(1)))); -/// assert_eq!(not_line_ending::<_, InputError<_>>.parse_peek(Partial::new("a\rb\nc")), Err(ErrMode::Backtrack(InputError::new(Partial::new("\rb\nc"), ErrorKind::Tag )))); -/// assert_eq!(not_line_ending::<_, InputError<_>>.parse_peek(Partial::new("a\rbc")), Err(ErrMode::Backtrack(InputError::new(Partial::new("\rbc"), ErrorKind::Tag )))); +/// # use winnow::ascii::till_line_ending; +/// assert_eq!(till_line_ending::<_, InputError<_>>.parse_peek(Partial::new("ab\r\nc")), Ok((Partial::new("\r\nc"), "ab"))); +/// assert_eq!(till_line_ending::<_, InputError<_>>.parse_peek(Partial::new("abc")), Err(ErrMode::Incomplete(Needed::new(1)))); +/// assert_eq!(till_line_ending::<_, InputError<_>>.parse_peek(Partial::new("")), Err(ErrMode::Incomplete(Needed::new(1)))); +/// assert_eq!(till_line_ending::<_, InputError<_>>.parse_peek(Partial::new("a\rb\nc")), Err(ErrMode::Backtrack(InputError::new(Partial::new("\rb\nc"), ErrorKind::Tag )))); +/// assert_eq!(till_line_ending::<_, InputError<_>>.parse_peek(Partial::new("a\rbc")), Err(ErrMode::Backtrack(InputError::new(Partial::new("\rbc"), ErrorKind::Tag )))); /// ``` #[inline(always)] -pub fn not_line_ending<I, E: ParserError<I>>(input: &mut I) -> PResult<<I as Stream>::Slice, E> +pub fn till_line_ending<I, E: ParserError<I>>(input: &mut I) -> PResult<<I as Stream>::Slice, E> where I: StreamIsPartial, I: Stream, I: Compare<&'static str>, <I as Stream>::Token: AsChar + Clone, { - trace("not_line_ending", move |input: &mut I| { + trace("till_line_ending", move |input: &mut I| { if <I as StreamIsPartial>::is_partial_supported() { - not_line_ending_::<_, _, true>(input) + till_line_ending_::<_, _, true>(input) } else { - not_line_ending_::<_, _, false>(input) + till_line_ending_::<_, _, false>(input) } }) .parse_next(input) } -fn not_line_ending_<I, E: ParserError<I>, const PARTIAL: bool>( +/// Deprecated, replaced with [`till_line_ending`] +#[deprecated(since = "0.5.35", note = "Replaced with `till_line_ending`")] +#[inline(always)] +pub fn not_line_ending<I, E: ParserError<I>>(input: &mut I) -> PResult<<I as Stream>::Slice, E> +where + I: StreamIsPartial, + I: Stream, + I: Compare<&'static str>, + <I as Stream>::Token: AsChar + Clone, +{ + till_line_ending(input) +} + +fn till_line_ending_<I, E: ParserError<I>, const PARTIAL: bool>( input: &mut I, ) -> PResult<<I as Stream>::Slice, E> where @@ -123,7 +165,7 @@ where I: Compare<&'static str>, <I as Stream>::Token: AsChar + Clone, { - let res = take_till0(('\r', '\n')).parse_next(input)?; + let res = take_till(0.., ('\r', '\n')).parse_next(input)?; if input.compare("\r") == CompareResult::Ok { let comp = input.compare("\r\n"); match comp { @@ -423,7 +465,7 @@ where /// /// ## Parsing an integer /// -/// You can use `digit1` in combination with [`Parser::try_map`][crate::Parser::try_map] to parse an integer: +/// You can use `digit1` in combination with [`Parser::try_map`] to parse an integer: /// /// ``` /// # use winnow::prelude::*; @@ -965,6 +1007,7 @@ impl Uint for u128 { } } +/// Deprecated since v0.5.17 impl Uint for i8 { fn checked_mul(self, by: u8, _: sealed::SealedMarker) -> Option<Self> { self.checked_mul(by as Self) @@ -974,6 +1017,7 @@ impl Uint for i8 { } } +/// Deprecated since v0.5.17 impl Uint for i16 { fn checked_mul(self, by: u8, _: sealed::SealedMarker) -> Option<Self> { self.checked_mul(by as Self) @@ -983,6 +1027,7 @@ impl Uint for i16 { } } +/// Deprecated since v0.5.17 impl Uint for i32 { fn checked_mul(self, by: u8, _: sealed::SealedMarker) -> Option<Self> { self.checked_mul(by as Self) @@ -992,6 +1037,7 @@ impl Uint for i32 { } } +/// Deprecated since v0.5.17 impl Uint for i64 { fn checked_mul(self, by: u8, _: sealed::SealedMarker) -> Option<Self> { self.checked_mul(by as Self) @@ -1001,6 +1047,7 @@ impl Uint for i64 { } } +/// Deprecated since v0.5.17 impl Uint for i128 { fn checked_mul(self, by: u8, _: sealed::SealedMarker) -> Option<Self> { self.checked_mul(by as Self) @@ -1318,6 +1365,7 @@ where } #[allow(clippy::trait_duplication_in_bounds)] // HACK: clippy 1.64.0 bug +#[allow(deprecated)] fn recognize_float_or_exceptions<I, E: ParserError<I>>( input: &mut I, ) -> PResult<<I as Stream>::Slice, E> @@ -1332,8 +1380,12 @@ where alt(( recognize_float, crate::token::tag_no_case("nan"), - crate::token::tag_no_case("infinity"), - crate::token::tag_no_case("inf"), + ( + opt(one_of(['+', '-'])), + crate::token::tag_no_case("infinity"), + ) + .recognize(), + (opt(one_of(['+', '-'])), crate::token::tag_no_case("inf")).recognize(), )) .parse_next(input) } diff --git a/src/ascii/tests.rs b/src/ascii/tests.rs index aacbd86..5091dc5 100644 --- a/src/ascii/tests.rs +++ b/src/ascii/tests.rs @@ -226,37 +226,37 @@ mod complete { } #[test] - fn is_not_line_ending_bytes() { + fn is_till_line_ending_bytes() { let a: &[u8] = b"ab12cd\nefgh"; assert_eq!( - not_line_ending::<_, InputError<_>>.parse_peek(a), + till_line_ending::<_, InputError<_>>.parse_peek(a), Ok((&b"\nefgh"[..], &b"ab12cd"[..])) ); let b: &[u8] = b"ab12cd\nefgh\nijkl"; assert_eq!( - not_line_ending::<_, InputError<_>>.parse_peek(b), + till_line_ending::<_, InputError<_>>.parse_peek(b), Ok((&b"\nefgh\nijkl"[..], &b"ab12cd"[..])) ); let c: &[u8] = b"ab12cd\r\nefgh\nijkl"; assert_eq!( - not_line_ending::<_, InputError<_>>.parse_peek(c), + till_line_ending::<_, InputError<_>>.parse_peek(c), Ok((&b"\r\nefgh\nijkl"[..], &b"ab12cd"[..])) ); let d: &[u8] = b"ab12cd"; assert_eq!( - not_line_ending::<_, InputError<_>>.parse_peek(d), + till_line_ending::<_, InputError<_>>.parse_peek(d), Ok((&[][..], d)) ); } #[test] - fn is_not_line_ending_str() { + fn is_till_line_ending_str() { let f = "βèƒôřè\rÂßÇáƒƭèř"; assert_eq!( - not_line_ending.parse_peek(f), + till_line_ending.parse_peek(f), Err(ErrMode::Backtrack(InputError::new( &f[12..], ErrorKind::Tag @@ -265,7 +265,7 @@ mod complete { let g2: &str = "ab12cd"; assert_eq!( - not_line_ending::<_, InputError<_>>.parse_peek(g2), + till_line_ending::<_, InputError<_>>.parse_peek(g2), Ok(("", g2)) ); } @@ -327,7 +327,7 @@ mod complete { #[test] fn full_line_windows() { fn take_full_line(i: &[u8]) -> IResult<&[u8], (&[u8], &[u8])> { - (not_line_ending, line_ending).parse_peek(i) + (till_line_ending, line_ending).parse_peek(i) } let input = b"abc\r\n"; let output = take_full_line(input); @@ -337,7 +337,7 @@ mod complete { #[test] fn full_line_unix() { fn take_full_line(i: &[u8]) -> IResult<&[u8], (&[u8], &[u8])> { - (not_line_ending, line_ending).parse_peek(i) + (till_line_ending, line_ending).parse_peek(i) } let input = b"abc\n"; let output = take_full_line(input); @@ -528,7 +528,7 @@ mod complete { #[test] #[cfg(feature = "std")] fn float_test() { - let mut test_cases = vec![ + let test_cases = [ "+3.14", "3.14", "-3.14", @@ -546,27 +546,37 @@ mod complete { "-1.234E-12", "-1.234e-12", "0.00000000000000000087", + "inf", + "Inf", + "infinity", + "Infinity", + "-inf", + "-Inf", + "-infinity", + "-Infinity", + "+inf", + "+Inf", + "+infinity", + "+Infinity", ]; - for test in test_cases.drain(..) { + for test in test_cases { let expected32 = str::parse::<f32>(test).unwrap(); let expected64 = str::parse::<f64>(test).unwrap(); println!("now parsing: {} -> {}", test, expected32); - let larger = test.to_string(); - assert_parse!( - float.parse_peek(larger.as_bytes()), + float.parse_peek(test.as_bytes()), Ok((&b""[..], expected32)) ); - assert_parse!(float.parse_peek(&larger[..]), Ok(("", expected32))); + assert_parse!(float.parse_peek(test), Ok(("", expected32))); assert_parse!( - float.parse_peek(larger.as_bytes()), + float.parse_peek(test.as_bytes()), Ok((&b""[..], expected64)) ); - assert_parse!(float.parse_peek(&larger[..]), Ok(("", expected64))); + assert_parse!(float.parse_peek(test), Ok(("", expected64))); } let remaining_exponent = "-1.234E-"; @@ -575,16 +585,27 @@ mod complete { Err(ErrMode::Cut(InputError::new("", ErrorKind::Slice))) ); - let (i, nan) = float::<_, f32, ()>.parse_peek("NaN").unwrap(); - assert!(nan.is_nan()); - assert_eq!(i, ""); + let nan_test_cases = ["nan", "NaN", "NAN"]; + + for test in nan_test_cases { + println!("now parsing: {}", test); + + let (remaining, parsed) = float::<_, f32, ()>.parse_peek(test.as_bytes()).unwrap(); + assert!(parsed.is_nan()); + assert!(remaining.is_empty()); - let (i, inf) = float::<_, f32, ()>.parse_peek("inf").unwrap(); - assert!(inf.is_infinite()); - assert_eq!(i, ""); - let (i, inf) = float::<_, f32, ()>.parse_peek("infinity").unwrap(); - assert!(inf.is_infinite()); - assert_eq!(i, ""); + let (remaining, parsed) = float::<_, f32, ()>.parse_peek(test).unwrap(); + assert!(parsed.is_nan()); + assert!(remaining.is_empty()); + + let (remaining, parsed) = float::<_, f64, ()>.parse_peek(test.as_bytes()).unwrap(); + assert!(parsed.is_nan()); + assert!(remaining.is_empty()); + + let (remaining, parsed) = float::<_, f64, ()>.parse_peek(test).unwrap(); + assert!(parsed.is_nan()); + assert!(remaining.is_empty()); + } } #[cfg(feature = "std")] @@ -1200,37 +1221,37 @@ mod partial { } #[test] - fn is_not_line_ending_bytes() { + fn is_till_line_ending_bytes() { let a: &[u8] = b"ab12cd\nefgh"; assert_eq!( - not_line_ending::<_, InputError<_>>.parse_peek(Partial::new(a)), + till_line_ending::<_, InputError<_>>.parse_peek(Partial::new(a)), Ok((Partial::new(&b"\nefgh"[..]), &b"ab12cd"[..])) ); let b: &[u8] = b"ab12cd\nefgh\nijkl"; assert_eq!( - not_line_ending::<_, InputError<_>>.parse_peek(Partial::new(b)), + till_line_ending::<_, InputError<_>>.parse_peek(Partial::new(b)), Ok((Partial::new(&b"\nefgh\nijkl"[..]), &b"ab12cd"[..])) ); let c: &[u8] = b"ab12cd\r\nefgh\nijkl"; assert_eq!( - not_line_ending::<_, InputError<_>>.parse_peek(Partial::new(c)), + till_line_ending::<_, InputError<_>>.parse_peek(Partial::new(c)), Ok((Partial::new(&b"\r\nefgh\nijkl"[..]), &b"ab12cd"[..])) ); let d: &[u8] = b"ab12cd"; assert_eq!( - not_line_ending::<_, InputError<_>>.parse_peek(Partial::new(d)), + till_line_ending::<_, InputError<_>>.parse_peek(Partial::new(d)), Err(ErrMode::Incomplete(Needed::new(1))) ); } #[test] - fn is_not_line_ending_str() { + fn is_till_line_ending_str() { let f = "βèƒôřè\rÂßÇáƒƭèř"; assert_eq!( - not_line_ending.parse_peek(Partial::new(f)), + till_line_ending.parse_peek(Partial::new(f)), Err(ErrMode::Backtrack(InputError::new( Partial::new(&f[12..]), ErrorKind::Tag @@ -1239,7 +1260,7 @@ mod partial { let g2: &str = "ab12cd"; assert_eq!( - not_line_ending::<_, InputError<_>>.parse_peek(Partial::new(g2)), + till_line_ending::<_, InputError<_>>.parse_peek(Partial::new(g2)), Err(ErrMode::Incomplete(Needed::new(1))) ); } @@ -1317,7 +1338,7 @@ mod partial { fn full_line_windows() { #[allow(clippy::type_complexity)] fn take_full_line(i: Partial<&[u8]>) -> IResult<Partial<&[u8]>, (&[u8], &[u8])> { - (not_line_ending, line_ending).parse_peek(i) + (till_line_ending, line_ending).parse_peek(i) } let input = b"abc\r\n"; let output = take_full_line(Partial::new(input)); @@ -1331,7 +1352,7 @@ mod partial { fn full_line_unix() { #[allow(clippy::type_complexity)] fn take_full_line(i: Partial<&[u8]>) -> IResult<Partial<&[u8]>, (&[u8], &[u8])> { - (not_line_ending, line_ending).parse_peek(i) + (till_line_ending, line_ending).parse_peek(i) } let input = b"abc\n"; let output = take_full_line(Partial::new(input)); diff --git a/src/binary/bits/mod.rs b/src/binary/bits/mod.rs index b11ba43..4a198c4 100644 --- a/src/binary/bits/mod.rs +++ b/src/binary/bits/mod.rs @@ -4,10 +4,10 @@ #[cfg(test)] mod tests; +use crate::combinator::trace; use crate::error::{ErrMode, ErrorConvert, ErrorKind, Needed, ParserError}; use crate::lib::std::ops::{AddAssign, Div, Shl, Shr}; use crate::stream::{AsBytes, Stream, StreamIsPartial, ToUsize}; -use crate::trace::trace; use crate::{unpeek, IResult, PResult, Parser}; /// Number of bits in a byte @@ -157,7 +157,7 @@ where /// } /// /// fn parser(input: (Stream<'_>, usize), count: usize)-> IResult<(Stream<'_>, usize), u8> { -/// take(count).parse_peek(input) +/// take(count).parse_peek(input) /// } /// /// // Consumes 0 bits, returns 0 diff --git a/src/binary/mod.rs b/src/binary/mod.rs index 8b2ee74..01053a5 100644 --- a/src/binary/mod.rs +++ b/src/binary/mod.rs @@ -8,6 +8,7 @@ pub mod bits; mod tests; use crate::combinator::repeat; +use crate::combinator::trace; use crate::error::ErrMode; use crate::error::ErrorKind; use crate::error::Needed; @@ -17,7 +18,6 @@ use crate::stream::Accumulate; use crate::stream::{AsBytes, Stream, StreamIsPartial}; use crate::stream::{ToUsize, UpdateSlice}; use crate::token::take; -use crate::trace::trace; use crate::PResult; use crate::Parser; @@ -2387,8 +2387,11 @@ where }(input) } -/// Gets a number from the parser and returns a -/// subslice of the input of that size. +/// Get a length-prefixed slice ([TLV](https://en.wikipedia.org/wiki/Type-length-value)) +/// +/// To apply a parser to the returned slice, see [`length_and_then`]. +/// +/// If the count is for something besides tokens, see [`length_repeat`]. /// /// *Complete version*: Returns an error if there is not enough input data. /// @@ -2404,7 +2407,7 @@ where /// # use winnow::prelude::*; /// use winnow::Bytes; /// use winnow::binary::be_u16; -/// use winnow::binary::length_data; +/// use winnow::binary::length_take; /// use winnow::token::tag; /// /// type Stream<'i> = Partial<&'i Bytes>; @@ -2414,13 +2417,13 @@ where /// } /// /// fn parser(s: Stream<'_>) -> IResult<Stream<'_>, &[u8]> { -/// length_data(be_u16).parse_peek(s) +/// length_take(be_u16).parse_peek(s) /// } /// /// assert_eq!(parser(stream(b"\x00\x03abcefg")), Ok((stream(&b"efg"[..]), &b"abc"[..]))); /// assert_eq!(parser(stream(b"\x00\x03a")), Err(ErrMode::Incomplete(Needed::new(2)))); /// ``` -pub fn length_data<I, N, E, F>(mut f: F) -> impl Parser<I, <I as Stream>::Slice, E> +pub fn length_take<I, N, E, F>(mut f: F) -> impl Parser<I, <I as Stream>::Slice, E> where I: StreamIsPartial, I: Stream, @@ -2428,18 +2431,27 @@ where F: Parser<I, N, E>, E: ParserError<I>, { - trace("length_data", move |i: &mut I| { + trace("length_take", move |i: &mut I| { let length = f.parse_next(i)?; crate::token::take(length).parse_next(i) }) } -/// Gets a number from the first parser, -/// takes a subslice of the input of that size, -/// then applies the second parser on that subslice. -/// If the second parser returns `Incomplete`, -/// `length_value` will return an error. +/// Deprecated since 0.5.27, replaced with [`length_take`] +#[deprecated(since = "0.5.27", note = "Replaced with `length_take`")] +pub fn length_data<I, N, E, F>(f: F) -> impl Parser<I, <I as Stream>::Slice, E> +where + I: StreamIsPartial, + I: Stream, + N: ToUsize, + F: Parser<I, N, E>, + E: ParserError<I>, +{ + length_take(f) +} + +/// Parse a length-prefixed slice ([TLV](https://en.wikipedia.org/wiki/Type-length-value)) /// /// *Complete version*: Returns an error if there is not enough input data. /// @@ -2456,7 +2468,7 @@ where /// # use winnow::prelude::*; /// use winnow::Bytes; /// use winnow::binary::be_u16; -/// use winnow::binary::length_value; +/// use winnow::binary::length_and_then; /// use winnow::token::tag; /// /// type Stream<'i> = Partial<&'i Bytes>; @@ -2472,14 +2484,14 @@ where /// } /// /// fn parser(s: Stream<'_>) -> IResult<Stream<'_>, &[u8]> { -/// length_value(be_u16, "abc").parse_peek(s) +/// length_and_then(be_u16, "abc").parse_peek(s) /// } /// /// assert_eq!(parser(stream(b"\x00\x03abcefg")), Ok((stream(&b"efg"[..]), &b"abc"[..]))); /// assert_eq!(parser(stream(b"\x00\x03123123")), Err(ErrMode::Backtrack(InputError::new(complete_stream(&b"123"[..]), ErrorKind::Tag)))); /// assert_eq!(parser(stream(b"\x00\x03a")), Err(ErrMode::Incomplete(Needed::new(2)))); /// ``` -pub fn length_value<I, O, N, E, F, G>(mut f: F, mut g: G) -> impl Parser<I, O, E> +pub fn length_and_then<I, O, N, E, F, G>(mut f: F, mut g: G) -> impl Parser<I, O, E> where I: StreamIsPartial, I: Stream + UpdateSlice + Clone, @@ -2488,8 +2500,8 @@ where G: Parser<I, O, E>, E: ParserError<I>, { - trace("length_value", move |i: &mut I| { - let data = length_data(f.by_ref()).parse_next(i)?; + trace("length_and_then", move |i: &mut I| { + let data = length_take(f.by_ref()).parse_next(i)?; let mut data = I::update_slice(i.clone(), data); let _ = data.complete(); let o = g.by_ref().complete_err().parse_next(&mut data)?; @@ -2497,8 +2509,23 @@ where }) } -/// Gets a number from the first parser, -/// then applies the second parser that many times. +/// Deprecated since 0.5.27, replaced with [`length_and_then`] +#[deprecated(since = "0.5.27", note = "Replaced with `length_and_then`")] +pub fn length_value<I, O, N, E, F, G>(f: F, g: G) -> impl Parser<I, O, E> +where + I: StreamIsPartial, + I: Stream + UpdateSlice + Clone, + N: ToUsize, + F: Parser<I, N, E>, + G: Parser<I, O, E>, + E: ParserError<I>, +{ + length_and_then(f, g) +} + +/// [`Accumulate`] a length-prefixed sequence of values ([TLV](https://en.wikipedia.org/wiki/Type-length-value)) +/// +/// If the length represents token counts, see instead [`length_take`] /// /// # Arguments /// * `f` The parser to apply to obtain the count. @@ -2533,7 +2560,7 @@ where /// assert_eq!(parser(stream(b"\x03123123123")), Err(ErrMode::Backtrack(InputError::new(stream(b"123123123"), ErrorKind::Tag)))); /// # } /// ``` -pub fn length_count<I, O, C, N, E, F, G>(mut f: F, mut g: G) -> impl Parser<I, C, E> +pub fn length_repeat<I, O, C, N, E, F, G>(mut f: F, mut g: G) -> impl Parser<I, C, E> where I: Stream, N: ToUsize, @@ -2542,9 +2569,23 @@ where G: Parser<I, O, E>, E: ParserError<I>, { - trace("length_count", move |i: &mut I| { + trace("length_repeat", move |i: &mut I| { let n = f.parse_next(i)?; let n = n.to_usize(); repeat(n, g.by_ref()).parse_next(i) }) } + +/// Deprecated since 0.5.27, replaced with [`length_repeat`] +#[deprecated(since = "0.5.27", note = "Replaced with `length_repeat`")] +pub fn length_count<I, O, C, N, E, F, G>(f: F, g: G) -> impl Parser<I, C, E> +where + I: Stream, + N: ToUsize, + C: Accumulate<O>, + F: Parser<I, N, E>, + G: Parser<I, O, E>, + E: ParserError<I>, +{ + length_repeat(f, g) +} diff --git a/src/binary/tests.rs b/src/binary/tests.rs index 5d92055..bc2a005 100644 --- a/src/binary/tests.rs +++ b/src/binary/tests.rs @@ -1123,7 +1123,7 @@ mod partial { #[test] #[cfg(feature = "alloc")] - fn length_count_test() { + fn length_repeat_test() { fn number(i: Partial<&[u8]>) -> IResult<Partial<&[u8]>, u32> { digit .try_map(str::from_utf8) @@ -1132,7 +1132,7 @@ mod partial { } fn cnt(i: Partial<&[u8]>) -> IResult<Partial<&[u8]>, Vec<&[u8]>> { - length_count(unpeek(number), "abc").parse_peek(i) + length_repeat(unpeek(number), "abc").parse_peek(i) } assert_eq!( @@ -1164,7 +1164,53 @@ mod partial { } #[test] - fn length_data_test() { + fn partial_length_bytes() { + use crate::binary::le_u8; + + fn x(i: Partial<&[u8]>) -> IResult<Partial<&[u8]>, &[u8]> { + length_take(le_u8).parse_peek(i) + } + assert_eq!( + x(Partial::new(b"\x02..>>")), + Ok((Partial::new(&b">>"[..]), &b".."[..])) + ); + assert_eq!( + x(Partial::new(b"\x02..")), + Ok((Partial::new(&[][..]), &b".."[..])) + ); + assert_eq!( + x(Partial::new(b"\x02.")), + Err(ErrMode::Incomplete(Needed::new(1))) + ); + assert_eq!( + x(Partial::new(b"\x02")), + Err(ErrMode::Incomplete(Needed::new(2))) + ); + + fn y(i: Partial<&[u8]>) -> IResult<Partial<&[u8]>, &[u8]> { + let (i, _) = "magic".parse_peek(i)?; + length_take(le_u8).parse_peek(i) + } + assert_eq!( + y(Partial::new(b"magic\x02..>>")), + Ok((Partial::new(&b">>"[..]), &b".."[..])) + ); + assert_eq!( + y(Partial::new(b"magic\x02..")), + Ok((Partial::new(&[][..]), &b".."[..])) + ); + assert_eq!( + y(Partial::new(b"magic\x02.")), + Err(ErrMode::Incomplete(Needed::new(1))) + ); + assert_eq!( + y(Partial::new(b"magic\x02")), + Err(ErrMode::Incomplete(Needed::new(2))) + ); + } + + #[test] + fn length_take_test() { fn number(i: Partial<&[u8]>) -> IResult<Partial<&[u8]>, u32> { digit .try_map(str::from_utf8) @@ -1173,7 +1219,7 @@ mod partial { } fn take(i: Partial<&[u8]>) -> IResult<Partial<&[u8]>, &[u8]> { - length_data(unpeek(number)).parse_peek(i) + length_take(unpeek(number)).parse_peek(i) } assert_eq!( @@ -1198,14 +1244,14 @@ mod partial { } #[test] - fn length_value_test() { + fn length_and_then_test() { use crate::stream::StreamIsPartial; - fn length_value_1(i: Partial<&[u8]>) -> IResult<Partial<&[u8]>, u16> { - length_value(be_u8, be_u16).parse_peek(i) + fn length_and_then_1(i: Partial<&[u8]>) -> IResult<Partial<&[u8]>, u16> { + length_and_then(be_u8, be_u16).parse_peek(i) } - fn length_value_2(i: Partial<&[u8]>) -> IResult<Partial<&[u8]>, (u8, u8)> { - length_value(be_u8, (be_u8, be_u8)).parse_peek(i) + fn length_and_then_2(i: Partial<&[u8]>) -> IResult<Partial<&[u8]>, (u8, u8)> { + length_and_then(be_u8, (be_u8, be_u8)).parse_peek(i) } let mut empty_complete = Partial::new(&b""[..]); @@ -1213,14 +1259,14 @@ mod partial { let i1 = [0, 5, 6]; assert_eq!( - length_value_1(Partial::new(&i1)), + length_and_then_1(Partial::new(&i1)), Err(ErrMode::Backtrack(error_position!( &empty_complete, ErrorKind::Slice ))) ); assert_eq!( - length_value_2(Partial::new(&i1)), + length_and_then_2(Partial::new(&i1)), Err(ErrMode::Backtrack(error_position!( &empty_complete, ErrorKind::Token @@ -1232,14 +1278,14 @@ mod partial { let mut middle_complete = Partial::new(&i2[1..2]); let _ = middle_complete.complete(); assert_eq!( - length_value_1(Partial::new(&i2)), + length_and_then_1(Partial::new(&i2)), Err(ErrMode::Backtrack(error_position!( &middle_complete, ErrorKind::Slice ))) ); assert_eq!( - length_value_2(Partial::new(&i2)), + length_and_then_2(Partial::new(&i2)), Err(ErrMode::Backtrack(error_position!( &empty_complete, ErrorKind::Token @@ -1249,21 +1295,21 @@ mod partial { let i3 = [2, 5, 6, 3, 4, 5, 7]; assert_eq!( - length_value_1(Partial::new(&i3)), + length_and_then_1(Partial::new(&i3)), Ok((Partial::new(&i3[3..]), 1286)) ); assert_eq!( - length_value_2(Partial::new(&i3)), + length_and_then_2(Partial::new(&i3)), Ok((Partial::new(&i3[3..]), (5, 6))) ); let i4 = [3, 5, 6, 3, 4, 5]; assert_eq!( - length_value_1(Partial::new(&i4)), + length_and_then_1(Partial::new(&i4)), Ok((Partial::new(&i4[4..]), 1286)) ); assert_eq!( - length_value_2(Partial::new(&i4)), + length_and_then_2(Partial::new(&i4)), Ok((Partial::new(&i4[4..]), (5, 6))) ); } diff --git a/src/combinator/branch.rs b/src/combinator/branch.rs index b909ff1..7fdcf8d 100644 --- a/src/combinator/branch.rs +++ b/src/combinator/branch.rs @@ -1,6 +1,6 @@ +use crate::combinator::trace; use crate::error::{ErrMode, ErrorKind, ParserError}; use crate::stream::Stream; -use crate::trace::trace; use crate::*; #[doc(inline)] @@ -16,7 +16,10 @@ pub trait Alt<I, O, E> { /// Pick the first successful parser /// -/// For tight control over the error, add a final case using [`fail`][crate::combinator::fail]. +/// To stop on an error, rather than trying further cases, see +/// [`cut_err`][crate::combinator::cut_err] ([example][crate::_tutorial::chapter_6]). +/// +/// For tight control over the error when no match is found, add a final case using [`fail`][crate::combinator::fail]. /// Alternatively, with a [custom error type][crate::_topic::error], it is possible to track all /// errors or return the error of the parser that went the farthest in the input data. /// @@ -65,6 +68,11 @@ pub trait Permutation<I, O, E> { /// It takes as argument a tuple of parsers, and returns a /// tuple of the parser results. /// +/// To stop on an error, rather than trying further permutations, see +/// [`cut_err`][crate::combinator::cut_err] ([example][crate::_tutorial::chapter_6]). +/// +/// # Example +/// /// ```rust /// # use winnow::{error::ErrMode,error::{InputError, ErrorKind}, error::Needed}; /// # use winnow::prelude::*; @@ -170,6 +178,30 @@ macro_rules! alt_trait_impl( ); ); +macro_rules! succ ( + (0, $submac:ident ! ($($rest:tt)*)) => ($submac!(1, $($rest)*)); + (1, $submac:ident ! ($($rest:tt)*)) => ($submac!(2, $($rest)*)); + (2, $submac:ident ! ($($rest:tt)*)) => ($submac!(3, $($rest)*)); + (3, $submac:ident ! ($($rest:tt)*)) => ($submac!(4, $($rest)*)); + (4, $submac:ident ! ($($rest:tt)*)) => ($submac!(5, $($rest)*)); + (5, $submac:ident ! ($($rest:tt)*)) => ($submac!(6, $($rest)*)); + (6, $submac:ident ! ($($rest:tt)*)) => ($submac!(7, $($rest)*)); + (7, $submac:ident ! ($($rest:tt)*)) => ($submac!(8, $($rest)*)); + (8, $submac:ident ! ($($rest:tt)*)) => ($submac!(9, $($rest)*)); + (9, $submac:ident ! ($($rest:tt)*)) => ($submac!(10, $($rest)*)); + (10, $submac:ident ! ($($rest:tt)*)) => ($submac!(11, $($rest)*)); + (11, $submac:ident ! ($($rest:tt)*)) => ($submac!(12, $($rest)*)); + (12, $submac:ident ! ($($rest:tt)*)) => ($submac!(13, $($rest)*)); + (13, $submac:ident ! ($($rest:tt)*)) => ($submac!(14, $($rest)*)); + (14, $submac:ident ! ($($rest:tt)*)) => ($submac!(15, $($rest)*)); + (15, $submac:ident ! ($($rest:tt)*)) => ($submac!(16, $($rest)*)); + (16, $submac:ident ! ($($rest:tt)*)) => ($submac!(17, $($rest)*)); + (17, $submac:ident ! ($($rest:tt)*)) => ($submac!(18, $($rest)*)); + (18, $submac:ident ! ($($rest:tt)*)) => ($submac!(19, $($rest)*)); + (19, $submac:ident ! ($($rest:tt)*)) => ($submac!(20, $($rest)*)); + (20, $submac:ident ! ($($rest:tt)*)) => ($submac!(21, $($rest)*)); +); + macro_rules! alt_trait_inner( ($it:tt, $self:expr, $input:expr, $start:ident, $err:expr, $head:ident $($id:ident)+) => ({ $input.reset($start.clone()); diff --git a/src/combinator/core.rs b/src/combinator/core.rs index d784b4e..efd7758 100644 --- a/src/combinator/core.rs +++ b/src/combinator/core.rs @@ -1,6 +1,6 @@ +use crate::combinator::trace; use crate::error::{ErrMode, ErrorKind, Needed, ParserError}; use crate::stream::Stream; -use crate::trace::trace; use crate::*; /// Return the remaining input. @@ -225,6 +225,8 @@ where /// This commits the parse result, preventing alternative branch paths like with /// [`winnow::combinator::alt`][crate::combinator::alt]. /// +/// See the [tutorial][crate::_tutorial::chapter_6] for more details. +/// /// # Example /// /// Without `cut_err`: @@ -331,7 +333,7 @@ where /// Call the iterator's [`ParserIterator::finish`] method to get the remaining input if successful, /// or the error value if we encountered an error. /// -/// On [`ErrMode::Backtrack`], iteration will stop. To instead chain an error up, see [`cut_err`]. +/// On [`ErrMode::Backtrack`], iteration will stop. To instead chain an error up, see [`cut_err`]. /// /// # Example /// @@ -432,11 +434,16 @@ enum State<E> { Incomplete(Needed), } -/// Always succeeds with given value without consuming any input. +/// Succeed, consuming no input /// /// For example, it can be used as the last alternative in `alt` to /// specify the default case. /// +/// Useful with: +/// - [`Parser::value`] +/// - [`Parser::default_value`] +/// - [`Parser::map`] +/// /// **Note:** This never advances the [`Stream`] /// /// # Example @@ -445,16 +452,13 @@ enum State<E> { /// # use winnow::{error::ErrMode, error::ErrorKind, error::InputError}; /// # use winnow::prelude::*; /// use winnow::combinator::alt; -/// use winnow::combinator::success; -/// -/// let mut parser = success::<_,_,InputError<_>>(10); -/// assert_eq!(parser.parse_peek("xyz"), Ok(("xyz", 10))); +/// use winnow::combinator::empty; /// /// fn sign(input: &str) -> IResult<&str, isize> { /// alt(( /// '-'.value(-1), /// '+'.value(1), -/// success::<_,_,InputError<_>>(1) +/// empty.value(1) /// )).parse_peek(input) /// } /// assert_eq!(sign("+10"), Ok(("10", 1))); @@ -462,7 +466,13 @@ enum State<E> { /// assert_eq!(sign("10"), Ok(("10", 1))); /// ``` #[doc(alias = "value")] -#[doc(alias = "empty")] +#[doc(alias = "success")] +pub fn empty<I: Stream, E: ParserError<I>>(_input: &mut I) -> PResult<(), E> { + Ok(()) +} + +/// Deprecated, replaced with [`empty`] + [`Parser::value`] +#[deprecated(since = "0.5.35", note = "Replaced with empty.value(...)`")] pub fn success<I: Stream, O: Clone, E: ParserError<I>>(val: O) -> impl Parser<I, O, E> { trace("success", move |_input: &mut I| Ok(val.clone())) } diff --git a/src/trace/internals.rs b/src/combinator/debug/internals.rs index 136d21b..c38b11e 100644 --- a/src/trace/internals.rs +++ b/src/combinator/debug/internals.rs @@ -4,6 +4,63 @@ use std::io::Write; use crate::error::ErrMode; use crate::stream::Stream; +use crate::*; + +pub struct Trace<P, D, I, O, E> +where + P: Parser<I, O, E>, + I: Stream, + D: std::fmt::Display, +{ + parser: P, + name: D, + call_count: usize, + i: core::marker::PhantomData<I>, + o: core::marker::PhantomData<O>, + e: core::marker::PhantomData<E>, +} + +impl<P, D, I, O, E> Trace<P, D, I, O, E> +where + P: Parser<I, O, E>, + I: Stream, + D: std::fmt::Display, +{ + #[inline(always)] + pub fn new(parser: P, name: D) -> Self { + Self { + parser, + name, + call_count: 0, + i: Default::default(), + o: Default::default(), + e: Default::default(), + } + } +} + +impl<P, D, I, O, E> Parser<I, O, E> for Trace<P, D, I, O, E> +where + P: Parser<I, O, E>, + I: Stream, + D: std::fmt::Display, +{ + #[inline] + fn parse_next(&mut self, i: &mut I) -> PResult<O, E> { + let depth = Depth::new(); + let original = i.checkpoint(); + start(*depth, &self.name, self.call_count, i); + + let res = self.parser.parse_next(i); + + let consumed = i.offset_from(&original); + let severity = Severity::with_result(&res); + end(*depth, &self.name, self.call_count, consumed, severity); + self.call_count += 1; + + res + } +} pub struct Depth { depth: usize, diff --git a/src/trace/mod.rs b/src/combinator/debug/mod.rs index 316733e..ee4c293 100644 --- a/src/trace/mod.rs +++ b/src/combinator/debug/mod.rs @@ -1,11 +1,4 @@ -//! Parser execution tracing -//! -//! By default, nothing happens and tracing gets compiled away as a no-op. To enable tracing, use -//! `--features debug`. -//! -//! # Example -//! -//!![Trace output from string example](https://raw.githubusercontent.com/winnow-rs/winnow/main/assets/trace.svg "Example output") +#![cfg_attr(feature = "debug", allow(clippy::std_instead_of_core))] #[cfg(feature = "debug")] mod internals; @@ -19,9 +12,9 @@ compile_error!("`debug` requires `std`"); /// Trace the execution of the parser /// -/// Note that [`Parser::context` also provides high level trace information. +/// Note that [`Parser::context`] also provides high level trace information. /// -/// See [`trace` module][self] for more details. +/// See [tutorial][crate::_tutorial::chapter_8] for more details. /// /// # Example /// @@ -30,7 +23,7 @@ compile_error!("`debug` requires `std`"); /// # use winnow::token::take_while; /// # use winnow::stream::AsChar; /// # use winnow::prelude::*; -/// use winnow::trace::trace; +/// use winnow::combinator::trace; /// /// fn short_alpha<'s>(s: &mut &'s [u8]) -> PResult<&'s [u8], InputError<&'s [u8]>> { /// trace("short_alpha", @@ -49,25 +42,11 @@ compile_error!("`debug` requires `std`"); #[cfg_attr(not(feature = "debug"), inline(always))] pub fn trace<I: Stream, O, E>( name: impl crate::lib::std::fmt::Display, - mut parser: impl Parser<I, O, E>, + parser: impl Parser<I, O, E>, ) -> impl Parser<I, O, E> { #[cfg(feature = "debug")] { - let mut call_count = 0; - move |i: &mut I| { - let depth = internals::Depth::new(); - let original = i.checkpoint(); - internals::start(*depth, &name, call_count, i); - - let res = parser.parse_next(i); - - let consumed = i.offset_from(&original); - let severity = internals::Severity::with_result(&res); - internals::end(*depth, &name, call_count, consumed, severity); - call_count += 1; - - res - } + internals::Trace::new(parser, name) } #[cfg(not(feature = "debug"))] { diff --git a/src/combinator/mod.rs b/src/combinator/mod.rs index ec68e48..da5fa79 100644 --- a/src/combinator/mod.rs +++ b/src/combinator/mod.rs @@ -6,81 +6,83 @@ //! //! Those are used to recognize the lowest level elements of your grammar, like, "here is a dot", or "here is an big endian integer". //! -//! | combinator | usage | input | output | comment | -//! |---|---|---|---|---| -//! | [`one_of`][crate::token::one_of] | `one_of(['a', 'b', 'c'])` | `"abc"` | `Ok(("bc", 'a'))` |Matches one of the provided characters (works with non ASCII characters too)| -//! | [`none_of`][crate::token::none_of] | `none_of(['a', 'b', 'c'])` | `"xyab"` | `Ok(("yab", 'x'))` |Matches anything but the provided characters| -//! | [`tag`][crate::token::tag] | `"hello"` | `"hello world"` | `Ok((" world", "hello"))` |Recognizes a specific suite of characters or bytes| -//! | [`tag_no_case`][crate::token::tag_no_case] | `tag_no_case("hello")` | `"HeLLo World"` | `Ok((" World", "HeLLo"))` |Case insensitive comparison. Note that case insensitive comparison is not well defined for unicode, and that you might have bad surprises| -//! | [`take`][crate::token::take] | `take(4)` | `"hello"` | `Ok(("o", "hell"))` |Takes a specific number of bytes or characters| -//! | [`take_while`][crate::token::take_while] | `take_while(0.., is_alphabetic)` | `"abc123"` | `Ok(("123", "abc"))` |Returns the longest list of bytes for which the provided pattern matches.| -//! | [`take_till0`][crate::token::take_till0] | `take_till0(is_alphabetic)` | `"123abc"` | `Ok(("abc", "123"))` |Returns the longest list of bytes or characters until the provided pattern matches. `take_till1` does the same, but must return at least one character. This is the reverse behaviour from `take_while`: `take_till(f)` is equivalent to `take_while(0.., \|c\| !f(c))`| -//! | [`take_until0`][crate::token::take_until0] | `take_until0("world")` | `"Hello world"` | `Ok(("world", "Hello "))` |Returns the longest list of bytes or characters until the provided tag is found. `take_until1` does the same, but must return at least one character| +//! | combinator | usage | input | new input | output | comment | +//! |---|---|---|---|---|---| +//! | [`one_of`][crate::token::one_of] | `one_of(['a', 'b', 'c'])` | `"abc"` | `"bc"` | `Ok('a')` |Matches one of the provided characters (works with non ASCII characters too)| +//! | [`none_of`][crate::token::none_of] | `none_of(['a', 'b', 'c'])` | `"xyab"` | `"yab"` | `Ok('x')` |Matches anything but the provided characters| +//! | [`tag`][crate::token::tag] | `"hello"` | `"hello world"` | `" world"` | `Ok("hello")` |Recognizes a specific suite of characters or bytes (see also [`Caseless`][crate::ascii::Caseless])| +//! | [`take`][crate::token::take] | `take(4)` | `"hello"` | `"o"` | `Ok("hell")` |Takes a specific number of bytes or characters| +//! | [`take_while`][crate::token::take_while] | `take_while(0.., is_alphabetic)` | `"abc123"` | `"123"` | `Ok("abc")` |Returns the longest list of bytes for which the provided pattern matches.| +//! | [`take_till0`][crate::token::take_till0] | `take_till0(is_alphabetic)` | `"123abc"` | `"abc"` | `Ok("123")` |Returns the longest list of bytes or characters until the provided pattern matches. `take_till1` does the same, but must return at least one character. This is the reverse behaviour from `take_while`: `take_till(f)` is equivalent to `take_while(0.., \|c\| !f(c))`| +//! | [`take_until`][crate::token::take_until] | `take_until(0.., "world")` | `"Hello world"` | `"world"` | `Ok("Hello ")` |Returns the longest list of bytes or characters until the provided tag is found.| //! //! ## Choice combinators //! -//! | combinator | usage | input | output | comment | -//! |---|---|---|---|---| -//! | [`alt`][crate::combinator::alt] | `alt(("ab", "cd"))` | `"cdef"` | `Ok(("ef", "cd"))` |Try a list of parsers and return the result of the first successful one| -//! | [`dispatch`][crate::combinator::dispatch] | \- | \- | \- | `match` for parsers | -//! | [`permutation`][crate::combinator::permutation] | `permutation(("ab", "cd", "12"))` | `"cd12abc"` | `Ok(("c", ("ab", "cd", "12"))` |Succeeds when all its child parser have succeeded, whatever the order| +//! | combinator | usage | input | new input | output | comment | +//! |---|---|---|---|---|---| +//! | [`alt`] | `alt(("ab", "cd"))` | `"cdef"` | `"ef"` | `Ok("cd")` |Try a list of parsers and return the result of the first successful one| +//! | [`dispatch`] | \- | \- | \- | \- | `match` for parsers | +//! | [`permutation`] | `permutation(("ab", "cd", "12"))` | `"cd12abc"` | `"c"` | `Ok(("ab", "cd", "12"))` |Succeeds when all its child parser have succeeded, whatever the order| //! //! ## Sequence combinators //! -//! | combinator | usage | input | output | comment | -//! |---|---|---|---|---| -//! | [`(...)` (tuples)][crate::Parser] | `("ab", "XY", take(1))` | `"abXYZ!"` | `Ok(("!", ("ab", "XY", "Z")))` |Chains parsers and assemble the sub results in a tuple. You can use as many child parsers as you can put elements in a tuple| -//! | [`delimited`] | `delimited(char('('), take(2), char(')'))` | `"(ab)cd"` | `Ok(("cd", "ab"))` || -//! | [`preceded`] | `preceded("ab", "XY")` | `"abXYZ"` | `Ok(("Z", "XY"))` || -//! | [`terminated`] | `terminated("ab", "XY")` | `"abXYZ"` | `Ok(("Z", "ab"))` || -//! | [`separated_pair`] | `separated_pair("hello", char(','), "world")` | `"hello,world!"` | `Ok(("!", ("hello", "world")))` || +//! | combinator | usage | input | new input | output | comment | +//! |---|---|---|---|---|---| +//! | [`(...)` (tuples)][crate::Parser] | `("ab", "XY", take(1))` | `"abXYZ!"` | `"!"` | `Ok(("ab", "XY", "Z"))` |Chains parsers and assemble the sub results in a tuple. You can use as many child parsers as you can put elements in a tuple| +//! | [`seq!`] | `seq!(_: char('('), take(2), _: char(')'))` | `"(ab)cd"` | `"cd"` | `Ok("ab")` || +//! | [`delimited`] | `delimited(char('('), take(2), char(')'))` | `"(ab)cd"` | `"cd"` | `Ok("ab")` || +//! | [`preceded`] | `preceded("ab", "XY")` | `"abXYZ"` | `"Z"` | `Ok("XY")` || +//! | [`terminated`] | `terminated("ab", "XY")` | `"abXYZ"` | `"Z"` | `Ok("ab")` || +//! | [`separated_pair`] | `separated_pair("hello", char(','), "world")` | `"hello,world!"` | `"!"` | `Ok(("hello", "world"))` || //! //! ## Applying a parser multiple times //! -//! | combinator | usage | input | output | comment | -//! |---|---|---|---|---| -//! | [`repeat`][crate::combinator::repeat] | `repeat(1..=3, "ab")` | `"ababc"` | `Ok(("c", vec!["ab", "ab"]))` |Applies the parser between m and n times (n included) and returns the list of results in a Vec| -//! | [`repeat_till0`][crate::combinator::repeat_till0] | `repeat_till0(tag( "ab" ), tag( "ef" ))` | `"ababefg"` | `Ok(("g", (vec!["ab", "ab"], "ef")))` |Applies the first parser until the second applies. Returns a tuple containing the list of results from the first in a Vec and the result of the second| -//! | [`separated0`][crate::combinator::separated0] | `separated0("ab", ",")` | `"ab,ab,ab."` | `Ok((".", vec!["ab", "ab", "ab"]))` |`separated1` works like `separated0` but must returns at least one element| -//! | [`fold_repeat`][crate::combinator::fold_repeat] | `fold_repeat(1..=2, be_u8, \|\| 0, \|acc, item\| acc + item)` | `[1, 2, 3]` | `Ok(([3], 3))` |Applies the parser between m and n times (n included) and folds the list of return value| +//! | combinator | usage | input | new input | output | comment | +//! |---|---|---|---|---|---| +//! | [`repeat`] | `repeat(1..=3, "ab")` | `"ababc"` | `"c"` | `Ok(vec!["ab", "ab"])` |Applies the parser between m and n times (n included) and returns the list of results in a Vec| +//! | [`repeat_till`] | `repeat_till(0.., tag( "ab" ), tag( "ef" ))` | `"ababefg"` | `"g"` | `Ok((vec!["ab", "ab"], "ef"))` |Applies the first parser until the second applies. Returns a tuple containing the list of results from the first in a Vec and the result of the second| +//! | [`separated`] | `separated(1..=3, "ab", ",")` | `"ab,ab,ab."` | `"."` | `Ok(vec!["ab", "ab", "ab"])` |Applies the parser and separator between m and n times (n included) and returns the list of results in a Vec| +//! | [`fold_repeat`] | `fold_repeat(1..=2, be_u8, \|\| 0, \|acc, item\| acc + item)` | `[1, 2, 3]` | `[3]` | `Ok(3)` |Applies the parser between m and n times (n included) and folds the list of return value| //! //! ## Partial related //! -//! - [`eof`][eof]: Returns its input if it is at the end of input data +//! - [`eof`]: Returns its input if it is at the end of input data //! - [`Parser::complete_err`]: Replaces an `Incomplete` returned by the child parser with an `Backtrack` //! //! ## Modifiers //! -//! - [`cond`][cond]: Conditional combinator. Wraps another parser and calls it if the condition is met -//! - [`Parser::flat_map`][crate::Parser::flat_map]: method to map a new parser from the output of the first parser, then apply that parser over the rest of the input -//! - [`Parser::value`][crate::Parser::value]: method to replace the result of a parser -//! - [`Parser::map`][crate::Parser::map]: method to map a function on the result of a parser -//! - [`Parser::and_then`][crate::Parser::and_then]: Applies a second parser over the output of the first one -//! - [`Parser::verify_map`][Parser::verify_map]: Maps a function returning an `Option` on the output of a parser -//! - [`Parser::try_map`][Parser::try_map]: Maps a function returning a `Result` on the output of a parser -//! - [`Parser::parse_to`][crate::Parser::parse_to]: Apply [`std::str::FromStr`] to the output of the parser -//! - [`not`][not]: Returns a result only if the embedded parser returns `Backtrack` or `Incomplete`. Does not consume the input -//! - [`opt`][opt]: Make the underlying parser optional -//! - [`peek`][peek]: Returns a result without consuming the input -//! - [`Parser::recognize`][Parser::recognize]: If the child parser was successful, return the consumed input as the produced value -//! - [`Parser::with_recognized`][Parser::with_recognized]: If the child parser was successful, return a tuple of the consumed input and the produced output. -//! - [`Parser::span`][Parser::span]: If the child parser was successful, return the location of the consumed input as the produced value -//! - [`Parser::with_span`][Parser::with_span]: If the child parser was successful, return a tuple of the location of the consumed input and the produced output. +//! - [`cond`]: Conditional combinator. Wraps another parser and calls it if the condition is met +//! - [`Parser::flat_map`]: method to map a new parser from the output of the first parser, then apply that parser over the rest of the input +//! - [`Parser::value`]: method to replace the result of a parser +//! - [`Parser::default_value`]: method to replace the result of a parser +//! - [`Parser::void`]: method to discard the result of a parser +//! - [`Parser::map`]: method to map a function on the result of a parser +//! - [`Parser::and_then`]: Applies a second parser over the output of the first one +//! - [`Parser::verify_map`]: Maps a function returning an `Option` on the output of a parser +//! - [`Parser::try_map`]: Maps a function returning a `Result` on the output of a parser +//! - [`Parser::parse_to`]: Apply [`std::str::FromStr`] to the output of the parser +//! - [`not`]: Returns a result only if the embedded parser returns `Backtrack` or `Incomplete`. Does not consume the input +//! - [`opt`]: Make the underlying parser optional +//! - [`peek`]: Returns a result without consuming the input +//! - [`Parser::recognize`]: If the child parser was successful, return the consumed input as the produced value +//! - [`Parser::with_recognized`]: If the child parser was successful, return a tuple of the consumed input and the produced output. +//! - [`Parser::span`]: If the child parser was successful, return the location of the consumed input as the produced value +//! - [`Parser::with_span`]: If the child parser was successful, return a tuple of the location of the consumed input and the produced output. //! - [`Parser::verify`]: Returns the result of the child parser if it satisfies a verification function //! //! ## Error management and debugging //! //! - [`cut_err`]: Commit the parse result, disallowing alternative parsers from being attempted -//! - [`backtrack_err`]: Attemmpts a parse, allowing alternative parsers to be attempted despite +//! - [`backtrack_err`]: Attempts a parse, allowing alternative parsers to be attempted despite //! use of `cut_err` //! - [`Parser::context`]: Add context to the error if the parser fails -//! - [`trace`][crate::trace::trace]: Print the parse state with the `debug` feature flag +//! - [`trace`]: Print the parse state with the `debug` feature flag //! - [`todo()`]: Placeholder parser //! //! ## Remaining combinators //! -//! - [`success`][success]: Returns a value without consuming any input, always succeeds -//! - [`fail`][fail]: Inversion of `success`. Always fails. +//! - [`success`]: Returns a value without consuming any input, always succeeds +//! - [`fail`]: Inversion of `success`. Always fails. //! - [`Parser::by_ref`]: Allow moving `&mut impl Parser` into other parsers //! //! ## Text parsing @@ -90,8 +92,8 @@ //! - [`crlf`][crate::ascii::crlf]: Recognizes the string `\r\n` //! - [`line_ending`][crate::ascii::line_ending]: Recognizes an end of line (both `\n` and `\r\n`) //! - [`newline`][crate::ascii::newline]: Matches a newline character `\n` -//! - [`not_line_ending`][crate::ascii::not_line_ending]: Recognizes a string of any char except `\r` or `\n` -//! - [`rest`][rest]: Return the remaining input +//! - [`till_line_ending`][crate::ascii::till_line_ending]: Recognizes a string of any char except `\r` or `\n` +//! - [`rest`]: Return the remaining input //! //! - [`alpha0`][crate::ascii::alpha0]: Recognizes zero or more lowercase and uppercase alphabetic characters: `[a-zA-Z]`. [`alpha1`][crate::ascii::alpha1] does the same but returns at least one character //! - [`alphanumeric0`][crate::ascii::alphanumeric0]: Recognizes zero or more numerical and alphabetic characters: `[0-9a-zA-Z]`. [`alphanumeric1`][crate::ascii::alphanumeric1] does the same but returns at least one character @@ -102,7 +104,7 @@ //! - [`oct_digit0`][crate::ascii::oct_digit0]: Recognizes zero or more octal characters: `[0-7]`. [`oct_digit1`][crate::ascii::oct_digit1] does the same but returns at least one character //! //! - [`float`][crate::ascii::float]: Parse a floating point number in a byte string -//! - [`dec_int`][crate::ascii::dec_uint]: Decode a variable-width, decimal signed integer +//! - [`dec_int`][crate::ascii::dec_int]: Decode a variable-width, decimal signed integer //! - [`dec_uint`][crate::ascii::dec_uint]: Decode a variable-width, decimal unsigned integer //! - [`hex_uint`][crate::ascii::hex_uint]: Decode a variable-width, hexadecimal integer //! @@ -149,12 +151,13 @@ //! //! - [`bits`][crate::binary::bits::bits]: Transforms the current input type (byte slice `&[u8]`) to a bit stream on which bit specific parsers and more general combinators can be applied //! - [`bytes`][crate::binary::bits::bytes]: Transforms its bits stream input back into a byte slice for the underlying parser -//! - [`take`][crate::binary::bits::take]: Take a set number of its -//! - [`tag`][crate::binary::bits::tag]: Check if a set number of bis matches a pattern +//! - [`take`][crate::binary::bits::take]: Take a set number of bits +//! - [`tag`][crate::binary::bits::tag]: Check if a set number of bits matches a pattern //! - [`bool`][crate::binary::bits::bool]: Match any one bit mod branch; mod core; +mod debug; mod multi; mod parser; mod sequence; @@ -164,6 +167,7 @@ mod tests; pub use self::branch::*; pub use self::core::*; +pub use self::debug::*; pub use self::multi::*; pub use self::parser::*; pub use self::sequence::*; diff --git a/src/combinator/multi.rs b/src/combinator/multi.rs index 1fdb753..f76d635 100644 --- a/src/combinator/multi.rs +++ b/src/combinator/multi.rs @@ -1,18 +1,18 @@ //! Combinators applying their child parser multiple times +use crate::combinator::trace; use crate::error::ErrMode; use crate::error::ErrorKind; use crate::error::ParserError; use crate::stream::Accumulate; use crate::stream::Range; use crate::stream::Stream; -use crate::trace::trace; use crate::PResult; use crate::Parser; /// [`Accumulate`] the output of a parser into a container, like `Vec` /// -/// This stops before `n` when the parser returns [`ErrMode::Backtrack`]. To instead chain an error up, see +/// This stops before `n` when the parser returns [`ErrMode::Backtrack`]. To instead chain an error up, see /// [`cut_err`][crate::combinator::cut_err]. /// /// # Arguments @@ -28,7 +28,7 @@ use crate::Parser; /// /// # Example /// -/// Zero or more reptitions: +/// Zero or more repetitions: /// ```rust /// # #[cfg(feature = "std")] { /// # use winnow::{error::ErrMode, error::ErrorKind, error::Needed}; @@ -47,7 +47,7 @@ use crate::Parser; /// # } /// ``` /// -/// One or more reptitions: +/// One or more repetitions: /// ```rust /// # #[cfg(feature = "std")] { /// # use winnow::{error::ErrMode, error::{InputError, ErrorKind}, error::Needed}; @@ -66,7 +66,7 @@ use crate::Parser; /// # } /// ``` /// -/// Fixed number of repeitions: +/// Fixed number of repetitions: /// ```rust /// # #[cfg(feature = "std")] { /// # use winnow::{error::ErrMode, error::{InputError, ErrorKind}, error::Needed}; @@ -86,7 +86,7 @@ use crate::Parser; /// # } /// ``` /// -/// Arbitrary reptitions: +/// Arbitrary repetitions: /// ```rust /// # #[cfg(feature = "std")] { /// # use winnow::{error::ErrMode, error::ErrorKind, error::Needed}; @@ -115,25 +115,194 @@ use crate::Parser; #[doc(alias = "skip_many")] #[doc(alias = "skip_many1")] #[inline(always)] -pub fn repeat<I, O, C, E, F>(range: impl Into<Range>, mut f: F) -> impl Parser<I, C, E> +pub fn repeat<I, O, C, E, P>(range: impl Into<Range>, parser: P) -> Repeat<P, I, O, C, E> where I: Stream, C: Accumulate<O>, - F: Parser<I, O, E>, + P: Parser<I, O, E>, E: ParserError<I>, { - let Range { - start_inclusive, - end_inclusive, - } = range.into(); - trace("repeat", move |i: &mut I| { - match (start_inclusive, end_inclusive) { - (0, None) => repeat0_(&mut f, i), - (1, None) => repeat1_(&mut f, i), - (start, end) if Some(start) == end => repeat_n_(start, &mut f, i), - (start, end) => repeat_m_n_(start, end.unwrap_or(usize::MAX), &mut f, i), - } - }) + Repeat { + range: range.into(), + parser, + i: Default::default(), + o: Default::default(), + c: Default::default(), + e: Default::default(), + } +} + +/// Implementation of [`repeat`] +#[cfg_attr(nightly, warn(rustdoc::missing_doc_code_examples))] +pub struct Repeat<P, I, O, C, E> +where + P: Parser<I, O, E>, + I: Stream, + C: Accumulate<O>, + E: ParserError<I>, +{ + range: Range, + parser: P, + i: core::marker::PhantomData<I>, + o: core::marker::PhantomData<O>, + c: core::marker::PhantomData<C>, + e: core::marker::PhantomData<E>, +} + +impl<P, I, O, E> Repeat<P, I, O, (), E> +where + P: Parser<I, O, E>, + I: Stream, + E: ParserError<I>, +{ + /// Repeats the embedded parser, calling `g` to gather the results + /// + /// This stops before `n` when the parser returns [`ErrMode::Backtrack`]. To instead chain an error up, see + /// [`cut_err`][crate::combinator::cut_err]. + /// + /// # Arguments + /// * `init` A function returning the initial value. + /// * `g` The function that combines a result of `f` with + /// the current accumulator. + /// + /// **Warning:** If the parser passed to `fold` accepts empty inputs + /// (like `alpha0` or `digit0`), `fold_repeat` will return an error, + /// to prevent going into an infinite loop. + /// + /// # Example + /// + /// Zero or more repetitions: + /// ```rust + /// # use winnow::{error::ErrMode, error::ErrorKind, error::Needed}; + /// # use winnow::prelude::*; + /// use winnow::combinator::repeat; + /// use winnow::token::tag; + /// + /// fn parser(s: &str) -> IResult<&str, Vec<&str>> { + /// repeat( + /// 0.., + /// "abc" + /// ).fold( + /// Vec::new, + /// |mut acc: Vec<_>, item| { + /// acc.push(item); + /// acc + /// } + /// ).parse_peek(s) + /// } + /// + /// assert_eq!(parser("abcabc"), Ok(("", vec!["abc", "abc"]))); + /// assert_eq!(parser("abc123"), Ok(("123", vec!["abc"]))); + /// assert_eq!(parser("123123"), Ok(("123123", vec![]))); + /// assert_eq!(parser(""), Ok(("", vec![]))); + /// ``` + /// + /// One or more repetitions: + /// ```rust + /// # use winnow::{error::ErrMode, error::{InputError, ErrorKind}, error::Needed}; + /// # use winnow::prelude::*; + /// use winnow::combinator::repeat; + /// use winnow::token::tag; + /// + /// fn parser(s: &str) -> IResult<&str, Vec<&str>> { + /// repeat( + /// 1.., + /// "abc", + /// ).fold( + /// Vec::new, + /// |mut acc: Vec<_>, item| { + /// acc.push(item); + /// acc + /// } + /// ).parse_peek(s) + /// } + /// + /// assert_eq!(parser("abcabc"), Ok(("", vec!["abc", "abc"]))); + /// assert_eq!(parser("abc123"), Ok(("123", vec!["abc"]))); + /// assert_eq!(parser("123123"), Err(ErrMode::Backtrack(InputError::new("123123", ErrorKind::Many)))); + /// assert_eq!(parser(""), Err(ErrMode::Backtrack(InputError::new("", ErrorKind::Many)))); + /// ``` + /// + /// Arbitrary number of repetitions: + /// ```rust + /// # use winnow::{error::ErrMode, error::ErrorKind, error::Needed}; + /// # use winnow::prelude::*; + /// use winnow::combinator::repeat; + /// use winnow::token::tag; + /// + /// fn parser(s: &str) -> IResult<&str, Vec<&str>> { + /// repeat( + /// 0..=2, + /// "abc", + /// ).fold( + /// Vec::new, + /// |mut acc: Vec<_>, item| { + /// acc.push(item); + /// acc + /// } + /// ).parse_peek(s) + /// } + /// + /// assert_eq!(parser("abcabc"), Ok(("", vec!["abc", "abc"]))); + /// assert_eq!(parser("abc123"), Ok(("123", vec!["abc"]))); + /// assert_eq!(parser("123123"), Ok(("123123", vec![]))); + /// assert_eq!(parser(""), Ok(("", vec![]))); + /// assert_eq!(parser("abcabcabc"), Ok(("abc", vec!["abc", "abc"]))); + /// ``` + #[doc(alias = "fold_many0")] + #[doc(alias = "fold_many1")] + #[doc(alias = "fold_many_m_n")] + #[doc(alias = "fold_repeat")] + #[inline(always)] + pub fn fold<H, G, R>(mut self, mut init: H, mut g: G) -> impl Parser<I, R, E> + where + G: FnMut(R, O) -> R, + H: FnMut() -> R, + { + let Range { + start_inclusive, + end_inclusive, + } = self.range; + trace("repeat_fold", move |i: &mut I| { + match (start_inclusive, end_inclusive) { + (0, None) => fold_repeat0_(&mut self.parser, &mut init, &mut g, i), + (1, None) => fold_repeat1_(&mut self.parser, &mut init, &mut g, i), + (start, end) => fold_repeat_m_n_( + start, + end.unwrap_or(usize::MAX), + &mut self.parser, + &mut init, + &mut g, + i, + ), + } + }) + } +} + +impl<P, I, O, C, E> Parser<I, C, E> for Repeat<P, I, O, C, E> +where + P: Parser<I, O, E>, + I: Stream, + C: Accumulate<O>, + E: ParserError<I>, +{ + #[inline(always)] + fn parse_next(&mut self, i: &mut I) -> PResult<C, E> { + let Range { + start_inclusive, + end_inclusive, + } = self.range; + trace("repeat", move |i: &mut I| { + match (start_inclusive, end_inclusive) { + (0, None) => repeat0_(&mut self.parser, i), + (1, None) => repeat1_(&mut self.parser, i), + (start, end) if Some(start) == end => repeat_n_(start, &mut self.parser, i), + (start, end) => repeat_m_n_(start, end.unwrap_or(usize::MAX), &mut self.parser, i), + } + }) + .parse_next(i) + } } fn repeat0_<I, O, C, E, F>(f: &mut F, i: &mut I) -> PResult<C, E> @@ -201,6 +370,73 @@ where } } +fn repeat_n_<I, O, C, E, F>(count: usize, f: &mut F, i: &mut I) -> PResult<C, E> +where + I: Stream, + C: Accumulate<O>, + F: Parser<I, O, E>, + E: ParserError<I>, +{ + let mut res = C::initial(Some(count)); + + for _ in 0..count { + match f.parse_next(i) { + Ok(o) => { + res.accumulate(o); + } + Err(e) => { + return Err(e.append(i, ErrorKind::Many)); + } + } + } + + Ok(res) +} + +fn repeat_m_n_<I, O, C, E, F>(min: usize, max: usize, parse: &mut F, input: &mut I) -> PResult<C, E> +where + I: Stream, + C: Accumulate<O>, + F: Parser<I, O, E>, + E: ParserError<I>, +{ + if min > max { + return Err(ErrMode::Cut(E::from_error_kind(input, ErrorKind::Many))); + } + + let mut res = C::initial(Some(min)); + for count in 0..max { + let start = input.checkpoint(); + let len = input.eof_offset(); + match parse.parse_next(input) { + Ok(value) => { + // infinite loop check: the parser must always consume + if input.eof_offset() == len { + return Err(ErrMode::assert( + input, + "`repeat` parsers must always consume", + )); + } + + res.accumulate(value); + } + Err(ErrMode::Backtrack(e)) => { + if count < min { + return Err(ErrMode::Backtrack(e.append(input, ErrorKind::Many))); + } else { + input.reset(start); + return Ok(res); + } + } + Err(e) => { + return Err(e); + } + } + } + + Ok(res) +} + /// [`Accumulate`] the output of parser `f` into a container, like `Vec`, until the parser `g` /// produces a result. /// @@ -216,11 +452,11 @@ where /// # #[cfg(feature = "std")] { /// # use winnow::{error::ErrMode, error::{InputError, ErrorKind}, error::Needed}; /// # use winnow::prelude::*; -/// use winnow::combinator::repeat_till0; +/// use winnow::combinator::repeat_till; /// use winnow::token::tag; /// /// fn parser(s: &str) -> IResult<&str, (Vec<&str>, &str)> { -/// repeat_till0("abc", "end").parse_peek(s) +/// repeat_till(0.., "abc", "end").parse_peek(s) /// }; /// /// assert_eq!(parser("abcabcend"), Ok(("", (vec!["abc", "abc"], "end")))); @@ -231,7 +467,11 @@ where /// # } /// ``` #[doc(alias = "many_till0")] -pub fn repeat_till0<I, O, C, P, E, F, G>(mut f: F, mut g: G) -> impl Parser<I, (C, P), E> +pub fn repeat_till<I, O, C, P, E, F, G>( + range: impl Into<Range>, + mut f: F, + mut g: G, +) -> impl Parser<I, (C, P), E> where I: Stream, C: Accumulate<O>, @@ -239,39 +479,262 @@ where G: Parser<I, P, E>, E: ParserError<I>, { - trace("repeat_till0", move |i: &mut I| { - let mut res = C::initial(None); - loop { - let start = i.checkpoint(); - let len = i.eof_offset(); - match g.parse_next(i) { - Ok(o) => return Ok((res, o)), - Err(ErrMode::Backtrack(_)) => { - i.reset(start); - match f.parse_next(i) { - Err(e) => return Err(e.append(i, ErrorKind::Many)), - Ok(o) => { - // infinite loop check: the parser must always consume - if i.eof_offset() == len { - return Err(ErrMode::assert( - i, - "`repeat` parsers must always consume", - )); - } - - res.accumulate(o); + let Range { + start_inclusive, + end_inclusive, + } = range.into(); + trace("repeat_till", move |i: &mut I| { + match (start_inclusive, end_inclusive) { + (0, None) => repeat_till0_(&mut f, &mut g, i), + (start, end) => repeat_till_m_n_(start, end.unwrap_or(usize::MAX), &mut f, &mut g, i), + } + }) +} + +/// Deprecated, replaced with [`repeat_till`] +#[deprecated(since = "0.5.35", note = "Replaced with `repeat_till`")] +#[inline(always)] +pub fn repeat_till0<I, O, C, P, E, F, G>(f: F, g: G) -> impl Parser<I, (C, P), E> +where + I: Stream, + C: Accumulate<O>, + F: Parser<I, O, E>, + G: Parser<I, P, E>, + E: ParserError<I>, +{ + repeat_till(0.., f, g) +} + +fn repeat_till0_<I, O, C, P, E, F, G>(f: &mut F, g: &mut G, i: &mut I) -> PResult<(C, P), E> +where + I: Stream, + C: Accumulate<O>, + F: Parser<I, O, E>, + G: Parser<I, P, E>, + E: ParserError<I>, +{ + let mut res = C::initial(None); + loop { + let start = i.checkpoint(); + let len = i.eof_offset(); + match g.parse_next(i) { + Ok(o) => return Ok((res, o)), + Err(ErrMode::Backtrack(_)) => { + i.reset(start); + match f.parse_next(i) { + Err(e) => return Err(e.append(i, ErrorKind::Many)), + Ok(o) => { + // infinite loop check: the parser must always consume + if i.eof_offset() == len { + return Err(ErrMode::assert(i, "`repeat` parsers must always consume")); } + + res.accumulate(o); } } - Err(e) => return Err(e), + } + Err(e) => return Err(e), + } + } +} + +fn repeat_till_m_n_<I, O, C, P, E, F, G>( + min: usize, + max: usize, + f: &mut F, + g: &mut G, + i: &mut I, +) -> PResult<(C, P), E> +where + I: Stream, + C: Accumulate<O>, + F: Parser<I, O, E>, + G: Parser<I, P, E>, + E: ParserError<I>, +{ + if min > max { + return Err(ErrMode::Cut(E::from_error_kind(i, ErrorKind::Many))); + } + + let mut res = C::initial(Some(min)); + for _ in 0..min { + match f.parse_next(i) { + Ok(o) => { + res.accumulate(o); + } + Err(e) => { + return Err(e.append(i, ErrorKind::Many)); } } + } + for count in min..=max { + let start = i.checkpoint(); + let len = i.eof_offset(); + match g.parse_next(i) { + Ok(o) => return Ok((res, o)), + Err(ErrMode::Backtrack(err)) => { + if count == max { + return Err(ErrMode::Backtrack(err)); + } + i.reset(start); + match f.parse_next(i) { + Err(e) => { + return Err(e.append(i, ErrorKind::Many)); + } + Ok(o) => { + // infinite loop check: the parser must always consume + if i.eof_offset() == len { + return Err(ErrMode::assert(i, "`repeat` parsers must always consume")); + } + + res.accumulate(o); + } + } + } + Err(e) => return Err(e), + } + } + unreachable!() +} + +/// [`Accumulate`] the output of a parser, interleaved with `sep` +/// +/// This stops when either parser returns [`ErrMode::Backtrack`]. To instead chain an error up, see +/// [`cut_err`][crate::combinator::cut_err]. +/// +/// # Arguments +/// * `range` The minimum and maximum number of iterations. +/// * `parser` The parser that parses the elements of the list. +/// * `sep` The parser that parses the separator between list elements. +/// +/// **Warning:** If the separator parser accepts empty inputs +/// (like `alpha0` or `digit0`), `separated` will return an error, +/// to prevent going into an infinite loop. +/// +/// # Example +/// +/// Zero or more repetitions: +/// ```rust +/// # #[cfg(feature = "std")] { +/// # use winnow::{error::ErrMode, error::ErrorKind, error::Needed}; +/// # use winnow::prelude::*; +/// use winnow::combinator::separated; +/// use winnow::token::tag; +/// +/// fn parser(s: &str) -> IResult<&str, Vec<&str>> { +/// separated(0.., "abc", "|").parse_peek(s) +/// } +/// +/// assert_eq!(parser("abc|abc|abc"), Ok(("", vec!["abc", "abc", "abc"]))); +/// assert_eq!(parser("abc123abc"), Ok(("123abc", vec!["abc"]))); +/// assert_eq!(parser("abc|def"), Ok(("|def", vec!["abc"]))); +/// assert_eq!(parser(""), Ok(("", vec![]))); +/// assert_eq!(parser("def|abc"), Ok(("def|abc", vec![]))); +/// # } +/// ``` +/// +/// One or more repetitions: +/// ```rust +/// # #[cfg(feature = "std")] { +/// # use winnow::{error::ErrMode, error::{InputError, ErrorKind}, error::Needed}; +/// # use winnow::prelude::*; +/// use winnow::combinator::separated; +/// use winnow::token::tag; +/// +/// fn parser(s: &str) -> IResult<&str, Vec<&str>> { +/// separated(1.., "abc", "|").parse_peek(s) +/// } +/// +/// assert_eq!(parser("abc|abc|abc"), Ok(("", vec!["abc", "abc", "abc"]))); +/// assert_eq!(parser("abc123abc"), Ok(("123abc", vec!["abc"]))); +/// assert_eq!(parser("abc|def"), Ok(("|def", vec!["abc"]))); +/// assert_eq!(parser(""), Err(ErrMode::Backtrack(InputError::new("", ErrorKind::Tag)))); +/// assert_eq!(parser("def|abc"), Err(ErrMode::Backtrack(InputError::new("def|abc", ErrorKind::Tag)))); +/// # } +/// ``` +/// +/// Fixed number of repetitions: +/// ```rust +/// # #[cfg(feature = "std")] { +/// # use winnow::{error::ErrMode, error::{InputError, ErrorKind}, error::Needed}; +/// # use winnow::prelude::*; +/// use winnow::combinator::separated; +/// use winnow::token::tag; +/// +/// fn parser(s: &str) -> IResult<&str, Vec<&str>> { +/// separated(2, "abc", "|").parse_peek(s) +/// } +/// +/// assert_eq!(parser("abc|abc|abc"), Ok(("|abc", vec!["abc", "abc"]))); +/// assert_eq!(parser("abc123abc"), Err(ErrMode::Backtrack(InputError::new("123abc", ErrorKind::Tag)))); +/// assert_eq!(parser("abc|def"), Err(ErrMode::Backtrack(InputError::new("def", ErrorKind::Tag)))); +/// assert_eq!(parser(""), Err(ErrMode::Backtrack(InputError::new("", ErrorKind::Tag)))); +/// assert_eq!(parser("def|abc"), Err(ErrMode::Backtrack(InputError::new("def|abc", ErrorKind::Tag)))); +/// # } +/// ``` +/// +/// Arbitrary repetitions: +/// ```rust +/// # #[cfg(feature = "std")] { +/// # use winnow::{error::ErrMode, error::{InputError, ErrorKind}, error::Needed}; +/// # use winnow::prelude::*; +/// use winnow::combinator::separated; +/// use winnow::token::tag; +/// +/// fn parser(s: &str) -> IResult<&str, Vec<&str>> { +/// separated(0..=2, "abc", "|").parse_peek(s) +/// } +/// +/// assert_eq!(parser("abc|abc|abc"), Ok(("|abc", vec!["abc", "abc"]))); +/// assert_eq!(parser("abc123abc"), Ok(("123abc", vec!["abc"]))); +/// assert_eq!(parser("abc|def"), Ok(("|def", vec!["abc"]))); +/// assert_eq!(parser(""), Ok(("", vec![]))); +/// assert_eq!(parser("def|abc"), Ok(("def|abc", vec![]))); +/// # } +/// ``` +#[doc(alias = "sep_by")] +#[doc(alias = "sep_by1")] +#[doc(alias = "separated_list0")] +#[doc(alias = "separated_list1")] +#[doc(alias = "separated_m_n")] +#[inline(always)] +pub fn separated<I, O, C, O2, E, P, S>( + range: impl Into<Range>, + mut parser: P, + mut separator: S, +) -> impl Parser<I, C, E> +where + I: Stream, + C: Accumulate<O>, + P: Parser<I, O, E>, + S: Parser<I, O2, E>, + E: ParserError<I>, +{ + let Range { + start_inclusive, + end_inclusive, + } = range.into(); + trace("separated", move |input: &mut I| { + match (start_inclusive, end_inclusive) { + (0, None) => separated0_(&mut parser, &mut separator, input), + (1, None) => separated1_(&mut parser, &mut separator, input), + (start, end) if Some(start) == end => { + separated_n_(start, &mut parser, &mut separator, input) + } + (start, end) => separated_m_n_( + start, + end.unwrap_or(usize::MAX), + &mut parser, + &mut separator, + input, + ), + } }) } -/// [`Accumulate`] the output of a parser, interleaed with `sep` +/// [`Accumulate`] the output of a parser, interleaved with `sep` /// -/// This stops when either parser returns [`ErrMode::Backtrack`]. To instead chain an error up, see +/// This stops when either parser returns [`ErrMode::Backtrack`]. To instead chain an error up, see /// [`cut_err`][crate::combinator::cut_err]. /// /// # Arguments @@ -300,6 +763,7 @@ where /// ``` #[doc(alias = "sep_by")] #[doc(alias = "separated_list0")] +#[deprecated(since = "0.5.19", note = "Replaced with `combinator::separated`")] pub fn separated0<I, O, C, O2, E, P, S>(mut parser: P, mut sep: S) -> impl Parser<I, C, E> where I: Stream, @@ -309,56 +773,74 @@ where E: ParserError<I>, { trace("separated0", move |i: &mut I| { - let mut res = C::initial(None); + separated0_(&mut parser, &mut sep, i) + }) +} - let start = i.checkpoint(); - match parser.parse_next(i) { +fn separated0_<I, O, C, O2, E, P, S>( + parser: &mut P, + separator: &mut S, + input: &mut I, +) -> PResult<C, E> +where + I: Stream, + C: Accumulate<O>, + P: Parser<I, O, E>, + S: Parser<I, O2, E>, + E: ParserError<I>, +{ + let mut acc = C::initial(None); + + let start = input.checkpoint(); + match parser.parse_next(input) { + Err(ErrMode::Backtrack(_)) => { + input.reset(start); + return Ok(acc); + } + Err(e) => return Err(e), + Ok(o) => { + acc.accumulate(o); + } + } + + loop { + let start = input.checkpoint(); + let len = input.eof_offset(); + match separator.parse_next(input) { Err(ErrMode::Backtrack(_)) => { - i.reset(start); - return Ok(res); + input.reset(start); + return Ok(acc); } Err(e) => return Err(e), - Ok(o) => { - res.accumulate(o); - } - } - - loop { - let start = i.checkpoint(); - let len = i.eof_offset(); - match sep.parse_next(i) { - Err(ErrMode::Backtrack(_)) => { - i.reset(start); - return Ok(res); + Ok(_) => { + // infinite loop check + if input.eof_offset() == len { + return Err(ErrMode::assert( + input, + "`separated` separator parser must always consume", + )); } - Err(e) => return Err(e), - Ok(_) => { - // infinite loop check: the parser must always consume - if i.eof_offset() == len { - return Err(ErrMode::assert(i, "sep parsers must always consume")); - } - match parser.parse_next(i) { - Err(ErrMode::Backtrack(_)) => { - i.reset(start); - return Ok(res); - } - Err(e) => return Err(e), - Ok(o) => { - res.accumulate(o); - } + match parser.parse_next(input) { + Err(ErrMode::Backtrack(_)) => { + input.reset(start); + return Ok(acc); + } + Err(e) => return Err(e), + Ok(o) => { + acc.accumulate(o); } } } } - }) + } } -/// [`Accumulate`] the output of a parser, interleaed with `sep` +/// [`Accumulate`] the output of a parser, interleaved with `sep` /// /// Fails if the element parser does not produce at least one element.$ /// -/// This stops when either parser returns [`ErrMode::Backtrack`]. To instead chain an error up, see +/// This stops when either parser returns [`ErrMode::Backtrack`]. To instead chain an error up, see /// [`cut_err`][crate::combinator::cut_err]. /// /// # Arguments @@ -387,6 +869,7 @@ where /// ``` #[doc(alias = "sep_by1")] #[doc(alias = "separated_list1")] +#[deprecated(since = "0.5.19", note = "Replaced with `combinator::separated`")] pub fn separated1<I, O, C, O2, E, P, S>(mut parser: P, mut sep: S) -> impl Parser<I, C, E> where I: Stream, @@ -396,50 +879,209 @@ where E: ParserError<I>, { trace("separated1", move |i: &mut I| { - let mut res = C::initial(None); + separated1_(&mut parser, &mut sep, i) + }) +} + +fn separated1_<I, O, C, O2, E, P, S>( + parser: &mut P, + separator: &mut S, + input: &mut I, +) -> PResult<C, E> +where + I: Stream, + C: Accumulate<O>, + P: Parser<I, O, E>, + S: Parser<I, O2, E>, + E: ParserError<I>, +{ + let mut acc = C::initial(None); + + // Parse the first element + match parser.parse_next(input) { + Err(e) => return Err(e), + Ok(o) => { + acc.accumulate(o); + } + } - // Parse the first element - match parser.parse_next(i) { + loop { + let start = input.checkpoint(); + let len = input.eof_offset(); + match separator.parse_next(input) { + Err(ErrMode::Backtrack(_)) => { + input.reset(start); + return Ok(acc); + } Err(e) => return Err(e), - Ok(o) => { - res.accumulate(o); + Ok(_) => { + // infinite loop check + if input.eof_offset() == len { + return Err(ErrMode::assert( + input, + "`separated` separator parser must always consume", + )); + } + + match parser.parse_next(input) { + Err(ErrMode::Backtrack(_)) => { + input.reset(start); + return Ok(acc); + } + Err(e) => return Err(e), + Ok(o) => { + acc.accumulate(o); + } + } } } + } +} - loop { - let start = i.checkpoint(); - let len = i.eof_offset(); - match sep.parse_next(i) { - Err(ErrMode::Backtrack(_)) => { - i.reset(start); - return Ok(res); +fn separated_n_<I, O, C, O2, E, P, S>( + count: usize, + parser: &mut P, + separator: &mut S, + input: &mut I, +) -> PResult<C, E> +where + I: Stream, + C: Accumulate<O>, + P: Parser<I, O, E>, + S: Parser<I, O2, E>, + E: ParserError<I>, +{ + let mut acc = C::initial(Some(count)); + + if count == 0 { + return Ok(acc); + } + + match parser.parse_next(input) { + Err(e) => { + return Err(e.append(input, ErrorKind::Many)); + } + Ok(o) => { + acc.accumulate(o); + } + } + + for _ in 1..count { + let len = input.eof_offset(); + match separator.parse_next(input) { + Err(e) => { + return Err(e.append(input, ErrorKind::Many)); + } + Ok(_) => { + // infinite loop check + if input.eof_offset() == len { + return Err(ErrMode::assert( + input, + "`separated` separator parser must always consume", + )); } - Err(e) => return Err(e), - Ok(_) => { - // infinite loop check: the parser must always consume - if i.eof_offset() == len { - return Err(ErrMode::assert(i, "sep parsers must always consume")); + + match parser.parse_next(input) { + Err(e) => { + return Err(e.append(input, ErrorKind::Many)); } + Ok(o) => { + acc.accumulate(o); + } + } + } + } + } - match parser.parse_next(i) { - Err(ErrMode::Backtrack(_)) => { - i.reset(start); - return Ok(res); - } - Err(e) => return Err(e), - Ok(o) => { - res.accumulate(o); + Ok(acc) +} + +fn separated_m_n_<I, O, C, O2, E, P, S>( + min: usize, + max: usize, + parser: &mut P, + separator: &mut S, + input: &mut I, +) -> PResult<C, E> +where + I: Stream, + C: Accumulate<O>, + P: Parser<I, O, E>, + S: Parser<I, O2, E>, + E: ParserError<I>, +{ + if min > max { + return Err(ErrMode::Cut(E::from_error_kind(input, ErrorKind::Many))); + } + + let mut acc = C::initial(Some(min)); + + let start = input.checkpoint(); + match parser.parse_next(input) { + Err(ErrMode::Backtrack(e)) => { + if min == 0 { + input.reset(start); + return Ok(acc); + } else { + return Err(ErrMode::Backtrack(e.append(input, ErrorKind::Many))); + } + } + Err(e) => return Err(e), + Ok(o) => { + acc.accumulate(o); + } + } + + for index in 1..max { + let start = input.checkpoint(); + let len = input.eof_offset(); + match separator.parse_next(input) { + Err(ErrMode::Backtrack(e)) => { + if index < min { + return Err(ErrMode::Backtrack(e.append(input, ErrorKind::Many))); + } else { + input.reset(start); + return Ok(acc); + } + } + Err(e) => { + return Err(e); + } + Ok(_) => { + // infinite loop check + if input.eof_offset() == len { + return Err(ErrMode::assert( + input, + "`separated` separator parser must always consume", + )); + } + + match parser.parse_next(input) { + Err(ErrMode::Backtrack(e)) => { + if index < min { + return Err(ErrMode::Backtrack(e.append(input, ErrorKind::Many))); + } else { + input.reset(start); + return Ok(acc); } } + Err(e) => { + return Err(e); + } + Ok(o) => { + acc.accumulate(o); + } } } } - }) + } + + Ok(acc) } /// Alternates between two parsers, merging the results (left associative) /// -/// This stops when either parser returns [`ErrMode::Backtrack`]. To instead chain an error up, see +/// This stops when either parser returns [`ErrMode::Backtrack`]. To instead chain an error up, see /// [`cut_err`][crate::combinator::cut_err]. /// /// # Example @@ -461,14 +1103,14 @@ where pub fn separated_foldl1<I, O, O2, E, P, S, Op>( mut parser: P, mut sep: S, - op: Op, + mut op: Op, ) -> impl Parser<I, O, E> where I: Stream, P: Parser<I, O, E>, S: Parser<I, O2, E>, E: ParserError<I>, - Op: Fn(O, O2, O) -> O, + Op: FnMut(O, O2, O) -> O, { trace("separated_foldl1", move |i: &mut I| { let mut ol = parser.parse_next(i)?; @@ -506,7 +1148,7 @@ where /// Alternates between two parsers, merging the results (right associative) /// -/// This stops when either parser returns [`ErrMode::Backtrack`]. To instead chain an error up, see +/// This stops when either parser returns [`ErrMode::Backtrack`]. To instead chain an error up, see /// [`cut_err`][crate::combinator::cut_err]. /// /// # Example @@ -530,14 +1172,14 @@ where pub fn separated_foldr1<I, O, O2, E, P, S, Op>( mut parser: P, mut sep: S, - op: Op, + mut op: Op, ) -> impl Parser<I, O, E> where I: Stream, P: Parser<I, O, E>, S: Parser<I, O2, E>, E: ParserError<I>, - Op: Fn(O, O2, O) -> O, + Op: FnMut(O, O2, O) -> O, { trace("separated_foldr1", move |i: &mut I| { let ol = parser.parse_next(i)?; @@ -556,73 +1198,6 @@ where }) } -fn repeat_m_n_<I, O, C, E, F>(min: usize, max: usize, parse: &mut F, input: &mut I) -> PResult<C, E> -where - I: Stream, - C: Accumulate<O>, - F: Parser<I, O, E>, - E: ParserError<I>, -{ - if min > max { - return Err(ErrMode::Cut(E::from_error_kind(input, ErrorKind::Many))); - } - - let mut res = C::initial(Some(min)); - for count in 0..max { - let start = input.checkpoint(); - let len = input.eof_offset(); - match parse.parse_next(input) { - Ok(value) => { - // infinite loop check: the parser must always consume - if input.eof_offset() == len { - return Err(ErrMode::assert( - input, - "`repeat` parsers must always consume", - )); - } - - res.accumulate(value); - } - Err(ErrMode::Backtrack(e)) => { - if count < min { - return Err(ErrMode::Backtrack(e.append(input, ErrorKind::Many))); - } else { - input.reset(start); - return Ok(res); - } - } - Err(e) => { - return Err(e); - } - } - } - - Ok(res) -} - -fn repeat_n_<I, O, C, E, F>(count: usize, f: &mut F, i: &mut I) -> PResult<C, E> -where - I: Stream, - C: Accumulate<O>, - F: Parser<I, O, E>, - E: ParserError<I>, -{ - let mut res = C::initial(Some(count)); - - for _ in 0..count { - match f.parse_next(i) { - Ok(o) => { - res.accumulate(o); - } - Err(e) => { - return Err(e.append(i, ErrorKind::Many)); - } - } - } - - Ok(res) -} - /// Repeats the embedded parser, filling the given slice with results. /// /// This parser fails if the input runs out before the given slice is full. @@ -673,109 +1248,14 @@ where }) } -/// Repeats the embedded parser `m..=n` times, calling `g` to gather the results -/// -/// This stops before `n` when the parser returns [`ErrMode::Backtrack`]. To instead chain an error up, see -/// [`cut_err`][crate::combinator::cut_err]. -/// -/// # Arguments -/// * `m` The minimum number of iterations. -/// * `n` The maximum number of iterations. -/// * `f` The parser to apply. -/// * `init` A function returning the initial value. -/// * `g` The function that combines a result of `f` with -/// the current accumulator. -/// -/// **Warning:** If the parser passed to `fold_repeat` accepts empty inputs -/// (like `alpha0` or `digit0`), `fold_repeat` will return an error, -/// to prevent going into an infinite loop. -/// -/// # Example -/// -/// Zero or more repetitions: -/// ```rust -/// # use winnow::{error::ErrMode, error::ErrorKind, error::Needed}; -/// # use winnow::prelude::*; -/// use winnow::combinator::fold_repeat; -/// use winnow::token::tag; -/// -/// fn parser(s: &str) -> IResult<&str, Vec<&str>> { -/// fold_repeat( -/// 0.., -/// "abc", -/// Vec::new, -/// |mut acc: Vec<_>, item| { -/// acc.push(item); -/// acc -/// } -/// ).parse_peek(s) -/// } -/// -/// assert_eq!(parser("abcabc"), Ok(("", vec!["abc", "abc"]))); -/// assert_eq!(parser("abc123"), Ok(("123", vec!["abc"]))); -/// assert_eq!(parser("123123"), Ok(("123123", vec![]))); -/// assert_eq!(parser(""), Ok(("", vec![]))); -/// ``` -/// -/// One or more repetitions: -/// ```rust -/// # use winnow::{error::ErrMode, error::{InputError, ErrorKind}, error::Needed}; -/// # use winnow::prelude::*; -/// use winnow::combinator::fold_repeat; -/// use winnow::token::tag; -/// -/// fn parser(s: &str) -> IResult<&str, Vec<&str>> { -/// fold_repeat( -/// 1.., -/// "abc", -/// Vec::new, -/// |mut acc: Vec<_>, item| { -/// acc.push(item); -/// acc -/// } -/// ).parse_peek(s) -/// } -/// -/// assert_eq!(parser("abcabc"), Ok(("", vec!["abc", "abc"]))); -/// assert_eq!(parser("abc123"), Ok(("123", vec!["abc"]))); -/// assert_eq!(parser("123123"), Err(ErrMode::Backtrack(InputError::new("123123", ErrorKind::Many)))); -/// assert_eq!(parser(""), Err(ErrMode::Backtrack(InputError::new("", ErrorKind::Many)))); -/// ``` -/// -/// Arbitrary number of repetitions: -/// ```rust -/// # use winnow::{error::ErrMode, error::ErrorKind, error::Needed}; -/// # use winnow::prelude::*; -/// use winnow::combinator::fold_repeat; -/// use winnow::token::tag; -/// -/// fn parser(s: &str) -> IResult<&str, Vec<&str>> { -/// fold_repeat( -/// 0..=2, -/// "abc", -/// Vec::new, -/// |mut acc: Vec<_>, item| { -/// acc.push(item); -/// acc -/// } -/// ).parse_peek(s) -/// } -/// -/// assert_eq!(parser("abcabc"), Ok(("", vec!["abc", "abc"]))); -/// assert_eq!(parser("abc123"), Ok(("123", vec!["abc"]))); -/// assert_eq!(parser("123123"), Ok(("123123", vec![]))); -/// assert_eq!(parser(""), Ok(("", vec![]))); -/// assert_eq!(parser("abcabcabc"), Ok(("abc", vec!["abc", "abc"]))); -/// ``` -#[doc(alias = "fold_many0")] -#[doc(alias = "fold_many1")] -#[doc(alias = "fold_many_m_n")] +/// Deprecated, replaced with [`Repeat::fold`] +#[deprecated(since = "0.5.36", note = "Replaced with `repeat(...).fold(...)`")] #[inline(always)] pub fn fold_repeat<I, O, E, F, G, H, R>( range: impl Into<Range>, - mut f: F, - mut init: H, - mut g: G, + f: F, + init: H, + g: G, ) -> impl Parser<I, R, E> where I: Stream, @@ -784,24 +1264,7 @@ where H: FnMut() -> R, E: ParserError<I>, { - let Range { - start_inclusive, - end_inclusive, - } = range.into(); - trace("fold_repeat", move |i: &mut I| { - match (start_inclusive, end_inclusive) { - (0, None) => fold_repeat0_(&mut f, &mut init, &mut g, i), - (1, None) => fold_repeat1_(&mut f, &mut init, &mut g, i), - (start, end) => fold_repeat_m_n_( - start, - end.unwrap_or(usize::MAX), - &mut f, - &mut init, - &mut g, - i, - ), - } - }) + repeat(range, f).fold(init, g) } fn fold_repeat0_<I, O, E, F, G, H, R>( diff --git a/src/combinator/parser.rs b/src/combinator/parser.rs index fb11adc..9ffdb3c 100644 --- a/src/combinator/parser.rs +++ b/src/combinator/parser.rs @@ -1,13 +1,17 @@ +use crate::combinator::trace; +use crate::combinator::trace_result; +#[cfg(feature = "unstable-recover")] +use crate::error::FromRecoverableError; use crate::error::{AddContext, ErrMode, ErrorKind, FromExternalError, ParserError}; use crate::lib::std::borrow::Borrow; use crate::lib::std::ops::Range; +#[cfg(feature = "unstable-recover")] +use crate::stream::Recover; use crate::stream::StreamIsPartial; use crate::stream::{Location, Stream}; -use crate::trace::trace; -use crate::trace::trace_result; use crate::*; -/// Implementation of [`Parser::by_ref`][Parser::by_ref] +/// Implementation of [`Parser::by_ref`] #[cfg_attr(nightly, warn(rustdoc::missing_doc_code_examples))] pub struct ByRef<'p, P> { p: &'p mut P, @@ -35,7 +39,7 @@ where pub struct Map<F, G, I, O, O2, E> where F: Parser<I, O, E>, - G: Fn(O) -> O2, + G: FnMut(O) -> O2, { parser: F, map: G, @@ -48,7 +52,7 @@ where impl<F, G, I, O, O2, E> Map<F, G, I, O, O2, E> where F: Parser<I, O, E>, - G: Fn(O) -> O2, + G: FnMut(O) -> O2, { #[inline(always)] pub(crate) fn new(parser: F, map: G) -> Self { @@ -66,7 +70,7 @@ where impl<F, G, I, O, O2, E> Parser<I, O2, E> for Map<F, G, I, O, O2, E> where F: Parser<I, O, E>, - G: Fn(O) -> O2, + G: FnMut(O) -> O2, { #[inline] fn parse_next(&mut self, i: &mut I) -> PResult<O2, E> { @@ -393,7 +397,7 @@ where pub struct Verify<F, G, I, O, O2, E> where F: Parser<I, O, E>, - G: Fn(&O2) -> bool, + G: FnMut(&O2) -> bool, I: Stream, O: Borrow<O2>, O2: ?Sized, @@ -410,7 +414,7 @@ where impl<F, G, I, O, O2, E> Verify<F, G, I, O, O2, E> where F: Parser<I, O, E>, - G: Fn(&O2) -> bool, + G: FnMut(&O2) -> bool, I: Stream, O: Borrow<O2>, O2: ?Sized, @@ -432,7 +436,7 @@ where impl<F, G, I, O, O2, E> Parser<I, O, E> for Verify<F, G, I, O, O2, E> where F: Parser<I, O, E>, - G: Fn(&O2) -> bool, + G: FnMut(&O2) -> bool, I: Stream, O: Borrow<O2>, O2: ?Sized, @@ -493,6 +497,48 @@ where } } +/// Implementation of [`Parser::default_value`] +#[cfg_attr(nightly, warn(rustdoc::missing_doc_code_examples))] +pub struct DefaultValue<F, I, O, O2, E> +where + F: Parser<I, O, E>, + O2: core::default::Default, +{ + parser: F, + o2: core::marker::PhantomData<O2>, + i: core::marker::PhantomData<I>, + o: core::marker::PhantomData<O>, + e: core::marker::PhantomData<E>, +} + +impl<F, I, O, O2, E> DefaultValue<F, I, O, O2, E> +where + F: Parser<I, O, E>, + O2: core::default::Default, +{ + #[inline(always)] + pub(crate) fn new(parser: F) -> Self { + Self { + parser, + o2: Default::default(), + i: Default::default(), + o: Default::default(), + e: Default::default(), + } + } +} + +impl<F, I, O, O2, E> Parser<I, O2, E> for DefaultValue<F, I, O, O2, E> +where + F: Parser<I, O, E>, + O2: core::default::Default, +{ + #[inline] + fn parse_next(&mut self, input: &mut I) -> PResult<O2, E> { + (self.parser).parse_next(input).map(|_| O2::default()) + } +} + /// Implementation of [`Parser::void`] #[cfg_attr(nightly, warn(rustdoc::missing_doc_code_examples))] pub struct Void<F, I, O, E> @@ -861,3 +907,191 @@ where .parse_next(i) } } + +/// Implementation of [`Parser::retry_after`] +#[cfg_attr(nightly, warn(rustdoc::missing_doc_code_examples))] +#[cfg(feature = "unstable-recover")] +pub struct RetryAfter<P, R, I, O, E> +where + P: Parser<I, O, E>, + R: Parser<I, (), E>, + I: Stream, + I: Recover<E>, + E: FromRecoverableError<I, E>, +{ + parser: P, + recover: R, + i: core::marker::PhantomData<I>, + o: core::marker::PhantomData<O>, + e: core::marker::PhantomData<E>, +} + +#[cfg(feature = "unstable-recover")] +impl<P, R, I, O, E> RetryAfter<P, R, I, O, E> +where + P: Parser<I, O, E>, + R: Parser<I, (), E>, + I: Stream, + I: Recover<E>, + E: FromRecoverableError<I, E>, +{ + #[inline(always)] + pub(crate) fn new(parser: P, recover: R) -> Self { + Self { + parser, + recover, + i: Default::default(), + o: Default::default(), + e: Default::default(), + } + } +} + +#[cfg(feature = "unstable-recover")] +impl<P, R, I, O, E> Parser<I, O, E> for RetryAfter<P, R, I, O, E> +where + P: Parser<I, O, E>, + R: Parser<I, (), E>, + I: Stream, + I: Recover<E>, + E: FromRecoverableError<I, E>, +{ + #[inline(always)] + fn parse_next(&mut self, i: &mut I) -> PResult<O, E> { + if I::is_recovery_supported() { + retry_after_inner(&mut self.parser, &mut self.recover, i) + } else { + self.parser.parse_next(i) + } + } +} + +#[cfg(feature = "unstable-recover")] +fn retry_after_inner<P, R, I, O, E>(parser: &mut P, recover: &mut R, i: &mut I) -> PResult<O, E> +where + P: Parser<I, O, E>, + R: Parser<I, (), E>, + I: Stream, + I: Recover<E>, + E: FromRecoverableError<I, E>, +{ + loop { + let token_start = i.checkpoint(); + let mut err = match parser.parse_next(i) { + Ok(o) => { + return Ok(o); + } + Err(ErrMode::Incomplete(e)) => return Err(ErrMode::Incomplete(e)), + Err(err) => err, + }; + let err_start = i.checkpoint(); + let err_start_eof_offset = i.eof_offset(); + if recover.parse_next(i).is_ok() { + let i_eof_offset = i.eof_offset(); + if err_start_eof_offset == i_eof_offset { + // Didn't advance so bubble the error up + } else if let Err(err_) = i.record_err(&token_start, &err_start, err) { + err = err_; + } else { + continue; + } + } + + i.reset(err_start.clone()); + err = err.map(|err| E::from_recoverable_error(&token_start, &err_start, i, err)); + return Err(err); + } +} + +/// Implementation of [`Parser::resume_after`] +#[cfg(feature = "unstable-recover")] +#[cfg_attr(nightly, warn(rustdoc::missing_doc_code_examples))] +pub struct ResumeAfter<P, R, I, O, E> +where + P: Parser<I, O, E>, + R: Parser<I, (), E>, + I: Stream, + I: Recover<E>, + E: FromRecoverableError<I, E>, +{ + parser: P, + recover: R, + i: core::marker::PhantomData<I>, + o: core::marker::PhantomData<O>, + e: core::marker::PhantomData<E>, +} + +#[cfg(feature = "unstable-recover")] +impl<P, R, I, O, E> ResumeAfter<P, R, I, O, E> +where + P: Parser<I, O, E>, + R: Parser<I, (), E>, + I: Stream, + I: Recover<E>, + E: FromRecoverableError<I, E>, +{ + #[inline(always)] + pub(crate) fn new(parser: P, recover: R) -> Self { + Self { + parser, + recover, + i: Default::default(), + o: Default::default(), + e: Default::default(), + } + } +} + +#[cfg(feature = "unstable-recover")] +impl<P, R, I, O, E> Parser<I, Option<O>, E> for ResumeAfter<P, R, I, O, E> +where + P: Parser<I, O, E>, + R: Parser<I, (), E>, + I: Stream, + I: Recover<E>, + E: FromRecoverableError<I, E>, +{ + #[inline(always)] + fn parse_next(&mut self, i: &mut I) -> PResult<Option<O>, E> { + if I::is_recovery_supported() { + resume_after_inner(&mut self.parser, &mut self.recover, i) + } else { + self.parser.parse_next(i).map(Some) + } + } +} + +#[cfg(feature = "unstable-recover")] +fn resume_after_inner<P, R, I, O, E>( + parser: &mut P, + recover: &mut R, + i: &mut I, +) -> PResult<Option<O>, E> +where + P: Parser<I, O, E>, + R: Parser<I, (), E>, + I: Stream, + I: Recover<E>, + E: FromRecoverableError<I, E>, +{ + let token_start = i.checkpoint(); + let mut err = match parser.parse_next(i) { + Ok(o) => { + return Ok(Some(o)); + } + Err(ErrMode::Incomplete(e)) => return Err(ErrMode::Incomplete(e)), + Err(err) => err, + }; + let err_start = i.checkpoint(); + if recover.parse_next(i).is_ok() { + if let Err(err_) = i.record_err(&token_start, &err_start, err) { + err = err_; + } else { + return Ok(None); + } + } + + i.reset(err_start.clone()); + err = err.map(|err| E::from_recoverable_error(&token_start, &err_start, i, err)); + Err(err) +} diff --git a/src/combinator/sequence.rs b/src/combinator/sequence.rs index 5cfeb9c..0f2e633 100644 --- a/src/combinator/sequence.rs +++ b/src/combinator/sequence.rs @@ -1,14 +1,19 @@ +use crate::combinator::trace; use crate::error::ParserError; use crate::stream::Stream; -use crate::trace::trace; use crate::*; +#[doc(inline)] +pub use crate::seq; + /// Sequence two parsers, only returning the output from the second. /// /// # Arguments /// * `first` The opening parser. /// * `second` The second parser to get object. /// +/// See also [`seq`] to generalize this across any number of fields. +/// /// # Example /// /// ```rust @@ -47,6 +52,8 @@ where /// * `first` The first parser to apply. /// * `second` The second parser to match an object. /// +/// See also [`seq`] to generalize this across any number of fields. +/// /// # Example /// /// ```rust @@ -86,6 +93,8 @@ where /// * `sep` The separator parser to apply. /// * `second` The second parser to apply. /// +/// See also [`seq`] to generalize this across any number of fields. +/// /// # Example /// /// ```rust @@ -127,6 +136,8 @@ where /// * `second` The second parser to apply. /// * `third` The third parser to apply and discard. /// +/// See also [`seq`] to generalize this across any number of fields. +/// /// # Example /// /// ```rust diff --git a/src/combinator/tests.rs b/src/combinator/tests.rs index 9d2b49d..726b410 100644 --- a/src/combinator/tests.rs +++ b/src/combinator/tests.rs @@ -717,13 +717,13 @@ fn permutation_test() { #[cfg(feature = "alloc")] fn separated0_test() { fn multi(i: Partial<&[u8]>) -> IResult<Partial<&[u8]>, Vec<&[u8]>> { - separated0("abcd", ",").parse_peek(i) + separated(0.., "abcd", ",").parse_peek(i) } fn multi_empty(i: Partial<&[u8]>) -> IResult<Partial<&[u8]>, Vec<&[u8]>> { - separated0("", ",").parse_peek(i) + separated(0.., "", ",").parse_peek(i) } fn multi_longsep(i: Partial<&[u8]>) -> IResult<Partial<&[u8]>, Vec<&[u8]>> { - separated0("abcd", "..").parse_peek(i) + separated(0.., "abcd", "..").parse_peek(i) } let a = &b"abcdef"[..]; @@ -773,7 +773,7 @@ fn separated0_test() { #[cfg_attr(debug_assertions, should_panic)] fn separated0_empty_sep_test() { fn empty_sep(i: Partial<&[u8]>) -> IResult<Partial<&[u8]>, Vec<&[u8]>> { - separated0("abc", "").parse_peek(i) + separated(0.., "abc", "").parse_peek(i) } let i = &b"abcabc"[..]; @@ -792,10 +792,10 @@ fn separated0_empty_sep_test() { #[cfg(feature = "alloc")] fn separated1_test() { fn multi(i: Partial<&[u8]>) -> IResult<Partial<&[u8]>, Vec<&[u8]>> { - separated1("abcd", ",").parse_peek(i) + separated(1.., "abcd", ",").parse_peek(i) } fn multi_longsep(i: Partial<&[u8]>) -> IResult<Partial<&[u8]>, Vec<&[u8]>> { - separated1("abcd", "..").parse_peek(i) + separated(1.., "abcd", "..").parse_peek(i) } let a = &b"abcdef"[..]; @@ -840,6 +840,47 @@ fn separated1_test() { #[test] #[cfg(feature = "alloc")] +fn separated_test() { + fn multi(i: Partial<&[u8]>) -> IResult<Partial<&[u8]>, Vec<&[u8]>> { + separated(2..=4, "abcd", ",").parse_peek(i) + } + + let a = &b"abcd,ef"[..]; + let b = &b"abcd,abcd,efgh"[..]; + let c = &b"abcd,abcd,abcd,abcd,efgh"[..]; + let d = &b"abcd,abcd,abcd,abcd,abcd,efgh"[..]; + let e = &b"abcd,ab"[..]; + + assert_eq!( + multi(Partial::new(a)), + Err(ErrMode::Backtrack(error_position!( + &Partial::new(&b"ef"[..]), + ErrorKind::Tag + ))) + ); + let res1 = vec![&b"abcd"[..], &b"abcd"[..]]; + assert_eq!( + multi(Partial::new(b)), + Ok((Partial::new(&b",efgh"[..]), res1)) + ); + let res2 = vec![&b"abcd"[..], &b"abcd"[..], &b"abcd"[..], &b"abcd"[..]]; + assert_eq!( + multi(Partial::new(c)), + Ok((Partial::new(&b",efgh"[..]), res2)) + ); + let res3 = vec![&b"abcd"[..], &b"abcd"[..], &b"abcd"[..], &b"abcd"[..]]; + assert_eq!( + multi(Partial::new(d)), + Ok((Partial::new(&b",abcd,efgh"[..]), res3)) + ); + assert_eq!( + multi(Partial::new(e)), + Err(ErrMode::Incomplete(Needed::new(2))) + ); +} + +#[test] +#[cfg(feature = "alloc")] fn repeat0_test() { fn multi(i: Partial<&[u8]>) -> IResult<Partial<&[u8]>, Vec<&[u8]>> { repeat(0.., "abcd").parse_peek(i) @@ -925,7 +966,7 @@ fn repeat1_test() { fn repeat_till_test() { #[allow(clippy::type_complexity)] fn multi(i: &[u8]) -> IResult<&[u8], (Vec<&[u8]>, &[u8])> { - repeat_till0("abcd", "efgh").parse_peek(i) + repeat_till(0.., "abcd", "efgh").parse_peek(i) } let a = b"abcdabcdefghabcd"; @@ -947,6 +988,46 @@ fn repeat_till_test() { } #[test] +#[cfg(feature = "alloc")] +fn repeat_till_range_test() { + #[allow(clippy::type_complexity)] + fn multi(i: &str) -> IResult<&str, (Vec<&str>, &str)> { + repeat_till(2..=4, "ab", "cd").parse_peek(i) + } + + assert_eq!( + multi("cd"), + Err(ErrMode::Backtrack(error_node_position!( + &"cd", + ErrorKind::Many, + error_position!(&"cd", ErrorKind::Tag) + ))) + ); + assert_eq!( + multi("abcd"), + Err(ErrMode::Backtrack(error_node_position!( + &"cd", + ErrorKind::Many, + error_position!(&"cd", ErrorKind::Tag) + ))) + ); + assert_eq!(multi("ababcd"), Ok(("", (vec!["ab", "ab"], "cd")))); + assert_eq!(multi("abababcd"), Ok(("", (vec!["ab", "ab", "ab"], "cd")))); + assert_eq!( + multi("ababababcd"), + Ok(("", (vec!["ab", "ab", "ab", "ab"], "cd"))) + ); + assert_eq!( + multi("abababababcd"), + Err(ErrMode::Backtrack(error_node_position!( + &"cd", + ErrorKind::Many, + error_position!(&"abcd", ErrorKind::Tag) + ))) + ); +} + +#[test] #[cfg(feature = "std")] fn infinite_many() { fn tst(input: &[u8]) -> IResult<&[u8], &[u8]> { @@ -1120,7 +1201,9 @@ fn fold_repeat0_test() { acc } fn multi(i: Partial<&[u8]>) -> IResult<Partial<&[u8]>, Vec<&[u8]>> { - fold_repeat(0.., "abcd", Vec::new, fold_into_vec).parse_peek(i) + repeat(0.., "abcd") + .fold(Vec::new, fold_into_vec) + .parse_peek(i) } assert_eq!( @@ -1158,7 +1241,7 @@ fn fold_repeat0_empty_test() { acc } fn multi_empty(i: Partial<&[u8]>) -> IResult<Partial<&[u8]>, Vec<&[u8]>> { - fold_repeat(0.., "", Vec::new, fold_into_vec).parse_peek(i) + repeat(0.., "").fold(Vec::new, fold_into_vec).parse_peek(i) } assert_eq!( @@ -1178,7 +1261,9 @@ fn fold_repeat1_test() { acc } fn multi(i: Partial<&[u8]>) -> IResult<Partial<&[u8]>, Vec<&[u8]>> { - fold_repeat(1.., "abcd", Vec::new, fold_into_vec).parse_peek(i) + repeat(1.., "abcd") + .fold(Vec::new, fold_into_vec) + .parse_peek(i) } let a = &b"abcdef"[..]; @@ -1214,7 +1299,9 @@ fn fold_repeat_test() { acc } fn multi(i: Partial<&[u8]>) -> IResult<Partial<&[u8]>, Vec<&[u8]>> { - fold_repeat(2..=4, "Abcd", Vec::new, fold_into_vec).parse_peek(i) + repeat(2..=4, "Abcd") + .fold(Vec::new, fold_into_vec) + .parse_peek(i) } let a = &b"Abcdef"[..]; diff --git a/src/error.rs b/src/error.rs index 449bebc..3f28013 100644 --- a/src/error.rs +++ b/src/error.rs @@ -4,12 +4,12 @@ //! - Accumulate more [context][Parser::context] as the error goes up the parser chain //! - Distinguish between [recoverable errors, //! unrecoverable errors, and more data is needed][ErrMode] -//! - Have a very low overhead, as errors are often discarded by the calling parser (examples: `many0`, `alt`) +//! - Have a very low overhead, as errors are often discarded by the calling parser (examples: `repeat`, `alt`) //! - Can be modified according to the user's needs, because some languages need a lot more information //! - Help thread-through the [stream][crate::stream] //! //! To abstract these needs away from the user, generally `winnow` parsers use the [`PResult`] -//! alias, rather than [`Result`][std::result::Result]. [`Parser::parse`] is a top-level operation +//! alias, rather than [`Result`]. [`Parser::parse`] is a top-level operation //! that can help convert to a `Result` for integrating with your application's error reporting. //! //! Error types include: @@ -30,26 +30,29 @@ use crate::stream::Stream; #[allow(unused_imports)] // Here for intra-doc links use crate::Parser; -/// Holds the result of [`Parser`] +/// For use with [`Parser::parse_peek`] which allows the input stream to be threaded through a +/// parser. /// /// - `Ok((I, O))` is the remaining [input][crate::stream] and the parsed value /// - [`Err(ErrMode<E>)`][ErrMode] is the error along with how to respond to it /// /// By default, the error type (`E`) is [`InputError`] /// -/// [`Parser::parse`] is a top-level operation that can help convert to a `Result` for integrating -/// with your application's error reporting. +/// When integrating into the result of the application, see +/// - [`Parser::parse`] +/// - [`ErrMode::into_inner`] pub type IResult<I, O, E = InputError<I>> = PResult<(I, O), E>; -/// Holds the result of [`Parser`] +/// For use with [`Parser::parse_next`] /// /// - `Ok(O)` is the parsed value /// - [`Err(ErrMode<E>)`][ErrMode] is the error along with how to respond to it /// -/// By default, the error type (`E`) is [`ErrorKind`]. +/// By default, the error type (`E`) is [`ContextError`]. /// -/// [`Parser::parse`] is a top-level operation that can help convert to a `Result` for integrating -/// with your application's error reporting. +/// When integrating into the result of the application, see +/// - [`Parser::parse`] +/// - [`ErrMode::into_inner`] pub type PResult<O, E = ContextError> = Result<O, ErrMode<E>>; /// Contains information on needed data if a parser returned `Incomplete` @@ -97,7 +100,7 @@ pub enum ErrMode<E> { /// /// More data needs to be buffered before retrying the parse. /// - /// This must only be set when the [`Stream`][crate::stream::Stream] is [partial][`crate::stream::StreamIsPartial`], like with + /// This must only be set when the [`Stream`] is [partial][`crate::stream::StreamIsPartial`], like with /// [`Partial`][crate::Partial] /// /// Convert this into an `Backtrack` with [`Parser::complete_err`] @@ -106,7 +109,7 @@ pub enum ErrMode<E> { /// /// For example, a parser for json values might include a /// [`dec_uint`][crate::ascii::dec_uint] as one case in an [`alt`][crate::combinator::alt] - /// combiantor. If it fails, the next case should be tried. + /// combinator. If it fails, the next case should be tried. Backtrack(E), /// The parser had an unrecoverable error. /// @@ -310,6 +313,17 @@ pub trait AddContext<I, C = &'static str>: Sized { } } +/// Capture context from when an error was recovered +pub trait FromRecoverableError<I: Stream, E> { + /// Capture context from when an error was recovered + fn from_recoverable_error( + token_start: &<I as Stream>::Checkpoint, + err_start: &<I as Stream>::Checkpoint, + input: &I, + e: E, + ) -> Self; +} + /// Create a new error with an external error, from [`std::str::FromStr`] /// /// This trait is required by the [`Parser::try_map`] combinator. @@ -384,6 +398,18 @@ impl<I: Clone> ParserError<I> for InputError<I> { impl<I: Clone, C> AddContext<I, C> for InputError<I> {} +impl<I: Clone + Stream> FromRecoverableError<I, Self> for InputError<I> { + #[inline] + fn from_recoverable_error( + _token_start: &<I as Stream>::Checkpoint, + _err_start: &<I as Stream>::Checkpoint, + _input: &I, + e: Self, + ) -> Self { + e + } +} + impl<I: Clone, E> FromExternalError<I, E> for InputError<I> { /// Create a new error from an input position and an external error #[inline] @@ -443,6 +469,17 @@ impl<I> ParserError<I> for () { impl<I, C> AddContext<I, C> for () {} +impl<I: Stream> FromRecoverableError<I, Self> for () { + #[inline] + fn from_recoverable_error( + _token_start: &<I as Stream>::Checkpoint, + _err_start: &<I as Stream>::Checkpoint, + _input: &I, + (): Self, + ) -> Self { + } +} + impl<I, E> FromExternalError<I, E> for () { #[inline] fn from_external_error(_input: &I, _kind: ErrorKind, _e: E) -> Self {} @@ -533,6 +570,18 @@ impl<C, I> AddContext<I, C> for ContextError<C> { } } +impl<I: Stream, C> FromRecoverableError<I, Self> for ContextError<C> { + #[inline] + fn from_recoverable_error( + _token_start: &<I as Stream>::Checkpoint, + _err_start: &<I as Stream>::Checkpoint, + _input: &I, + e: Self, + ) -> Self { + e + } +} + #[cfg(feature = "std")] impl<C, I, E: std::error::Error + Send + Sync + 'static> FromExternalError<I, E> for ContextError<C> @@ -861,6 +910,19 @@ where } #[cfg(feature = "std")] +impl<I: Clone + Stream, C> FromRecoverableError<I, Self> for TreeError<I, C> { + #[inline] + fn from_recoverable_error( + _token_start: &<I as Stream>::Checkpoint, + _err_start: &<I as Stream>::Checkpoint, + _input: &I, + e: Self, + ) -> Self { + e + } +} + +#[cfg(feature = "std")] impl<I, C, E: std::error::Error + Send + Sync + 'static> FromExternalError<I, E> for TreeError<I, C> where I: Clone, @@ -877,7 +939,7 @@ where #[cfg(feature = "std")] impl<I, C> TreeError<I, C> where - I: Clone + std::fmt::Display, + I: Clone + crate::lib::std::fmt::Display, C: fmt::Display, { fn write(&self, f: &mut fmt::Formatter<'_>, indent: usize) -> fmt::Result { @@ -1179,6 +1241,9 @@ impl<I, E> ParseError<I, E> { } /// The location in [`ParseError::input`] where parsing failed + /// + /// **Note:** This is an offset, not an index, and may point to the end of input + /// (`input.len()`) on eof errors. #[inline] pub fn offset(&self) -> usize { self.offset @@ -1219,21 +1284,21 @@ where writeln!(f, "parse error at line {}, column {}", line_num, col_num)?; // | - for _ in 0..=gutter { + for _ in 0..gutter { write!(f, " ")?; } - writeln!(f, "|")?; + writeln!(f, " |")?; // 1 | 00:32:00.a999999 write!(f, "{} | ", line_num)?; writeln!(f, "{}", String::from_utf8_lossy(content))?; // | ^ - for _ in 0..=gutter { + for _ in 0..gutter { write!(f, " ")?; } - write!(f, "|")?; - for _ in 0..=col_idx { + write!(f, " | ")?; + for _ in 0..col_idx { write!(f, " ")?; } // The span will be empty at eof, so we need to make sure we always print at least @@ -1246,7 +1311,7 @@ where } else { let content = input; writeln!(f, "{}", String::from_utf8_lossy(content))?; - for _ in 0..=span_start { + for _ in 0..span_start { write!(f, " ")?; } // The span will be empty at eof, so we need to make sure we always print at least @@ -1284,10 +1349,9 @@ fn translate_position(input: &[u8], index: usize) -> (usize, usize) { None => 0, }; let line = input[0..line_start].iter().filter(|b| **b == b'\n').count(); - let line = line; // HACK: This treats byte offset and column offsets the same - let column = std::str::from_utf8(&input[line_start..=index]) + let column = crate::lib::std::str::from_utf8(&input[line_start..=index]) .map(|s| s.chars().count() - 1) .unwrap_or_else(|_| index - line_start); let column = column + column_offset; @@ -1297,6 +1361,27 @@ fn translate_position(input: &[u8], index: usize) -> (usize, usize) { #[cfg(test)] #[cfg(feature = "std")] +mod test_parse_error { + use super::*; + + #[test] + fn single_line() { + let mut input = "0xZ123"; + let start = input.checkpoint(); + let _ = input.next_token().unwrap(); + let _ = input.next_token().unwrap(); + let inner = InputError::new(input, ErrorKind::Slice); + let error = ParseError::new(input, start, inner); + let expected = "\ +0xZ123 + ^ +slice error starting at: Z123"; + assert_eq!(error.to_string(), expected); + } +} + +#[cfg(test)] +#[cfg(feature = "std")] mod test_translate_position { use super::*; @@ -7,6 +7,8 @@ //! - [Tutorial][_tutorial::chapter_0] //! - [Special Topics][_topic] //! - [Discussions](https://github.com/winnow-rs/winnow/discussions) +//! - [CHANGELOG](https://github.com/winnow-rs/winnow/blob/v0.5.37/CHANGELOG.md) (includes major version migration +//! guides) //! //! ## Aspirations //! @@ -49,6 +51,7 @@ #![cfg_attr(docsrs, feature(extended_key_value_attributes))] #![cfg_attr(not(feature = "std"), no_std)] #![warn(missing_docs)] +#![warn(clippy::std_instead_of_core)] // BEGIN - Embark standard lints v6 for Rust 1.55+ // do not change or add/remove here, but one can add exceptions after this section // for more info see: <https://github.com/EmbarkStudios/rust-ecosystem/issues/59> @@ -175,6 +178,7 @@ pub(crate) mod lib { #[cfg(feature = "std")] /// internal std exports for `no_std` compatibility pub mod std { + #![allow(clippy::std_instead_of_core)] #[doc(hidden)] pub use std::{ alloc, borrow, boxed, cmp, collections, convert, fmt, hash, iter, mem, ops, option, @@ -236,6 +240,8 @@ pub mod prelude { pub use crate::IResult; pub use crate::PResult; pub use crate::Parser; + #[cfg(feature = "unstable-recover")] + pub use crate::RecoverableParser as _; } pub use error::IResult; diff --git a/src/macros.rs b/src/macros.rs deleted file mode 100644 index b3078c6..0000000 --- a/src/macros.rs +++ /dev/null @@ -1,78 +0,0 @@ -/// `match` for parsers -/// -/// When parsers have unique prefixes to test for, this offers better performance over -/// [`alt`][crate::combinator::alt] though it might be at the cost of duplicating parts of your grammar -/// if you needed to [`peek`][crate::combinator::peek]. -/// -/// For tight control over the error in a catch-all case, use [`fail`][crate::combinator::fail]. -/// -/// # Example -/// -/// ```rust -/// use winnow::prelude::*; -/// use winnow::combinator::dispatch; -/// # use winnow::token::any; -/// # use winnow::combinator::peek; -/// # use winnow::combinator::preceded; -/// # use winnow::combinator::success; -/// # use winnow::combinator::fail; -/// -/// fn escaped(input: &mut &str) -> PResult<char> { -/// preceded('\\', escape_seq_char).parse_next(input) -/// } -/// -/// fn escape_seq_char(input: &mut &str) -> PResult<char> { -/// dispatch! {any; -/// 'b' => success('\u{8}'), -/// 'f' => success('\u{c}'), -/// 'n' => success('\n'), -/// 'r' => success('\r'), -/// 't' => success('\t'), -/// '\\' => success('\\'), -/// '"' => success('"'), -/// _ => fail::<_, char, _>, -/// } -/// .parse_next(input) -/// } -/// -/// assert_eq!(escaped.parse_peek("\\nHello"), Ok(("Hello", '\n'))); -/// ``` -#[macro_export] -macro_rules! dispatch { - ($match_parser: expr; $( $pat:pat $(if $pred:expr)? => $expr: expr ),+ $(,)? ) => { - $crate::trace::trace("dispatch", move |i: &mut _| - { - use $crate::Parser; - let initial = $match_parser.parse_next(i)?; - match initial { - $( - $pat $(if $pred)? => $expr.parse_next(i), - )* - } - }) - } -} - -macro_rules! succ ( - (0, $submac:ident ! ($($rest:tt)*)) => ($submac!(1, $($rest)*)); - (1, $submac:ident ! ($($rest:tt)*)) => ($submac!(2, $($rest)*)); - (2, $submac:ident ! ($($rest:tt)*)) => ($submac!(3, $($rest)*)); - (3, $submac:ident ! ($($rest:tt)*)) => ($submac!(4, $($rest)*)); - (4, $submac:ident ! ($($rest:tt)*)) => ($submac!(5, $($rest)*)); - (5, $submac:ident ! ($($rest:tt)*)) => ($submac!(6, $($rest)*)); - (6, $submac:ident ! ($($rest:tt)*)) => ($submac!(7, $($rest)*)); - (7, $submac:ident ! ($($rest:tt)*)) => ($submac!(8, $($rest)*)); - (8, $submac:ident ! ($($rest:tt)*)) => ($submac!(9, $($rest)*)); - (9, $submac:ident ! ($($rest:tt)*)) => ($submac!(10, $($rest)*)); - (10, $submac:ident ! ($($rest:tt)*)) => ($submac!(11, $($rest)*)); - (11, $submac:ident ! ($($rest:tt)*)) => ($submac!(12, $($rest)*)); - (12, $submac:ident ! ($($rest:tt)*)) => ($submac!(13, $($rest)*)); - (13, $submac:ident ! ($($rest:tt)*)) => ($submac!(14, $($rest)*)); - (14, $submac:ident ! ($($rest:tt)*)) => ($submac!(15, $($rest)*)); - (15, $submac:ident ! ($($rest:tt)*)) => ($submac!(16, $($rest)*)); - (16, $submac:ident ! ($($rest:tt)*)) => ($submac!(17, $($rest)*)); - (17, $submac:ident ! ($($rest:tt)*)) => ($submac!(18, $($rest)*)); - (18, $submac:ident ! ($($rest:tt)*)) => ($submac!(19, $($rest)*)); - (19, $submac:ident ! ($($rest:tt)*)) => ($submac!(20, $($rest)*)); - (20, $submac:ident ! ($($rest:tt)*)) => ($submac!(21, $($rest)*)); -); diff --git a/src/macros/dispatch.rs b/src/macros/dispatch.rs new file mode 100644 index 0000000..b763d4b --- /dev/null +++ b/src/macros/dispatch.rs @@ -0,0 +1,55 @@ +/// `match` for parsers +/// +/// When parsers have unique prefixes to test for, this offers better performance over +/// [`alt`][crate::combinator::alt] though it might be at the cost of duplicating parts of your grammar +/// if you needed to [`peek`][crate::combinator::peek]. +/// +/// For tight control over the error in a catch-all case, use [`fail`][crate::combinator::fail]. +/// +/// # Example +/// +/// ```rust +/// use winnow::prelude::*; +/// use winnow::combinator::dispatch; +/// # use winnow::token::any; +/// # use winnow::combinator::peek; +/// # use winnow::combinator::preceded; +/// # use winnow::combinator::empty; +/// # use winnow::combinator::fail; +/// +/// fn escaped(input: &mut &str) -> PResult<char> { +/// preceded('\\', escape_seq_char).parse_next(input) +/// } +/// +/// fn escape_seq_char(input: &mut &str) -> PResult<char> { +/// dispatch! {any; +/// 'b' => empty.value('\u{8}'), +/// 'f' => empty.value('\u{c}'), +/// 'n' => empty.value('\n'), +/// 'r' => empty.value('\r'), +/// 't' => empty.value('\t'), +/// '\\' => empty.value('\\'), +/// '"' => empty.value('"'), +/// _ => fail::<_, char, _>, +/// } +/// .parse_next(input) +/// } +/// +/// assert_eq!(escaped.parse_peek("\\nHello"), Ok(("Hello", '\n'))); +/// ``` +#[macro_export] +#[doc(hidden)] // forced to be visible in intended location +macro_rules! dispatch { + ($match_parser: expr; $( $pat:pat $(if $pred:expr)? => $expr: expr ),+ $(,)? ) => { + $crate::combinator::trace("dispatch", move |i: &mut _| + { + use $crate::Parser; + let initial = $match_parser.parse_next(i)?; + match initial { + $( + $pat $(if $pred)? => $expr.parse_next(i), + )* + } + }) + } +} diff --git a/src/macros/mod.rs b/src/macros/mod.rs new file mode 100644 index 0000000..09dc68b --- /dev/null +++ b/src/macros/mod.rs @@ -0,0 +1,5 @@ +mod dispatch; +mod seq; + +#[cfg(test)] +mod test; diff --git a/src/macros/seq.rs b/src/macros/seq.rs new file mode 100644 index 0000000..e104391 --- /dev/null +++ b/src/macros/seq.rs @@ -0,0 +1,268 @@ +/// Initialize a struct or tuple out of a sequences of parsers +/// +///# Example +/// +/// ``` +/// # use winnow::prelude::*; +/// # use winnow::ascii::{alphanumeric1, dec_uint, space0}; +/// # use winnow::combinator::delimited; +/// # use winnow::combinator::empty; +/// # use winnow::error::ContextError; +/// use winnow::combinator::seq; +/// +/// #[derive(Default, Debug, PartialEq)] +/// struct Field { +/// namespace: u32, +/// name: Vec<u8>, +/// value: Vec<u8>, +/// point: (u32, u32), +/// metadata: Vec<u8>, +/// } +/// +/// // Parse into structs / tuple-structs +/// fn field(input: &mut &[u8]) -> PResult<Field> { +/// seq!{Field { +/// namespace: empty.value(5), +/// name: alphanumeric1.map(|s: &[u8]| s.to_owned()), +/// // `_` fields are ignored when building the struct +/// _: (space0, b':', space0), +/// value: alphanumeric1.map(|s: &[u8]| s.to_owned()), +/// _: (space0, b':', space0), +/// point: point, +/// // default initialization also works +/// ..Default::default() +/// }}.parse_next(input) +/// } +/// +/// // Or parse into tuples +/// fn point(input: &mut &[u8]) -> PResult<(u32, u32)> { +/// let num = dec_uint::<_, u32, ContextError>; +/// seq!(num, _: (space0, b',', space0), num).parse_next(input) +/// } +/// +/// assert_eq!( +/// field.parse_peek(&b"test: data: 123 , 4"[..]), +/// Ok(( +/// &b""[..], +/// Field { +/// namespace: 5, +/// name: b"test"[..].to_owned(), +/// value: b"data"[..].to_owned(), +/// point: (123, 4), +/// metadata: Default::default(), +/// }, +/// )), +/// ); +/// ``` +#[macro_export] +#[doc(alias = "tuple")] +#[doc(alias = "preceded")] +#[doc(alias = "terminated")] +#[doc(alias = "delimited")] +#[doc(alias = "pair")] +#[doc(alias = "separated_pair")] +#[doc(alias = "struct_parser")] +macro_rules! seq { + ($name: ident { $($fields: tt)* }) => { + $crate::combinator::trace(stringify!($name), move |input: &mut _| { + use $crate::Parser; + $crate::seq_parse_struct_fields!(input; $($fields)*); + #[allow(clippy::redundant_field_names)] + Ok($crate::seq_init_struct_fields!( ($($fields)*); $name;)) + }) + }; + ($name: ident ( $($elements: tt)* )) => { + $crate::combinator::trace(stringify!($name), move |input: &mut _| { + use $crate::Parser; + $crate::seq_parse_tuple_fields!( ($($elements)*) ; ).map(|t| { + $crate::seq_init_tuple_fields!( + ($($elements)*); + (t.0, t.1, t.2, t.3, t.4, t.5, t.6, t.7, t.8, t.9, t.10, t.11, t.12, t.13, t.14, t.15, t.16, t.17, t.18, t.19, t.20); + $name; + ) + }).parse_next(input) + }) + }; + (( $($elements: tt)* )) => { + $crate::combinator::trace("tuple", move |input: &mut _| { + use $crate::Parser; + $crate::seq_parse_tuple_fields!( ($($elements)*) ; ).map(|t| { + $crate::seq_init_tuple_fields!( + ($($elements)*); + (t.0, t.1, t.2, t.3, t.4, t.5, t.6, t.7, t.8, t.9, t.10, t.11, t.12, t.13, t.14, t.15, t.16, t.17, t.18, t.19, t.20); + ; + ) + }).parse_next(input) + }) + }; + ($($elements: tt)*) => { + $crate::seq!(($($elements)*)) + }; +} + +#[macro_export] +#[doc(hidden)] +macro_rules! seq_parse_struct_fields { + ( + $input: ident; + _ : $head_parser: expr, $($fields: tt)* + ) => { + let _ = $head_parser.parse_next($input)?; + $crate::seq_parse_struct_fields!($input; $($fields)*) + }; + ( + $input: ident; + _ : $head_parser: expr + ) => { + let _ = $head_parser.parse_next($input)?; + }; + ( + $input: ident; + $head_field: ident : $head_parser: expr, $($fields: tt)* + ) => { + let $head_field = $head_parser.parse_next($input)?; + $crate::seq_parse_struct_fields!($input; $($fields)*) + }; + ( + $input: ident; + $head_field: ident : $head_parser: expr + ) => { + let $head_field = $head_parser.parse_next($input)?; + }; + ( + $input: expr; + .. $update: expr + ) => {}; + ( + $input: expr; + $(,)? + ) => {}; +} + +#[macro_export] +#[doc(hidden)] +macro_rules! seq_parse_tuple_fields { + ( + (_ : $head_parser: expr, $($fields: tt)* ); + $($sequenced: tt)* + ) => { + $crate::seq_parse_tuple_fields!( ( $($fields)* ) ; $($sequenced)* $head_parser.void(), ) + }; + ( + (_ : $head_parser: expr); + $($sequenced: tt)* + ) => { + $crate::seq_parse_tuple_fields!((); $($sequenced)* $head_parser.void(), ) + }; + ( + ($head_parser: expr, $($fields: tt)*); + $($sequenced: tt)* + ) => { + $crate::seq_parse_tuple_fields!( ( $($fields)* ) ; $($sequenced)* $head_parser, ) + }; + ( + ($head_parser: expr); + $($sequenced: tt)* + )=> { + $crate::seq_parse_tuple_fields!((); $($sequenced)* $head_parser, ) + }; + ( + (); + $($sequenced: tt)* + ) => { + ($($sequenced)*) + }; +} + +#[macro_export] +#[doc(hidden)] +macro_rules! seq_init_struct_fields { + ( + (_ : $head_parser: expr, $($fields: tt)*); + $name: ident; + $($inits: tt)* + ) => { + $crate::seq_init_struct_fields!( ( $($fields)* ) ; $name ; $($inits)* ) + }; + ( + (_ : $head_parser: expr); + $name: ident; + $($inits: tt)* + ) => { + $crate::seq_init_struct_fields!( (); $name ; $($inits)* ) + }; + ( + ($head_field: ident : $head_parser: expr, $($fields: tt)*); + $name: ident; + $($inits: tt)* + ) => + { + $crate::seq_init_struct_fields!( ( $($fields)* ) ; $name ; $($inits)* $head_field: $head_field, ) + }; + ( + ($head_field: ident : $head_parser: expr); + $name: ident; + $($inits: tt)* + ) => { + $crate::seq_init_struct_fields!( (); $name ; $($inits)* $head_field: $head_field,) + }; + ( + (.. $update: expr); + $name: ident; + $($inits: tt)* + ) => { + $name { $($inits)* ..$update } + }; + ( + ($(,)?); + $name: ident; + $($inits: tt)* + ) => { + $name { $($inits)* } + }; +} + +#[macro_export] +#[doc(hidden)] +macro_rules! seq_init_tuple_fields { + ( + (_ : $head_parser: expr, $($fields: tt)*); + ($head_arg: expr, $($args: expr),*); + $($name: ident)?; + $($inits: tt)* + ) => { + $crate::seq_init_tuple_fields!( ( $($fields)* ); ( $($args),* ) ; $($name)? ; $($inits)* ) + }; + ( + (_ : $head_parser: expr); + ($head_arg: expr, $($args: expr),*); + $($name: ident)?; + $($inits: tt)* + ) => { + $crate::seq_init_tuple_fields!((); ( $($args),* ); $($name)? ; $($inits)*) + }; + ( + ($head_parser: expr, $($fields: tt)*); + ($head_arg: expr, $($args: expr),*); + $($name: ident)?; + $($inits: tt)* + ) => { + $crate::seq_init_tuple_fields!( ( $($fields)* ) ; ( $($args),* ) ; $($name)? ; $($inits)* $head_arg, ) + }; + ( + ($head_parser: expr); + ($head_arg: expr, $($args: expr),*); + $($name: ident)?; + $($inits: tt)* + ) => { + $crate::seq_init_tuple_fields!((); ( $($args),* ); $($name)? ; $($inits)* $head_arg) + }; + ( + (); + ($($args: expr),*); + $($name: ident)?; + $($inits: expr),* $(,)? + ) => { + $($name)?( $($inits,)* ) + }; +} diff --git a/src/macros/test.rs b/src/macros/test.rs new file mode 100644 index 0000000..17601bc --- /dev/null +++ b/src/macros/test.rs @@ -0,0 +1,378 @@ +use crate::ascii::dec_uint; +use crate::combinator::dispatch; +use crate::combinator::empty; +use crate::combinator::fail; +use crate::combinator::seq; +use crate::error::ErrMode; +use crate::error::ErrorKind; +use crate::error::ParserError; +use crate::prelude::*; +use crate::token::any; + +#[test] +fn dispatch_basics() { + fn escape_seq_char(input: &mut &str) -> PResult<char> { + dispatch! {any; + 'b' => empty.value('\u{8}'), + 'f' => empty.value('\u{c}'), + 'n' => empty.value('\n'), + 'r' => empty.value('\r'), + 't' => empty.value('\t'), + '\\' => empty.value('\\'), + '"' => empty.value('"'), + _ => fail::<_, char, _>, + } + .parse_next(input) + } + assert_eq!(escape_seq_char.parse_peek("b123"), Ok(("123", '\u{8}'))); + assert_eq!( + escape_seq_char.parse_peek("error"), + Err(ErrMode::Backtrack(ParserError::from_error_kind( + &"rror", + ErrorKind::Fail + ))) + ); + assert_eq!( + escape_seq_char.parse_peek(""), + Err(ErrMode::Backtrack(ParserError::from_error_kind( + &"", + ErrorKind::Fail + ))) + ); +} + +#[test] +fn seq_struct_basics() { + #[derive(Debug, PartialEq)] + struct Point { + x: u32, + y: u32, + } + + fn parser(input: &mut &str) -> PResult<Point> { + seq! { + Point { + x: dec_uint, + _: ',', + y: dec_uint, + } + } + .parse_next(input) + } + assert_eq!( + parser.parse_peek("123,4 remaining"), + Ok((" remaining", Point { x: 123, y: 4 },)), + ); + assert_eq!( + parser.parse_peek("123, remaining"), + Err(ErrMode::Backtrack(ParserError::from_error_kind( + &" remaining", + ErrorKind::Fail + ))) + ); + assert_eq!( + parser.parse_peek(""), + Err(ErrMode::Backtrack(ParserError::from_error_kind( + &"", + ErrorKind::Fail + ))) + ); +} + +#[test] +fn seq_struct_default_init() { + #[derive(Debug, PartialEq, Default)] + struct Point { + x: u32, + y: u32, + z: u32, + } + + fn parser(input: &mut &str) -> PResult<Point> { + seq! { + Point { + x: dec_uint, + _: ',', + y: dec_uint, + ..Default::default() + } + } + .parse_next(input) + } + assert_eq!( + parser.parse_peek("123,4 remaining"), + Ok((" remaining", Point { x: 123, y: 4, z: 0 },)), + ); + assert_eq!( + parser.parse_peek("123, remaining"), + Err(ErrMode::Backtrack(ParserError::from_error_kind( + &" remaining", + ErrorKind::Fail + ))) + ); + assert_eq!( + parser.parse_peek(""), + Err(ErrMode::Backtrack(ParserError::from_error_kind( + &"", + ErrorKind::Fail + ))) + ); +} + +#[test] +fn seq_struct_trailing_comma_elided() { + #![allow(dead_code)] + + #[derive(Debug, PartialEq)] + struct Point { + x: u32, + y: u32, + } + + fn parser(input: &mut &str) -> PResult<Point> { + seq! { + Point { + x: dec_uint, + _: ',', + y: dec_uint, + _: empty, + } + } + .parse_next(input) + } +} + +#[test] +fn seq_struct_no_trailing_comma() { + #![allow(dead_code)] + + #[derive(Debug, PartialEq)] + struct Point { + x: u32, + y: u32, + } + + fn parser(input: &mut &str) -> PResult<Point> { + seq! { + Point { + x: dec_uint, + _: ',', + y: dec_uint + } + } + .parse_next(input) + } +} + +#[test] +fn seq_struct_no_trailing_comma_elided() { + #![allow(dead_code)] + + #[derive(Debug, PartialEq)] + struct Point { + x: u32, + y: u32, + } + + fn parser(input: &mut &str) -> PResult<Point> { + seq! { + Point { + x: dec_uint, + _: ',', + y: dec_uint, + _: empty + } + } + .parse_next(input) + } +} + +#[test] +fn seq_tuple_struct_basics() { + #[derive(Debug, PartialEq)] + struct Point(u32, u32); + + fn parser(input: &mut &str) -> PResult<Point> { + seq! { + Point( + dec_uint, + _: ',', + dec_uint, + ) + } + .parse_next(input) + } + assert_eq!( + parser.parse_peek("123,4 remaining"), + Ok((" remaining", Point(123, 4),)), + ); + assert_eq!( + parser.parse_peek("123, remaining"), + Err(ErrMode::Backtrack(ParserError::from_error_kind( + &" remaining", + ErrorKind::Fail + ))) + ); + assert_eq!( + parser.parse_peek(""), + Err(ErrMode::Backtrack(ParserError::from_error_kind( + &"", + ErrorKind::Fail + ))) + ); +} + +#[test] +fn seq_tuple_struct_trailing_comma_elided() { + #![allow(dead_code)] + + #[derive(Debug, PartialEq)] + struct Point(u32, u32); + + fn parser(input: &mut &str) -> PResult<Point> { + seq! { + Point( + dec_uint, + _: ',', + dec_uint, + _: empty, + ) + } + .parse_next(input) + } +} + +#[test] +fn seq_tuple_struct_no_trailing_comma() { + #![allow(dead_code)] + + #[derive(Debug, PartialEq)] + struct Point(u32, u32); + + fn parser(input: &mut &str) -> PResult<Point> { + seq! { + Point( + dec_uint, + _: ',', + dec_uint + ) + } + .parse_next(input) + } +} + +#[test] +fn seq_tuple_struct_no_trailing_comma_elided() { + #![allow(dead_code)] + + #[derive(Debug, PartialEq)] + struct Point(u32, u32); + + fn parser(input: &mut &str) -> PResult<Point> { + seq! { + Point( + dec_uint, + _: ',', + dec_uint, + _: empty + ) + } + .parse_next(input) + } +} + +#[test] +fn seq_tuple_basics() { + fn parser(input: &mut &str) -> PResult<(u32, u32)> { + seq! { + ( + dec_uint, + _: ',', + dec_uint, + ) + } + .parse_next(input) + } + assert_eq!( + parser.parse_peek("123,4 remaining"), + Ok((" remaining", (123, 4),)), + ); + assert_eq!( + parser.parse_peek("123, remaining"), + Err(ErrMode::Backtrack(ParserError::from_error_kind( + &" remaining", + ErrorKind::Fail + ))) + ); + assert_eq!( + parser.parse_peek(""), + Err(ErrMode::Backtrack(ParserError::from_error_kind( + &"", + ErrorKind::Fail + ))) + ); +} + +#[test] +fn seq_tuple_trailing_comma_elided() { + #![allow(dead_code)] + + fn parser(input: &mut &str) -> PResult<(u32, u32)> { + seq! { + ( + dec_uint, + _: ',', + dec_uint, + _: empty, + ) + } + .parse_next(input) + } +} + +#[test] +fn seq_tuple_no_trailing_comma() { + #![allow(dead_code)] + + fn parser(input: &mut &str) -> PResult<(u32, u32)> { + seq! { + ( + dec_uint, + _: ',', + dec_uint + ) + } + .parse_next(input) + } +} + +#[test] +fn seq_tuple_no_trailing_comma_elided() { + #![allow(dead_code)] + + fn parser(input: &mut &str) -> PResult<(u32, u32)> { + seq! { + ( + dec_uint, + _: ',', + dec_uint, + _: empty + ) + } + .parse_next(input) + } +} + +#[test] +fn seq_tuple_no_parens() { + #![allow(dead_code)] + + fn parser(input: &mut &str) -> PResult<(u32, u32)> { + seq! ( + dec_uint, + _: ',', + dec_uint, + ) + .parse_next(input) + } +} diff --git a/src/parser.rs b/src/parser.rs index b59e4cd..1afa7e5 100644 --- a/src/parser.rs +++ b/src/parser.rs @@ -1,8 +1,13 @@ //! Basic types to build the parsers +use crate::ascii::Caseless as AsciiCaseless; use crate::combinator::*; +#[cfg(feature = "unstable-recover")] +use crate::error::FromRecoverableError; use crate::error::{AddContext, FromExternalError, IResult, PResult, ParseError, ParserError}; use crate::stream::{AsChar, Compare, Location, ParseSlice, Stream, StreamIsPartial}; +#[cfg(feature = "unstable-recover")] +use crate::stream::{Recover, Recoverable}; /// Core trait for parsing /// @@ -10,12 +15,12 @@ use crate::stream::{AsChar, Compare, Location, ParseSlice, Stream, StreamIsParti /// ```rust /// use winnow::prelude::*; /// -/// fn success(input: &mut &str) -> PResult<()> { +/// fn empty(input: &mut &str) -> PResult<()> { /// let output = (); /// Ok(output) /// } /// -/// let (input, output) = success.parse_peek("Hello").unwrap(); +/// let (input, output) = empty.parse_peek("Hello").unwrap(); /// assert_eq!(input, "Hello"); // We didn't consume any input /// ``` /// @@ -23,14 +28,14 @@ use crate::stream::{AsChar, Compare, Location, ParseSlice, Stream, StreamIsParti /// ```rust /// use winnow::prelude::*; /// -/// fn success<O: Clone>(output: O) -> impl FnMut(&mut &str) -> PResult<O> { +/// fn empty<O: Clone>(output: O) -> impl FnMut(&mut &str) -> PResult<O> { /// move |input: &mut &str| { /// let output = output.clone(); /// Ok(output) /// } /// } /// -/// let (input, output) = success("World").parse_peek("Hello").unwrap(); +/// let (input, output) = empty("World").parse_peek("Hello").unwrap(); /// assert_eq!(input, "Hello"); // We didn't consume any input /// assert_eq!(output, "World"); /// ``` @@ -91,7 +96,7 @@ pub trait Parser<I, O, E> { /// /// # Example /// - /// Because parsers are `FnMut`, they can be called multiple times. This prevents moving `f` + /// Because parsers are `FnMut`, they can be called multiple times. This prevents moving `f` /// into [`length_data`][crate::binary::length_data] and `g` into /// [`Parser::complete_err`]: /// ```rust,compile_fail @@ -161,6 +166,30 @@ pub trait Parser<I, O, E> { Value::new(self, val) } + /// Produce a type's default value + /// + /// # Example + /// + /// ```rust + /// # use winnow::{error::ErrMode,error::ErrorKind, error::InputError, Parser}; + /// use winnow::ascii::alpha1; + /// # fn main() { + /// + /// let mut parser = alpha1.default_value::<u32>(); + /// + /// assert_eq!(parser.parse_peek("abcd"), Ok(("", 0))); + /// assert_eq!(parser.parse_peek("123abcd;"), Err(ErrMode::Backtrack(InputError::new("123abcd;", ErrorKind::Slice)))); + /// # } + /// ``` + #[inline(always)] + fn default_value<O2>(self) -> DefaultValue<Self, I, O, O2, E> + where + Self: core::marker::Sized, + O2: core::default::Default, + { + DefaultValue::new(self) + } + /// Discards the output of the `Parser` /// /// # Example @@ -194,11 +223,11 @@ pub trait Parser<I, O, E> { /// use winnow::ascii::alpha1; /// # fn main() { /// - /// fn parser1<'s>(i: &mut &'s str) -> PResult<&'s str, InputError<&'s str>> { - /// alpha1(i) - /// } + /// fn parser1<'s>(i: &mut &'s str) -> PResult<&'s str, InputError<&'s str>> { + /// alpha1(i) + /// } /// - /// let mut parser2 = parser1.output_into(); + /// let mut parser2 = parser1.output_into(); /// /// // the parser converts the &str output of the child parser into a Vec<u8> /// let bytes: IResult<&str, Vec<u8>> = parser2.parse_peek("abcd"); @@ -381,7 +410,7 @@ pub trait Parser<I, O, E> { #[inline(always)] fn map<G, O2>(self, map: G) -> Map<Self, G, I, O, O2, E> where - G: Fn(O) -> O2, + G: FnMut(O) -> O2, Self: core::marker::Sized, { Map::new(self, map) @@ -580,7 +609,7 @@ pub trait Parser<I, O, E> { fn verify<G, O2>(self, filter: G) -> Verify<Self, G, I, O, O2, E> where Self: core::marker::Sized, - G: Fn(&O2) -> bool, + G: FnMut(&O2) -> bool, I: Stream, O: crate::lib::std::borrow::Borrow<O2>, O2: ?Sized, @@ -637,6 +666,43 @@ pub trait Parser<I, O, E> { { ErrInto::new(self) } + + /// Recover from an error by skipping everything `recover` consumes and trying again + /// + /// If `recover` consumes nothing, the error is returned, allowing an alternative recovery + /// method. + /// + /// This commits the parse result, preventing alternative branch paths like with + /// [`winnow::combinator::alt`][crate::combinator::alt]. + #[inline(always)] + #[cfg(feature = "unstable-recover")] + fn retry_after<R>(self, recover: R) -> RetryAfter<Self, R, I, O, E> + where + Self: core::marker::Sized, + R: Parser<I, (), E>, + I: Stream, + I: Recover<E>, + E: FromRecoverableError<I, E>, + { + RetryAfter::new(self, recover) + } + + /// Recover from an error by skipping this parse and everything `recover` consumes + /// + /// This commits the parse result, preventing alternative branch paths like with + /// [`winnow::combinator::alt`][crate::combinator::alt]. + #[inline(always)] + #[cfg(feature = "unstable-recover")] + fn resume_after<R>(self, recover: R) -> ResumeAfter<Self, R, I, O, E> + where + Self: core::marker::Sized, + R: Parser<I, (), E>, + I: Stream, + I: Recover<E>, + E: FromRecoverableError<I, E>, + { + ResumeAfter::new(self, recover) + } } impl<'a, I, O, E, F> Parser<I, O, E> for F @@ -742,6 +808,38 @@ where /// # use winnow::{error::ErrMode, error::{InputError, ErrorKind}, error::Needed}; /// # use winnow::combinator::alt; /// # use winnow::token::take; +/// use winnow::ascii::Caseless; +/// +/// fn parser<'s>(s: &mut &'s [u8]) -> PResult<&'s [u8], InputError<&'s [u8]>> { +/// alt((Caseless(&"hello"[..]), take(5usize))).parse_next(s) +/// } +/// +/// assert_eq!(parser.parse_peek(&b"Hello, World!"[..]), Ok((&b", World!"[..], &b"Hello"[..]))); +/// assert_eq!(parser.parse_peek(&b"hello, World!"[..]), Ok((&b", World!"[..], &b"hello"[..]))); +/// assert_eq!(parser.parse_peek(&b"HeLlo, World!"[..]), Ok((&b", World!"[..], &b"HeLlo"[..]))); +/// assert_eq!(parser.parse_peek(&b"Something"[..]), Ok((&b"hing"[..], &b"Somet"[..]))); +/// assert_eq!(parser.parse_peek(&b"Some"[..]), Err(ErrMode::Backtrack(InputError::new(&b"Some"[..], ErrorKind::Slice)))); +/// assert_eq!(parser.parse_peek(&b""[..]), Err(ErrMode::Backtrack(InputError::new(&b""[..], ErrorKind::Slice)))); +/// ``` +impl<'s, I, E: ParserError<I>> Parser<I, <I as Stream>::Slice, E> for AsciiCaseless<&'s [u8]> +where + I: Compare<AsciiCaseless<&'s [u8]>> + StreamIsPartial, + I: Stream, +{ + #[inline(always)] + fn parse_next(&mut self, i: &mut I) -> PResult<<I as Stream>::Slice, E> { + crate::token::tag(*self).parse_next(i) + } +} + +/// This is a shortcut for [`tag`][crate::token::tag]. +/// +/// # Example +/// ```rust +/// # use winnow::prelude::*; +/// # use winnow::{error::ErrMode, error::{InputError, ErrorKind}, error::Needed}; +/// # use winnow::combinator::alt; +/// # use winnow::token::take; /// /// fn parser<'s>(s: &mut &'s [u8]) -> PResult<&'s [u8], InputError<&'s [u8]>> { /// alt((b"Hello", take(5usize))).parse_next(s) @@ -768,6 +866,39 @@ where /// # Example /// ```rust /// # use winnow::prelude::*; +/// # use winnow::{error::ErrMode, error::{InputError, ErrorKind}, error::Needed}; +/// # use winnow::combinator::alt; +/// # use winnow::token::take; +/// use winnow::ascii::Caseless; +/// +/// fn parser<'s>(s: &mut &'s [u8]) -> PResult<&'s [u8], InputError<&'s [u8]>> { +/// alt((Caseless(b"hello"), take(5usize))).parse_next(s) +/// } +/// +/// assert_eq!(parser.parse_peek(&b"Hello, World!"[..]), Ok((&b", World!"[..], &b"Hello"[..]))); +/// assert_eq!(parser.parse_peek(&b"hello, World!"[..]), Ok((&b", World!"[..], &b"hello"[..]))); +/// assert_eq!(parser.parse_peek(&b"HeLlo, World!"[..]), Ok((&b", World!"[..], &b"HeLlo"[..]))); +/// assert_eq!(parser.parse_peek(&b"Something"[..]), Ok((&b"hing"[..], &b"Somet"[..]))); +/// assert_eq!(parser.parse_peek(&b"Some"[..]), Err(ErrMode::Backtrack(InputError::new(&b"Some"[..], ErrorKind::Slice)))); +/// assert_eq!(parser.parse_peek(&b""[..]), Err(ErrMode::Backtrack(InputError::new(&b""[..], ErrorKind::Slice)))); +/// ``` +impl<'s, I, E: ParserError<I>, const N: usize> Parser<I, <I as Stream>::Slice, E> + for AsciiCaseless<&'s [u8; N]> +where + I: Compare<AsciiCaseless<&'s [u8; N]>> + StreamIsPartial, + I: Stream, +{ + #[inline(always)] + fn parse_next(&mut self, i: &mut I) -> PResult<<I as Stream>::Slice, E> { + crate::token::tag(*self).parse_next(i) + } +} + +/// This is a shortcut for [`tag`][crate::token::tag]. +/// +/// # Example +/// ```rust +/// # use winnow::prelude::*; /// # use winnow::{error::ErrMode, error::{InputError, ErrorKind}}; /// # use winnow::combinator::alt; /// # use winnow::token::take; @@ -792,6 +923,38 @@ where } } +/// This is a shortcut for [`tag`][crate::token::tag]. +/// +/// # Example +/// ```rust +/// # use winnow::prelude::*; +/// # use winnow::{error::ErrMode, error::{InputError, ErrorKind}}; +/// # use winnow::combinator::alt; +/// # use winnow::token::take; +/// # use winnow::ascii::Caseless; +/// +/// fn parser<'s>(s: &mut &'s str) -> PResult<&'s str, InputError<&'s str>> { +/// alt((Caseless("hello"), take(5usize))).parse_next(s) +/// } +/// +/// assert_eq!(parser.parse_peek("Hello, World!"), Ok((", World!", "Hello"))); +/// assert_eq!(parser.parse_peek("hello, World!"), Ok((", World!", "hello"))); +/// assert_eq!(parser.parse_peek("HeLlo, World!"), Ok((", World!", "HeLlo"))); +/// assert_eq!(parser.parse_peek("Something"), Ok(("hing", "Somet"))); +/// assert_eq!(parser.parse_peek("Some"), Err(ErrMode::Backtrack(InputError::new("Some", ErrorKind::Slice)))); +/// assert_eq!(parser.parse_peek(""), Err(ErrMode::Backtrack(InputError::new("", ErrorKind::Slice)))); +/// ``` +impl<'s, I, E: ParserError<I>> Parser<I, <I as Stream>::Slice, E> for AsciiCaseless<&'s str> +where + I: Compare<AsciiCaseless<&'s str>> + StreamIsPartial, + I: Stream, +{ + #[inline(always)] + fn parse_next(&mut self, i: &mut I) -> PResult<<I as Stream>::Slice, E> { + crate::token::tag(*self).parse_next(i) + } +} + impl<I, E: ParserError<I>> Parser<I, (), E> for () { #[inline(always)] fn parse_next(&mut self, _i: &mut I) -> PResult<(), E> { @@ -831,6 +994,71 @@ macro_rules! impl_parser_for_tuples { } } +/// Collect all errors when parsing the input +/// +/// [`Parser`]s will need to use [`Recoverable<I, _>`] for their input. +#[cfg(feature = "unstable-recover")] +pub trait RecoverableParser<I, O, R, E> { + /// Collect all errors when parsing the input + /// + /// If `self` fails, this acts like [`Parser::resume_after`] and returns `Ok(None)`. + /// Generally, this should be avoided by using + /// [`Parser::retry_after`] and [`Parser::resume_after`] throughout your parser. + /// + /// The empty `input` is returned to allow turning the errors into [`ParserError`]s. + fn recoverable_parse(&mut self, input: I) -> (I, Option<O>, Vec<R>); +} + +#[cfg(feature = "unstable-recover")] +impl<P, I, O, R, E> RecoverableParser<I, O, R, E> for P +where + P: Parser<Recoverable<I, R>, O, E>, + I: Stream, + I: StreamIsPartial, + R: FromRecoverableError<Recoverable<I, R>, E>, + R: crate::lib::std::fmt::Debug, + E: FromRecoverableError<Recoverable<I, R>, E>, + E: ParserError<Recoverable<I, R>>, + E: crate::lib::std::fmt::Debug, +{ + #[inline] + fn recoverable_parse(&mut self, input: I) -> (I, Option<O>, Vec<R>) { + debug_assert!( + !I::is_partial_supported(), + "partial streams need to handle `ErrMode::Incomplete`" + ); + + let start = input.checkpoint(); + let mut input = Recoverable::new(input); + let start_token = input.checkpoint(); + let result = ( + self.by_ref(), + crate::combinator::eof.resume_after(rest.void()), + ) + .parse_next(&mut input); + + let (o, err) = match result { + Ok((o, _)) => (Some(o), None), + Err(err) => { + let err = err + .into_inner() + .expect("complete parsers should not report `ErrMode::Incomplete(_)`"); + let err_start = input.checkpoint(); + let err = R::from_recoverable_error(&start_token, &err_start, &input, err); + (None, Some(err)) + } + }; + + let (mut input, mut errs) = input.into_parts(); + input.reset(start); + if let Some(err) = err { + errs.push(err); + } + + (input, o, errs) + } +} + impl_parser_for_tuples!( P1 O1, P2 O2, diff --git a/src/stream/impls.rs b/src/stream/impls.rs index b277dd9..d76e1bf 100644 --- a/src/stream/impls.rs +++ b/src/stream/impls.rs @@ -238,14 +238,14 @@ mod bytes { impl PartialOrd for Bytes { #[inline] fn partial_cmp(&self, other: &Bytes) -> Option<Ordering> { - PartialOrd::partial_cmp(self.as_bytes(), other.as_bytes()) + Some(self.cmp(other)) } } impl Ord for Bytes { #[inline] fn cmp(&self, other: &Bytes) -> Ordering { - self.partial_cmp(other).unwrap() + Ord::cmp(self.as_bytes(), other.as_bytes()) } } @@ -485,14 +485,14 @@ mod bstr { impl PartialOrd for BStr { #[inline] fn partial_cmp(&self, other: &BStr) -> Option<Ordering> { - PartialOrd::partial_cmp(self.as_bytes(), other.as_bytes()) + Some(self.cmp(other)) } } impl Ord for BStr { #[inline] fn cmp(&self, other: &BStr) -> Ordering { - self.partial_cmp(other).unwrap() + Ord::cmp(self.as_bytes(), other.as_bytes()) } } diff --git a/src/stream/mod.rs b/src/stream/mod.rs index 5f2152e..6873ac3 100644 --- a/src/stream/mod.rs +++ b/src/stream/mod.rs @@ -9,8 +9,12 @@ //! - [`Partial`] can mark an input as partial buffer that is being streamed into //! - [Custom stream types][crate::_topic::stream] +use core::hash::BuildHasher; use core::num::NonZeroUsize; +use crate::ascii::Caseless as AsciiCaseless; +#[cfg(feature = "unstable-recover")] +use crate::error::FromRecoverableError; use crate::error::Needed; use crate::lib::std::iter::{Cloned, Enumerate}; use crate::lib::std::slice::Iter; @@ -19,13 +23,17 @@ use crate::lib::std::str::CharIndices; use crate::lib::std::str::FromStr; #[allow(unused_imports)] -#[cfg(feature = "unstable-doc")] +#[cfg(any(feature = "unstable-doc", feature = "unstable-recover"))] use crate::error::ErrMode; #[cfg(feature = "alloc")] use crate::lib::std::collections::BTreeMap; +#[cfg(feature = "alloc")] +use crate::lib::std::collections::BTreeSet; #[cfg(feature = "std")] use crate::lib::std::collections::HashMap; +#[cfg(feature = "std")] +use crate::lib::std::collections::HashSet; #[cfg(feature = "alloc")] use crate::lib::std::string::String; #[cfg(feature = "alloc")] @@ -88,8 +96,18 @@ impl BStr { /// Allow collecting the span of a parsed token /// +/// Spans are tracked as a [`Range<usize>`] of byte offsets. +/// +/// Converting byte offsets to line or column numbers is left up to the user, as computing column +/// numbers requires domain knowledge (are columns byte-based, codepoint-based, or grapheme-based?) +/// and O(n) iteration over the input to determine codepoint and line boundaries. +/// +/// [The `line-span` crate](https://docs.rs/line-span/latest/line_span/) can help with converting +/// byte offsets to line numbers. +/// /// See [`Parser::span`][crate::Parser::span] and [`Parser::with_span`][crate::Parser::with_span] for more details #[derive(Copy, Clone, Default, Debug, PartialEq, Eq, PartialOrd, Ord)] +#[doc(alias = "LocatedSpan")] pub struct Located<I> { initial: I, input: I, @@ -132,6 +150,84 @@ impl<I: crate::lib::std::fmt::Display> crate::lib::std::fmt::Display for Located } } +/// Allow recovering from parse errors, capturing them as the parser continues +/// +/// Generally, this will be used indirectly via +/// [`RecoverableParser::recoverable_parse`][crate::RecoverableParser::recoverable_parse]. +#[cfg(feature = "unstable-recover")] +#[derive(Clone, Debug)] +pub struct Recoverable<I, E> +where + I: Stream, +{ + input: I, + errors: Vec<E>, + is_recoverable: bool, +} + +#[cfg(feature = "unstable-recover")] +impl<I, E> Recoverable<I, E> +where + I: Stream, +{ + /// Track recoverable errors with the stream + pub fn new(input: I) -> Self { + Self { + input, + errors: Default::default(), + is_recoverable: true, + } + } + + /// Act as a normal stream + pub fn unrecoverable(input: I) -> Self { + Self { + input, + errors: Default::default(), + is_recoverable: false, + } + } + + /// Access the current input and errors + pub fn into_parts(self) -> (I, Vec<E>) { + (self.input, self.errors) + } +} + +#[cfg(feature = "unstable-recover")] +impl<I, E> AsRef<I> for Recoverable<I, E> +where + I: Stream, +{ + #[inline(always)] + fn as_ref(&self) -> &I { + &self.input + } +} + +#[cfg(feature = "unstable-recover")] +impl<I, E> crate::lib::std::ops::Deref for Recoverable<I, E> +where + I: Stream, +{ + type Target = I; + + #[inline(always)] + fn deref(&self) -> &Self::Target { + &self.input + } +} + +#[cfg(feature = "unstable-recover")] +impl<I: crate::lib::std::fmt::Display, E> crate::lib::std::fmt::Display for Recoverable<I, E> +where + I: Stream, +{ + fn fmt(&self, f: &mut crate::lib::std::fmt::Formatter<'_>) -> crate::lib::std::fmt::Result { + crate::lib::std::fmt::Display::fmt(&self.input, f) + } +} + /// Thread global state through your parsers /// /// Use cases @@ -171,6 +267,7 @@ impl<I: crate::lib::std::fmt::Display> crate::lib::std::fmt::Display for Located /// assert_eq!(state.get(), 1); /// ``` #[derive(Clone, Copy, Debug, Eq, PartialEq)] +#[doc(alias = "LocatedSpan")] pub struct Stateful<I, S> { /// Inner input being wrapped in state pub input: I, @@ -202,7 +299,7 @@ impl<I: crate::lib::std::fmt::Display, S> crate::lib::std::fmt::Display for Stat /// Mark the input as a partial buffer for streaming input. /// -/// Complete input means that we already have all of the data. This will be the common case with +/// Complete input means that we already have all of the data. This will be the common case with /// small files that can be read entirely to memory. /// /// In contrast, streaming input assumes that we might not have all of the data. @@ -322,6 +419,13 @@ pub trait SliceLen { fn slice_len(&self) -> usize; } +impl<S: SliceLen> SliceLen for AsciiCaseless<S> { + #[inline(always)] + fn slice_len(&self) -> usize { + self.0.slice_len() + } +} + impl<'a, T> SliceLen for &'a [T] { #[inline] fn slice_len(&self) -> usize { @@ -350,6 +454,13 @@ impl<'a> SliceLen for &'a str { } } +impl SliceLen for char { + #[inline] + fn slice_len(&self) -> usize { + self.len_utf8() + } +} + impl<'a> SliceLen for &'a Bytes { #[inline] fn slice_len(&self) -> usize { @@ -384,6 +495,18 @@ where } } +#[cfg(feature = "unstable-recover")] +impl<I, E> SliceLen for Recoverable<I, E> +where + I: SliceLen, + I: Stream, +{ + #[inline(always)] + fn slice_len(&self) -> usize { + self.input.slice_len() + } +} + impl<I, S> SliceLen for Stateful<I, S> where I: SliceLen, @@ -423,7 +546,8 @@ pub trait Stream: Offset<<Self as Stream>::Checkpoint> + crate::lib::std::fmt::D /// Iterate with the offset from the current location fn iter_offsets(&self) -> Self::IterOffsets; - /// Returns the offaet to the end of the input + + /// Returns the offset to the end of the input fn eof_offset(&self) -> usize; /// Split off the next token from the input @@ -450,7 +574,7 @@ pub trait Stream: Offset<<Self as Stream>::Checkpoint> + crate::lib::std::fmt::D /// Split off a slice of tokens from the input /// /// **NOTE:** For inputs with variable width tokens, like `&str`'s `char`, `offset` might not correspond - /// with the number of tokens. To get a valid offset, use: + /// with the number of tokens. To get a valid offset, use: /// - [`Stream::eof_offset`] /// - [`Stream::iter_offsets`] /// - [`Stream::offset_for`] @@ -950,7 +1074,64 @@ impl<I: Stream> Stream for Located<I> { } } -impl<I: Stream, S: Clone + crate::lib::std::fmt::Debug> Stream for Stateful<I, S> { +#[cfg(feature = "unstable-recover")] +impl<I, E: crate::lib::std::fmt::Debug> Stream for Recoverable<I, E> +where + I: Stream, +{ + type Token = <I as Stream>::Token; + type Slice = <I as Stream>::Slice; + + type IterOffsets = <I as Stream>::IterOffsets; + + type Checkpoint = Checkpoint<I::Checkpoint>; + + #[inline(always)] + fn iter_offsets(&self) -> Self::IterOffsets { + self.input.iter_offsets() + } + #[inline(always)] + fn eof_offset(&self) -> usize { + self.input.eof_offset() + } + + #[inline(always)] + fn next_token(&mut self) -> Option<Self::Token> { + self.input.next_token() + } + + #[inline(always)] + fn offset_for<P>(&self, predicate: P) -> Option<usize> + where + P: Fn(Self::Token) -> bool, + { + self.input.offset_for(predicate) + } + #[inline(always)] + fn offset_at(&self, tokens: usize) -> Result<usize, Needed> { + self.input.offset_at(tokens) + } + #[inline(always)] + fn next_slice(&mut self, offset: usize) -> Self::Slice { + self.input.next_slice(offset) + } + + #[inline(always)] + fn checkpoint(&self) -> Self::Checkpoint { + Checkpoint(self.input.checkpoint()) + } + #[inline(always)] + fn reset(&mut self, checkpoint: Self::Checkpoint) { + self.input.reset(checkpoint.0); + } + + #[inline(always)] + fn raw(&self) -> &dyn crate::lib::std::fmt::Debug { + &self.input + } +} + +impl<I: Stream, S: crate::lib::std::fmt::Debug> Stream for Stateful<I, S> { type Token = <I as Stream>::Token; type Slice = <I as Stream>::Slice; @@ -1057,6 +1238,8 @@ impl<I: Stream> Stream for Partial<I> { } /// Number of indices input has advanced since start of parsing +/// +/// See [`Located`] for adding location tracking to your [`Stream`] pub trait Location { /// Number of indices input has advanced since start of parsing fn location(&self) -> usize; @@ -1072,6 +1255,18 @@ where } } +#[cfg(feature = "unstable-recover")] +impl<I, E> Location for Recoverable<I, E> +where + I: Location, + I: Stream, +{ + #[inline(always)] + fn location(&self) -> usize { + self.input.location() + } +} + impl<I, S> Location for Stateful<I, S> where I: Location, @@ -1092,6 +1287,232 @@ where } } +/// Capture top-level errors in the middle of parsing so parsing can resume +/// +/// See [`Recoverable`] for adding error recovery tracking to your [`Stream`] +#[cfg(feature = "unstable-recover")] +pub trait Recover<E>: Stream { + /// Capture a top-level error + /// + /// May return `Err(err)` if recovery is not possible (e.g. if [`Recover::is_recovery_supported`] + /// returns `false`). + fn record_err( + &mut self, + token_start: &Self::Checkpoint, + err_start: &Self::Checkpoint, + err: ErrMode<E>, + ) -> Result<(), ErrMode<E>>; + + /// Report whether the [`Stream`] can save off errors for recovery + fn is_recovery_supported() -> bool; +} + +#[cfg(feature = "unstable-recover")] +impl<'a, T, E> Recover<E> for &'a [T] +where + &'a [T]: Stream, +{ + #[inline(always)] + fn record_err( + &mut self, + _token_start: &Self::Checkpoint, + _err_start: &Self::Checkpoint, + err: ErrMode<E>, + ) -> Result<(), ErrMode<E>> { + Err(err) + } + + /// Report whether the [`Stream`] can save off errors for recovery + #[inline(always)] + fn is_recovery_supported() -> bool { + false + } +} + +#[cfg(feature = "unstable-recover")] +impl<'a, E> Recover<E> for &'a str { + #[inline(always)] + fn record_err( + &mut self, + _token_start: &Self::Checkpoint, + _err_start: &Self::Checkpoint, + err: ErrMode<E>, + ) -> Result<(), ErrMode<E>> { + Err(err) + } + + /// Report whether the [`Stream`] can save off errors for recovery + #[inline(always)] + fn is_recovery_supported() -> bool { + false + } +} + +#[cfg(feature = "unstable-recover")] +impl<'a, E> Recover<E> for &'a Bytes { + #[inline(always)] + fn record_err( + &mut self, + _token_start: &Self::Checkpoint, + _err_start: &Self::Checkpoint, + err: ErrMode<E>, + ) -> Result<(), ErrMode<E>> { + Err(err) + } + + /// Report whether the [`Stream`] can save off errors for recovery + #[inline(always)] + fn is_recovery_supported() -> bool { + false + } +} + +#[cfg(feature = "unstable-recover")] +impl<'a, E> Recover<E> for &'a BStr { + #[inline(always)] + fn record_err( + &mut self, + _token_start: &Self::Checkpoint, + _err_start: &Self::Checkpoint, + err: ErrMode<E>, + ) -> Result<(), ErrMode<E>> { + Err(err) + } + + /// Report whether the [`Stream`] can save off errors for recovery + #[inline(always)] + fn is_recovery_supported() -> bool { + false + } +} + +#[cfg(feature = "unstable-recover")] +impl<I, E> Recover<E> for (I, usize) +where + I: Recover<E>, + I: Stream<Token = u8> + Clone, +{ + #[inline(always)] + fn record_err( + &mut self, + _token_start: &Self::Checkpoint, + _err_start: &Self::Checkpoint, + err: ErrMode<E>, + ) -> Result<(), ErrMode<E>> { + Err(err) + } + + /// Report whether the [`Stream`] can save off errors for recovery + #[inline(always)] + fn is_recovery_supported() -> bool { + false + } +} + +#[cfg(feature = "unstable-recover")] +impl<I, E> Recover<E> for Located<I> +where + I: Recover<E>, + I: Stream, +{ + #[inline(always)] + fn record_err( + &mut self, + _token_start: &Self::Checkpoint, + _err_start: &Self::Checkpoint, + err: ErrMode<E>, + ) -> Result<(), ErrMode<E>> { + Err(err) + } + + /// Report whether the [`Stream`] can save off errors for recovery + #[inline(always)] + fn is_recovery_supported() -> bool { + false + } +} + +#[cfg(feature = "unstable-recover")] +impl<I, E, R> Recover<E> for Recoverable<I, R> +where + I: Stream, + R: FromRecoverableError<Self, E>, + R: crate::lib::std::fmt::Debug, +{ + fn record_err( + &mut self, + token_start: &Self::Checkpoint, + err_start: &Self::Checkpoint, + err: ErrMode<E>, + ) -> Result<(), ErrMode<E>> { + if self.is_recoverable { + match err { + ErrMode::Incomplete(need) => Err(ErrMode::Incomplete(need)), + ErrMode::Backtrack(err) | ErrMode::Cut(err) => { + self.errors + .push(R::from_recoverable_error(token_start, err_start, self, err)); + Ok(()) + } + } + } else { + Err(err) + } + } + + /// Report whether the [`Stream`] can save off errors for recovery + #[inline(always)] + fn is_recovery_supported() -> bool { + true + } +} + +#[cfg(feature = "unstable-recover")] +impl<I, E, S> Recover<E> for Stateful<I, S> +where + I: Recover<E>, + I: Stream, + S: Clone + crate::lib::std::fmt::Debug, +{ + #[inline(always)] + fn record_err( + &mut self, + _token_start: &Self::Checkpoint, + _err_start: &Self::Checkpoint, + err: ErrMode<E>, + ) -> Result<(), ErrMode<E>> { + Err(err) + } + + /// Report whether the [`Stream`] can save off errors for recovery + #[inline(always)] + fn is_recovery_supported() -> bool { + false + } +} + +#[cfg(feature = "unstable-recover")] +impl<I, E> Recover<E> for Partial<I> +where + I: Recover<E>, + I: Stream, +{ + #[inline(always)] + fn record_err( + &mut self, + _token_start: &Self::Checkpoint, + _err_start: &Self::Checkpoint, + err: ErrMode<E>, + ) -> Result<(), ErrMode<E>> { + Err(err) + } + + /// Report whether the [`Stream`] can save off errors for recovery + #[inline(always)] + fn is_recovery_supported() -> bool { + false + } +} + /// Marks the input as being the complete buffer or a partial buffer for streaming input /// /// See [`Partial`] for marking a presumed complete buffer type as a streaming buffer. @@ -1224,6 +1645,33 @@ where } } +#[cfg(feature = "unstable-recover")] +impl<I, E> StreamIsPartial for Recoverable<I, E> +where + I: StreamIsPartial, + I: Stream, +{ + type PartialState = I::PartialState; + + fn complete(&mut self) -> Self::PartialState { + self.input.complete() + } + + fn restore_partial(&mut self, state: Self::PartialState) { + self.input.restore_partial(state); + } + + #[inline(always)] + fn is_partial_supported() -> bool { + I::is_partial_supported() + } + + #[inline(always)] + fn is_partial(&self) -> bool { + self.input.is_partial() + } +} + impl<I, S> StreamIsPartial for Stateful<I, S> where I: StreamIsPartial, @@ -1276,7 +1724,10 @@ where /// Useful functions to calculate the offset between slices and show a hexdump of a slice pub trait Offset<Start = Self> { - /// Offset between the first byte of `start` and the first byte of `self` + /// Offset between the first byte of `start` and the first byte of `self`a + /// + /// **Note:** This is an offset, not an index, and may point to the end of input + /// (`start.len()`) when `self` is exhausted. fn offset_from(&self, start: &Start) -> usize; } @@ -1288,9 +1739,9 @@ impl<'a, T> Offset for &'a [T] { debug_assert!( fst <= snd, - "`Offset::offset_to` only accepts slices of `self`" + "`Offset::offset_from({snd:?}, {fst:?})` only accepts slices of `self`" ); - snd as usize - fst as usize + (snd as usize - fst as usize) / crate::lib::std::mem::size_of::<T>() } } @@ -1386,6 +1837,30 @@ where } } +#[cfg(feature = "unstable-recover")] +impl<I, E> Offset for Recoverable<I, E> +where + I: Stream, + E: crate::lib::std::fmt::Debug, +{ + #[inline(always)] + fn offset_from(&self, other: &Self) -> usize { + self.offset_from(&other.checkpoint()) + } +} + +#[cfg(feature = "unstable-recover")] +impl<I, E> Offset<<Recoverable<I, E> as Stream>::Checkpoint> for Recoverable<I, E> +where + I: Stream, + E: crate::lib::std::fmt::Debug, +{ + #[inline(always)] + fn offset_from(&self, other: &<Recoverable<I, E> as Stream>::Checkpoint) -> usize { + self.checkpoint().offset_from(other) + } +} + impl<I, S> Offset for Stateful<I, S> where I: Stream, @@ -1400,7 +1875,7 @@ where impl<I, S> Offset<<Stateful<I, S> as Stream>::Checkpoint> for Stateful<I, S> where I: Stream, - S: Clone + crate::lib::std::fmt::Debug, + S: crate::lib::std::fmt::Debug, { #[inline(always)] fn offset_from(&self, other: &<Stateful<I, S> as Stream>::Checkpoint) -> usize { @@ -1468,6 +1943,18 @@ where } } +#[cfg(feature = "unstable-recover")] +impl<I, E> AsBytes for Recoverable<I, E> +where + I: Stream, + I: AsBytes, +{ + #[inline(always)] + fn as_bytes(&self) -> &[u8] { + self.input.as_bytes() + } +} + impl<I, S> AsBytes for Stateful<I, S> where I: AsBytes, @@ -1525,6 +2012,18 @@ where } } +#[cfg(feature = "unstable-recover")] +impl<I, E> AsBStr for Recoverable<I, E> +where + I: Stream, + I: AsBStr, +{ + #[inline(always)] + fn as_bstr(&self) -> &[u8] { + self.input.as_bstr() + } +} + impl<I, S> AsBStr for Stateful<I, S> where I: AsBStr, @@ -1567,47 +2066,50 @@ pub trait Compare<T> { /// by lowercasing both strings and comparing /// the result. This is a temporary solution until /// a better one appears + #[deprecated(since = "0.5.20", note = "Replaced with `compare(ascii::Caseless(_))`")] fn compare_no_case(&self, t: T) -> CompareResult; } -fn lowercase_byte(c: u8) -> u8 { - match c { - b'A'..=b'Z' => c - b'A' + b'a', - _ => c, - } -} - impl<'a, 'b> Compare<&'b [u8]> for &'a [u8] { #[inline] fn compare(&self, t: &'b [u8]) -> CompareResult { - let pos = self.iter().zip(t.iter()).position(|(a, b)| a != b); - - match pos { - Some(_) => CompareResult::Error, - None => { - if self.len() >= t.len() { - CompareResult::Ok - } else { - CompareResult::Incomplete - } - } + if t.iter().zip(*self).any(|(a, b)| a != b) { + CompareResult::Error + } else if self.len() < t.slice_len() { + CompareResult::Incomplete + } else { + CompareResult::Ok } } - #[inline] + #[inline(always)] + #[allow(deprecated)] fn compare_no_case(&self, t: &'b [u8]) -> CompareResult { - if self + self.compare(AsciiCaseless(t)) + } +} + +impl<'a, 'b> Compare<AsciiCaseless<&'b [u8]>> for &'a [u8] { + #[inline] + fn compare(&self, t: AsciiCaseless<&'b [u8]>) -> CompareResult { + if t.0 .iter() - .zip(t) - .any(|(a, b)| lowercase_byte(*a) != lowercase_byte(*b)) + .zip(*self) + .any(|(a, b)| !a.eq_ignore_ascii_case(b)) { CompareResult::Error - } else if self.len() < t.len() { + } else if self.len() < t.slice_len() { CompareResult::Incomplete } else { CompareResult::Ok } } + + #[inline(always)] + #[allow(deprecated)] + fn compare_no_case(&self, t: AsciiCaseless<&'b [u8]>) -> CompareResult { + self.compare(t) + } } impl<'a, const LEN: usize> Compare<[u8; LEN]> for &'a [u8] { @@ -1617,11 +2119,25 @@ impl<'a, const LEN: usize> Compare<[u8; LEN]> for &'a [u8] { } #[inline(always)] + #[allow(deprecated)] fn compare_no_case(&self, t: [u8; LEN]) -> CompareResult { self.compare_no_case(&t[..]) } } +impl<'a, const LEN: usize> Compare<AsciiCaseless<[u8; LEN]>> for &'a [u8] { + #[inline(always)] + fn compare(&self, t: AsciiCaseless<[u8; LEN]>) -> CompareResult { + self.compare(AsciiCaseless(&t.0[..])) + } + + #[inline(always)] + #[allow(deprecated)] + fn compare_no_case(&self, t: AsciiCaseless<[u8; LEN]>) -> CompareResult { + self.compare_no_case(AsciiCaseless(&t.0[..])) + } +} + impl<'a, 'b, const LEN: usize> Compare<&'b [u8; LEN]> for &'a [u8] { #[inline(always)] fn compare(&self, t: &'b [u8; LEN]) -> CompareResult { @@ -1629,46 +2145,124 @@ impl<'a, 'b, const LEN: usize> Compare<&'b [u8; LEN]> for &'a [u8] { } #[inline(always)] + #[allow(deprecated)] fn compare_no_case(&self, t: &'b [u8; LEN]) -> CompareResult { self.compare_no_case(&t[..]) } } +impl<'a, 'b, const LEN: usize> Compare<AsciiCaseless<&'b [u8; LEN]>> for &'a [u8] { + #[inline(always)] + fn compare(&self, t: AsciiCaseless<&'b [u8; LEN]>) -> CompareResult { + self.compare(AsciiCaseless(&t.0[..])) + } + + #[inline(always)] + #[allow(deprecated)] + fn compare_no_case(&self, t: AsciiCaseless<&'b [u8; LEN]>) -> CompareResult { + self.compare_no_case(AsciiCaseless(&t.0[..])) + } +} + impl<'a, 'b> Compare<&'b str> for &'a [u8] { #[inline(always)] fn compare(&self, t: &'b str) -> CompareResult { self.compare(t.as_bytes()) } #[inline(always)] + #[allow(deprecated)] fn compare_no_case(&self, t: &'b str) -> CompareResult { self.compare_no_case(t.as_bytes()) } } +impl<'a, 'b> Compare<AsciiCaseless<&'b str>> for &'a [u8] { + #[inline(always)] + fn compare(&self, t: AsciiCaseless<&'b str>) -> CompareResult { + self.compare(AsciiCaseless(t.0.as_bytes())) + } + #[inline(always)] + #[allow(deprecated)] + fn compare_no_case(&self, t: AsciiCaseless<&'b str>) -> CompareResult { + self.compare_no_case(AsciiCaseless(t.0.as_bytes())) + } +} + +impl<'a> Compare<char> for &'a [u8] { + #[inline(always)] + fn compare(&self, t: char) -> CompareResult { + self.compare(t.encode_utf8(&mut [0; 4]).as_bytes()) + } + + #[inline(always)] + #[allow(deprecated)] + fn compare_no_case(&self, t: char) -> CompareResult { + self.compare_no_case(t.encode_utf8(&mut [0; 4]).as_bytes()) + } +} + +impl<'a> Compare<AsciiCaseless<char>> for &'a [u8] { + #[inline] + fn compare(&self, t: AsciiCaseless<char>) -> CompareResult { + self.compare(AsciiCaseless(t.0.encode_utf8(&mut [0; 4]).as_bytes())) + } + + #[inline(always)] + #[allow(deprecated)] + fn compare_no_case(&self, t: AsciiCaseless<char>) -> CompareResult { + self.compare_no_case(AsciiCaseless(t.0.encode_utf8(&mut [0; 4]).as_bytes())) + } +} + impl<'a, 'b> Compare<&'b str> for &'a str { #[inline(always)] fn compare(&self, t: &'b str) -> CompareResult { self.as_bytes().compare(t.as_bytes()) } - //FIXME: this version is too simple and does not use the current locale #[inline] + #[allow(deprecated)] fn compare_no_case(&self, t: &'b str) -> CompareResult { - let pos = self - .chars() - .zip(t.chars()) - .position(|(a, b)| a.to_lowercase().ne(b.to_lowercase())); + self.compare(AsciiCaseless(t)) + } +} - match pos { - Some(_) => CompareResult::Error, - None => { - if self.len() >= t.len() { - CompareResult::Ok - } else { - CompareResult::Incomplete - } - } - } +impl<'a, 'b> Compare<AsciiCaseless<&'b str>> for &'a str { + #[inline(always)] + fn compare(&self, t: AsciiCaseless<&'b str>) -> CompareResult { + self.as_bytes().compare(t.as_bytes()) + } + + #[inline(always)] + #[allow(deprecated)] + fn compare_no_case(&self, t: AsciiCaseless<&'b str>) -> CompareResult { + self.compare(t) + } +} + +impl<'a> Compare<char> for &'a str { + #[inline(always)] + fn compare(&self, t: char) -> CompareResult { + self.compare(t.encode_utf8(&mut [0; 4]).as_bytes()) + } + + #[inline(always)] + #[allow(deprecated)] + fn compare_no_case(&self, t: char) -> CompareResult { + self.compare_no_case(t.encode_utf8(&mut [0; 4]).as_bytes()) + } +} + +impl<'a> Compare<AsciiCaseless<char>> for &'a str { + #[inline] + fn compare(&self, t: AsciiCaseless<char>) -> CompareResult { + self.compare(AsciiCaseless(t.0.encode_utf8(&mut [0; 4]).as_bytes())) + } + + #[inline(always)] + #[allow(deprecated)] + fn compare_no_case(&self, t: AsciiCaseless<char>) -> CompareResult { + self.compare_no_case(AsciiCaseless(t.0.encode_utf8(&mut [0; 4]).as_bytes())) } } @@ -1678,11 +2272,24 @@ impl<'a, 'b> Compare<&'b [u8]> for &'a str { AsBStr::as_bstr(self).compare(t) } #[inline(always)] + #[allow(deprecated)] fn compare_no_case(&self, t: &'b [u8]) -> CompareResult { AsBStr::as_bstr(self).compare_no_case(t) } } +impl<'a, 'b> Compare<AsciiCaseless<&'b [u8]>> for &'a str { + #[inline(always)] + fn compare(&self, t: AsciiCaseless<&'b [u8]>) -> CompareResult { + AsBStr::as_bstr(self).compare(t) + } + #[inline(always)] + #[allow(deprecated)] + fn compare_no_case(&self, t: AsciiCaseless<&'b [u8]>) -> CompareResult { + AsBStr::as_bstr(self).compare_no_case(t) + } +} + impl<'a, T> Compare<T> for &'a Bytes where &'a [u8]: Compare<T>, @@ -1694,6 +2301,7 @@ where } #[inline(always)] + #[allow(deprecated)] fn compare_no_case(&self, t: T) -> CompareResult { let bytes = (*self).as_bytes(); bytes.compare_no_case(t) @@ -1711,6 +2319,7 @@ where } #[inline(always)] + #[allow(deprecated)] fn compare_no_case(&self, t: T) -> CompareResult { let bytes = (*self).as_bytes(); bytes.compare_no_case(t) @@ -1727,6 +2336,25 @@ where } #[inline(always)] + #[allow(deprecated)] + fn compare_no_case(&self, other: U) -> CompareResult { + self.input.compare_no_case(other) + } +} + +#[cfg(feature = "unstable-recover")] +impl<I, E, U> Compare<U> for Recoverable<I, E> +where + I: Stream, + I: Compare<U>, +{ + #[inline(always)] + fn compare(&self, other: U) -> CompareResult { + self.input.compare(other) + } + + #[inline(always)] + #[allow(deprecated)] fn compare_no_case(&self, other: U) -> CompareResult { self.input.compare_no_case(other) } @@ -1742,6 +2370,7 @@ where } #[inline(always)] + #[allow(deprecated)] fn compare_no_case(&self, other: U) -> CompareResult { self.input.compare_no_case(other) } @@ -1757,6 +2386,7 @@ where } #[inline(always)] + #[allow(deprecated)] fn compare_no_case(&self, t: T) -> CompareResult { self.input.compare_no_case(t) } @@ -1775,6 +2405,27 @@ impl<'i, 's> FindSlice<&'s [u8]> for &'i [u8] { } } +impl<'i, 's> FindSlice<(&'s [u8],)> for &'i [u8] { + #[inline(always)] + fn find_slice(&self, substr: (&'s [u8],)) -> Option<usize> { + memmem(self, substr.0) + } +} + +impl<'i, 's> FindSlice<(&'s [u8], &'s [u8])> for &'i [u8] { + #[inline(always)] + fn find_slice(&self, substr: (&'s [u8], &'s [u8])) -> Option<usize> { + memmem2(self, substr) + } +} + +impl<'i, 's> FindSlice<(&'s [u8], &'s [u8], &'s [u8])> for &'i [u8] { + #[inline(always)] + fn find_slice(&self, substr: (&'s [u8], &'s [u8], &'s [u8])) -> Option<usize> { + memmem3(self, substr) + } +} + impl<'i> FindSlice<u8> for &'i [u8] { #[inline(always)] fn find_slice(&self, substr: u8) -> Option<usize> { @@ -1782,6 +2433,27 @@ impl<'i> FindSlice<u8> for &'i [u8] { } } +impl<'i> FindSlice<(u8,)> for &'i [u8] { + #[inline(always)] + fn find_slice(&self, substr: (u8,)) -> Option<usize> { + memchr(substr.0, self) + } +} + +impl<'i> FindSlice<(u8, u8)> for &'i [u8] { + #[inline(always)] + fn find_slice(&self, substr: (u8, u8)) -> Option<usize> { + memchr2(substr, self) + } +} + +impl<'i> FindSlice<(u8, u8, u8)> for &'i [u8] { + #[inline(always)] + fn find_slice(&self, substr: (u8, u8, u8)) -> Option<usize> { + memchr3(substr, self) + } +} + impl<'i, 's> FindSlice<&'s str> for &'i [u8] { #[inline(always)] fn find_slice(&self, substr: &'s str) -> Option<usize> { @@ -1789,17 +2461,129 @@ impl<'i, 's> FindSlice<&'s str> for &'i [u8] { } } +impl<'i, 's> FindSlice<(&'s str,)> for &'i [u8] { + #[inline(always)] + fn find_slice(&self, substr: (&'s str,)) -> Option<usize> { + memmem(self, substr.0.as_bytes()) + } +} + +impl<'i, 's> FindSlice<(&'s str, &'s str)> for &'i [u8] { + #[inline(always)] + fn find_slice(&self, substr: (&'s str, &'s str)) -> Option<usize> { + memmem2(self, (substr.0.as_bytes(), substr.1.as_bytes())) + } +} + +impl<'i, 's> FindSlice<(&'s str, &'s str, &'s str)> for &'i [u8] { + #[inline(always)] + fn find_slice(&self, substr: (&'s str, &'s str, &'s str)) -> Option<usize> { + memmem3( + self, + ( + substr.0.as_bytes(), + substr.1.as_bytes(), + substr.2.as_bytes(), + ), + ) + } +} + impl<'i, 's> FindSlice<&'s str> for &'i str { #[inline(always)] fn find_slice(&self, substr: &'s str) -> Option<usize> { - self.find(substr) + self.as_bytes().find_slice(substr.as_bytes()) + } +} + +impl<'i, 's> FindSlice<(&'s str,)> for &'i str { + #[inline(always)] + fn find_slice(&self, substr: (&'s str,)) -> Option<usize> { + self.as_bytes().find_slice(substr) + } +} + +impl<'i, 's> FindSlice<(&'s str, &'s str)> for &'i str { + #[inline(always)] + fn find_slice(&self, substr: (&'s str, &'s str)) -> Option<usize> { + self.as_bytes().find_slice(substr) + } +} + +impl<'i, 's> FindSlice<(&'s str, &'s str, &'s str)> for &'i str { + #[inline(always)] + fn find_slice(&self, substr: (&'s str, &'s str, &'s str)) -> Option<usize> { + self.as_bytes().find_slice(substr) } } impl<'i> FindSlice<char> for &'i str { #[inline(always)] fn find_slice(&self, substr: char) -> Option<usize> { - self.find(substr) + let mut b = [0; 4]; + let substr = substr.encode_utf8(&mut b); + self.find_slice(&*substr) + } +} + +impl<'i> FindSlice<(char,)> for &'i str { + #[inline(always)] + fn find_slice(&self, substr: (char,)) -> Option<usize> { + let mut b = [0; 4]; + let substr0 = substr.0.encode_utf8(&mut b); + self.find_slice((&*substr0,)) + } +} + +impl<'i> FindSlice<(char, char)> for &'i str { + #[inline(always)] + fn find_slice(&self, substr: (char, char)) -> Option<usize> { + let mut b = [0; 4]; + let substr0 = substr.0.encode_utf8(&mut b); + let mut b = [0; 4]; + let substr1 = substr.1.encode_utf8(&mut b); + self.find_slice((&*substr0, &*substr1)) + } +} + +impl<'i> FindSlice<(char, char, char)> for &'i str { + #[inline(always)] + fn find_slice(&self, substr: (char, char, char)) -> Option<usize> { + let mut b = [0; 4]; + let substr0 = substr.0.encode_utf8(&mut b); + let mut b = [0; 4]; + let substr1 = substr.1.encode_utf8(&mut b); + let mut b = [0; 4]; + let substr2 = substr.2.encode_utf8(&mut b); + self.find_slice((&*substr0, &*substr1, &*substr2)) + } +} + +impl<'i> FindSlice<u8> for &'i str { + #[inline(always)] + fn find_slice(&self, substr: u8) -> Option<usize> { + self.find_slice(substr.as_char()) + } +} + +impl<'i> FindSlice<(u8,)> for &'i str { + #[inline(always)] + fn find_slice(&self, substr: (u8,)) -> Option<usize> { + self.find_slice((substr.0.as_char(),)) + } +} + +impl<'i> FindSlice<(u8, u8)> for &'i str { + #[inline(always)] + fn find_slice(&self, substr: (u8, u8)) -> Option<usize> { + self.find_slice((substr.0.as_char(), substr.1.as_char())) + } +} + +impl<'i> FindSlice<(u8, u8, u8)> for &'i str { + #[inline(always)] + fn find_slice(&self, substr: (u8, u8, u8)) -> Option<usize> { + self.find_slice((substr.0.as_char(), substr.1.as_char(), substr.2.as_char())) } } @@ -1837,6 +2621,18 @@ where } } +#[cfg(feature = "unstable-recover")] +impl<I, E, T> FindSlice<T> for Recoverable<I, E> +where + I: Stream, + I: FindSlice<T>, +{ + #[inline(always)] + fn find_slice(&self, substr: T) -> Option<usize> { + self.input.find_slice(substr) + } +} + impl<I, S, T> FindSlice<T> for Stateful<I, S> where I: FindSlice<T>, @@ -1928,6 +2724,20 @@ where } } +#[cfg(feature = "unstable-recover")] +impl<I, E> UpdateSlice for Recoverable<I, E> +where + I: Stream, + I: UpdateSlice, + E: crate::lib::std::fmt::Debug, +{ + #[inline(always)] + fn update_slice(mut self, inner: Self::Slice) -> Self { + self.input = I::update_slice(self.input, inner); + self + } +} + impl<I, S> UpdateSlice for Stateful<I, S> where I: UpdateSlice, @@ -1953,7 +2763,7 @@ where } } -/// Ensure checkpoint details are kept privazte +/// Ensure checkpoint details are kept private #[derive(Copy, Clone, Debug)] pub struct Checkpoint<T>(T); @@ -2177,15 +2987,19 @@ where } #[cfg(feature = "std")] -impl<K, V> Accumulate<(K, V)> for HashMap<K, V> +impl<K, V, S> Accumulate<(K, V)> for HashMap<K, V, S> where K: crate::lib::std::cmp::Eq + crate::lib::std::hash::Hash, + S: BuildHasher + Default, { #[inline(always)] fn initial(capacity: Option<usize>) -> Self { + let h = S::default(); match capacity { - Some(capacity) => HashMap::with_capacity(clamp_capacity::<(K, V)>(capacity)), - None => HashMap::new(), + Some(capacity) => { + HashMap::with_capacity_and_hasher(clamp_capacity::<(K, V)>(capacity), h) + } + None => HashMap::with_hasher(h), } } #[inline(always)] @@ -2195,6 +3009,41 @@ where } #[cfg(feature = "alloc")] +impl<K> Accumulate<K> for BTreeSet<K> +where + K: crate::lib::std::cmp::Ord, +{ + #[inline(always)] + fn initial(_capacity: Option<usize>) -> Self { + BTreeSet::new() + } + #[inline(always)] + fn accumulate(&mut self, key: K) { + self.insert(key); + } +} + +#[cfg(feature = "std")] +impl<K, S> Accumulate<K> for HashSet<K, S> +where + K: crate::lib::std::cmp::Eq + crate::lib::std::hash::Hash, + S: BuildHasher + Default, +{ + #[inline(always)] + fn initial(capacity: Option<usize>) -> Self { + let h = S::default(); + match capacity { + Some(capacity) => HashSet::with_capacity_and_hasher(clamp_capacity::<K>(capacity), h), + None => HashSet::with_hasher(h), + } + } + #[inline(always)] + fn accumulate(&mut self, key: K) { + self.insert(key); + } +} + +#[cfg(feature = "alloc")] #[inline] pub(crate) fn clamp_capacity<T>(capacity: usize) -> usize { /// Don't pre-allocate more than 64KiB when calling `Vec::with_capacity`. @@ -2338,6 +3187,7 @@ impl AsChar for u8 { self == b'\n' } } + impl<'a> AsChar for &'a u8 { #[inline(always)] fn as_char(self) -> char { @@ -2520,7 +3370,7 @@ impl<C: AsChar> ContainsToken<C> for char { } } -impl<C: AsChar, F: Fn(C) -> bool> ContainsToken<C> for F { +impl<C, F: Fn(C) -> bool> ContainsToken<C> for F { #[inline(always)] fn contains_token(&self, token: C) -> bool { self(token) @@ -2675,51 +3525,158 @@ fn memchr(token: u8, slice: &[u8]) -> Option<usize> { memchr::memchr(token, slice) } +#[cfg(feature = "simd")] +#[inline(always)] +fn memchr2(token: (u8, u8), slice: &[u8]) -> Option<usize> { + memchr::memchr2(token.0, token.1, slice) +} + +#[cfg(feature = "simd")] +#[inline(always)] +fn memchr3(token: (u8, u8, u8), slice: &[u8]) -> Option<usize> { + memchr::memchr3(token.0, token.1, token.2, slice) +} + #[cfg(not(feature = "simd"))] #[inline(always)] fn memchr(token: u8, slice: &[u8]) -> Option<usize> { slice.iter().position(|t| *t == token) } -#[cfg(feature = "simd")] +#[cfg(not(feature = "simd"))] +#[inline(always)] +fn memchr2(token: (u8, u8), slice: &[u8]) -> Option<usize> { + slice.iter().position(|t| *t == token.0 || *t == token.1) +} + +#[cfg(not(feature = "simd"))] +#[inline(always)] +fn memchr3(token: (u8, u8, u8), slice: &[u8]) -> Option<usize> { + slice + .iter() + .position(|t| *t == token.0 || *t == token.1 || *t == token.2) +} + #[inline(always)] fn memmem(slice: &[u8], tag: &[u8]) -> Option<usize> { - if tag.len() > slice.len() { - return None; + if tag.len() == 1 { + memchr(tag[0], slice) + } else { + memmem_(slice, tag) + } +} + +#[inline(always)] +fn memmem2(slice: &[u8], tag: (&[u8], &[u8])) -> Option<usize> { + if tag.0.len() == 1 && tag.1.len() == 1 { + memchr2((tag.0[0], tag.1[0]), slice) + } else { + memmem2_(slice, tag) + } +} + +#[inline(always)] +fn memmem3(slice: &[u8], tag: (&[u8], &[u8], &[u8])) -> Option<usize> { + if tag.0.len() == 1 && tag.1.len() == 1 && tag.2.len() == 1 { + memchr3((tag.0[0], tag.1[0], tag.2[0]), slice) + } else { + memmem3_(slice, tag) } +} - let (&substr_first, substr_rest) = match tag.split_first() { - Some(split) => split, - // an empty substring is found at position 0 - // This matches the behavior of str.find(""). +#[cfg(feature = "simd")] +#[inline(always)] +fn memmem_(slice: &[u8], tag: &[u8]) -> Option<usize> { + let &prefix = match tag.first() { + Some(x) => x, None => return Some(0), }; - - if substr_rest.is_empty() { - return memchr::memchr(substr_first, slice); + #[allow(clippy::manual_find)] // faster this way + for i in memchr::memchr_iter(prefix, slice) { + if slice[i..].starts_with(tag) { + return Some(i); + } } + None +} - let mut offset = 0; - let haystack = &slice[..slice.len() - substr_rest.len()]; +#[cfg(feature = "simd")] +fn memmem2_(slice: &[u8], tag: (&[u8], &[u8])) -> Option<usize> { + let prefix = match (tag.0.first(), tag.1.first()) { + (Some(&a), Some(&b)) => (a, b), + _ => return Some(0), + }; + #[allow(clippy::manual_find)] // faster this way + for i in memchr::memchr2_iter(prefix.0, prefix.1, slice) { + let subslice = &slice[i..]; + if subslice.starts_with(tag.0) { + return Some(i); + } + if subslice.starts_with(tag.1) { + return Some(i); + } + } + None +} - while let Some(position) = memchr::memchr(substr_first, &haystack[offset..]) { - offset += position; - let next_offset = offset + 1; - if &slice[next_offset..][..substr_rest.len()] == substr_rest { - return Some(offset); +#[cfg(feature = "simd")] +fn memmem3_(slice: &[u8], tag: (&[u8], &[u8], &[u8])) -> Option<usize> { + let prefix = match (tag.0.first(), tag.1.first(), tag.2.first()) { + (Some(&a), Some(&b), Some(&c)) => (a, b, c), + _ => return Some(0), + }; + #[allow(clippy::manual_find)] // faster this way + for i in memchr::memchr3_iter(prefix.0, prefix.1, prefix.2, slice) { + let subslice = &slice[i..]; + if subslice.starts_with(tag.0) { + return Some(i); } + if subslice.starts_with(tag.1) { + return Some(i); + } + if subslice.starts_with(tag.2) { + return Some(i); + } + } + None +} - offset = next_offset; +#[cfg(not(feature = "simd"))] +fn memmem_(slice: &[u8], tag: &[u8]) -> Option<usize> { + for i in 0..slice.len() { + let subslice = &slice[i..]; + if subslice.starts_with(tag) { + return Some(i); + } } + None +} +#[cfg(not(feature = "simd"))] +fn memmem2_(slice: &[u8], tag: (&[u8], &[u8])) -> Option<usize> { + for i in 0..slice.len() { + let subslice = &slice[i..]; + if subslice.starts_with(tag.0) { + return Some(i); + } + if subslice.starts_with(tag.1) { + return Some(i); + } + } None } #[cfg(not(feature = "simd"))] -fn memmem(slice: &[u8], tag: &[u8]) -> Option<usize> { +fn memmem3_(slice: &[u8], tag: (&[u8], &[u8], &[u8])) -> Option<usize> { for i in 0..slice.len() { let subslice = &slice[i..]; - if subslice.starts_with(tag) { + if subslice.starts_with(tag.0) { + return Some(i); + } + if subslice.starts_with(tag.1) { + return Some(i); + } + if subslice.starts_with(tag.2) { return Some(i); } } diff --git a/src/stream/tests.rs b/src/stream/tests.rs index e653ad9..06047df 100644 --- a/src/stream/tests.rs +++ b/src/stream/tests.rs @@ -1,8 +1,28 @@ #[cfg(feature = "std")] use proptest::prelude::*; +use crate::error::ErrMode::Backtrack; +use crate::error::{ErrorKind, InputError}; +use crate::token::tag; +use crate::{ + combinator::{separated, separated_pair}, + PResult, Parser, +}; + use super::*; +#[cfg(feature = "std")] +#[test] +fn test_fxhashmap_compiles() { + let input = "a=b"; + fn pair(i: &mut &str) -> PResult<(char, char)> { + let out = separated_pair('a', '=', 'b').parse_next(i)?; + Ok(out) + } + + let _: rustc_hash::FxHashMap<char, char> = separated(0.., pair, ',').parse(input).unwrap(); +} + #[test] fn test_offset_u8() { let s = b"abcd123"; @@ -114,3 +134,95 @@ fn test_partial_complete() { i.restore_partial(incomplete_state); assert!(i.is_partial(), "incomplete stream state should be restored"); } + +#[test] +fn test_custom_slice() { + type Token = usize; + type TokenSlice<'i> = &'i [Token]; + + let mut tokens: TokenSlice<'_> = &[1, 2, 3, 4]; + + let input = &mut tokens; + let start = input.checkpoint(); + let _ = input.next_token(); + let _ = input.next_token(); + let offset = input.offset_from(&start); + assert_eq!(offset, 2); +} + +#[test] +fn test_tag_support_char() { + assert_eq!( + tag::<_, _, InputError<_>>('π').parse_peek("π"), + Ok(("", "π")) + ); + assert_eq!( + tag::<_, _, InputError<_>>('π').parse_peek("π3.14"), + Ok(("3.14", "π")) + ); + + assert_eq!( + tag::<_, _, InputError<_>>("π").parse_peek("π3.14"), + Ok(("3.14", "π")) + ); + + assert_eq!( + tag::<_, _, InputError<_>>('-').parse_peek("π"), + Err(Backtrack(InputError::new("π", ErrorKind::Tag))) + ); + + assert_eq!( + tag::<_, Partial<&[u8]>, InputError<_>>('π').parse_peek(Partial::new(b"\xCF\x80")), + Ok((Partial::new(Default::default()), "π".as_bytes())) + ); + assert_eq!( + tag::<_, &[u8], InputError<_>>('π').parse_peek(b"\xCF\x80"), + Ok((Default::default(), "π".as_bytes())) + ); + + assert_eq!( + tag::<_, Partial<&[u8]>, InputError<_>>('π').parse_peek(Partial::new(b"\xCF\x803.14")), + Ok((Partial::new(&b"3.14"[..]), "π".as_bytes())) + ); + assert_eq!( + tag::<_, &[u8], InputError<_>>('π').parse_peek(b"\xCF\x80"), + Ok((Default::default(), "π".as_bytes())) + ); + + assert_eq!( + tag::<_, &[u8], InputError<_>>('π').parse_peek(b"\xCF\x803.14"), + Ok((&b"3.14"[..], "π".as_bytes())) + ); + + assert_eq!( + tag::<_, &[u8], InputError<_>>(AsciiCaseless('a')).parse_peek(b"ABCxyz"), + Ok((&b"BCxyz"[..], &b"A"[..])) + ); + + assert_eq!( + tag::<_, &[u8], InputError<_>>('a').parse_peek(b"ABCxyz"), + Err(Backtrack(InputError::new(&b"ABCxyz"[..], ErrorKind::Tag))) + ); + + assert_eq!( + tag::<_, &[u8], InputError<_>>(AsciiCaseless('π')).parse_peek(b"\xCF\x803.14"), + Ok((&b"3.14"[..], "π".as_bytes())) + ); + + assert_eq!( + tag::<_, _, InputError<_>>(AsciiCaseless('🧑')).parse_peek("🧑你好"), + Ok(("你好", "🧑")) + ); + + let mut buffer = [0; 4]; + let input = '\u{241b}'.encode_utf8(&mut buffer); + assert_eq!( + tag::<_, &[u8], InputError<_>>(AsciiCaseless('␛')).parse_peek(input.as_bytes()), + Ok((&b""[..], [226, 144, 155].as_slice())) + ); + + assert_eq!( + tag::<_, &[u8], InputError<_>>('-').parse_peek(b"\xCF\x80"), + Err(Backtrack(InputError::new(&b"\xCF\x80"[..], ErrorKind::Tag))) + ); +} diff --git a/src/token/mod.rs b/src/token/mod.rs index fba019c..8641515 100644 --- a/src/token/mod.rs +++ b/src/token/mod.rs @@ -3,6 +3,7 @@ #[cfg(test)] mod tests; +use crate::combinator::trace; use crate::error::ErrMode; use crate::error::ErrorKind; use crate::error::Needed; @@ -11,7 +12,6 @@ use crate::lib::std::result::Result::Ok; use crate::stream::Range; use crate::stream::{Compare, CompareResult, ContainsToken, FindSlice, SliceLen, Stream}; use crate::stream::{StreamIsPartial, ToUsize}; -use crate::trace::trace; use crate::PResult; use crate::Parser; @@ -52,7 +52,7 @@ where if <I as StreamIsPartial>::is_partial_supported() { any_::<_, _, true>(input) } else { - any_::<_, _, true>(input) + any_::<_, _, false>(input) } }) .parse_next(input) @@ -81,7 +81,7 @@ where /// /// It will return `Err(ErrMode::Backtrack(InputError::new(_, ErrorKind::Tag)))` if the input doesn't match the pattern /// -/// **Note:** [`Parser`][crate::Parser] is implemented for strings and byte strings as a convenience (complete +/// **Note:** [`Parser`] is implemented for strings and byte strings as a convenience (complete /// only) /// /// # Example @@ -114,6 +114,23 @@ where /// assert_eq!(parser(Partial::new("S")), Err(ErrMode::Backtrack(InputError::new(Partial::new("S"), ErrorKind::Tag)))); /// assert_eq!(parser(Partial::new("H")), Err(ErrMode::Incomplete(Needed::new(4)))); /// ``` +/// +/// ```rust +/// # use winnow::{error::ErrMode, error::{InputError, ErrorKind}, error::Needed}; +/// # use winnow::prelude::*; +/// use winnow::token::tag; +/// use winnow::ascii::Caseless; +/// +/// fn parser(s: &str) -> IResult<&str, &str> { +/// tag(Caseless("hello")).parse_peek(s) +/// } +/// +/// assert_eq!(parser("Hello, World!"), Ok((", World!", "Hello"))); +/// assert_eq!(parser("hello, World!"), Ok((", World!", "hello"))); +/// assert_eq!(parser("HeLlO, World!"), Ok((", World!", "HeLlO"))); +/// assert_eq!(parser("Something"), Err(ErrMode::Backtrack(InputError::new("Something", ErrorKind::Tag)))); +/// assert_eq!(parser(""), Err(ErrMode::Backtrack(InputError::new("", ErrorKind::Tag)))); +/// ``` #[inline(always)] #[doc(alias = "literal")] #[doc(alias = "bytes")] @@ -201,6 +218,7 @@ where #[doc(alias = "literal")] #[doc(alias = "bytes")] #[doc(alias = "just")] +#[deprecated(since = "0.5.20", note = "Replaced with `tag(ascii::Caseless(_))`")] pub fn tag_no_case<T, I, Error: ParserError<I>>( tag: T, ) -> impl Parser<I, <I as Stream>::Slice, Error> @@ -219,6 +237,7 @@ where }) } +#[allow(deprecated)] fn tag_no_case_<T, I, Error: ParserError<I>, const PARTIAL: bool>( i: &mut I, t: T, @@ -244,7 +263,7 @@ where /// Recognize a token that matches the [pattern][ContainsToken] /// -/// **Note:** [`Parser`][crate::Parser] is implemented as a convenience (complete +/// **Note:** [`Parser`] is implemented as a convenience (complete /// only) for /// - `u8` /// - `char` @@ -349,9 +368,9 @@ where /// It will return an `ErrMode::Backtrack(InputError::new(_, ErrorKind::Slice))` if the pattern wasn't met or is out /// of range (m <= len <= n). /// -/// *Partial version* will return a `ErrMode::Incomplete(Needed::new(1))` if the pattern reaches the end of the input or is too short. +/// *Partial version* will return a `ErrMode::Incomplete(Needed::new(1))` if the pattern reaches the end of the input or is too short. /// -/// To recognize a series of tokens, use [`repeat`][crate::combinator::repeat] to [`Accumulate`][crate::stream::Accumulate] into a `()` and then [`Parser::recognize`][crate::Parser::recognize]. +/// To recognize a series of tokens, use [`repeat`][crate::combinator::repeat] to [`Accumulate`][crate::stream::Accumulate] into a `()` and then [`Parser::recognize`]. /// /// # Example /// @@ -546,14 +565,27 @@ where I: Stream, T: ContainsToken<<I as Stream>::Token>, { - let e: ErrorKind = ErrorKind::Slice; if PARTIAL && input.is_partial() { - take_till1_partial(input, |c| !list.contains_token(c), e) + take_till1_partial(input, |c| !list.contains_token(c)) } else { - take_till1_complete(input, |c| !list.contains_token(c), e) + take_till1_complete(input, |c| !list.contains_token(c)) } } +fn take_while_m_n_<T, I, Error: ParserError<I>, const PARTIAL: bool>( + input: &mut I, + m: usize, + n: usize, + list: &T, +) -> PResult<<I as Stream>::Slice, Error> +where + I: StreamIsPartial, + I: Stream, + T: ContainsToken<<I as Stream>::Token>, +{ + take_till_m_n::<_, _, _, PARTIAL>(input, m, n, |c| !list.contains_token(c)) +} + /// Looks for the first element of the input type for which the condition returns true, /// and returns the input up to this position. /// @@ -580,11 +612,11 @@ where fn take_till1_partial<P, I: Stream, E: ParserError<I>>( input: &mut I, predicate: P, - e: ErrorKind, ) -> PResult<<I as Stream>::Slice, E> where P: Fn(I::Token) -> bool, { + let e: ErrorKind = ErrorKind::Slice; let offset = input .offset_for(predicate) .ok_or_else(|| ErrMode::Incomplete(Needed::new(1)))?; @@ -621,11 +653,11 @@ where fn take_till1_complete<P, I: Stream, E: ParserError<I>>( input: &mut I, predicate: P, - e: ErrorKind, ) -> PResult<<I as Stream>::Slice, E> where P: Fn(I::Token) -> bool, { + let e: ErrorKind = ErrorKind::Slice; let offset = input .offset_for(predicate) .unwrap_or_else(|| input.eof_offset()); @@ -636,16 +668,16 @@ where } } -fn take_while_m_n_<T, I, Error: ParserError<I>, const PARTIAL: bool>( +fn take_till_m_n<P, I, Error: ParserError<I>, const PARTIAL: bool>( input: &mut I, m: usize, n: usize, - list: &T, + predicate: P, ) -> PResult<<I as Stream>::Slice, Error> where I: StreamIsPartial, I: Stream, - T: ContainsToken<<I as Stream>::Token>, + P: Fn(I::Token) -> bool, { if n < m { return Err(ErrMode::assert(input, "`m` should be <= `n`")); @@ -653,7 +685,7 @@ where let mut final_count = 0; for (processed, (offset, token)) in input.iter_offsets().enumerate() { - if !list.contains_token(token) { + if predicate(token) { if processed < m { return Err(ErrMode::from_error_kind(input, ErrorKind::Slice)); } else { @@ -696,6 +728,86 @@ where /// ```rust /// # use winnow::{error::ErrMode, error::ErrorKind, error::InputError, error::Needed}; /// # use winnow::prelude::*; +/// use winnow::token::take_till; +/// +/// fn till_colon(s: &str) -> IResult<&str, &str> { +/// take_till(0.., |c| c == ':').parse_peek(s) +/// } +/// +/// assert_eq!(till_colon("latin:123"), Ok((":123", "latin"))); +/// assert_eq!(till_colon(":empty matched"), Ok((":empty matched", ""))); //allowed +/// assert_eq!(till_colon("12345"), Ok(("", "12345"))); +/// assert_eq!(till_colon(""), Ok(("", ""))); +/// ``` +/// +/// ```rust +/// # use winnow::{error::ErrMode, error::ErrorKind, error::InputError, error::Needed}; +/// # use winnow::prelude::*; +/// # use winnow::Partial; +/// use winnow::token::take_till; +/// +/// fn till_colon(s: Partial<&str>) -> IResult<Partial<&str>, &str> { +/// take_till(0.., |c| c == ':').parse_peek(s) +/// } +/// +/// assert_eq!(till_colon(Partial::new("latin:123")), Ok((Partial::new(":123"), "latin"))); +/// assert_eq!(till_colon(Partial::new(":empty matched")), Ok((Partial::new(":empty matched"), ""))); //allowed +/// assert_eq!(till_colon(Partial::new("12345")), Err(ErrMode::Incomplete(Needed::new(1)))); +/// assert_eq!(till_colon(Partial::new("")), Err(ErrMode::Incomplete(Needed::new(1)))); +/// ``` +#[inline(always)] +#[doc(alias = "is_not")] +pub fn take_till<T, I, Error: ParserError<I>>( + range: impl Into<Range>, + list: T, +) -> impl Parser<I, <I as Stream>::Slice, Error> +where + I: StreamIsPartial, + I: Stream, + T: ContainsToken<<I as Stream>::Token>, +{ + let Range { + start_inclusive, + end_inclusive, + } = range.into(); + trace("take_till", move |i: &mut I| { + match (start_inclusive, end_inclusive) { + (0, None) => { + if <I as StreamIsPartial>::is_partial_supported() { + take_till0_partial(i, |c| list.contains_token(c)) + } else { + take_till0_complete(i, |c| list.contains_token(c)) + } + } + (1, None) => { + if <I as StreamIsPartial>::is_partial_supported() { + take_till1_partial(i, |c| list.contains_token(c)) + } else { + take_till1_complete(i, |c| list.contains_token(c)) + } + } + (start, end) => { + let end = end.unwrap_or(usize::MAX); + if <I as StreamIsPartial>::is_partial_supported() { + take_till_m_n::<_, _, _, true>(i, start, end, |c| list.contains_token(c)) + } else { + take_till_m_n::<_, _, _, false>(i, start, end, |c| list.contains_token(c)) + } + } + } + }) +} + +/// Recognize the longest input slice (if any) till a [pattern][ContainsToken] is met. +/// +/// *Partial version* will return a `ErrMode::Incomplete(Needed::new(1))` if the match reaches the +/// end of input or if there was not match. +/// +/// # Example +/// +/// ```rust +/// # use winnow::{error::ErrMode, error::ErrorKind, error::InputError, error::Needed}; +/// # use winnow::prelude::*; /// use winnow::token::take_till0; /// /// fn till_colon(s: &str) -> IResult<&str, &str> { @@ -723,6 +835,7 @@ where /// assert_eq!(till_colon(Partial::new("12345")), Err(ErrMode::Incomplete(Needed::new(1)))); /// assert_eq!(till_colon(Partial::new("")), Err(ErrMode::Incomplete(Needed::new(1)))); /// ``` +#[deprecated(since = "0.5.21", note = "Replaced with `take_till(0.., ...)`")] #[inline(always)] pub fn take_till0<T, I, Error: ParserError<I>>( list: T, @@ -800,7 +913,7 @@ where /// assert_eq!(not_space(Partial::new("")), Err(ErrMode::Incomplete(Needed::new(1)))); /// ``` #[inline(always)] -#[doc(alias = "is_not")] +#[deprecated(since = "0.5.21", note = "Replaced with `take_till(1.., ...)`")] pub fn take_till1<T, I, Error: ParserError<I>>( list: T, ) -> impl Parser<I, <I as Stream>::Slice, Error> @@ -810,11 +923,10 @@ where T: ContainsToken<<I as Stream>::Token>, { trace("take_till1", move |i: &mut I| { - let e: ErrorKind = ErrorKind::Slice; if <I as StreamIsPartial>::is_partial_supported() && i.is_partial() { - take_till1_partial(i, |c| list.contains_token(c), e) + take_till1_partial(i, |c| list.contains_token(c)) } else { - take_till1_complete(i, |c| list.contains_token(c), e) + take_till1_complete(i, |c| list.contains_token(c)) } }) } @@ -922,10 +1034,10 @@ where /// ```rust /// # use winnow::{error::ErrMode, error::{InputError, ErrorKind}, error::Needed}; /// # use winnow::prelude::*; -/// use winnow::token::take_until0; +/// use winnow::token::take_until; /// /// fn until_eof(s: &str) -> IResult<&str, &str> { -/// take_until0("eof").parse_peek(s) +/// take_until(0.., "eof").parse_peek(s) /// } /// /// assert_eq!(until_eof("hello, worldeof"), Ok(("eof", "hello, world"))); @@ -938,10 +1050,10 @@ where /// # use winnow::{error::ErrMode, error::ErrorKind, error::InputError, error::Needed}; /// # use winnow::prelude::*; /// # use winnow::Partial; -/// use winnow::token::take_until0; +/// use winnow::token::take_until; /// /// fn until_eof(s: Partial<&str>) -> IResult<Partial<&str>, &str> { -/// take_until0("eof").parse_peek(s) +/// take_until(0.., "eof").parse_peek(s) /// } /// /// assert_eq!(until_eof(Partial::new("hello, worldeof")), Ok((Partial::new("eof"), "hello, world"))); @@ -949,59 +1061,14 @@ where /// assert_eq!(until_eof(Partial::new("hello, worldeo")), Err(ErrMode::Incomplete(Needed::Unknown))); /// assert_eq!(until_eof(Partial::new("1eof2eof")), Ok((Partial::new("eof2eof"), "1"))); /// ``` -#[inline(always)] -pub fn take_until0<T, I, Error: ParserError<I>>( - tag: T, -) -> impl Parser<I, <I as Stream>::Slice, Error> -where - I: StreamIsPartial, - I: Stream + FindSlice<T>, - T: SliceLen + Clone, -{ - trace("take_until0", move |i: &mut I| { - if <I as StreamIsPartial>::is_partial_supported() { - take_until0_::<_, _, _, true>(i, tag.clone()) - } else { - take_until0_::<_, _, _, false>(i, tag.clone()) - } - }) -} - -fn take_until0_<T, I, Error: ParserError<I>, const PARTIAL: bool>( - i: &mut I, - t: T, -) -> PResult<<I as Stream>::Slice, Error> -where - I: StreamIsPartial, - I: Stream + FindSlice<T>, - T: SliceLen, -{ - match i.find_slice(t) { - Some(offset) => Ok(i.next_slice(offset)), - None if PARTIAL && i.is_partial() => Err(ErrMode::Incomplete(Needed::Unknown)), - None => Err(ErrMode::from_error_kind(i, ErrorKind::Slice)), - } -} - -/// Recognize the non empty input slice up to the first occurrence of the literal. -/// -/// It doesn't consume the pattern. -/// -/// *Complete version*: It will return `Err(ErrMode::Backtrack(InputError::new(_, ErrorKind::Slice)))` -/// if the pattern wasn't met. -/// -/// *Partial version*: will return a `ErrMode::Incomplete(Needed::new(N))` if the input doesn't -/// contain the pattern or if the input is smaller than the pattern. -/// -/// # Example /// /// ```rust /// # use winnow::{error::ErrMode, error::{InputError, ErrorKind}, error::Needed}; /// # use winnow::prelude::*; -/// use winnow::token::take_until1; +/// use winnow::token::take_until; /// /// fn until_eof(s: &str) -> IResult<&str, &str> { -/// take_until1("eof").parse_peek(s) +/// take_until(1.., "eof").parse_peek(s) /// } /// /// assert_eq!(until_eof("hello, worldeof"), Ok(("eof", "hello, world"))); @@ -1015,20 +1082,21 @@ where /// # use winnow::{error::ErrMode, error::{InputError, ErrorKind}, error::Needed}; /// # use winnow::prelude::*; /// # use winnow::Partial; -/// use winnow::token::take_until1; +/// use winnow::token::take_until; /// /// fn until_eof(s: Partial<&str>) -> IResult<Partial<&str>, &str> { -/// take_until1("eof").parse_peek(s) +/// take_until(1.., "eof").parse_peek(s) /// } /// /// assert_eq!(until_eof(Partial::new("hello, worldeof")), Ok((Partial::new("eof"), "hello, world"))); /// assert_eq!(until_eof(Partial::new("hello, world")), Err(ErrMode::Incomplete(Needed::Unknown))); /// assert_eq!(until_eof(Partial::new("hello, worldeo")), Err(ErrMode::Incomplete(Needed::Unknown))); /// assert_eq!(until_eof(Partial::new("1eof2eof")), Ok((Partial::new("eof2eof"), "1"))); -/// assert_eq!(until_eof(Partial::new("eof")), Err(ErrMode::Backtrack(InputError::new(Partial::new("eof"), ErrorKind::Slice)))); +/// assert_eq!(until_eof(Partial::new("eof")), Err(ErrMode::Backtrack(InputError::new(Partial::new("eof"), ErrorKind::Slice)))); /// ``` #[inline(always)] -pub fn take_until1<T, I, Error: ParserError<I>>( +pub fn take_until<T, I, Error: ParserError<I>>( + range: impl Into<Range>, tag: T, ) -> impl Parser<I, <I as Stream>::Slice, Error> where @@ -1036,15 +1104,81 @@ where I: Stream + FindSlice<T>, T: SliceLen + Clone, { - trace("take_until1", move |i: &mut I| { - if <I as StreamIsPartial>::is_partial_supported() { - take_until1_::<_, _, _, true>(i, tag.clone()) - } else { - take_until1_::<_, _, _, false>(i, tag.clone()) + let Range { + start_inclusive, + end_inclusive, + } = range.into(); + trace("take_until", move |i: &mut I| { + match (start_inclusive, end_inclusive) { + (0, None) => { + if <I as StreamIsPartial>::is_partial_supported() { + take_until0_::<_, _, _, true>(i, tag.clone()) + } else { + take_until0_::<_, _, _, false>(i, tag.clone()) + } + } + (1, None) => { + if <I as StreamIsPartial>::is_partial_supported() { + take_until1_::<_, _, _, true>(i, tag.clone()) + } else { + take_until1_::<_, _, _, false>(i, tag.clone()) + } + } + (start, end) => { + let end = end.unwrap_or(usize::MAX); + if <I as StreamIsPartial>::is_partial_supported() { + take_until_m_n_::<_, _, _, true>(i, start, end, tag.clone()) + } else { + take_until_m_n_::<_, _, _, false>(i, start, end, tag.clone()) + } + } } }) } +/// Deprecated, see [`take_until`] +#[deprecated(since = "0.5.35", note = "Replaced with `take_until`")] +pub fn take_until0<T, I, Error: ParserError<I>>( + tag: T, +) -> impl Parser<I, <I as Stream>::Slice, Error> +where + I: StreamIsPartial, + I: Stream + FindSlice<T>, + T: SliceLen + Clone, +{ + take_until(0.., tag) +} + +fn take_until0_<T, I, Error: ParserError<I>, const PARTIAL: bool>( + i: &mut I, + t: T, +) -> PResult<<I as Stream>::Slice, Error> +where + I: StreamIsPartial, + I: Stream + FindSlice<T>, + T: SliceLen, +{ + match i.find_slice(t) { + Some(offset) => Ok(i.next_slice(offset)), + None if PARTIAL && i.is_partial() => Err(ErrMode::Incomplete(Needed::Unknown)), + None => Err(ErrMode::from_error_kind(i, ErrorKind::Slice)), + } +} + +/// Deprecated, see [`take_until`] +#[deprecated(since = "0.5.35", note = "Replaced with `take_until`")] +#[inline(always)] +pub fn take_until1<T, I, Error: ParserError<I>>( + tag: T, +) -> impl Parser<I, <I as Stream>::Slice, Error> +where + I: StreamIsPartial, + I: Stream + FindSlice<T>, + T: SliceLen + Clone, +{ + take_until(1.., tag) +} + fn take_until1_<T, I, Error: ParserError<I>, const PARTIAL: bool>( i: &mut I, t: T, @@ -1060,3 +1194,39 @@ where Some(offset) => Ok(i.next_slice(offset)), } } + +fn take_until_m_n_<T, I, Error: ParserError<I>, const PARTIAL: bool>( + i: &mut I, + start: usize, + end: usize, + t: T, +) -> PResult<<I as Stream>::Slice, Error> +where + I: StreamIsPartial, + I: Stream + FindSlice<T>, + T: SliceLen, +{ + if end < start { + return Err(ErrMode::assert(i, "`start` should be <= `end`")); + } + + match i.find_slice(t) { + Some(offset) => { + let start_offset = i.offset_at(start); + let end_offset = i.offset_at(end).unwrap_or_else(|_err| i.eof_offset()); + if start_offset.map(|s| offset < s).unwrap_or(true) { + if PARTIAL && i.is_partial() { + return Err(ErrMode::Incomplete(Needed::Unknown)); + } else { + return Err(ErrMode::from_error_kind(i, ErrorKind::Slice)); + } + } + if end_offset < offset { + return Err(ErrMode::from_error_kind(i, ErrorKind::Slice)); + } + Ok(i.next_slice(offset)) + } + None if PARTIAL && i.is_partial() => Err(ErrMode::Incomplete(Needed::Unknown)), + None => Err(ErrMode::from_error_kind(i, ErrorKind::Slice)), + } +} diff --git a/src/token/tests.rs b/src/token/tests.rs index d9f3646..e8198f9 100644 --- a/src/token/tests.rs +++ b/src/token/tests.rs @@ -3,7 +3,7 @@ use super::*; #[cfg(feature = "std")] use proptest::prelude::*; -use crate::binary::length_data; +use crate::ascii::Caseless; use crate::combinator::delimited; use crate::error::ErrMode; use crate::error::ErrorKind; @@ -67,6 +67,37 @@ proptest! { } #[test] +fn complete_take_until() { + fn take_until_5_10(i: &str) -> IResult<&str, &str> { + take_until(5..=8, "end").parse_peek(i) + } + assert_eq!( + take_until_5_10("end"), + Err(ErrMode::Backtrack(error_position!( + &"end", + ErrorKind::Slice + ))) + ); + assert_eq!( + take_until_5_10("1234end"), + Err(ErrMode::Backtrack(error_position!( + &"1234end", + ErrorKind::Slice + ))) + ); + assert_eq!(take_until_5_10("12345end"), Ok(("end", "12345"))); + assert_eq!(take_until_5_10("123456end"), Ok(("end", "123456"))); + assert_eq!(take_until_5_10("12345678end"), Ok(("end", "12345678"))); + assert_eq!( + take_until_5_10("123456789end"), + Err(ErrMode::Backtrack(error_position!( + &"123456789end", + ErrorKind::Slice + ))) + ); +} + +#[test] fn partial_any_str() { use super::any; assert_eq!( @@ -183,7 +214,7 @@ fn partial_is_a() { #[test] fn partial_is_not() { fn a_or_b(i: Partial<&[u8]>) -> IResult<Partial<&[u8]>, &[u8]> { - take_till1(['a', 'b']).parse_peek(i) + take_till(1.., ['a', 'b']).parse_peek(i) } let a = Partial::new(&b"cdab"[..]); @@ -208,7 +239,7 @@ fn partial_is_not() { #[test] fn partial_take_until_incomplete() { fn y(i: Partial<&[u8]>) -> IResult<Partial<&[u8]>, &[u8]> { - take_until0("end").parse_peek(i) + take_until(0.., "end").parse_peek(i) } assert_eq!( y(Partial::new(&b"nd"[..])), @@ -227,7 +258,7 @@ fn partial_take_until_incomplete() { #[test] fn partial_take_until_incomplete_s() { fn ys(i: Partial<&str>) -> IResult<Partial<&str>, &str> { - take_until0("end").parse_peek(i) + take_until(0.., "end").parse_peek(i) } assert_eq!( ys(Partial::new("123en")), @@ -365,7 +396,7 @@ fn partial_take_while_m_n() { #[test] fn partial_take_till0() { fn f(i: Partial<&[u8]>) -> IResult<Partial<&[u8]>, &[u8]> { - take_till0(AsChar::is_alpha).parse_peek(i) + take_till(0.., AsChar::is_alpha).parse_peek(i) } let a = &b""[..]; let b = &b"abcd"[..]; @@ -387,7 +418,7 @@ fn partial_take_till0() { #[test] fn partial_take_till1() { fn f(i: Partial<&[u8]>) -> IResult<Partial<&[u8]>, &[u8]> { - take_till1(AsChar::is_alpha).parse_peek(i) + take_till(1.., AsChar::is_alpha).parse_peek(i) } let a = &b""[..]; let b = &b"abcd"[..]; @@ -447,7 +478,7 @@ fn partial_take_while_utf8() { #[test] fn partial_take_till0_utf8() { fn f(i: Partial<&str>) -> IResult<Partial<&str>, &str> { - take_till0(|c| c == '點').parse_peek(i) + take_till(0.., |c| c == '點').parse_peek(i) } assert_eq!( @@ -465,7 +496,7 @@ fn partial_take_till0_utf8() { ); fn g(i: Partial<&str>) -> IResult<Partial<&str>, &str> { - take_till0(|c| c != '點').parse_peek(i) + take_till(0.., |c| c != '點').parse_peek(i) } assert_eq!( @@ -569,57 +600,11 @@ fn partial_recognize_take_while0() { ); } -#[test] -fn partial_length_bytes() { - use crate::binary::le_u8; - - fn x(i: Partial<&[u8]>) -> IResult<Partial<&[u8]>, &[u8]> { - length_data(le_u8).parse_peek(i) - } - assert_eq!( - x(Partial::new(b"\x02..>>")), - Ok((Partial::new(&b">>"[..]), &b".."[..])) - ); - assert_eq!( - x(Partial::new(b"\x02..")), - Ok((Partial::new(&[][..]), &b".."[..])) - ); - assert_eq!( - x(Partial::new(b"\x02.")), - Err(ErrMode::Incomplete(Needed::new(1))) - ); - assert_eq!( - x(Partial::new(b"\x02")), - Err(ErrMode::Incomplete(Needed::new(2))) - ); - - fn y(i: Partial<&[u8]>) -> IResult<Partial<&[u8]>, &[u8]> { - let (i, _) = "magic".parse_peek(i)?; - length_data(le_u8).parse_peek(i) - } - assert_eq!( - y(Partial::new(b"magic\x02..>>")), - Ok((Partial::new(&b">>"[..]), &b".."[..])) - ); - assert_eq!( - y(Partial::new(b"magic\x02..")), - Ok((Partial::new(&[][..]), &b".."[..])) - ); - assert_eq!( - y(Partial::new(b"magic\x02.")), - Err(ErrMode::Incomplete(Needed::new(1))) - ); - assert_eq!( - y(Partial::new(b"magic\x02")), - Err(ErrMode::Incomplete(Needed::new(2))) - ); -} - #[cfg(feature = "alloc")] #[test] fn partial_case_insensitive() { fn test(i: Partial<&[u8]>) -> IResult<Partial<&[u8]>, &[u8]> { - tag_no_case("ABcd").parse_peek(i) + tag(Caseless("ABcd")).parse_peek(i) } assert_eq!( test(Partial::new(&b"aBCdefgh"[..])), @@ -653,7 +638,7 @@ fn partial_case_insensitive() { ); fn test2(i: Partial<&str>) -> IResult<Partial<&str>, &str> { - tag_no_case("ABcd").parse_peek(i) + tag(Caseless("ABcd")).parse_peek(i) } assert_eq!( test2(Partial::new("aBCdefgh")), diff --git a/src/trace.rs b/src/trace.rs new file mode 100644 index 0000000..9c05576 --- /dev/null +++ b/src/trace.rs @@ -0,0 +1,11 @@ +//! Deprecated, replaced with [`winnow::combinator`][crate::combinator] + +/// Deprecated, replaced with [`winnow::combinator::trace`][crate::combinator::trace] +#[deprecated(since = "0.5.35", note = "Replaced with `winnow::combinator::trace`")] +#[inline(always)] +pub fn trace<I: crate::stream::Stream, O, E>( + name: impl crate::lib::std::fmt::Display, + parser: impl crate::Parser<I, O, E>, +) -> impl crate::Parser<I, O, E> { + crate::combinator::trace(name, parser) +} |