chumsky/lib.rs
1#![cfg_attr(not(any(doc, feature = "std", test)), no_std)]
2#![cfg_attr(docsrs, feature(doc_auto_cfg, doc_cfg), deny(rustdoc::all))]
3#![cfg_attr(
4 feature = "nightly",
5 feature(never_type, fn_traits, tuple_trait, unboxed_closures)
6)]
7#![doc = include_str!("../README.md")]
8#![doc = "## Examples"]
9#, "/examples/brainfuck.rs", ")\n\n")]
10#, "/examples/json.rs", ")\n\n")]
11#, "/examples/nano_rust.rs", ")\n\n")]
12#![deny(missing_docs, clippy::undocumented_unsafe_blocks)]
13#![allow(
14 clippy::should_implement_trait,
15 clippy::type_complexity,
16 clippy::result_unit_err
17)]
18
19extern crate alloc;
20extern crate core;
21
22macro_rules! go_extra {
23 ( $O :ty ) => {
24 #[inline(always)]
25 fn go_emit(&self, inp: &mut InputRef<'src, '_, I, E>) -> PResult<Emit, $O> {
26 Parser::<I, $O, E>::go::<Emit>(self, inp)
27 }
28 #[inline(always)]
29 fn go_check(&self, inp: &mut InputRef<'src, '_, I, E>) -> PResult<Check, $O> {
30 Parser::<I, $O, E>::go::<Check>(self, inp)
31 }
32 };
33}
34
35mod blanket;
36#[cfg(feature = "unstable")]
37pub mod cache;
38pub mod combinator;
39pub mod container;
40#[cfg(feature = "either")]
41mod either;
42pub mod error;
43#[cfg(feature = "extension")]
44pub mod extension;
45pub mod extra;
46#[cfg(docsrs)]
47pub mod guide;
48pub mod input;
49pub mod inspector;
50pub mod label;
51#[cfg(feature = "lexical-numbers")]
52pub mod number;
53#[cfg(feature = "pratt")]
54pub mod pratt;
55pub mod primitive;
56mod private;
57pub mod recovery;
58pub mod recursive;
59#[cfg(feature = "regex")]
60pub mod regex;
61pub mod span;
62mod stream;
63pub mod text;
64#[cfg(feature = "bytes")]
65mod tokio;
66pub mod util;
67
68/// Commonly used functions, traits and types.
69///
70/// *Listen, three eyes,” he said, “don’t you try to outweird me, I get stranger things than you free with my breakfast
71/// cereal.”*
72pub mod prelude {
73 #[cfg(feature = "lexical-numbers")]
74 pub use super::number::number;
75 #[cfg(feature = "regex")]
76 pub use super::regex::regex;
77 pub use super::{
78 error::{Cheap, EmptyErr, Error as _, Rich, Simple},
79 extra,
80 input::Input,
81 primitive::{
82 any, any_ref, choice, custom, empty, end, group, just, map_ctx, none_of, one_of, todo,
83 },
84 recovery::{nested_delimiters, skip_then_retry_until, skip_until, via_parser},
85 recursive::{recursive, Recursive},
86 span::{SimpleSpan, Span as _},
87 text, Boxed, ConfigIterParser, ConfigParser, IterParser, ParseResult, Parser,
88 };
89 pub use crate::{select, select_ref};
90}
91
92use crate::input::InputOwn;
93use alloc::{
94 boxed::Box,
95 rc::{self, Rc},
96 string::String,
97 vec,
98 vec::Vec,
99};
100#[cfg(feature = "nightly")]
101use core::marker::Tuple;
102use core::{
103 borrow::Borrow,
104 cell::{Cell, RefCell},
105 cmp::{Eq, Ord, Ordering},
106 fmt,
107 hash::Hash,
108 marker::PhantomData,
109 mem::MaybeUninit,
110 ops::{Range, RangeFrom},
111 panic::Location,
112 str::FromStr,
113};
114use hashbrown::HashMap;
115#[cfg(feature = "serde")]
116use serde::{de::Visitor, Deserialize, Deserializer, Serialize, Serializer};
117
118use self::{
119 combinator::*,
120 container::*,
121 error::Error,
122 extra::ParserExtra,
123 input::{
124 BorrowInput, Emitter, ExactSizeInput, InputRef, MapExtra, SliceInput, StrInput, ValueInput,
125 },
126 inspector::Inspector,
127 label::{LabelError, Labelled},
128 prelude::*,
129 primitive::Any,
130 private::{Check, Emit, IPResult, Located, MaybeUninitExt, Mode, PResult, Sealed},
131 recovery::{RecoverWith, Strategy},
132 span::Span,
133 text::*,
134 util::{IntoMaybe, MaybeMut, MaybeRef},
135};
136#[cfg(all(feature = "extension", doc))]
137use self::{extension::v1::*, primitive::custom, stream::Stream};
138
139/// A type that allows mentioning type parameters *without* all of the customary omission of auto traits that comes
140/// with `PhantomData`.
141struct EmptyPhantom<T>(core::marker::PhantomData<T>);
142
143impl<T> EmptyPhantom<T> {
144 const fn new() -> Self {
145 Self(core::marker::PhantomData)
146 }
147}
148
149impl<T> Copy for EmptyPhantom<T> {}
150impl<T> Clone for EmptyPhantom<T> {
151 fn clone(&self) -> Self {
152 *self
153 }
154}
155// SAFETY: This is safe because `EmptyPhantom` doesn't actually contain a `T`.
156unsafe impl<T> Send for EmptyPhantom<T> {}
157// SAFETY: This is safe because `EmptyPhantom` doesn't actually contain a `T`.
158unsafe impl<T> Sync for EmptyPhantom<T> {}
159impl<T> Unpin for EmptyPhantom<T> {}
160impl<T> core::panic::UnwindSafe for EmptyPhantom<T> {}
161impl<T> core::panic::RefUnwindSafe for EmptyPhantom<T> {}
162
163pub(crate) type DynParser<'src, 'b, I, O, E> = dyn Parser<'src, I, O, E> + 'b;
164#[cfg(feature = "pratt")]
165pub(crate) type DynOperator<'src, 'b, I, O, E> = dyn pratt::Operator<'src, I, O, E> + 'b;
166
167/// Labels corresponding to a variety of patterns.
168#[derive(Clone, Debug, PartialEq)]
169#[non_exhaustive]
170pub enum DefaultExpected<'a, T> {
171 /// A specific token was expected.
172 Token(MaybeRef<'a, T>),
173 /// Anything other than the end of input was expected.
174 Any,
175 /// Something other than the provided input was expected.
176 SomethingElse,
177 /// The end of input was expected.
178 EndOfInput,
179}
180
181impl<T> DefaultExpected<'_, T> {
182 /// Convert this [`DefaultExpected`] into an owned version of itself, cloning any inner references if required.
183 #[inline]
184 pub fn into_owned(self) -> DefaultExpected<'static, T>
185 where
186 T: Clone,
187 {
188 match self {
189 Self::Token(tok) => DefaultExpected::Token(tok.into_owned()),
190 Self::Any => DefaultExpected::Any,
191 Self::SomethingElse => DefaultExpected::SomethingElse,
192 Self::EndOfInput => DefaultExpected::EndOfInput,
193 }
194 }
195}
196
197/// The result of performing a parse on an input with [`Parser`].
198///
199/// Unlike `Result`, this type is designed to express the fact that generating outputs and errors are not
200/// mutually-exclusive operations: it is possible for a parse to produce non-terminal errors (see
201/// [`Parser::recover_with`] while still producing useful output).
202///
203/// If you don't care for recovered outputs and you with to treat success/failure as a binary, you may use
204/// [`ParseResult::into_result`].
205#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)]
206pub struct ParseResult<T, E> {
207 output: Option<T>,
208 errs: Vec<E>,
209}
210
211impl<T, E> ParseResult<T, E> {
212 pub(crate) fn new(output: Option<T>, errs: Vec<E>) -> ParseResult<T, E> {
213 ParseResult { output, errs }
214 }
215
216 /// Whether this result contains output
217 pub fn has_output(&self) -> bool {
218 self.output.is_some()
219 }
220
221 /// Whether this result has any errors
222 pub fn has_errors(&self) -> bool {
223 !self.errs.is_empty()
224 }
225
226 /// Get a reference to the output of this result, if it exists
227 pub fn output(&self) -> Option<&T> {
228 self.output.as_ref()
229 }
230
231 /// Get an iterator over the parse errors for this result. The iterator will produce no items if there were no
232 /// errors.
233 pub fn errors(&self) -> impl ExactSizeIterator<Item = &E> + DoubleEndedIterator {
234 self.errs.iter()
235 }
236
237 /// Convert this `ParseResult` into an option containing the output, if any exists
238 pub fn into_output(self) -> Option<T> {
239 self.output
240 }
241
242 /// Convert this `ParseResult` into a vector containing any errors. The vector will be empty if there were no
243 /// errors.
244 pub fn into_errors(self) -> Vec<E> {
245 self.errs
246 }
247
248 /// Convert this `ParseResult` into a tuple containing the output, if any existed, and errors, if any were
249 /// encountered.
250 pub fn into_output_errors(self) -> (Option<T>, Vec<E>) {
251 (self.output, self.errs)
252 }
253
254 /// Convert this `ParseResult` into a standard `Result`. This discards output if parsing generated any errors,
255 /// matching the old behavior of [`Parser::parse`].
256 pub fn into_result(self) -> Result<T, Vec<E>> {
257 if self.errs.is_empty() {
258 self.output.ok_or(self.errs)
259 } else {
260 Err(self.errs)
261 }
262 }
263
264 /// Convert this `ParseResult` into the output. If any errors were generated (including non-fatal errors!), a
265 /// panic will occur instead.
266 ///
267 /// The use of this function is discouraged in user-facing code. However, it may be convenient for use in tests.
268 #[track_caller]
269 pub fn unwrap(self) -> T
270 where
271 E: fmt::Debug,
272 {
273 if self.has_errors() {
274 panic!(
275 "called `ParseResult::unwrap` on a parse result containing errors: {:?}",
276 &self.errs
277 )
278 } else {
279 self.output.expect("parser generated no errors or output")
280 }
281 }
282}
283
284/// A trait implemented by parsers.
285///
286/// Parsers take inputs of type `I`, which will implement [`Input`]. Refer to the documentation on [`Input`] for examples
287/// of common input types. It will then attempt to parse them into a value of type `O`, which may be just about any type.
288/// In doing so, they may encounter errors. These need not be fatal to the parsing process: syntactic errors can be
289/// recovered from and a valid output may still be generated alongside any syntax errors that were encountered along the
290/// way. Usually, this output comes in the form of an
291/// [Abstract Syntax Tree](https://en.wikipedia.org/wiki/Abstract_syntax_tree) (AST).
292///
293/// The final type parameter, `E`, is expected to be one of the type in the [`extra`] module,
294/// implementing [`ParserExtra`]. This trait is used to encapsulate the various types a parser
295/// uses that are not simply its input and output. Refer to the documentation on the [`ParserExtra`] trait
296/// for more detail on the contained types. If not provided, it will default to [`extra::Default`],
297/// which will have the least overhead, but also the least meaningful errors.
298///
299/// The lifetime of the parser is used for zero-copy output - the input is bound by the lifetime,
300/// and returned values or parser state may take advantage of this to borrow tokens or slices of the
301/// input and hold on to them, if the input supports this.
302///
303/// # Stability
304///
305/// This trait is not intended to be implemented by downstream users of `chumsky`. While you can technically implement
306/// it, doing so is considered to be outside the stability guarantees of the crate. Your code may break with a future,
307/// semver-compatible release! Instead of implementing this trait, you should consider other options:
308///
309/// 1) Try using combinators like [`Parser::try_map`] and [`Parser::validate`] to implement custom error generation
310///
311/// 2) Use [`custom`] to implement your own parsing logic inline within an existing parser
312///
313/// 3) Use chumsky's [`extension`] API to write an extension parser that feels like it's native to chumsky
314///
315/// 4) If you believe you've found a common use-case that's missing from chumsky, you could open a pull request to
316/// implement it in chumsky itself rather than implementing `Parser` yourself.
317// #[cfg_attr(
318// feature = "nightly",
319// diagnostic::on_unimplemented(
320// message = "The following is not a parser from `{I}` to `{O}`: `{Self}`",
321// label = "This parser is not compatible because it does not implement `Parser<{I}, {O}, E>`",
322// note = "You should check that the output types of your parsers are consistent with the combinators you're using",
323// )
324// )]
325pub trait Parser<'src, I: Input<'src>, O, E: ParserExtra<'src, I> = extra::Default> {
326 #[doc(hidden)]
327 fn go<M: Mode>(&self, inp: &mut InputRef<'src, '_, I, E>) -> PResult<M, O>
328 where
329 Self: Sized;
330
331 #[doc(hidden)]
332 fn go_emit(&self, inp: &mut InputRef<'src, '_, I, E>) -> PResult<Emit, O>;
333 #[doc(hidden)]
334 fn go_check(&self, inp: &mut InputRef<'src, '_, I, E>) -> PResult<Check, O>;
335
336 /// Parse a stream of tokens, yielding an output if possible, and any errors encountered along the way.
337 ///
338 /// If `None` is returned (i.e: parsing failed) then there will *always* be at least one item in the error `Vec`.
339 /// If you want to include non-default state, use [`Parser::parse_with_state`] instead.
340 ///
341 /// Although the signature of this function looks complicated, it's simpler than you think! You can pass a
342 /// [`&[T]`], a [`&str`], [`Stream`], or anything implementing [`Input`] to it.
343 fn parse(&self, input: I) -> ParseResult<O, E::Error>
344 where
345 I: Input<'src>,
346 E::State: Default,
347 E::Context: Default,
348 {
349 self.parse_with_state(input, &mut E::State::default())
350 }
351
352 /// Parse a stream of tokens, yielding an output if possible, and any errors encountered along the way.
353 /// The provided state will be passed on to parsers that expect it, such as [`map_with`](Parser::map_with).
354 ///
355 /// If `None` is returned (i.e: parsing failed) then there will *always* be at least one item in the error `Vec`.
356 /// If you want to just use a default state value, use [`Parser::parse`] instead.
357 ///
358 /// Although the signature of this function looks complicated, it's simpler than you think! You can pass a
359 /// [`&[T]`], a [`&str`], [`Stream`], or anything implementing [`Input`] to it.
360 fn parse_with_state(&self, input: I, state: &mut E::State) -> ParseResult<O, E::Error>
361 where
362 I: Input<'src>,
363 E::Context: Default,
364 {
365 let mut own = InputOwn::new_state(input, state);
366 let mut inp = own.as_ref_start();
367 let res = self.then_ignore(end()).go::<Emit>(&mut inp);
368 let alt = inp.take_alt().map(|alt| alt.err).unwrap_or_else(|| {
369 let fake_span = inp.span_since(&inp.cursor());
370 // TODO: Why is this needed?
371 E::Error::expected_found([], None, fake_span)
372 });
373 let mut errs = own.into_errs();
374 let out = match res {
375 Ok(out) => Some(out),
376 Err(()) => {
377 errs.push(alt);
378 None
379 }
380 };
381 ParseResult::new(out, errs)
382 }
383
384 /// Parse a stream of tokens, ignoring any output, and returning any errors encountered along the way.
385 ///
386 /// If parsing failed, then there will *always* be at least one item in the returned `Vec`.
387 /// If you want to include non-default state, use [`Parser::check_with_state`] instead.
388 ///
389 /// Although the signature of this function looks complicated, it's simpler than you think! You can pass a
390 /// [`&[T]`], a [`&str`], [`Stream`], or anything implementing [`Input`] to it.
391 fn check(&self, input: I) -> ParseResult<(), E::Error>
392 where
393 Self: Sized,
394 I: Input<'src>,
395 E::State: Default,
396 E::Context: Default,
397 {
398 self.check_with_state(input, &mut E::State::default())
399 }
400
401 /// Parse a stream of tokens, ignoring any output, and returning any errors encountered along the way.
402 ///
403 /// If parsing failed, then there will *always* be at least one item in the returned `Vec`.
404 /// If you want to just use a default state value, use [`Parser::check`] instead.
405 ///
406 /// Although the signature of this function looks complicated, it's simpler than you think! You can pass a
407 /// [`&[T]`], a [`&str`], [`Stream`], or anything implementing [`Input`] to it.
408 fn check_with_state(&self, input: I, state: &mut E::State) -> ParseResult<(), E::Error>
409 where
410 Self: Sized,
411 I: Input<'src>,
412 E::Context: Default,
413 {
414 let mut own = InputOwn::new_state(input, state);
415 let mut inp = own.as_ref_start();
416 let res = self.then_ignore(end()).go::<Check>(&mut inp);
417 let alt = inp.take_alt().map(|alt| alt.err).unwrap_or_else(|| {
418 let fake_span = inp.span_since(&inp.cursor());
419 // TODO: Why is this needed?
420 E::Error::expected_found([], None, fake_span)
421 });
422 let mut errs = own.into_errs();
423 let out = match res {
424 Ok(()) => Some(()),
425 Err(()) => {
426 errs.push(alt);
427 None
428 }
429 };
430 ParseResult::new(out, errs)
431 }
432
433 /// Convert the output of this parser into a slice of the input, based on the current parser's
434 /// span.
435 ///
436 /// Note: unlike the parser `.repeated().collect()`, this method includes all tokens that are
437 /// "ignored" by the parser, including any padding, separators, and sub-parsers with
438 /// [`Parser::ignored`], [`Parser::ignore_then`], and [`Parser::then_ignore`].
439 ///
440 /// # Examples
441 /// Example with input of type `&str` (token type is `char`).
442 /// ```
443 /// # use chumsky::prelude::*;
444 /// // Matches a number with underscores that is surrounded by apostrophes.
445 /// let quoted_numeric = any::<&str, extra::Err<Simple<char>>>()
446 /// .filter(|c: &char| c.is_digit(10))
447 /// .separated_by(just("_").repeated().at_most(1))
448 /// .to_slice()
449 /// .padded_by(just("'"));
450 /// assert_eq!(quoted_numeric.parse("'1_23'").into_result(), Ok("1_23"));
451 /// ```
452 /// Example with input of type `&[u32]` (token type is `u32`).
453 /// ```
454 /// # use chumsky::prelude::*;
455 /// // Matches even numbers, then ignoring the rest of the input when an odd number is reached.
456 /// let even_matcher = any::<&[u32], extra::Err<Simple<u32>>>()
457 /// .filter(|c: &u32| c % 2 == 0)
458 /// .repeated()
459 /// .to_slice()
460 /// .lazy();
461 /// assert_eq!(even_matcher.parse(&[2, 4, 8, 5, 6]).unwrap(), &[2, 4, 8]);
462 /// ```
463 fn to_slice(self) -> ToSlice<Self, O>
464 where
465 Self: Sized,
466 {
467 ToSlice {
468 parser: self,
469 phantom: EmptyPhantom::new(),
470 }
471 }
472
473 /// Filter the output of this parser, accepting only inputs that match the given predicate.
474 ///
475 /// The output type of this parser is `I`, the input that was found.
476 ///
477 /// # Examples
478 ///
479 /// ```
480 /// # use chumsky::{prelude::*, error::Simple};
481 /// let lowercase = any::<_, extra::Err<Simple<char>>>()
482 /// .filter(char::is_ascii_lowercase)
483 /// .repeated()
484 /// .at_least(1)
485 /// .collect::<String>();
486 ///
487 /// assert_eq!(lowercase.parse("hello").into_result(), Ok("hello".to_string()));
488 /// assert!(lowercase.parse("Hello").has_errors());
489 /// ```
490 fn filter<F: Fn(&O) -> bool>(self, f: F) -> Filter<Self, F>
491 where
492 Self: Sized,
493 {
494 Filter {
495 parser: self,
496 filter: f,
497 }
498 }
499
500 /// Map the output of this parser to another value.
501 ///
502 /// The output type of this parser is `U`, the same as the function's output.
503 ///
504 /// # Examples
505 ///
506 /// ```
507 /// # use chumsky::{prelude::*, error::Simple};
508 /// #[derive(Debug, PartialEq)]
509 /// enum Token { Word(String), Num(u64) }
510 ///
511 /// let word = any::<_, extra::Err<Simple<char>>>()
512 /// .filter(|c: &char| c.is_alphabetic())
513 /// .repeated().at_least(1)
514 /// .collect::<String>()
515 /// .map(Token::Word);
516 ///
517 /// let num = any::<_, extra::Err<Simple<char>>>()
518 /// .filter(|c: &char| c.is_ascii_digit())
519 /// .repeated().at_least(1)
520 /// .collect::<String>()
521 /// .map(|s| Token::Num(s.parse().unwrap()));
522 ///
523 /// let token = word.or(num);
524 ///
525 /// assert_eq!(token.parse("test").into_result(), Ok(Token::Word("test".to_string())));
526 /// assert_eq!(token.parse("42").into_result(), Ok(Token::Num(42)));
527 /// ```
528 fn map<U, F: Fn(O) -> U>(self, f: F) -> Map<Self, O, F>
529 where
530 Self: Sized,
531 {
532 Map {
533 parser: self,
534 mapper: f,
535 phantom: EmptyPhantom::new(),
536 }
537 }
538
539 /// Map the output of this parser to another value, with the opportunity to get extra metadata from the parse like the span or parser state.
540 ///
541 /// See the docs for [`MapExtra`] for examples of metadata that can be fetched.
542 ///
543 /// The output type of this parser is `U`, the same as the function's output.
544 ///
545 /// # Examples
546 ///
547 /// Using the span of the output in the mapping function:
548 ///
549 /// ```
550 /// # use chumsky::prelude::*;
551 ///
552 /// // It's common for AST nodes to use a wrapper type that allows attaching span information to them
553 /// #[derive(Debug, PartialEq)]
554 /// pub struct Spanned<T>(T, SimpleSpan<usize>);
555 ///
556 /// let ident = text::ascii::ident::<_, extra::Err<Simple<char>>>()
557 /// .map_with(|ident, e| Spanned(ident, e.span())) // Equivalent to `.map_with_span(|ident, span| Spanned(ident, span))`
558 /// .padded();
559 ///
560 /// assert_eq!(ident.parse("hello").into_result(), Ok(Spanned("hello", (0..5).into())));
561 /// assert_eq!(ident.parse(" hello ").into_result(), Ok(Spanned("hello", (7..12).into())));
562 /// ```
563 ///
564 /// Using the parser state in the mapping function to intern strings:
565 ///
566 /// ```
567 /// # use chumsky::prelude::*;
568 /// use std::ops::Range;
569 /// use lasso::{Rodeo, Spur};
570 ///
571 /// // It's common for AST nodes to use interned versions of identifiers
572 /// // Keys are generally smaller, faster to compare, and can be `Copy`
573 /// #[derive(Copy, Clone)]
574 /// pub struct Ident(Spur);
575 ///
576 /// let ident = text::ascii::ident::<_, extra::Full<Simple<char>, extra::SimpleState<Rodeo>, ()>>()
577 /// .map_with(|ident, e| Ident(e.state().get_or_intern(ident)))
578 /// .padded()
579 /// .repeated()
580 /// .at_least(1)
581 /// .collect::<Vec<_>>();
582 ///
583 /// // Test out parser
584 ///
585 /// let mut interner = extra::SimpleState(Rodeo::new());
586 ///
587 /// match ident.parse_with_state("hello", &mut interner).into_result() {
588 /// Ok(idents) => {
589 /// assert_eq!(interner.resolve(&idents[0].0), "hello");
590 /// }
591 /// Err(e) => panic!("Parsing Failed: {:?}", e),
592 /// }
593 ///
594 /// match ident.parse_with_state("hello hello", &mut interner).into_result() {
595 /// Ok(idents) => {
596 /// assert_eq!(idents[0].0, idents[1].0);
597 /// }
598 /// Err(e) => panic!("Parsing Failed: {:?}", e),
599 /// }
600 /// ```
601 ///
602 /// Using the parse context in the mapping function:
603 ///
604 /// ```
605 /// # use chumsky::{prelude::*, error::Simple};
606 ///
607 /// fn palindrome_parser<'src>() -> impl Parser<'src, &'src str, String> {
608 /// recursive(|chain| {
609 /// choice((
610 /// just(String::new())
611 /// .configure(|cfg, ctx: &String| cfg.seq(ctx.clone()))
612 /// .then_ignore(end()),
613 /// any()
614 /// .map_with(|x, e| format!("{x}{}", e.ctx()))
615 /// .ignore_with_ctx(chain),
616 /// ))
617 /// })
618 /// .with_ctx(String::new())
619 /// }
620 ///
621 /// assert_eq!(palindrome_parser().parse("abccba").into_result().as_deref(), Ok("cba"));
622 /// assert_eq!(palindrome_parser().parse("hello olleh").into_result().as_deref(), Ok(" olleh"));
623 /// assert!(palindrome_parser().parse("abccb").into_result().is_err());
624 /// ```
625 fn map_with<U, F: Fn(O, &mut MapExtra<'src, '_, I, E>) -> U>(self, f: F) -> MapWith<Self, O, F>
626 where
627 Self: Sized,
628 {
629 MapWith {
630 parser: self,
631 mapper: f,
632 phantom: EmptyPhantom::new(),
633 }
634 }
635
636 /// Map the output of this parser to another value.
637 /// If the output of this parser isn't a tuple, use [`Parser::map`].
638 ///
639 /// The output type of this parser is `U`, the same as the function's output.
640 ///
641 /// # Examples
642 ///
643 /// ```
644 /// # use chumsky::prelude::*;
645 /// #[derive(Clone, Copy, Debug, PartialEq, Eq)]
646 /// pub enum Value {
647 /// One(u8),
648 /// Two(u8, u8),
649 /// Three(u8, u8, u8),
650 /// }
651 ///
652 /// fn parser<'src>() -> impl Parser<'src, &'src [u8], Vec<Value>> {
653 /// choice((
654 /// just(1).ignore_then(any()).map(Value::One),
655 /// just(2)
656 /// .ignore_then(group((any(), any())))
657 /// .map_group(Value::Two),
658 /// just(3)
659 /// .ignore_then(group((any(), any(), any())))
660 /// .map_group(Value::Three),
661 /// ))
662 /// .repeated()
663 /// .collect()
664 /// }
665 ///
666 /// let bytes = &[3, 1, 2, 3, 1, 127, 2, 21, 69];
667 /// assert_eq!(
668 /// parser().parse(bytes).into_result(),
669 /// Ok(vec![
670 /// Value::Three(1, 2, 3),
671 /// Value::One(127),
672 /// Value::Two(21, 69)
673 /// ])
674 /// );
675 /// ```
676 #[cfg(feature = "nightly")]
677 fn map_group<F: Fn<O>>(self, f: F) -> MapGroup<Self, O, F>
678 where
679 Self: Sized,
680 O: Tuple,
681 {
682 MapGroup {
683 parser: self,
684 mapper: f,
685 phantom: EmptyPhantom::new(),
686 }
687 }
688
689 /// Transform the output of this parser to the pattern's span.
690 ///
691 /// This is commonly used when you know what pattern you've parsed and are only interested in the span of the
692 /// pattern.
693 ///
694 /// The output type of this parser is `I::Span`.
695 ///
696 /// # Examples
697 ///
698 /// ```
699 /// # use chumsky::prelude::*;
700 ///
701 /// // It's common for AST nodes to use a wrapper type that allows attaching span information to them
702 /// #[derive(Debug, PartialEq)]
703 /// pub enum Expr<'src> {
704 /// Int(&'src str, SimpleSpan),
705 /// // The span is that of the operator, '+'
706 /// Add(Box<Expr<'src>>, SimpleSpan, Box<Expr<'src>>),
707 /// }
708 ///
709 /// let int = text::int::<_, extra::Err<Simple<char>>>(10)
710 /// .to_slice()
711 /// .map_with(|int, e| Expr::Int(int, e.span()))
712 /// .padded();
713 ///
714 /// let add_op = just('+').to_span().padded();
715 /// let sum = int.foldl(
716 /// add_op.then(int).repeated(),
717 /// |a, (op_span, b)| Expr::Add(Box::new(a), op_span, Box::new(b)),
718 /// );
719 ///
720 /// assert_eq!(sum.parse("42 + 7 + 13").into_result(), Ok(Expr::Add(
721 /// Box::new(Expr::Add(
722 /// Box::new(Expr::Int("42", (0..2).into())),
723 /// (3..4).into(),
724 /// Box::new(Expr::Int("7", (5..6).into())),
725 /// )),
726 /// (7..8).into(),
727 /// Box::new(Expr::Int("13", (9..11).into())),
728 /// )));
729 /// ```
730 fn to_span(self) -> ToSpan<Self, O>
731 where
732 Self: Sized,
733 {
734 ToSpan {
735 parser: self,
736 phantom: EmptyPhantom::new(),
737 }
738 }
739
740 /// After a successful parse, apply a fallible function to the output. If the function produces an error, treat it
741 /// as a parsing error.
742 ///
743 /// If you wish parsing of this pattern to continue when an error is generated instead of halting, consider using
744 /// [`Parser::validate`] instead.
745 ///
746 /// The output type of this parser is `U`, the [`Ok`] return value of the function.
747 ///
748 /// # Examples
749 ///
750 /// ```
751 /// # use chumsky::prelude::*;
752 /// let byte = text::int::<_, extra::Err<Rich<char>>>(10)
753 /// .try_map(|s: &str, span| s
754 /// .parse::<u8>()
755 /// .map_err(|e| Rich::custom(span, e)));
756 ///
757 /// assert!(byte.parse("255").has_output());
758 /// assert!(byte.parse("256").has_errors()); // Out of range
759 /// ```
760 #[doc(alias = "filter_map")]
761 fn try_map<U, F: Fn(O, I::Span) -> Result<U, E::Error>>(self, f: F) -> TryMap<Self, O, F>
762 where
763 Self: Sized,
764 {
765 TryMap {
766 parser: self,
767 mapper: f,
768 phantom: EmptyPhantom::new(),
769 }
770 }
771
772 /// After a successful parse, apply a fallible function to the output, with the opportunity to get extra metadata.
773 /// If the function produces an error, treat it as a parsing error.
774 ///
775 /// If you wish parsing of this pattern to continue when an error is generated instead of halting, consider using
776 /// [`Parser::validate`] instead.
777 ///
778 /// The output type of this parser is `U`, the [`Ok`] return value of the function.
779 fn try_map_with<U, F: Fn(O, &mut MapExtra<'src, '_, I, E>) -> Result<U, E::Error>>(
780 self,
781 f: F,
782 ) -> TryMapWith<Self, O, F>
783 where
784 Self: Sized,
785 {
786 TryMapWith {
787 parser: self,
788 mapper: f,
789 phantom: EmptyPhantom::new(),
790 }
791 }
792
793 /// Ignore the output of this parser, yielding `()` as an output instead.
794 ///
795 /// This can be used to reduce the cost of parsing by avoiding unnecessary allocations (most collections containing
796 /// [ZSTs](https://doc.rust-lang.org/nomicon/exotic-sizes.html#zero-sized-types-zsts)
797 /// [do not allocate](https://doc.rust-lang.org/std/vec/struct.Vec.html#guarantees)). For example, it's common to
798 /// want to ignore whitespace in many grammars (see [`text::whitespace`]).
799 ///
800 /// The output type of this parser is `()`.
801 ///
802 /// # Examples
803 ///
804 /// ```
805 /// # use chumsky::{prelude::*, error::Simple};
806 /// // A parser that parses any number of whitespace characters without allocating
807 /// let whitespace = any::<_, extra::Err<Simple<char>>>()
808 /// .filter(|c: &char| c.is_whitespace())
809 /// .ignored()
810 /// .repeated()
811 /// .collect::<Vec<_>>();
812 ///
813 /// assert_eq!(whitespace.parse(" ").into_result(), Ok(vec![(); 4]));
814 /// assert!(whitespace.parse(" hello").has_errors());
815 /// ```
816 fn ignored(self) -> Ignored<Self, O>
817 where
818 Self: Sized,
819 {
820 Ignored {
821 parser: self,
822 phantom: EmptyPhantom::new(),
823 }
824 }
825
826 /// Memoize the parser such that later attempts to parse the same input 'remember' the attempt and exit early.
827 ///
828 /// If you're finding that certain inputs produce exponential behavior in your parser, strategically applying
829 /// memoization to a ['garden path'](https://en.wikipedia.org/wiki/Garden-path_sentence) rule is often an effective
830 /// way to solve the problem. At the limit, applying memoization to all combinators will turn any parser into one
831 /// with `O(n)`, albeit with very significant per-element overhead and high memory usage.
832 ///
833 /// Memoization also works with recursion, so this can be used to write parsers using
834 /// [left recursion](https://en.wikipedia.org/wiki/Left_recursion).
835 // TODO: Example
836 #[cfg(feature = "memoization")]
837 fn memoized(self) -> Memoized<Self>
838 where
839 Self: Sized,
840 {
841 Memoized { parser: self }
842 }
843
844 /// Transform all outputs of this parser to a predetermined value.
845 ///
846 /// The output type of this parser is `U`, the type of the predetermined value.
847 ///
848 /// # Examples
849 ///
850 /// ```
851 /// # use chumsky::{prelude::*, error::Simple};
852 /// #[derive(Clone, Debug, PartialEq)]
853 /// enum Op { Add, Sub, Mul, Div }
854 ///
855 /// let op = just::<_, _, extra::Err<Simple<char>>>('+').to(Op::Add)
856 /// .or(just('-').to(Op::Sub))
857 /// .or(just('*').to(Op::Mul))
858 /// .or(just('/').to(Op::Div));
859 ///
860 /// assert_eq!(op.parse("+").into_result(), Ok(Op::Add));
861 /// assert_eq!(op.parse("/").into_result(), Ok(Op::Div));
862 /// ```
863 fn to<U: Clone>(self, to: U) -> To<Self, O, U>
864 where
865 Self: Sized,
866 {
867 To {
868 parser: self,
869 to,
870 phantom: EmptyPhantom::new(),
871 }
872 }
873
874 /// Label this parser with the given label.
875 ///
876 /// Labelling a parser makes all errors generated by the parser refer to the label rather than any sub-elements
877 /// within the parser. For example, labelling a parser for an expression would yield "expected expression" errors
878 /// rather than "expected integer, string, binary op, etc." errors.
879 // TODO: Example
880 fn labelled<L>(self, label: L) -> Labelled<Self, L>
881 where
882 Self: Sized,
883 E::Error: LabelError<'src, I, L>,
884 {
885 Labelled {
886 parser: self,
887 label,
888 is_context: false,
889 }
890 }
891
892 /// Parse one thing and then another thing, yielding a tuple of the two outputs.
893 ///
894 /// The output type of this parser is `(O, U)`, a combination of the outputs of both parsers.
895 ///
896 /// If you instead only need the output of __one__ of the parsers, use [`ignore_then`](Self::ignore_then)
897 /// or [`then_ignore`](Self::then_ignore).
898 ///
899 /// # Examples
900 ///
901 /// ```
902 /// # use chumsky::{prelude::*, error::Simple};
903 /// let word = any::<_, extra::Err<Simple<char>>>()
904 /// .filter(|c: &char| c.is_alphabetic())
905 /// .repeated()
906 /// .at_least(1)
907 /// .collect::<String>();
908 /// let two_words = word.then_ignore(just(' ')).then(word);
909 ///
910 /// assert_eq!(two_words.parse("dog cat").into_result(), Ok(("dog".to_string(), "cat".to_string())));
911 /// assert!(two_words.parse("hedgehog").has_errors());
912 /// ```
913 fn then<U, B: Parser<'src, I, U, E>>(self, other: B) -> Then<Self, B, O, U, E>
914 where
915 Self: Sized,
916 {
917 Then {
918 parser_a: self,
919 parser_b: other,
920 phantom: EmptyPhantom::new(),
921 }
922 }
923
924 /// Parse one thing and then another thing, yielding only the output of the latter.
925 ///
926 /// The output type of this parser is `U`, the same as the second parser.
927 ///
928 /// If you instead only need the output of the first parser, use [`then_ignore`](Self::then_ignore).
929 /// If you need the output of __both__ parsers, use [`then`](Self::then).
930 ///
931 /// # Examples
932 ///
933 /// ```
934 /// # use chumsky::{prelude::*, error::Simple};
935 /// let zeroes = any::<_, extra::Err<Simple<char>>>().filter(|c: &char| *c == '0').ignored().repeated().collect::<Vec<_>>();
936 /// let digits = any().filter(|c: &char| c.is_ascii_digit())
937 /// .repeated()
938 /// .collect::<String>();
939 /// let integer = zeroes
940 /// .ignore_then(digits)
941 /// .from_str()
942 /// .unwrapped();
943 ///
944 /// assert_eq!(integer.parse("00064").into_result(), Ok(64));
945 /// assert_eq!(integer.parse("32").into_result(), Ok(32));
946 /// ```
947 fn ignore_then<U, B: Parser<'src, I, U, E>>(self, other: B) -> IgnoreThen<Self, B, O, E>
948 where
949 Self: Sized,
950 {
951 IgnoreThen {
952 parser_a: self,
953 parser_b: other,
954 phantom: EmptyPhantom::new(),
955 }
956 }
957
958 /// Parse one thing and then another thing, yielding only the output of the former.
959 ///
960 /// The output type of this parser is `O`, the same as the original parser.
961 ///
962 /// If you instead only need the output of the second parser, use [`ignore_then`](Self::ignore_then).
963 /// If you need the output of __both__ parsers, use [`then`](Self::then).
964 ///
965 /// # Examples
966 ///
967 /// ```
968 /// # use chumsky::{prelude::*, error::Simple};
969 /// let word = any::<_, extra::Err<Simple<char>>>()
970 /// .filter(|c: &char| c.is_alphabetic())
971 /// .repeated()
972 /// .at_least(1)
973 /// .collect::<String>();
974 ///
975 /// let punctuated = word
976 /// .then_ignore(just('!').or(just('?')).or_not());
977 ///
978 /// let sentence = punctuated
979 /// .padded() // Allow for whitespace gaps
980 /// .repeated()
981 /// .collect::<Vec<_>>();
982 ///
983 /// assert_eq!(
984 /// sentence.parse("hello! how are you?").into_result(),
985 /// Ok(vec![
986 /// "hello".to_string(),
987 /// "how".to_string(),
988 /// "are".to_string(),
989 /// "you".to_string(),
990 /// ]),
991 /// );
992 /// ```
993 fn then_ignore<U, B: Parser<'src, I, U, E>>(self, other: B) -> ThenIgnore<Self, B, U, E>
994 where
995 Self: Sized,
996 {
997 ThenIgnore {
998 parser_a: self,
999 parser_b: other,
1000 phantom: EmptyPhantom::new(),
1001 }
1002 }
1003
1004 /// Parse input as part of a token-tree - using an input generated from within the current
1005 /// input. In other words, this parser will attempt to create a *new* input stream from within
1006 /// the one it is being run on, and the parser it was called on will be provided this *new* input.
1007 /// By default, the original parser is expected to consume up to the end of the new stream. To
1008 /// allow only consuming part of the stream, use [`Parser::lazy`] to ignore trailing tokens.
1009 ///
1010 /// The provided parser `P` is expected to have both an input and output type which match the input
1011 /// type of the parser it is called on. As an example, if the original parser takes an input of
1012 /// `Stream<Iterator<Item = T>>`, `P` will be run first against that input, and is expected to
1013 /// output a new `Stream<Iterator<Item = T>>` which the original parser will be run against.
1014 ///
1015 /// The output of this parser is `O`, the output of the parser it is called on.
1016 ///
1017 /// # Examples
1018 ///
1019 /// ```
1020 /// # use chumsky::{prelude::*, util::MaybeRef, error::Simple};
1021 /// #[derive(Debug, Clone, PartialEq)]
1022 /// enum Token<'src> {
1023 /// Struct,
1024 /// Ident(&'src str),
1025 /// Item(&'src str),
1026 /// Group(Vec<Token<'src>>),
1027 /// }
1028 ///
1029 /// let group = select_ref! { Token::Group(g) => g.as_slice() };
1030 ///
1031 /// let ident = select_ref! { Token::Ident(i) => *i };
1032 ///
1033 /// let items = select_ref! { Token::Item(i) => *i }
1034 /// .repeated()
1035 /// .collect::<Vec<_>>()
1036 /// .nested_in(group);
1037 ///
1038 /// let struc = just::<_, _, extra::Err<Simple<_>>>(&Token::Struct)
1039 /// .ignore_then(ident)
1040 /// .then(items);
1041 ///
1042 /// let tl = struc
1043 /// .repeated()
1044 /// .collect::<Vec<_>>();
1045 ///
1046 /// let tokens = [
1047 /// Token::Struct,
1048 /// Token::Ident("foo"),
1049 /// Token::Group(vec![
1050 /// Token::Item("a"),
1051 /// Token::Item("b"),
1052 /// ]),
1053 /// ];
1054 ///
1055 /// assert_eq!(tl.parse(&tokens).into_result(), Ok(vec![("foo", vec!["a", "b"])]));
1056 /// ```
1057 fn nested_in<B: Parser<'src, J, I, F>, J, F>(self, other: B) -> NestedIn<Self, B, J, F, O, E>
1058 where
1059 Self: Sized,
1060 I: 'src,
1061 J: Input<'src>,
1062 F: ParserExtra<'src, J>,
1063 {
1064 NestedIn {
1065 parser_a: self,
1066 parser_b: other,
1067 phantom: EmptyPhantom::new(),
1068 }
1069 }
1070
1071 /// Parse one thing and then another thing, creating the second parser from the result of
1072 /// the first. If you do need the context in the output, use [`Parser::then_with_ctx`].
1073 ///
1074 /// The output of this parser is `U`, the result of the second parser
1075 ///
1076 /// Error recovery for this parser may be sub-optimal, as if the first parser succeeds on
1077 /// recovery then the second produces an error, the primary error will point to the location in
1078 /// the second parser which failed, ignoring that the first parser may be the root cause. There
1079 /// may be other pathological errors cases as well.
1080 ///
1081 /// # Examples
1082 ///
1083 /// ```
1084 /// # use chumsky::{prelude::*, error::Simple};
1085 /// let successor = just(b'\0').configure(|cfg, ctx: &u8| cfg.seq(*ctx + 1));
1086 ///
1087 /// // A parser that parses a single letter and then its successor
1088 /// let successive_letters = one_of::<_, _, extra::Err<Simple<u8>>>(b'a'..=b'z')
1089 /// .ignore_with_ctx(successor);
1090 ///
1091 /// assert_eq!(successive_letters.parse(b"ab").into_result(), Ok(b'b')); // 'b' follows 'a'
1092 /// assert!(successive_letters.parse(b"ac").has_errors()); // 'c' does not follow 'a'
1093 /// ```
1094 fn ignore_with_ctx<U, P>(
1095 self,
1096 then: P,
1097 ) -> IgnoreWithCtx<Self, P, O, I, extra::Full<E::Error, E::State, O>>
1098 where
1099 Self: Sized,
1100 O: 'src,
1101 P: Parser<'src, I, U, extra::Full<E::Error, E::State, O>>,
1102 {
1103 IgnoreWithCtx {
1104 parser: self,
1105 then,
1106 phantom: EmptyPhantom::new(),
1107 }
1108 }
1109
1110 /// Parse one thing and then another thing, creating the second parser from the result of
1111 /// the first. If you don't need the context in the output, prefer [`Parser::ignore_with_ctx`].
1112 ///
1113 /// The output of this parser is `(E::Context, O)`,
1114 /// a combination of the context and the output of the parser.
1115 ///
1116 /// Error recovery for this parser may be sub-optimal, as if the first parser succeeds on
1117 /// recovery then the second produces an error, the primary error will point to the location in
1118 /// the second parser which failed, ignoring that the first parser may be the root cause. There
1119 /// may be other pathological errors cases as well.
1120 fn then_with_ctx<U, P>(
1121 self,
1122 then: P,
1123 ) -> ThenWithCtx<Self, P, O, I, extra::Full<E::Error, E::State, O>>
1124 where
1125 Self: Sized,
1126 O: 'src,
1127 P: Parser<'src, I, U, extra::Full<E::Error, E::State, O>>,
1128 {
1129 ThenWithCtx {
1130 parser: self,
1131 then,
1132 phantom: EmptyPhantom::new(),
1133 }
1134 }
1135
1136 /// Run the previous contextual parser with the provided context.
1137 ///
1138 /// ```
1139 /// # use chumsky::prelude::*;
1140 /// # use chumsky::primitive::JustCfg;
1141 ///
1142 /// let generic = just(b'0').configure(|cfg, ctx: &u8| cfg.seq(*ctx));
1143 ///
1144 /// let parse_a = just::<_, _, extra::Default>(b'b').ignore_then(generic.with_ctx::<u8>(b'a'));
1145 /// let parse_b = just::<_, _, extra::Default>(b'a').ignore_then(generic.with_ctx(b'b'));
1146 ///
1147 /// assert_eq!(parse_a.parse(b"ba" as &[_]).into_result(), Ok::<_, Vec<EmptyErr>>(b'a'));
1148 /// assert!(parse_a.parse(b"bb").has_errors());
1149 /// assert_eq!(parse_b.parse(b"ab" as &[_]).into_result(), Ok(b'b'));
1150 /// assert!(parse_b.parse(b"aa").has_errors());
1151 /// ```
1152 fn with_ctx<Ctx>(self, ctx: Ctx) -> WithCtx<Self, Ctx>
1153 where
1154 Self: Sized,
1155 Ctx: 'src + Clone,
1156 {
1157 WithCtx { parser: self, ctx }
1158 }
1159
1160 /// Runs the previous parser with the provided state.
1161 ///
1162 /// This is very uncommonly used and exists mostly for completeness.
1163 ///
1164 /// One possible use-case is 'glueing' together parsers declared in different places with incompatible state types.
1165 ///
1166 /// Note that the state value will be cloned and dropping *during* parsing, so it is recommended to ensure that
1167 /// this is a relatively performant operation.
1168 fn with_state<State>(self, state: State) -> WithState<Self, State>
1169 where
1170 Self: Sized,
1171 State: 'src + Clone,
1172 {
1173 WithState {
1174 parser: self,
1175 state,
1176 }
1177 }
1178
1179 /// Applies both parsers to the same position in the input, succeeding
1180 /// only if both succeed. The returned value will be that of the first parser,
1181 /// and the input will be at the end of the first parser if `and_is` succeeds.
1182 ///
1183 /// The second parser is allowed to consume more or less input than the first parser,
1184 /// but like its output, how much it consumes won't affect the final result.
1185 ///
1186 /// The motivating use-case is in combination with [`Parser::not`], allowing a parser
1187 /// to consume something only if it isn't also something like an escape sequence or a nested block.
1188 ///
1189 /// # Examples
1190 ///
1191 /// ```
1192 /// # use chumsky::{prelude::*, error::Simple};
1193 ///
1194 /// let escape = just("\\n").to('\n');
1195 ///
1196 /// // C-style string literal
1197 /// let string = none_of::<_, _, extra::Err<Simple<char>>>('"')
1198 /// .and_is(escape.not())
1199 /// .or(escape)
1200 /// .repeated()
1201 /// .collect::<String>()
1202 /// .padded_by(just('"'));
1203 ///
1204 /// assert_eq!(
1205 /// string.parse("\"wxyz\"").into_result().as_deref(),
1206 /// Ok("wxyz"),
1207 /// );
1208 /// assert_eq!(
1209 /// string.parse("\"a\nb\"").into_result().as_deref(),
1210 /// Ok("a\nb"),
1211 /// );
1212 /// ```
1213 fn and_is<U, B>(self, other: B) -> AndIs<Self, B, U>
1214 where
1215 Self: Sized,
1216 B: Parser<'src, I, U, E>,
1217 {
1218 AndIs {
1219 parser_a: self,
1220 parser_b: other,
1221 phantom: EmptyPhantom::new(),
1222 }
1223 }
1224
1225 /// Parse the pattern surrounded by the given delimiters.
1226 ///
1227 /// The output type of this parser is `O`, the same as the original parser.
1228 ///
1229 /// # Examples
1230 ///
1231 /// ```
1232 /// # use chumsky::{prelude::*, error::Simple};
1233 /// // A LISP-style S-expression
1234 /// #[derive(Debug, PartialEq)]
1235 /// enum SExpr {
1236 /// Ident(String),
1237 /// Num(u64),
1238 /// List(Vec<SExpr>),
1239 /// }
1240 ///
1241 /// let ident = any::<_, extra::Err<Simple<char>>>().filter(|c: &char| c.is_alphabetic())
1242 /// .repeated()
1243 /// .at_least(1)
1244 /// .collect::<String>();
1245 ///
1246 /// let num = text::int(10)
1247 /// .from_str()
1248 /// .unwrapped();
1249 ///
1250 /// let s_expr = recursive(|s_expr| s_expr
1251 /// .padded()
1252 /// .repeated()
1253 /// .collect::<Vec<_>>()
1254 /// .map(SExpr::List)
1255 /// .delimited_by(just('('), just(')'))
1256 /// .or(ident.map(SExpr::Ident))
1257 /// .or(num.map(SExpr::Num)));
1258 ///
1259 /// // A valid input
1260 /// assert_eq!(
1261 /// s_expr.parse("(add (mul 42 3) 15)").into_result(),
1262 /// Ok(SExpr::List(vec![
1263 /// SExpr::Ident("add".to_string()),
1264 /// SExpr::List(vec![
1265 /// SExpr::Ident("mul".to_string()),
1266 /// SExpr::Num(42),
1267 /// SExpr::Num(3),
1268 /// ]),
1269 /// SExpr::Num(15),
1270 /// ])),
1271 /// );
1272 /// ```
1273 fn delimited_by<U, V, B, C>(self, start: B, end: C) -> DelimitedBy<Self, B, C, U, V>
1274 where
1275 Self: Sized,
1276 B: Parser<'src, I, U, E>,
1277 C: Parser<'src, I, V, E>,
1278 {
1279 DelimitedBy {
1280 parser: self,
1281 start,
1282 end,
1283 phantom: EmptyPhantom::new(),
1284 }
1285 }
1286
1287 /// Parse a pattern, but with an instance of another pattern on either end, yielding the output of the inner.
1288 ///
1289 /// The output type of this parser is `O`, the same as the original parser.
1290 ///
1291 /// # Examples
1292 ///
1293 /// ```
1294 /// # use chumsky::{prelude::*, error::Simple};
1295 /// let ident = text::ascii::ident::<_, extra::Err<Simple<char>>>()
1296 /// .padded_by(just('!'));
1297 ///
1298 /// assert_eq!(ident.parse("!hello!").into_result(), Ok("hello"));
1299 /// assert!(ident.parse("hello!").has_errors());
1300 /// assert!(ident.parse("!hello").has_errors());
1301 /// assert!(ident.parse("hello").has_errors());
1302 /// ```
1303 fn padded_by<U, B>(self, padding: B) -> PaddedBy<Self, B, U>
1304 where
1305 Self: Sized,
1306 B: Parser<'src, I, U, E>,
1307 {
1308 PaddedBy {
1309 parser: self,
1310 padding,
1311 phantom: EmptyPhantom::new(),
1312 }
1313 }
1314
1315 /// Parse one thing or, on failure, another thing.
1316 ///
1317 /// The output of both parsers must be of the same type, because either output can be produced.
1318 ///
1319 /// If both parser succeed, the output of the first parser is guaranteed to be prioritized over the output of the
1320 /// second.
1321 ///
1322 /// If both parsers produce errors, the combinator will attempt to select from or combine the errors to produce an
1323 /// error that is most likely to be useful to a human attempting to understand the problem. The exact algorithm
1324 /// used is left unspecified, and is not part of the crate's semver guarantees, although regressions in error
1325 /// quality should be reported in the issue tracker of the main repository.
1326 ///
1327 /// Please note that long chains of [`Parser::or`] combinators have been known to result in poor compilation times.
1328 /// If you feel you are experiencing this, consider using [`choice`] instead.
1329 ///
1330 /// The output type of this parser is `O`, the output of both parsers.
1331 ///
1332 /// # Examples
1333 ///
1334 /// ```
1335 /// # use chumsky::{prelude::*, error::Simple};
1336 /// let op = just::<_, _, extra::Err<Simple<char>>>('+')
1337 /// .or(just('-'))
1338 /// .or(just('*'))
1339 /// .or(just('/'));
1340 ///
1341 /// assert_eq!(op.parse("+").into_result(), Ok('+'));
1342 /// assert_eq!(op.parse("/").into_result(), Ok('/'));
1343 /// assert!(op.parse("!").has_errors());
1344 /// ```
1345 fn or<B>(self, other: B) -> Or<Self, B>
1346 where
1347 Self: Sized,
1348 B: Parser<'src, I, O, E>,
1349 {
1350 Or {
1351 choice: choice((self, other)),
1352 }
1353 }
1354
1355 /// Attempt to parse something, but only if it exists.
1356 ///
1357 /// If parsing of the pattern is successful, the output is `Some(_)`. Otherwise, the output is `None`.
1358 ///
1359 /// The output type of this parser is `Option<O>`.
1360 ///
1361 /// # Examples
1362 ///
1363 /// ```
1364 /// # use chumsky::{prelude::*, error::Simple};
1365 /// let word = any::<_, extra::Err<Simple<char>>>().filter(|c: &char| c.is_alphabetic())
1366 /// .repeated()
1367 /// .at_least(1)
1368 /// .collect::<String>();
1369 ///
1370 /// let word_or_question = word
1371 /// .then(just('?').or_not());
1372 ///
1373 /// assert_eq!(word_or_question.parse("hello?").into_result(), Ok(("hello".to_string(), Some('?'))));
1374 /// assert_eq!(word_or_question.parse("wednesday").into_result(), Ok(("wednesday".to_string(), None)));
1375 /// ```
1376 fn or_not(self) -> OrNot<Self>
1377 where
1378 Self: Sized,
1379 {
1380 OrNot { parser: self }
1381 }
1382
1383 /// Invert the result of the contained parser, failing if it succeeds and succeeding if it fails.
1384 /// The output of this parser is always `()`, the unit type.
1385 ///
1386 /// The motivating case for this is in combination with [`Parser::and_is`], allowing a parser
1387 /// to consume something only if it isn't also something like an escape sequence or a nested block.
1388 ///
1389 /// Caveats:
1390 /// - The error message produced by `not` by default will likely be fairly unhelpful - it can
1391 /// only tell the span that was wrong.
1392 /// - If not careful, it's fairly easy to create non-intuitive behavior due to end-of-input
1393 /// being a valid token for a parser to consume, and as most parsers fail at end of input,
1394 /// `not` will succeed on it.
1395 ///
1396 /// ```
1397 /// # use chumsky::{prelude::*, error::Simple};
1398 ///
1399 /// #[derive(Debug, PartialEq)]
1400 /// enum Tree<'src> {
1401 /// Text(&'src str),
1402 /// Group(Vec<Self>),
1403 /// }
1404 ///
1405 /// // Arbitrary text, nested in a tree with { ... } delimiters
1406 /// let tree = recursive::<_, _, extra::Err<Simple<char>>, _, _>(|tree| {
1407 /// let text = any()
1408 /// .and_is(one_of("{}").not())
1409 /// .repeated()
1410 /// .at_least(1)
1411 /// .to_slice()
1412 /// .map(Tree::Text);
1413 ///
1414 /// let group = tree
1415 /// .repeated()
1416 /// .collect()
1417 /// .delimited_by(just('{'), just('}'))
1418 /// .map(Tree::Group);
1419 ///
1420 /// text.or(group)
1421 /// });
1422 ///
1423 /// assert_eq!(
1424 /// tree.parse("{abcd{efg{hijk}lmn{opq}rs}tuvwxyz}").into_result(),
1425 /// Ok(Tree::Group(vec![
1426 /// Tree::Text("abcd"),
1427 /// Tree::Group(vec![
1428 /// Tree::Text("efg"),
1429 /// Tree::Group(vec![
1430 /// Tree::Text("hijk"),
1431 /// ]),
1432 /// Tree::Text("lmn"),
1433 /// Tree::Group(vec![
1434 /// Tree::Text("opq"),
1435 /// ]),
1436 /// Tree::Text("rs"),
1437 /// ]),
1438 /// Tree::Text("tuvwxyz"),
1439 /// ])),
1440 /// );
1441 /// ```
1442 fn not(self) -> Not<Self, O>
1443 where
1444 Self: Sized,
1445 {
1446 Not {
1447 parser: self,
1448 phantom: EmptyPhantom::new(),
1449 }
1450 }
1451
1452 /// Parse a pattern zero or more times (analog to Regex's `<PAT>*`).
1453 ///
1454 /// Input is eagerly parsed. Be aware that the parser will accept no occurrences of the pattern too. Consider using
1455 /// [`Repeated::at_least`] instead if you wish to parse a minimum number of elements.
1456 ///
1457 /// The output type of this parser is, by default, `()`. If you want to collect the items into a [`Container`]
1458 /// (such as a [`Vec`]), use [`IterParser::collect`].
1459 ///
1460 /// # Examples
1461 ///
1462 /// ```
1463 /// # use chumsky::{prelude::*, error::Simple};
1464 /// let num = any::<_, extra::Err<Simple<char>>>()
1465 /// .filter(|c: &char| c.is_ascii_digit())
1466 /// .repeated()
1467 /// .at_least(1)
1468 /// .collect::<String>()
1469 /// .from_str()
1470 /// .unwrapped();
1471 ///
1472 /// let sum = num.clone()
1473 /// .foldl(just('+').ignore_then(num).repeated(), |a, b| a + b);
1474 ///
1475 /// assert_eq!(sum.parse("2+13+4+0+5").into_result(), Ok(24));
1476 /// ```
1477 #[cfg_attr(debug_assertions, track_caller)]
1478 fn repeated(self) -> Repeated<Self, O, I, E>
1479 where
1480 Self: Sized,
1481 {
1482 Repeated {
1483 parser: self,
1484 at_least: 0,
1485 at_most: !0,
1486 #[cfg(debug_assertions)]
1487 location: *Location::caller(),
1488 phantom: EmptyPhantom::new(),
1489 }
1490 }
1491
1492 /// Parse a pattern, separated by another, any number of times.
1493 ///
1494 /// You can use [`SeparatedBy::allow_leading`] or [`SeparatedBy::allow_trailing`] to allow leading or trailing
1495 /// separators.
1496 ///
1497 /// The output type of this parser can be any [`Container`].
1498 ///
1499 /// # Examples
1500 ///
1501 /// ```
1502 /// # use chumsky::{prelude::*, error::Simple};
1503 /// let shopping = text::ascii::ident::<_, extra::Err<Simple<char>>>()
1504 /// .padded()
1505 /// .separated_by(just(','))
1506 /// .collect::<Vec<_>>();
1507 ///
1508 /// assert_eq!(shopping.parse("eggs").into_result(), Ok(vec!["eggs"]));
1509 /// assert_eq!(shopping.parse("eggs, flour, milk").into_result(), Ok(vec!["eggs", "flour", "milk"]));
1510 /// ```
1511 ///
1512 /// See [`SeparatedBy::allow_leading`] and [`SeparatedBy::allow_trailing`] for more examples.
1513 #[cfg_attr(debug_assertions, track_caller)]
1514 fn separated_by<U, B>(self, separator: B) -> SeparatedBy<Self, B, O, U, I, E>
1515 where
1516 Self: Sized,
1517 B: Parser<'src, I, U, E>,
1518 {
1519 SeparatedBy {
1520 parser: self,
1521 separator,
1522 at_least: 0,
1523 at_most: !0,
1524 allow_leading: false,
1525 allow_trailing: false,
1526 #[cfg(debug_assertions)]
1527 location: *Location::caller(),
1528 phantom: EmptyPhantom::new(),
1529 }
1530 }
1531
1532 /// Left-fold the output of the parser into a single value.
1533 ///
1534 /// The output of the original parser must be of type `(A, impl IntoIterator<Item = B>)`.
1535 ///
1536 /// The output type of this parser is `A`, the left-hand component of the original parser's output.
1537 ///
1538 /// # Examples
1539 ///
1540 /// ```
1541 /// # use chumsky::{prelude::*, error::Simple};
1542 /// let int = text::int::<_, extra::Err<Simple<char>>>(10)
1543 /// .from_str()
1544 /// .unwrapped();
1545 ///
1546 /// let sum = int
1547 /// .clone()
1548 /// .foldl(just('+').ignore_then(int).repeated(), |a, b| a + b);
1549 ///
1550 /// assert_eq!(sum.parse("1+12+3+9").into_result(), Ok(25));
1551 /// assert_eq!(sum.parse("6").into_result(), Ok(6));
1552 /// ```
1553 #[cfg_attr(debug_assertions, track_caller)]
1554 fn foldl<B, F, OB>(self, other: B, f: F) -> Foldl<F, Self, B, OB, E>
1555 where
1556 F: Fn(O, OB) -> O,
1557 B: IterParser<'src, I, OB, E>,
1558 Self: Sized,
1559 {
1560 Foldl {
1561 parser_a: self,
1562 parser_b: other,
1563 folder: f,
1564 #[cfg(debug_assertions)]
1565 location: *Location::caller(),
1566 phantom: EmptyPhantom::new(),
1567 }
1568 }
1569
1570 /// Left-fold the output of the parser into a single value, making use of the parser's state when doing so.
1571 ///
1572 /// The output of the original parser must be of type `(A, impl IntoIterator<Item = B>)`.
1573 ///
1574 /// The output type of this parser is `A`, the left-hand component of the original parser's output.
1575 ///
1576 /// # Examples
1577 ///
1578 /// ## General
1579 ///
1580 /// ```
1581 /// # use chumsky::{prelude::*, error::Simple, extra::SimpleState};
1582 /// let int = text::int::<_, extra::Full<Simple<char>, SimpleState<i32>, ()>>(10)
1583 /// .from_str()
1584 /// .unwrapped();
1585 ///
1586 /// let sum = int
1587 /// .clone()
1588 /// .foldl_with(just('+').ignore_then(int).repeated(), |a, b, e| (a + b) * **e.state());
1589 ///
1590 /// let mut multiplier = SimpleState(2i32);
1591 /// assert_eq!(sum.parse_with_state("1+12+3+9", &mut multiplier).into_result(), Ok(134));
1592 /// assert_eq!(sum.parse_with_state("6", &mut multiplier).into_result(), Ok(6));
1593 /// ```
1594 ///
1595 /// ## Interning / Arena Allocation
1596 ///
1597 /// This example assumes use of the `slotmap` crate for arena allocation.
1598 ///
1599 /// ```
1600 /// # use chumsky::prelude::*;
1601 /// use slotmap::{new_key_type, SlotMap};
1602 ///
1603 /// // Metadata type for node Ids for extra type safety
1604 /// new_key_type! {
1605 /// pub struct NodeId;
1606 /// }
1607 ///
1608 /// // AST nodes reference other nodes with `NodeId`s instead of containing boxed/owned values
1609 /// #[derive(Copy, Clone, Debug, PartialEq)]
1610 /// enum Expr {
1611 /// Int(i32),
1612 /// Add(NodeId, NodeId),
1613 /// }
1614 ///
1615 /// type NodeArena = SlotMap<NodeId, Expr>;
1616 ///
1617 /// // Now, define our parser
1618 /// let int = text::int::<&str, extra::Full<Simple<char>, extra::SimpleState<NodeArena>, ()>>(10)
1619 /// .padded()
1620 /// .map_with(|s, e|
1621 /// // Return the ID of the new integer node
1622 /// e.state().insert(Expr::Int(s.parse().unwrap()))
1623 /// );
1624 ///
1625 /// let sum = int.foldl_with(
1626 /// just('+').padded().ignore_then(int).repeated(),
1627 /// |a: NodeId, b: NodeId, e| {
1628 /// // Inserting an item into the arena returns its ID
1629 /// e.state().insert(Expr::Add(a, b))
1630 /// }
1631 /// );
1632 ///
1633 /// // Test our parser
1634 /// let mut arena = extra::SimpleState(NodeArena::default());
1635 /// let four_plus_eight = sum.parse_with_state("4 + 8", &mut arena).unwrap();
1636 /// if let Expr::Add(a, b) = arena[four_plus_eight] {
1637 /// assert_eq!(arena[a], Expr::Int(4));
1638 /// assert_eq!(arena[b], Expr::Int(8));
1639 /// } else {
1640 /// panic!("Not an Expr::Add");
1641 /// }
1642 /// ```
1643 #[cfg_attr(debug_assertions, track_caller)]
1644 fn foldl_with<B, F, OB>(self, other: B, f: F) -> FoldlWith<F, Self, B, OB, E>
1645 where
1646 F: Fn(O, OB, &mut MapExtra<'src, '_, I, E>) -> O,
1647 B: IterParser<'src, I, OB, E>,
1648 Self: Sized,
1649 {
1650 FoldlWith {
1651 parser_a: self,
1652 parser_b: other,
1653 folder: f,
1654 #[cfg(debug_assertions)]
1655 location: *Location::caller(),
1656 phantom: EmptyPhantom::new(),
1657 }
1658 }
1659
1660 /// Parse a pattern. Afterwards, the input stream will be rewound to its original state, as if parsing had not
1661 /// occurred.
1662 ///
1663 /// This combinator is useful for cases in which you wish to avoid a parser accidentally consuming too much input,
1664 /// causing later parsers to fail as a result. A typical use-case of this is that you want to parse something that
1665 /// is not followed by something else.
1666 ///
1667 /// The output type of this parser is `O`, the same as the original parser.
1668 ///
1669 /// # Examples
1670 ///
1671 /// ```
1672 /// # use chumsky::prelude::*;
1673 /// let just_numbers = text::digits::<_, extra::Err<Simple<char>>>(10)
1674 /// .to_slice()
1675 /// .padded()
1676 /// .then_ignore(none_of("+-*/").rewind())
1677 /// .separated_by(just(','))
1678 /// .collect::<Vec<_>>();
1679 /// // 3 is not parsed because it's followed by '+'.
1680 /// assert_eq!(just_numbers.lazy().parse("1, 2, 3 + 4").into_result(), Ok(vec!["1", "2"]));
1681 /// ```
1682 fn rewind(self) -> Rewind<Self>
1683 where
1684 Self: Sized,
1685 {
1686 Rewind { parser: self }
1687 }
1688
1689 /// Make the parser lazy, such that it parses as much of the input as it can finishes successfully, leaving the trailing input untouched.
1690 ///
1691 /// The output type of this parser is `O`, the same as the original parser.
1692 ///
1693 /// # Examples
1694 ///
1695 /// ```
1696 /// # use chumsky::prelude::*;
1697 /// let digits = one_of::<_, _, extra::Err<Simple<char>>>('0'..='9')
1698 /// .repeated()
1699 /// .collect::<String>()
1700 /// .lazy();
1701 ///
1702 /// assert_eq!(digits.parse("12345abcde").into_result().as_deref(), Ok("12345"));
1703 /// ```
1704 fn lazy(self) -> Lazy<'src, Self, I, E>
1705 where
1706 Self: Sized,
1707 I: ValueInput<'src>,
1708 {
1709 self.then_ignore(any().repeated())
1710 }
1711
1712 /// Parse a pattern, ignoring any amount of whitespace both before and after the pattern.
1713 ///
1714 /// The output type of this parser is `O`, the same as the original parser.
1715 ///
1716 /// # Examples
1717 ///
1718 /// ```
1719 /// # use chumsky::prelude::*;
1720 /// let ident = text::ascii::ident::<_, extra::Err<Simple<char>>>().padded();
1721 ///
1722 /// // A pattern with no whitespace surrounding it is accepted
1723 /// assert_eq!(ident.parse("hello").into_result(), Ok("hello"));
1724 /// // A pattern with arbitrary whitespace surrounding it is also accepted
1725 /// assert_eq!(ident.parse(" \t \n \t world \t ").into_result(), Ok("world"));
1726 /// ```
1727 fn padded(self) -> Padded<Self>
1728 where
1729 Self: Sized,
1730 I: Input<'src>,
1731 I::Token: Char,
1732 {
1733 Padded { parser: self }
1734 }
1735
1736 // /// Flatten a nested collection.
1737 // ///
1738 // /// This use-cases of this method are broadly similar to those of [`Iterator::flatten`].
1739 // ///
1740 // /// The output type of this parser is `Vec<T>`, where the original parser output was
1741 // /// `impl IntoIterator<Item = impl IntoIterator<Item = T>>`.
1742 // fn flatten<T, Inner>(self) -> Map<Self, O, fn(O) -> Vec<T>>
1743 // where
1744 // Self: Sized,
1745 // O: IntoIterator<Item = Inner>,
1746 // Inner: IntoIterator<Item = T>,
1747 // {
1748 // self.map(|xs| xs.into_iter().flat_map(|xs| xs.into_iter()).collect())
1749 // }
1750
1751 /// Apply a fallback recovery strategy to this parser should it fail.
1752 ///
1753 /// There is no silver bullet for error recovery, so this function allows you to specify one of several different
1754 /// strategies at the location of your choice. Prefer an error recovery strategy that more precisely mirrors valid
1755 /// syntax where possible to make error recovery more reliable.
1756 ///
1757 /// Because chumsky is a [PEG](https://en.m.wikipedia.org/wiki/Parsing_expression_grammar) parser, which always
1758 /// take the first successful parsing route through a grammar, recovering from an error may cause the parser to
1759 /// erroneously miss alternative valid routes through the grammar that do not generate recoverable errors. If you
1760 /// run into cases where valid syntax fails to parse without errors, this might be happening: consider removing
1761 /// error recovery or switching to a more specific error recovery strategy.
1762 ///
1763 /// The output type of this parser is `O`, the same as the original parser.
1764 ///
1765 /// # Examples
1766 ///
1767 /// ```
1768 /// # use chumsky::{prelude::*, error::Simple};
1769 /// #[derive(Debug, PartialEq)]
1770 /// enum Expr<'src> {
1771 /// Error,
1772 /// Int(&'src str),
1773 /// List(Vec<Expr<'src>>),
1774 /// }
1775 ///
1776 /// let recovery = just::<_, _, extra::Err<Simple<char>>>('[')
1777 /// .then(none_of(']').repeated().then(just(']')));
1778 ///
1779 /// let expr = recursive::<_, _, extra::Err<Simple<char>>, _, _>(|expr| expr
1780 /// .separated_by(just(','))
1781 /// .collect::<Vec<_>>()
1782 /// .delimited_by(just('['), just(']'))
1783 /// .map(Expr::List)
1784 /// // If parsing a list expression fails, recover at the next delimiter, generating an error AST node
1785 /// .recover_with(via_parser(recovery.map(|_| Expr::Error)))
1786 /// .or(text::int(10).map(Expr::Int))
1787 /// .padded());
1788 ///
1789 /// assert!(expr.parse("five").has_errors()); // Text is not a valid expression in this language...
1790 /// assert_eq!(
1791 /// expr.parse("[1, 2, 3]").into_result(),
1792 /// Ok(Expr::List(vec![Expr::Int("1"), Expr::Int("2"), Expr::Int("3")])),
1793 /// ); // ...but lists and numbers are!
1794 ///
1795 /// // This input has two syntax errors...
1796 /// let res = expr.parse("[[1, two], [3, four]]");
1797 /// // ...and error recovery allows us to catch both of them!
1798 /// assert_eq!(res.errors().len(), 2);
1799 /// // Additionally, the AST we get back still has useful information.
1800 /// assert_eq!(res.output(), Some(&Expr::List(vec![Expr::Error, Expr::Error])));
1801 /// ```
1802 fn recover_with<S: Strategy<'src, I, O, E>>(self, strategy: S) -> RecoverWith<Self, S>
1803 where
1804 Self: Sized,
1805 {
1806 RecoverWith {
1807 parser: self,
1808 strategy,
1809 }
1810 }
1811
1812 /// Map the primary error of this parser to another value.
1813 ///
1814 /// This function is most useful when using a custom error type, allowing you to augment errors according to
1815 /// context.
1816 ///
1817 /// The output type of this parser is `O`, the same as the original parser.
1818 // TODO: Map E -> D, not E -> E
1819 fn map_err<F>(self, f: F) -> MapErr<Self, F>
1820 where
1821 Self: Sized,
1822 F: Fn(E::Error) -> E::Error,
1823 {
1824 MapErr {
1825 parser: self,
1826 mapper: f,
1827 }
1828 }
1829
1830 // /// Map the primary error of this parser to another value, making use of the span from the start of the attempted
1831 // /// to the point at which the error was encountered.
1832 // ///
1833 // /// This function is useful for augmenting errors to allow them to display the span of the initial part of a
1834 // /// pattern, for example to add a "while parsing" clause to your error messages.
1835 // ///
1836 // /// The output type of this parser is `O`, the same as the original parser.
1837 // ///
1838 // // TODO: Map E -> D, not E -> E
1839 // fn map_err_with_span<F>(self, f: F) -> MapErrWithSpan<Self, F>
1840 // where
1841 // Self: Sized,
1842 // F: Fn(E::Error, I::Span) -> E::Error,
1843 // {
1844 // MapErrWithSpan {
1845 // parser: self,
1846 // mapper: f,
1847 // }
1848 // }
1849
1850 /// Map the primary error of this parser to another value, making use of the parser state.
1851 ///
1852 /// This function is useful for augmenting errors to allow them to include context in non context-free
1853 /// languages, or provide contextual notes on possible causes.
1854 ///
1855 /// The output type of this parser is `O`, the same as the original parser.
1856 ///
1857 // TODO: Map E -> D, not E -> E
1858 fn map_err_with_state<F>(self, f: F) -> MapErrWithState<Self, F>
1859 where
1860 Self: Sized,
1861 F: Fn(E::Error, I::Span, &mut E::State) -> E::Error,
1862 {
1863 MapErrWithState {
1864 parser: self,
1865 mapper: f,
1866 }
1867 }
1868
1869 /// Validate an output, producing non-terminal errors if it does not fulfill certain criteria.
1870 /// The errors will not immediately halt parsing on this path, but instead it will continue,
1871 /// potentially emitting one or more other errors, only failing after the pattern has otherwise
1872 /// successfully, or emitted another terminal error.
1873 ///
1874 /// This function also permits mapping the output to a value of another type, similar to [`Parser::map`].
1875 ///
1876 /// If you wish parsing of this pattern to halt when an error is generated instead of continuing, consider using
1877 /// [`Parser::try_map`] instead.
1878 ///
1879 /// The output type of this parser is `U`, the result of the validation closure.
1880 ///
1881 /// # Examples
1882 ///
1883 /// ```
1884 /// # use chumsky::prelude::*;
1885 /// let large_int = text::int::<_, extra::Err<Rich<char>>>(10)
1886 /// .from_str()
1887 /// .unwrapped()
1888 /// .validate(|x: u32, e, emitter| {
1889 /// if x < 256 { emitter.emit(Rich::custom(e.span(), format!("{} must be 256 or higher.", x))) }
1890 /// x
1891 /// });
1892 ///
1893 /// assert_eq!(large_int.parse("537").into_result(), Ok(537));
1894 /// assert!(large_int.parse("243").into_result().is_err());
1895 /// ```
1896 ///
1897 /// To show the difference in behavior from [`Parser::try_map`]:
1898 ///
1899 /// ```
1900 /// # use chumsky::{text::TextExpected, util::MaybeRef, error::LabelError, prelude::*};
1901 ///
1902 /// // Start with the same large_int validator
1903 /// let large_int_val = text::int::<_, extra::Err<Rich<char>>>(10)
1904 /// .from_str()
1905 /// .unwrapped()
1906 /// .validate(|x: u32, e, emitter| {
1907 /// if x < 256 { emitter.emit(Rich::custom(e.span(), format!("{} must be 256 or higher", x))) }
1908 /// x
1909 /// });
1910 ///
1911 /// // A try_map version of the same parser
1912 /// let large_int_tm = text::int::<_, extra::Err<Rich<char>>>(10)
1913 /// .from_str()
1914 /// .unwrapped()
1915 /// .try_map(|x: u32, span| {
1916 /// if x < 256 {
1917 /// Err(Rich::custom(span, format!("{} must be 256 or higher", x)))
1918 /// } else {
1919 /// Ok(x)
1920 /// }
1921 /// });
1922 ///
1923 /// // Parser that uses the validation version
1924 /// let multi_step_val = large_int_val.then(text::ascii::ident().padded());
1925 /// // Parser that uses the try_map version
1926 /// let multi_step_tm = large_int_tm.then(text::ascii::ident().padded());
1927 ///
1928 /// // On success, both parsers are equivalent
1929 /// assert_eq!(
1930 /// multi_step_val.parse("512 foo").into_result(),
1931 /// Ok((512, "foo"))
1932 /// );
1933 ///
1934 /// assert_eq!(
1935 /// multi_step_tm.parse("512 foo").into_result(),
1936 /// Ok((512, "foo"))
1937 /// );
1938 ///
1939 /// // However, on failure, they may produce different errors:
1940 /// assert_eq!(
1941 /// multi_step_val.parse("100 2").into_result(),
1942 /// Err(vec![
1943 /// Rich::<char>::custom((0..3).into(), "100 must be 256 or higher"),
1944 /// <Rich<char> as LabelError<&str, _>>::expected_found([TextExpected::<&str>::IdentifierPart], Some(MaybeRef::Val('2')), (4..5).into()),
1945 /// ])
1946 /// );
1947 ///
1948 /// assert_eq!(
1949 /// multi_step_tm.parse("100 2").into_result(),
1950 /// Err(vec![Rich::<char>::custom((0..3).into(), "100 must be 256 or higher")])
1951 /// );
1952 /// ```
1953 ///
1954 /// As is seen in the above example, validation doesn't prevent the emission of later errors in the
1955 /// same parser, but still produces an error in the output.
1956 ///
1957 fn validate<U, F>(self, f: F) -> Validate<Self, O, F>
1958 where
1959 Self: Sized,
1960 F: Fn(O, &mut MapExtra<'src, '_, I, E>, &mut Emitter<E::Error>) -> U,
1961 {
1962 Validate {
1963 parser: self,
1964 validator: f,
1965 phantom: EmptyPhantom::new(),
1966 }
1967 }
1968
1969 // /// Map the primary error of this parser to a result. If the result is [`Ok`], the parser succeeds with that value.
1970 // ///
1971 // /// Note that, if the closure returns [`Err`], the parser will not consume any input.
1972 // ///
1973 // /// The output type of this parser is `U`, the [`Ok`] type of the result.
1974 // fn or_else<F>(self, f: F) -> OrElse<Self, F>
1975 // where
1976 // Self: Sized,
1977 // F: Fn(E::Error) -> Result<O, E::Error>,
1978 // {
1979 // OrElse {
1980 // parser: self,
1981 // or_else: f,
1982 // }
1983 // }
1984
1985 /// Attempt to convert the output of this parser into something else using Rust's [`FromStr`] trait.
1986 ///
1987 /// This is most useful when wanting to convert literal values into their corresponding Rust type, such as when
1988 /// parsing integers.
1989 ///
1990 /// The output type of this parser is `Result<U, U::Err>`, the result of attempting to parse the output, `O`, into
1991 /// the value `U`.
1992 ///
1993 /// # Examples
1994 ///
1995 /// ```
1996 /// # use chumsky::prelude::*;
1997 /// let uint64 = text::int::<_, extra::Err<Simple<char>>>(10)
1998 /// .from_str::<u64>()
1999 /// .unwrapped();
2000 ///
2001 /// assert_eq!(uint64.parse("7").into_result(), Ok(7));
2002 /// assert_eq!(uint64.parse("42").into_result(), Ok(42));
2003 /// ```
2004 #[allow(clippy::wrong_self_convention)]
2005 fn from_str<U>(self) -> Map<Self, O, fn(O) -> Result<U, U::Err>>
2006 where
2007 Self: Sized,
2008 U: FromStr,
2009 O: AsRef<str>,
2010 {
2011 self.map(|o| o.as_ref().parse())
2012 }
2013
2014 /// For parsers that produce a [`Result`] as their output, unwrap the result (panicking if an [`Err`] is
2015 /// encountered).
2016 ///
2017 /// In general, this method should be avoided except in cases where all possible that the parser might produce can
2018 /// by parsed using [`FromStr`] without producing an error.
2019 ///
2020 /// This combinator is not named `unwrap` to avoid confusion: it unwraps *during parsing*, not immediately.
2021 ///
2022 /// The output type of this parser is `U`, the [`Ok`] value of the [`Result`].
2023 ///
2024 /// # Examples
2025 ///
2026 /// ```
2027 /// # use chumsky::prelude::*;
2028 /// let boolean = just::<_, _, extra::Err<Simple<char>>>("true")
2029 /// .or(just("false"))
2030 /// .from_str::<bool>()
2031 /// .unwrapped(); // Cannot panic: the only possible outputs generated by the parser are "true" or "false"
2032 ///
2033 /// assert_eq!(boolean.parse("true").into_result(), Ok(true));
2034 /// assert_eq!(boolean.parse("false").into_result(), Ok(false));
2035 /// // Does not panic, because the original parser only accepts "true" or "false"
2036 /// assert!(boolean.parse("42").has_errors());
2037 /// ```
2038 #[track_caller]
2039 fn unwrapped(self) -> Unwrapped<Self, O>
2040 where
2041 Self: Sized,
2042 {
2043 Unwrapped {
2044 parser: self,
2045 location: *Location::caller(),
2046 phantom: EmptyPhantom::new(),
2047 }
2048 }
2049
2050 /// Turn this [`Parser`] into an [`IterParser`] if its output type implements [`IntoIterator`].
2051 ///
2052 /// The resulting iterable parser will emit each element of the output type in turn.
2053 ///
2054 /// This is *broadly* analogous to functions like [`Vec::into_iter`], but operating at the level of parser outputs.
2055 ///
2056 /// # Examples
2057 ///
2058 /// ```
2059 /// # use chumsky::prelude::*;
2060 /// // Parses whole integers
2061 /// let num = text::int::<&str, extra::Default>(10).padded().map(|x: &str| x.parse::<u64>().unwrap());
2062 /// // Parses a range like `0..4` into a vector like `[0, 1, 2, 3]`
2063 /// let range = num.then_ignore(just("..")).then(num)
2064 /// .map(|(x, y)| x..y)
2065 /// .into_iter()
2066 /// .collect::<Vec<u64>>();
2067 /// // Parses a list of numbers into a vector
2068 /// let list = num.separated_by(just(',')).collect::<Vec<u64>>();
2069 /// let set = range.or(list);
2070 /// assert_eq!(set.parse("0, 1, 2, 3").unwrap(), [0, 1, 2, 3]);
2071 /// assert_eq!(set.parse("0..4").unwrap(), [0, 1, 2, 3]);
2072 /// ```
2073 fn into_iter(self) -> IntoIter<Self, O>
2074 where
2075 Self: Sized,
2076 O: IntoIterator,
2077 {
2078 IntoIter {
2079 parser: self,
2080 phantom: EmptyPhantom::new(),
2081 }
2082 }
2083
2084 /// Box the parser, yielding a parser that performs parsing through dynamic dispatch.
2085 ///
2086 /// Boxing a parser might be useful for:
2087 ///
2088 /// - Dynamically building up parsers at run-time
2089 ///
2090 /// - Improving compilation times (Rust can struggle to compile code containing very long types)
2091 ///
2092 /// - Passing a parser over an FFI boundary
2093 ///
2094 /// - Getting around compiler implementation problems with long types such as
2095 /// [this](https://github.com/rust-lang/rust/issues/54540).
2096 ///
2097 /// - Places where you need to name the type of a parser
2098 ///
2099 /// Boxing a parser is broadly equivalent to boxing other combinators via dynamic dispatch, such as [`Iterator`].
2100 ///
2101 /// The output type of this parser is `O`, the same as the original parser.
2102 ///
2103 /// # Examples
2104 ///
2105 /// When not using `boxed`, the following patterns are either impossible or very difficult to express:
2106 ///
2107 /// ```compile_fail
2108 /// # use chumsky::prelude::*;
2109 ///
2110 /// pub trait Parseable: Sized {
2111 /// type Parser<'src>: Parser<'src, &'src str, Self>;
2112 ///
2113 /// fn parser<'src>() -> Self::Parser<'src>;
2114 /// }
2115 ///
2116 /// impl Parseable for i32 {
2117 /// // We *can* write this type, but it will be very impractical, and change on any alterations
2118 /// // to the implementation
2119 /// type Parser<'src> = ???;
2120 ///
2121 /// fn parser<'src>() -> Self::Parser<'src> {
2122 /// todo()
2123 /// }
2124 /// }
2125 /// ```
2126 ///
2127 /// ```compile_fail
2128 /// # use chumsky::prelude::*;
2129 /// # fn user_input<'src>() -> impl IntoIterator<Item = impl Parser<'src, &'src str, char>> { [just('b')] }
2130 ///
2131 /// let user_input = user_input();
2132 ///
2133 /// let mut parser = just('a');
2134 /// for i in user_input {
2135 /// // Doesn't work due to type mismatch - since every combinator creates a unique type
2136 /// parser = parser.or(i);
2137 /// }
2138 ///
2139 /// let parser = parser.then(just('z'));
2140 /// let _ = parser.parse("b").into_result();
2141 /// ```
2142 ///
2143 /// However, with `boxed`, we can express them by making the parsers all share a common type:
2144 ///
2145 /// ```
2146 /// use chumsky::prelude::*;
2147 ///
2148 /// pub trait Parseable: Sized {
2149 /// fn parser<'src>() -> Boxed<'src, 'src, &'src str, Self>;
2150 /// }
2151 ///
2152 /// impl Parseable for i32 {
2153 /// fn parser<'src>() -> Boxed<'src, 'src, &'src str, Self> {
2154 /// todo().boxed()
2155 /// }
2156 /// }
2157 /// ```
2158 ///
2159 /// ```
2160 /// # use chumsky::prelude::*;
2161 /// # fn user_input<'src>() -> impl IntoIterator<Item = impl Parser<'src, &'src str, char>> { [just('b'), just('c')] }
2162 /// let user_input = user_input();
2163 /// let mut parser = just('a').boxed();
2164 /// for i in user_input {
2165 /// // Doesn't work due to type mismatch - since every combinator creates a unique type
2166 /// parser = parser.or(i).boxed();
2167 /// }
2168 /// let parser = parser.then(just('z'));
2169 /// parser.parse("az").into_result().unwrap();
2170 /// ```
2171 ///
2172 fn boxed<'b>(self) -> Boxed<'src, 'b, I, O, E>
2173 where
2174 Self: Sized + 'src + 'b,
2175 {
2176 Boxed {
2177 inner: Rc::new(self),
2178 }
2179 }
2180
2181 /// Simplify the type of the parser using Rust's `impl Trait` syntax.
2182 ///
2183 /// The only reason for using this function is to make Rust's compiler errors easier to debug: it does not change
2184 /// the behaviour of the parser at all, and is in fact just a simple identity function.
2185 #[cfg(feature = "nightly")]
2186 fn simplify(self) -> impl Parser<'src, I, O, E>
2187 where
2188 Self: Sized + 'src,
2189 {
2190 self
2191 }
2192
2193 /// Use [Pratt parsing](https://en.wikipedia.org/wiki/Operator-precedence_parser#Pratt_parsing) to ergonomically
2194 /// parse this pattern separated by prefix, postfix, and infix operators of various associativites and precedence.
2195 ///
2196 /// Pratt parsing is a powerful technique and is recommended when writing parsers for expressions.
2197 ///
2198 /// # Example
2199 ///
2200 /// See the documentation in [`pratt`] for more extensive examples and details.
2201 ///
2202 /// ```
2203 /// # use chumsky::prelude::*;
2204 /// use chumsky::pratt::*;
2205 ///
2206 /// let int = text::int::<_, extra::Err<Rich<char>>>(10)
2207 /// .from_str()
2208 /// .unwrapped()
2209 /// .padded();
2210 ///
2211 /// let op = |c| just(c).padded();
2212 ///
2213 /// let expr = int.pratt((
2214 /// prefix(2, op('-'), |_, x: i64, _| -x),
2215 /// infix(left(1), op('*'), |x, _, y, _| x * y),
2216 /// infix(left(1), op('/'), |x, _, y, _| x / y),
2217 /// infix(left(0), op('+'), |x, _, y, _| x + y),
2218 /// infix(left(0), op('-'), |x, _, y, _| x - y),
2219 /// ));
2220 ///
2221 /// // Pratt parsing can handle unary operators...
2222 /// assert_eq!(expr.parse("-7").into_result(), Ok(-7));
2223 /// // ...and infix binary operators...
2224 /// assert_eq!(expr.parse("6 + 3").into_result(), Ok(9));
2225 /// // ...and arbitrary precedence levels between them.
2226 /// assert_eq!(expr.parse("2 + 3 * -4").into_result(), Ok(-10));
2227 /// ```
2228 #[cfg(feature = "pratt")]
2229 fn pratt<Ops>(self, ops: Ops) -> pratt::Pratt<Self, Ops>
2230 where
2231 Self: Sized,
2232 {
2233 pratt::Pratt { atom: self, ops }
2234 }
2235}
2236
2237#[cfg(feature = "nightly")]
2238impl<'src, I, O, E> Parser<'src, I, O, E> for !
2239where
2240 I: Input<'src>,
2241 E: ParserExtra<'src, I>,
2242{
2243 fn go<M: Mode>(&self, _inp: &mut InputRef<'src, '_, I, E>) -> PResult<M, O> {
2244 *self
2245 }
2246
2247 go_extra!(O);
2248}
2249
2250/// A [`Parser`] that can be configured with runtime context.
2251///
2252/// This allows for context-sensitive parsing
2253/// of input. Note that chumsky only supports 'left'-sensitive parsing, where the context for a parser
2254/// is derived from earlier in the input.
2255///
2256/// Chumsky distinguishes 'state' from 'context'. State is not able to change what input a parser
2257/// accepts, but may be used to change the contents of the type it emits. In this way state is expected
2258/// to be idempotent - combinators such as [`Parser::map_with`] are allowed to not call the
2259/// provided closure at all if they don't emit any output. Context and configuration, on the other hand,
2260/// is used to change what kind of input a parser may accept, and thus must always be evaluated. Context
2261/// isn't usable in any map combinator however - while it may affect accepted input, it is not expected
2262/// to change the final result outside of how it changes what the parser itself returns.
2263///
2264/// Not all parsers currently support configuration. If you feel like you need a parser to be configurable
2265/// and it isn't currently, please open an issue on the issue tracker of the main repository.
2266pub trait ConfigParser<'src, I, O, E>: Parser<'src, I, O, E>
2267where
2268 I: Input<'src>,
2269 E: ParserExtra<'src, I>,
2270{
2271 /// A type describing the configurable aspects of the parser.
2272 type Config: Default;
2273
2274 #[doc(hidden)]
2275 fn go_cfg<M: Mode>(
2276 &self,
2277 inp: &mut InputRef<'src, '_, I, E>,
2278 cfg: Self::Config,
2279 ) -> PResult<M, O>;
2280
2281 #[doc(hidden)]
2282 #[inline(always)]
2283 fn go_emit_cfg(
2284 &self,
2285 inp: &mut InputRef<'src, '_, I, E>,
2286 cfg: Self::Config,
2287 ) -> PResult<Emit, O> {
2288 self.go_cfg::<Emit>(inp, cfg)
2289 }
2290 #[doc(hidden)]
2291 #[inline(always)]
2292 fn go_check_cfg(
2293 &self,
2294 inp: &mut InputRef<'src, '_, I, E>,
2295 cfg: Self::Config,
2296 ) -> PResult<Check, O> {
2297 self.go_cfg::<Check>(inp, cfg)
2298 }
2299
2300 /// A combinator that allows configuration of the parser from the current context. Context
2301 /// is most often derived from [`Parser::ignore_with_ctx`], [`Parser::then_with_ctx`] or [`map_ctx`],
2302 /// and is how chumsky supports parsing things such as indentation-sensitive grammars.
2303 ///
2304 /// # Examples
2305 ///
2306 /// ```
2307 /// # use chumsky::prelude::*;
2308 ///
2309 /// let int = text::int::<_, extra::Err<Rich<char>>>(10)
2310 /// .from_str()
2311 /// .unwrapped();
2312 ///
2313 /// // By default, accepts any number of items
2314 /// let item = text::ascii::ident()
2315 /// .padded()
2316 /// .repeated();
2317 ///
2318 /// // With configuration, we can declare an exact number of items based on a prefix length
2319 /// let len_prefixed_arr = int
2320 /// .ignore_with_ctx(item.configure(|repeat, ctx| repeat.exactly(*ctx)).collect::<Vec<_>>());
2321 ///
2322 /// assert_eq!(
2323 /// len_prefixed_arr.parse("2 foo bar").into_result(),
2324 /// Ok(vec!["foo", "bar"]),
2325 /// );
2326 ///
2327 /// assert_eq!(
2328 /// len_prefixed_arr.parse("0").into_result(),
2329 /// Ok(vec![]),
2330 /// );
2331 ///
2332 /// len_prefixed_arr.parse("3 foo bar baz bam").into_result().unwrap_err();
2333 /// len_prefixed_arr.parse("3 foo bar").into_result().unwrap_err();
2334 /// ```
2335 fn configure<F>(self, cfg: F) -> Configure<Self, F>
2336 where
2337 Self: Sized,
2338 F: Fn(Self::Config, &E::Context) -> Self::Config,
2339 {
2340 Configure { parser: self, cfg }
2341 }
2342}
2343
2344/// An iterator that wraps an iterable parser. See [`IterParser::parse_iter`].
2345#[cfg(test)]
2346pub struct ParserIter<
2347 'src,
2348 'iter,
2349 P: IterParser<'src, I, O, E>,
2350 I: Input<'src>,
2351 O,
2352 E: ParserExtra<'src, I>,
2353> {
2354 parser: P,
2355 own: InputOwn<'src, 'iter, I, E>,
2356 iter_state: Option<P::IterState<Emit>>,
2357 #[allow(dead_code)]
2358 phantom: EmptyPhantom<(&'src (), O)>,
2359}
2360
2361#[cfg(test)]
2362impl<'src, P, I: Input<'src>, O, E: ParserExtra<'src, I>> Iterator
2363 for ParserIter<'src, '_, P, I, O, E>
2364where
2365 P: IterParser<'src, I, O, E>,
2366{
2367 type Item = O;
2368
2369 fn next(&mut self) -> Option<Self::Item> {
2370 let mut inp = self.own.as_ref_start();
2371 let parser = &self.parser;
2372
2373 let iter_state = match &mut self.iter_state {
2374 Some(state) => state,
2375 None => {
2376 let state = parser.make_iter::<Emit>(&mut inp).ok()?;
2377 self.iter_state = Some(state);
2378 self.iter_state.as_mut().unwrap()
2379 }
2380 };
2381
2382 let res = parser.next::<Emit>(&mut inp, iter_state);
2383 // TODO: Avoid clone
2384 self.own.start = inp.cursor().inner;
2385 res.ok().and_then(|res| res)
2386 }
2387}
2388
2389/// An iterable equivalent of [`Parser`], i.e: a parser that generates a sequence of outputs.
2390pub trait IterParser<'src, I, O, E = extra::Default>
2391where
2392 I: Input<'src>,
2393 E: ParserExtra<'src, I>,
2394{
2395 #[doc(hidden)]
2396 type IterState<M: Mode>
2397 where
2398 I: 'src;
2399
2400 // Determines whether this iter parser is expected to not consume input on each iteration
2401 #[doc(hidden)]
2402 const NONCONSUMPTION_IS_OK: bool = false;
2403
2404 #[doc(hidden)]
2405 fn make_iter<M: Mode>(
2406 &self,
2407 inp: &mut InputRef<'src, '_, I, E>,
2408 ) -> PResult<Emit, Self::IterState<M>>;
2409 #[doc(hidden)]
2410 fn next<M: Mode>(
2411 &self,
2412 inp: &mut InputRef<'src, '_, I, E>,
2413 state: &mut Self::IterState<M>,
2414 ) -> IPResult<M, O>;
2415
2416 /// Collect this iterable parser into a [`Container`].
2417 ///
2418 /// This is commonly useful for collecting parsers that output many values into containers of various kinds:
2419 /// [`Vec`]s, [`String`]s, or even [`HashMap`]s. This method is analogous to [`Iterator::collect`].
2420 ///
2421 /// The output type of this iterable parser is `C`, the type being collected into.
2422 ///
2423 /// # Examples
2424 ///
2425 /// ```
2426 /// # use chumsky::{prelude::*, error::Simple};
2427 /// let word = any::<_, extra::Err<Simple<char>>>().filter(|c: &char| c.is_alphabetic()) // This parser produces an output of `char`
2428 /// .repeated() // This parser is iterable (i.e: implements `IterParser`)
2429 /// .collect::<String>(); // We collect the `char`s into a `String`
2430 ///
2431 /// assert_eq!(word.parse("hello").into_result(), Ok("hello".to_string()));
2432 /// ```
2433 #[cfg_attr(debug_assertions, track_caller)]
2434 fn collect<C: Container<O>>(self) -> Collect<Self, O, C>
2435 where
2436 Self: Sized,
2437 {
2438 Collect {
2439 parser: self,
2440 #[cfg(debug_assertions)]
2441 location: *Location::caller(),
2442 phantom: EmptyPhantom::new(),
2443 }
2444 }
2445
2446 /// Collect this iterable parser into a [`ContainerExactly`].
2447 ///
2448 /// This is useful for situations where the number of items to consume is statically known.
2449 /// A common use-case is collecting into an array.
2450 ///
2451 /// The output type of this iterable parser if `C`, the type being collected into.
2452 ///
2453 /// # Examples
2454 ///
2455 /// ```
2456 /// # use chumsky::{prelude::*, error::Simple};
2457 /// let three_digit = any::<_, extra::Err<Simple<char>>>().filter(|c: &char| c.is_numeric())
2458 /// .repeated()
2459 /// .collect_exactly::<[_; 3]>();
2460 ///
2461 /// assert_eq!(three_digit.parse("123").into_result(), Ok(['1', '2', '3']));
2462 /// assert!(three_digit.parse("12").into_result().is_err());
2463 /// assert!(three_digit.parse("1234").into_result().is_err());
2464 /// ```
2465 fn collect_exactly<C: ContainerExactly<O>>(self) -> CollectExactly<Self, O, C>
2466 where
2467 Self: Sized,
2468 {
2469 CollectExactly {
2470 parser: self,
2471 phantom: EmptyPhantom::new(),
2472 }
2473 }
2474
2475 /// Collect this iterable parser into a [`usize`], outputting the number of elements that were parsed.
2476 ///
2477 /// This is sugar for [`.collect::<usize>()`](Self::collect).
2478 ///
2479 /// # Examples
2480 ///
2481 /// ```
2482 /// # use chumsky::prelude::*;
2483 ///
2484 /// // Counts how many chess squares are in the input.
2485 /// let squares = one_of::<_, _, extra::Err<Simple<char>>>('a'..='z').then(one_of('1'..='8')).padded().repeated().count();
2486 ///
2487 /// assert_eq!(squares.parse("a1 b2 c3").into_result(), Ok(3));
2488 /// assert_eq!(squares.parse("e5 e7 c6 c7 f6 d5 e6 d7 e4 c5 d6 c4 b6 f5").into_result(), Ok(14));
2489 /// assert_eq!(squares.parse("").into_result(), Ok(0));
2490 /// ```
2491 fn count(self) -> Collect<Self, O, usize>
2492 where
2493 Self: Sized,
2494 {
2495 self.collect()
2496 }
2497
2498 /// Enumerate outputs of this iterable parser.
2499 ///
2500 /// This function behaves in a similar way to [`Iterator::enumerate`].
2501 ///
2502 /// The output type of this iterable parser is `(usize, O)`.
2503 ///
2504 /// # Examples
2505 ///
2506 /// ```
2507 /// # use chumsky::{prelude::*, error::Simple};
2508 /// let word = text::ascii::ident::<_, extra::Err<Simple<char>>>()
2509 /// .padded()
2510 /// .repeated() // This parser is iterable (i.e: implements `IterParser`)
2511 /// .enumerate()
2512 /// .collect::<Vec<(usize, &str)>>();
2513 ///
2514 /// assert_eq!(word.parse("hello world").into_result(), Ok(vec![(0, "hello"), (1, "world")]));
2515 /// ```
2516 fn enumerate(self) -> Enumerate<Self, O>
2517 where
2518 Self: Sized,
2519 {
2520 Enumerate {
2521 parser: self,
2522 phantom: EmptyPhantom::new(),
2523 }
2524 }
2525
2526 /// Right-fold the output of the parser into a single value.
2527 ///
2528 /// The output of the original parser must be of type `(impl IntoIterator<Item = A>, B)`. Because right-folds work
2529 /// backwards, the iterator must implement [`DoubleEndedIterator`] so that it can be reversed.
2530 ///
2531 /// The output type of this iterable parser is `B`, the right-hand component of the original parser's output.
2532 ///
2533 /// # Examples
2534 ///
2535 /// ```
2536 /// # use chumsky::{prelude::*, error::Simple};
2537 /// let int = text::int::<_, extra::Err<Simple<char>>>(10)
2538 /// .from_str()
2539 /// .unwrapped();
2540 ///
2541 /// let signed = just('+').to(1)
2542 /// .or(just('-').to(-1))
2543 /// .repeated()
2544 /// .foldr(int, |a, b| a * b);
2545 ///
2546 /// assert_eq!(signed.parse("3").into_result(), Ok(3));
2547 /// assert_eq!(signed.parse("-17").into_result(), Ok(-17));
2548 /// assert_eq!(signed.parse("--+-+-5").into_result(), Ok(5));
2549 /// ```
2550 #[cfg_attr(debug_assertions, track_caller)]
2551 fn foldr<B, F, OA>(self, other: B, f: F) -> Foldr<F, Self, B, O, E>
2552 where
2553 F: Fn(O, OA) -> OA,
2554 B: Parser<'src, I, OA, E>,
2555 Self: Sized,
2556 {
2557 Foldr {
2558 parser_a: self,
2559 parser_b: other,
2560 folder: f,
2561 #[cfg(debug_assertions)]
2562 location: *Location::caller(),
2563 phantom: EmptyPhantom::new(),
2564 }
2565 }
2566
2567 /// Right-fold the output of the parser into a single value, making use of the parser's state when doing so.
2568 ///
2569 /// The output of the original parser must be of type `(impl IntoIterator<Item = A>, B)`. Because right-folds work
2570 /// backwards, the iterator must implement [`DoubleEndedIterator`] so that it can be reversed.
2571 ///
2572 /// The output type of this parser is `B`, the right-hand component of the original parser's output.
2573 ///
2574 /// # Examples
2575 ///
2576 /// ```
2577 /// # use chumsky::{prelude::*, error::Simple, extra::SimpleState};
2578 /// let int = text::int::<_, extra::Full<Simple<char>, SimpleState<i32>, ()>>(10)
2579 /// .from_str()
2580 /// .unwrapped();
2581 ///
2582 /// let signed = just('+').to(1)
2583 /// .or(just('-').to(-1))
2584 /// .repeated()
2585 /// .foldr_with(int, |a, b, e| {
2586 /// **e.state() += 1;
2587 /// a * b
2588 /// });
2589 ///
2590 /// // Test our parser
2591 /// let mut folds = SimpleState(0i32);
2592 /// assert_eq!(signed.parse_with_state("3", &mut folds).into_result(), Ok(3));
2593 /// assert_eq!(signed.parse_with_state("-17", &mut folds).into_result(), Ok(-17));
2594 /// assert_eq!(signed.parse_with_state("--+-+-5", &mut folds).into_result(), Ok(5));
2595 /// ```
2596 ///
2597 ///
2598 #[cfg_attr(debug_assertions, track_caller)]
2599 fn foldr_with<B, F, OA>(self, other: B, f: F) -> FoldrWith<F, Self, B, O, E>
2600 where
2601 F: Fn(O, OA, &mut MapExtra<'src, '_, I, E>) -> OA,
2602 B: Parser<'src, I, OA, E>,
2603 Self: Sized,
2604 {
2605 FoldrWith {
2606 parser_a: self,
2607 parser_b: other,
2608 folder: f,
2609 #[cfg(debug_assertions)]
2610 location: *Location::caller(),
2611 phantom: EmptyPhantom::new(),
2612 }
2613 }
2614
2615 /// TODO
2616 #[cfg(feature = "nightly")]
2617 fn flatten(self) -> Flatten<Self, O>
2618 where
2619 O: IntoIterator,
2620 Self: Sized,
2621 {
2622 Flatten {
2623 parser: self,
2624 phantom: EmptyPhantom::new(),
2625 }
2626 }
2627
2628 /// Create an iterator over the outputs generated by an iterable parser.
2629 ///
2630 /// Warning: Trailing errors will be ignored
2631 // TODO: Stabilize once error handling is properly decided on
2632 #[cfg(test)]
2633 fn parse_iter(self, input: I) -> ParseResult<ParserIter<'src, 'static, Self, I, O, E>, E::Error>
2634 where
2635 Self: IterParser<'src, I, O, E> + Sized,
2636 E::State: Default,
2637 E::Context: Default,
2638 {
2639 ParseResult::new(
2640 Some(ParserIter {
2641 parser: self,
2642 own: InputOwn::new(input),
2643 iter_state: None,
2644 phantom: EmptyPhantom::new(),
2645 }),
2646 Vec::new(),
2647 )
2648 }
2649
2650 /// Create an iterator over the outputs generated by an iterable parser with the given parser state.
2651 ///
2652 /// Warning: Trailing errors will be ignored
2653 // TODO: Stabilize once error handling is properly decided on
2654 #[cfg(test)]
2655 fn parse_iter_with_state<'parse>(
2656 self,
2657 input: I,
2658 state: &'parse mut E::State,
2659 ) -> ParseResult<ParserIter<'src, 'parse, Self, I, O, E>, E::Error>
2660 where
2661 Self: IterParser<'src, I, O, E> + Sized,
2662 E::Context: Default,
2663 {
2664 ParseResult::new(
2665 Some(ParserIter {
2666 parser: self,
2667 own: InputOwn::new_state(input, state),
2668 iter_state: None,
2669 phantom: EmptyPhantom::new(),
2670 }),
2671 Vec::new(),
2672 )
2673 }
2674}
2675
2676/// An iterable equivalent of [`ConfigParser`], i.e: a parser that generates a sequence of outputs and
2677/// can be configured at runtime.
2678pub trait ConfigIterParser<'src, I, O, E = extra::Default>: IterParser<'src, I, O, E>
2679where
2680 I: Input<'src>,
2681 E: ParserExtra<'src, I>,
2682{
2683 /// A trait describing the configurable aspects of the iterable parser.
2684 type Config: Default;
2685
2686 #[doc(hidden)]
2687 fn next_cfg<M: Mode>(
2688 &self,
2689 inp: &mut InputRef<'src, '_, I, E>,
2690 state: &mut Self::IterState<M>,
2691 cfg: &Self::Config,
2692 ) -> IPResult<M, O>;
2693
2694 /// A combinator that allows configuration of the parser from the current context
2695 fn configure<F>(self, cfg: F) -> IterConfigure<Self, F, O>
2696 where
2697 Self: Sized,
2698 F: Fn(Self::Config, &E::Context) -> Self::Config,
2699 {
2700 IterConfigure {
2701 parser: self,
2702 cfg,
2703 phantom: EmptyPhantom::new(),
2704 }
2705 }
2706
2707 /// A combinator that allows fallible configuration of the parser from the current context -
2708 /// if an error is returned, parsing fails.
2709 fn try_configure<F>(self, cfg: F) -> TryIterConfigure<Self, F, O>
2710 where
2711 Self: Sized,
2712 F: Fn(Self::Config, &E::Context, I::Span) -> Result<Self::Config, E::Error>,
2713 {
2714 TryIterConfigure {
2715 parser: self,
2716 cfg,
2717 phantom: EmptyPhantom::new(),
2718 }
2719 }
2720}
2721
2722/// See [`Parser::boxed`].
2723///
2724/// Due to current implementation details, the inner value is not, in fact, a [`Box`], but is an [`Rc`] to facilitate
2725/// efficient cloning. This is likely to change in the future. Unlike [`Box`], [`Rc`] has no size guarantees: although
2726/// it is *currently* the same size as a raw pointer.
2727// TODO: Don't use an Rc (why?)
2728pub struct Boxed<'src, 'b, I: Input<'src>, O, E: ParserExtra<'src, I> = extra::Default> {
2729 inner: Rc<DynParser<'src, 'b, I, O, E>>,
2730}
2731
2732impl<'src, I: Input<'src>, O, E: ParserExtra<'src, I>> Clone for Boxed<'src, '_, I, O, E> {
2733 fn clone(&self) -> Self {
2734 Self {
2735 inner: self.inner.clone(),
2736 }
2737 }
2738}
2739
2740impl<'src, I, O, E> Parser<'src, I, O, E> for Boxed<'src, '_, I, O, E>
2741where
2742 I: Input<'src>,
2743 E: ParserExtra<'src, I>,
2744{
2745 #[inline]
2746 fn go<M: Mode>(&self, inp: &mut InputRef<'src, '_, I, E>) -> PResult<M, O> {
2747 M::invoke(&*self.inner, inp)
2748 }
2749
2750 fn boxed<'c>(self) -> Boxed<'src, 'c, I, O, E>
2751 where
2752 Self: Sized + 'src + 'c,
2753 {
2754 // Never double-box parsers
2755 self
2756 }
2757
2758 go_extra!(O);
2759}
2760
2761impl<'src, I, O, E, T> Parser<'src, I, O, E> for ::alloc::boxed::Box<T>
2762where
2763 I: Input<'src>,
2764 E: ParserExtra<'src, I>,
2765 T: Parser<'src, I, O, E>,
2766{
2767 #[inline]
2768 fn go<M: Mode>(&self, inp: &mut InputRef<'src, '_, I, E>) -> PResult<M, O>
2769 where
2770 Self: Sized,
2771 {
2772 T::go::<M>(self, inp)
2773 }
2774
2775 go_extra!(O);
2776}
2777
2778impl<'src, I, O, E, T> Parser<'src, I, O, E> for ::alloc::rc::Rc<T>
2779where
2780 I: Input<'src>,
2781 E: ParserExtra<'src, I>,
2782 T: Parser<'src, I, O, E>,
2783{
2784 #[inline]
2785 fn go<M: Mode>(&self, inp: &mut InputRef<'src, '_, I, E>) -> PResult<M, O>
2786 where
2787 Self: Sized,
2788 {
2789 T::go::<M>(self, inp)
2790 }
2791
2792 go_extra!(O);
2793}
2794
2795impl<'src, I, O, E, T> Parser<'src, I, O, E> for ::alloc::sync::Arc<T>
2796where
2797 I: Input<'src>,
2798 E: ParserExtra<'src, I>,
2799 T: Parser<'src, I, O, E>,
2800{
2801 #[inline]
2802 fn go<M: Mode>(&self, inp: &mut InputRef<'src, '_, I, E>) -> PResult<M, O>
2803 where
2804 Self: Sized,
2805 {
2806 T::go::<M>(self, inp)
2807 }
2808
2809 go_extra!(O);
2810}
2811
2812/// Create a parser that selects one or more input patterns and map them to an output value.
2813///
2814/// This is most useful when turning the tokens of a previous compilation pass (such as lexing) into data that can be
2815/// used for parsing, although it can also generally be used to select inputs and map them to outputs. Any unmapped
2816/// input patterns will become syntax errors, just as with [`Parser::filter`].
2817///
2818/// Internally, [`select!`] is very similar to a single-token [`Parser::filter`] and thinking of it as such might make
2819/// it less confusing.
2820///
2821/// `select!` requires that tokens implement [`Clone`] and the input type implements [`ValueInput`]. If you're trying
2822/// to access tokens referentially (for the sake of nested parsing, or simply because you want to avoid cloning the
2823/// token), see [`select_ref!`].
2824///
2825/// # Examples
2826///
2827/// `select!` is syntactically similar to a `match` expression and has support for
2828/// [pattern guards](https://doc.rust-lang.org/reference/expressions/match-expr.html#match-guards):
2829///
2830/// ```
2831/// # use chumsky::{prelude::*, error::Simple};
2832/// #[derive(Clone)]
2833/// enum Token<'src> { Ident(&'src str) }
2834///
2835/// enum Expr<'src> { Local(&'src str), Null, True, False }
2836///
2837/// # let _: chumsky::primitive::Select<_, &[Token], Expr, extra::Default> =
2838/// select! {
2839/// Token::Ident(s) if s == "true" => Expr::True,
2840/// Token::Ident(s) if s == "false" => Expr::False,
2841/// Token::Ident(s) if s == "null" => Expr::Null,
2842/// Token::Ident(s) => Expr::Local(s),
2843/// }
2844/// # ;
2845/// ```
2846///
2847/// If you require access to the token's span or other metadata, you may add an argument after a pattern to gain access
2848/// to it (see the docs for [`Parser::map_with`] and [`MapExtra`]):
2849///
2850/// ```
2851/// # use chumsky::{prelude::*, error::Simple};
2852/// #[derive(Clone)]
2853/// enum Token<'src> { Num(f64), Str(&'src str) }
2854///
2855/// enum Expr<'src> { Num(f64), Str(&'src str) }
2856///
2857/// type Span = SimpleSpan<usize>;
2858///
2859/// impl<'src> Expr<'src> {
2860/// fn spanned(self, span: Span) -> (Self, Span) { (self, span) }
2861/// }
2862///
2863/// # let _: chumsky::primitive::Select<_, &[Token], (Expr, Span), extra::Default> =
2864/// select! {
2865/// Token::Num(x) = e => Expr::Num(x).spanned(e.span()),
2866/// Token::Str(s) = e => Expr::Str(s).spanned(e.span()),
2867/// }
2868/// # ;
2869/// ```
2870///
2871/// ```
2872/// # use chumsky::{prelude::*, error::Simple};
2873/// // The type of our parser's input (tokens like this might be emitted by your compiler's lexer)
2874/// #[derive(Clone, Debug, PartialEq)]
2875/// enum Token {
2876/// Num(u64),
2877/// Bool(bool),
2878/// LParen,
2879/// RParen,
2880/// }
2881///
2882/// // The type of our parser's output, a syntax tree
2883/// #[derive(Debug, PartialEq)]
2884/// enum Ast {
2885/// Num(u64),
2886/// Bool(bool),
2887/// List(Vec<Ast>),
2888/// }
2889///
2890/// // Our parser converts a stream of input tokens into an AST
2891/// // `select!` is used to deconstruct some of the tokens and turn them into AST nodes
2892/// let ast = recursive::<_, _, extra::Err<Simple<Token>>, _, _>(|ast| {
2893/// let literal = select! {
2894/// Token::Num(x) => Ast::Num(x),
2895/// Token::Bool(x) => Ast::Bool(x),
2896/// };
2897///
2898/// literal.or(ast
2899/// .repeated()
2900/// .collect()
2901/// .delimited_by(just(Token::LParen), just(Token::RParen))
2902/// .map(Ast::List))
2903/// });
2904///
2905/// use Token::*;
2906/// assert_eq!(
2907/// ast.parse(&[LParen, Num(5), LParen, Bool(false), Num(42), RParen, RParen]).into_result(),
2908/// Ok(Ast::List(vec![
2909/// Ast::Num(5),
2910/// Ast::List(vec![
2911/// Ast::Bool(false),
2912/// Ast::Num(42),
2913/// ]),
2914/// ])),
2915/// );
2916/// ```
2917#[macro_export]
2918macro_rules! select {
2919 ($($p:pat $(= $extra:ident)? $(if $guard:expr)? $(=> $out:expr)?),+ $(,)?) => ({
2920 $crate::primitive::select(
2921 move |x, extra| match (x, extra) {
2922 $(($p $(,$extra)?, ..) $(if $guard)? => ::core::option::Option::Some({ () $(;$out)? })),+,
2923 _ => ::core::option::Option::None,
2924 }
2925 )
2926 });
2927}
2928
2929/// A version of [`select!`] that selects on token by reference instead of by value.
2930///
2931/// Useful if you want to extract elements from a token in a zero-copy manner.
2932///
2933/// See the docs for [`select!`] for more information.
2934///
2935/// Requires that the parser input implements [`BorrowInput`].
2936#[macro_export]
2937macro_rules! select_ref {
2938 ($($p:pat $(= $extra:ident)? $(if $guard:expr)? $(=> $out:expr)?),+ $(,)?) => ({
2939 $crate::primitive::select_ref(
2940 move |x, extra| match (x, extra) {
2941 $(($p $(,$extra)?, ..) $(if $guard)? => ::core::option::Option::Some({ () $(;$out)? })),+,
2942 _ => ::core::option::Option::None,
2943 }
2944 )
2945 });
2946}
2947
2948#[cfg(test)]
2949mod tests {
2950 use crate::prelude::*;
2951
2952 #[test]
2953 fn zero_copy() {
2954 use crate::input::WithContext;
2955 use crate::prelude::*;
2956
2957 #[derive(PartialEq, Debug)]
2958 enum Token<'src> {
2959 Ident(&'src str),
2960 String(&'src str),
2961 }
2962
2963 type FileId = u32;
2964 type Span = SimpleSpan<usize, FileId>;
2965
2966 fn parser<'src>(
2967 ) -> impl Parser<'src, WithContext<Span, &'src str>, [(Span, Token<'src>); 6]> {
2968 let ident = any()
2969 .filter(|c: &char| c.is_alphanumeric())
2970 .repeated()
2971 .at_least(1)
2972 .to_slice()
2973 .map(Token::Ident);
2974
2975 let string = just('"')
2976 .then(any().filter(|c: &char| *c != '"').repeated())
2977 .then(just('"'))
2978 .to_slice()
2979 .map(Token::String);
2980
2981 ident
2982 .or(string)
2983 .map_with(|token, e| (e.span(), token))
2984 .padded()
2985 .repeated()
2986 .collect_exactly()
2987 }
2988
2989 assert_eq!(
2990 parser()
2991 .parse(r#"hello "world" these are "test" tokens"#.with_context(42))
2992 .into_result(),
2993 Ok([
2994 (Span::new(42, 0..5), Token::Ident("hello")),
2995 (Span::new(42, 6..13), Token::String("\"world\"")),
2996 (Span::new(42, 14..19), Token::Ident("these")),
2997 (Span::new(42, 20..23), Token::Ident("are")),
2998 (Span::new(42, 24..30), Token::String("\"test\"")),
2999 (Span::new(42, 31..37), Token::Ident("tokens")),
3000 ]),
3001 );
3002 }
3003
3004 #[test]
3005 fn zero_copy_map_span() {
3006 use crate::{
3007 input::{SliceInput, ValueInput},
3008 prelude::*,
3009 };
3010
3011 #[derive(PartialEq, Debug)]
3012 enum Token<'src> {
3013 Ident(&'src str),
3014 String(&'src str),
3015 }
3016
3017 type FileId<'src> = &'src str;
3018 type Span<'src> = SimpleSpan<usize, FileId<'src>>;
3019
3020 fn parser<'src, I>() -> impl Parser<'src, I, [(Span<'src>, Token<'src>); 6]>
3021 where
3022 I: ValueInput<'src, Token = char, Span = Span<'src>>
3023 + SliceInput<'src, Slice = &'src str>,
3024 {
3025 let ident = any()
3026 .filter(|c: &char| c.is_alphanumeric())
3027 .repeated()
3028 .at_least(1)
3029 .to_slice()
3030 .map(Token::Ident);
3031
3032 let string = just('"')
3033 .then(any().filter(|c: &char| *c != '"').repeated())
3034 .then(just('"'))
3035 .to_slice()
3036 .map(Token::String);
3037
3038 ident
3039 .or(string)
3040 .map_with(|token, e| (e.span(), token))
3041 .padded()
3042 .repeated()
3043 .collect_exactly()
3044 }
3045
3046 let filename = "file.txt".to_string();
3047 let fstr = filename.as_str();
3048
3049 assert_eq!(
3050 parser()
3051 .parse(
3052 r#"hello "world" these are "test" tokens"#
3053 .map_span(|span| Span::new(fstr, span.start()..span.end()))
3054 )
3055 .into_result(),
3056 Ok([
3057 (Span::new("file.txt", 0..5), Token::Ident("hello")),
3058 (Span::new("file.txt", 6..13), Token::String("\"world\"")),
3059 (Span::new("file.txt", 14..19), Token::Ident("these")),
3060 (Span::new("file.txt", 20..23), Token::Ident("are")),
3061 (Span::new("file.txt", 24..30), Token::String("\"test\"")),
3062 (Span::new("file.txt", 31..37), Token::Ident("tokens")),
3063 ]),
3064 );
3065 }
3066
3067 #[test]
3068 fn zero_copy_repetition() {
3069 use crate::prelude::*;
3070
3071 fn parser<'src>() -> impl Parser<'src, &'src str, Vec<u64>> {
3072 any()
3073 .filter(|c: &char| c.is_ascii_digit())
3074 .repeated()
3075 .at_least(1)
3076 .at_most(3)
3077 .to_slice()
3078 .map(|b: &str| b.parse::<u64>().unwrap())
3079 .padded()
3080 .separated_by(just(',').padded())
3081 .allow_trailing()
3082 .collect()
3083 .delimited_by(just('['), just(']'))
3084 }
3085
3086 assert_eq!(
3087 parser().parse("[122 , 23,43, 4, ]").into_result(),
3088 Ok(vec![122, 23, 43, 4]),
3089 );
3090 assert_eq!(
3091 parser().parse("[0, 3, 6, 900,120]").into_result(),
3092 Ok(vec![0, 3, 6, 900, 120]),
3093 );
3094 assert_eq!(
3095 parser().parse("[200,400,50 ,0,0, ]").into_result(),
3096 Ok(vec![200, 400, 50, 0, 0]),
3097 );
3098
3099 assert!(parser().parse("[1234,123,12,1]").has_errors());
3100 assert!(parser().parse("[,0, 1, 456]").has_errors());
3101 assert!(parser().parse("[3, 4, 5, 67 89,]").has_errors());
3102 }
3103
3104 #[test]
3105 fn zero_copy_group() {
3106 use crate::prelude::*;
3107
3108 fn parser<'src>() -> impl Parser<'src, &'src str, (&'src str, u64, char)> {
3109 group((
3110 any()
3111 .filter(|c: &char| c.is_ascii_alphabetic())
3112 .repeated()
3113 .at_least(1)
3114 .to_slice()
3115 .padded(),
3116 any()
3117 .filter(|c: &char| c.is_ascii_digit())
3118 .repeated()
3119 .at_least(1)
3120 .to_slice()
3121 .map(|s: &str| s.parse::<u64>().unwrap())
3122 .padded(),
3123 any().filter(|c: &char| !c.is_whitespace()).padded(),
3124 ))
3125 }
3126
3127 assert_eq!(
3128 parser().parse("abc 123 [").into_result(),
3129 Ok(("abc", 123, '[')),
3130 );
3131 assert_eq!(
3132 parser().parse("among3d").into_result(),
3133 Ok(("among", 3, 'd')),
3134 );
3135 assert_eq!(
3136 parser().parse("cba321,").into_result(),
3137 Ok(("cba", 321, ',')),
3138 );
3139
3140 assert!(parser().parse("abc 123 ").has_errors());
3141 assert!(parser().parse("123abc ]").has_errors());
3142 assert!(parser().parse("and one &").has_errors());
3143 }
3144
3145 #[test]
3146 fn zero_copy_group_array() {
3147 use crate::prelude::*;
3148
3149 fn parser<'src>() -> impl Parser<'src, &'src str, [char; 3]> {
3150 group([just('a'), just('b'), just('c')])
3151 }
3152
3153 assert_eq!(parser().parse("abc").into_result(), Ok(['a', 'b', 'c']));
3154 assert!(parser().parse("abd").has_errors());
3155 }
3156
3157 #[test]
3158 fn unicode_str() {
3159 let input = "🄯🄚🄐🝋🄂🬯🈦g🍩🕔🈳2🬙🨞🅢🭳🎅h🧿🏩k🠡🀔🤟📵🤿🝜🙘5🠻🠓";
3160 let mut own = crate::input::InputOwn::<_, extra::Default>::new(input);
3161 let mut inp = own.as_ref_start();
3162
3163 while let Some(_c) = inp.next() {}
3164 }
3165
3166 #[test]
3167 fn iter() {
3168 use crate::prelude::*;
3169
3170 fn parser<'src>() -> impl IterParser<'src, &'src str, char> {
3171 any().repeated()
3172 }
3173
3174 let mut chars = String::new();
3175 for c in parser().parse_iter("abcdefg").into_result().unwrap() {
3176 chars.push(c);
3177 }
3178
3179 assert_eq!(&chars, "abcdefg");
3180 }
3181
3182 #[test]
3183 #[cfg(feature = "memoization")]
3184 fn exponential() {
3185 use crate::prelude::*;
3186
3187 fn parser<'src>() -> impl Parser<'src, &'src str, String> {
3188 recursive(|expr| {
3189 let atom = any()
3190 .filter(|c: &char| c.is_alphabetic())
3191 .repeated()
3192 .at_least(1)
3193 .collect()
3194 .or(expr.delimited_by(just('('), just(')')));
3195
3196 atom.clone()
3197 .then_ignore(just('+'))
3198 .then(atom.clone())
3199 .map(|(a, b)| format!("{}{}", a, b))
3200 .memoized()
3201 .or(atom)
3202 })
3203 .then_ignore(end())
3204 }
3205
3206 parser()
3207 .parse("((((((((((((((((((((((((((((((a+b))))))))))))))))))))))))))))))")
3208 .into_result()
3209 .unwrap();
3210 }
3211
3212 #[test]
3213 #[cfg(feature = "memoization")]
3214 fn left_recursive() {
3215 use crate::prelude::*;
3216
3217 fn parser<'src>() -> impl Parser<'src, &'src str, String> {
3218 recursive(|expr| {
3219 let atom = any()
3220 .filter(|c: &char| c.is_alphabetic())
3221 .repeated()
3222 .at_least(1)
3223 .collect();
3224
3225 let sum = expr
3226 .clone()
3227 .then_ignore(just('+'))
3228 .then(expr)
3229 .map(|(a, b)| format!("{}{}", a, b))
3230 .memoized();
3231
3232 sum.or(atom)
3233 })
3234 .then_ignore(end())
3235 }
3236
3237 assert_eq!(parser().parse("a+b+c").into_result().unwrap(), "abc");
3238 }
3239
3240 #[cfg(debug_assertions)]
3241 mod debug_asserts {
3242 use crate::prelude::*;
3243
3244 // TODO panic when left recursive parser is detected
3245 // #[test]
3246 // #[should_panic]
3247 // fn debug_assert_left_recursive() {
3248 // recursive(|expr| {
3249 // let atom = any::<&str, extra::Default>()
3250 // .filter(|c: &char| c.is_alphabetic())
3251 // .repeated()
3252 // .at_least(1)
3253 // .collect();
3254
3255 // let sum = expr
3256 // .clone()
3257 // .then_ignore(just('+'))
3258 // .then(expr)
3259 // .map(|(a, b)| format!("{}{}", a, b));
3260
3261 // sum.or(atom)
3262 // })
3263 // .then_ignore(end())
3264 // .parse("a+b+c");
3265 // }
3266
3267 #[test]
3268 #[should_panic]
3269 #[cfg(debug_assertions)]
3270 fn debug_assert_collect() {
3271 empty::<&str, extra::Default>()
3272 .to(())
3273 .repeated()
3274 .collect::<()>()
3275 .parse("a+b+c")
3276 .unwrap();
3277 }
3278
3279 #[test]
3280 #[should_panic]
3281 #[cfg(debug_assertions)]
3282 fn debug_assert_separated_by() {
3283 empty::<&str, extra::Default>()
3284 .to(())
3285 .separated_by(empty())
3286 .collect::<()>()
3287 .parse("a+b+c");
3288 }
3289
3290 #[test]
3291 fn debug_assert_separated_by2() {
3292 assert_eq!(
3293 empty::<&str, extra::Default>()
3294 .to(())
3295 .separated_by(just(','))
3296 .count()
3297 .parse(",")
3298 .unwrap(),
3299 2
3300 );
3301 }
3302
3303 #[test]
3304 #[should_panic]
3305 #[cfg(debug_assertions)]
3306 fn debug_assert_foldl() {
3307 assert_eq!(
3308 empty::<&str, extra::Default>()
3309 .to(1)
3310 .foldl(empty().repeated(), |n, ()| n + 1)
3311 .parse("a+b+c")
3312 .unwrap(),
3313 3
3314 );
3315 }
3316
3317 #[test]
3318 #[should_panic]
3319 #[cfg(debug_assertions)]
3320 fn debug_assert_foldl_with() {
3321 use extra::SimpleState;
3322
3323 let state = 100;
3324 empty::<&str, extra::Full<EmptyErr, SimpleState<i32>, ()>>()
3325 .foldl_with(empty().to(()).repeated(), |_, _, _| ())
3326 .parse_with_state("a+b+c", &mut state.into());
3327 }
3328
3329 #[test]
3330 #[should_panic]
3331 #[cfg(debug_assertions)]
3332 fn debug_assert_foldr() {
3333 empty::<&str, extra::Default>()
3334 .to(())
3335 .repeated()
3336 .foldr(empty(), |_, _| ())
3337 .parse("a+b+c");
3338 }
3339
3340 #[test]
3341 #[should_panic]
3342 #[cfg(debug_assertions)]
3343 fn debug_assert_foldr_with_state() {
3344 empty::<&str, extra::Default>()
3345 .to(())
3346 .repeated()
3347 .foldr_with(empty(), |_, _, _| ())
3348 .parse_with_state("a+b+c", &mut ());
3349 }
3350
3351 #[test]
3352 #[should_panic]
3353 #[cfg(debug_assertions)]
3354 fn debug_assert_repeated() {
3355 empty::<&str, extra::Default>()
3356 .to(())
3357 .repeated()
3358 .parse("a+b+c");
3359 }
3360
3361 // TODO what about IterConfigure and TryIterConfigure?
3362 }
3363
3364 #[test]
3365 #[should_panic]
3366 fn recursive_define_twice() {
3367 let mut expr = Recursive::declare();
3368 expr.define({
3369 let atom = any::<&str, extra::Default>()
3370 .filter(|c: &char| c.is_alphabetic())
3371 .repeated()
3372 .at_least(1)
3373 .collect();
3374 let sum = expr
3375 .clone()
3376 .then_ignore(just('+'))
3377 .then(expr.clone())
3378 .map(|(a, b)| format!("{}{}", a, b));
3379
3380 sum.or(atom)
3381 });
3382 expr.define(expr.clone());
3383
3384 expr.then_ignore(end()).parse("a+b+c");
3385 }
3386
3387 #[test]
3388 #[should_panic]
3389 fn todo_err() {
3390 let expr = todo::<&str, String, extra::Default>();
3391 expr.then_ignore(end()).parse("a+b+c");
3392 }
3393
3394 #[test]
3395 fn box_impl() {
3396 fn parser<'src>() -> impl Parser<'src, &'src str, Vec<u64>> {
3397 Box::new(
3398 any()
3399 .filter(|c: &char| c.is_ascii_digit())
3400 .repeated()
3401 .at_least(1)
3402 .at_most(3)
3403 .to_slice()
3404 .map(|b: &str| b.parse::<u64>().unwrap())
3405 .padded()
3406 .separated_by(just(',').padded())
3407 .allow_trailing()
3408 .collect()
3409 .delimited_by(just('['), just(']')),
3410 )
3411 }
3412
3413 assert_eq!(
3414 parser().parse("[122 , 23,43, 4, ]").into_result(),
3415 Ok(vec![122, 23, 43, 4]),
3416 );
3417 assert_eq!(
3418 parser().parse("[0, 3, 6, 900,120]").into_result(),
3419 Ok(vec![0, 3, 6, 900, 120]),
3420 );
3421 assert_eq!(
3422 parser().parse("[200,400,50 ,0,0, ]").into_result(),
3423 Ok(vec![200, 400, 50, 0, 0]),
3424 );
3425 }
3426
3427 #[test]
3428 fn rc_impl() {
3429 use alloc::rc::Rc;
3430
3431 fn parser<'src>() -> impl Parser<'src, &'src str, Vec<u64>> {
3432 Rc::new(
3433 any()
3434 .filter(|c: &char| c.is_ascii_digit())
3435 .repeated()
3436 .at_least(1)
3437 .at_most(3)
3438 .to_slice()
3439 .map(|b: &str| b.parse::<u64>().unwrap())
3440 .padded()
3441 .separated_by(just(',').padded())
3442 .allow_trailing()
3443 .collect()
3444 .delimited_by(just('['), just(']')),
3445 )
3446 }
3447
3448 assert_eq!(
3449 parser().parse("[122 , 23,43, 4, ]").into_result(),
3450 Ok(vec![122, 23, 43, 4]),
3451 );
3452 assert_eq!(
3453 parser().parse("[0, 3, 6, 900,120]").into_result(),
3454 Ok(vec![0, 3, 6, 900, 120]),
3455 );
3456 assert_eq!(
3457 parser().parse("[200,400,50 ,0,0, ]").into_result(),
3458 Ok(vec![200, 400, 50, 0, 0]),
3459 );
3460 }
3461
3462 #[derive(Copy, Clone, Debug, PartialEq, Eq)]
3463 struct MyErr(&'static str);
3464
3465 impl<'src, I: Input<'src>> crate::Error<'src, I> for MyErr {
3466 fn merge(self, other: Self) -> Self {
3467 if other == MyErr("special") {
3468 MyErr("special")
3469 } else {
3470 self
3471 }
3472 }
3473 }
3474
3475 impl<'src, I> crate::LabelError<'src, I, crate::DefaultExpected<'src, I::Token>> for MyErr
3476 where
3477 I: Input<'src>,
3478 {
3479 fn expected_found<E: IntoIterator<Item = crate::DefaultExpected<'src, I::Token>>>(
3480 _expected: E,
3481 _found: Option<crate::MaybeRef<'src, I::Token>>,
3482 _span: I::Span,
3483 ) -> Self {
3484 MyErr("expected found")
3485 }
3486 }
3487
3488 #[test]
3489 fn err_prio_0() {
3490 #[allow(dead_code)]
3491 fn always_err<'src>() -> impl Parser<'src, &'src str, (), extra::Err<MyErr>> {
3492 empty().try_map(|_, _| Err(MyErr("special")))
3493 }
3494
3495 assert_eq!(
3496 always_err().parse("test").into_result().unwrap_err(),
3497 vec![MyErr("special")]
3498 )
3499 }
3500
3501 #[test]
3502 fn err_prio_1() {
3503 #[allow(dead_code)]
3504 fn always_err_choice<'src>() -> impl Parser<'src, &'src str, (), extra::Err<MyErr>> {
3505 choice((just("something").ignored(), empty())).try_map(|_, _| Err(MyErr("special")))
3506 }
3507
3508 assert_eq!(
3509 always_err_choice().parse("test").into_result().unwrap_err(),
3510 vec![MyErr("special")]
3511 )
3512 }
3513
3514 #[test]
3515 fn into_iter_no_error() {
3516 fn parser<'src>() -> impl Parser<'src, &'src str, (), extra::Err<MyErr>> {
3517 let many_as = just('a')
3518 .ignored()
3519 .repeated()
3520 .at_least(1)
3521 .collect::<Vec<_>>();
3522
3523 many_as.into_iter().collect()
3524 }
3525
3526 assert_eq!(parser().parse("aaa").into_result(), Ok(()));
3527 }
3528
3529 #[cfg(feature = "nightly")]
3530 #[test]
3531 fn flatten() {
3532 fn parser<'src>() -> impl Parser<'src, &'src str, Vec<char>, extra::Err<MyErr>> {
3533 let many_as = just('a')
3534 .map(Some)
3535 .or(any().to(None))
3536 .repeated()
3537 .flatten()
3538 .collect::<Vec<_>>();
3539
3540 many_as.into_iter().collect()
3541 }
3542
3543 assert_eq!(
3544 parser().parse("abracadabra").into_result(),
3545 Ok(vec!['a', 'a', 'a', 'a', 'a'])
3546 );
3547 }
3548
3549 #[test]
3550 #[cfg(feature = "unstable")]
3551 fn cached() {
3552 fn my_parser<'src>() -> impl Parser<'src, &'src str, &'src str, extra::Default> {
3553 any().repeated().exactly(5).to_slice()
3554 }
3555
3556 struct MyCache;
3557
3558 impl crate::cache::Cached for MyCache {
3559 type Parser<'src> = Boxed<'src, 'src, &'src str, &'src str, extra::Default>;
3560
3561 fn make_parser<'src>(self) -> Self::Parser<'src> {
3562 Parser::boxed(my_parser())
3563 }
3564 }
3565
3566 // usage < definition
3567 {
3568 let parser = crate::cache::Cache::new(MyCache);
3569
3570 for _ in 0..2 {
3571 let s = "hello".to_string();
3572
3573 assert_eq!(parser.get().parse(&s).into_result(), Ok("hello"));
3574 assert!(parser.get().parse("goodbye").into_result().is_err());
3575 }
3576 }
3577
3578 // usage > definition
3579 {
3580 let s = "hello".to_string();
3581
3582 for _ in 0..2 {
3583 let parser = crate::cache::Cache::new(MyCache);
3584
3585 assert_eq!(parser.get().parse(&s).into_result(), Ok("hello"));
3586 assert!(parser.get().parse("goodbye").into_result().is_err());
3587 }
3588 }
3589 }
3590
3591 #[test]
3592 #[allow(dead_code)]
3593 fn map_with_compiles() {
3594 enum Token {}
3595 enum Expr {}
3596
3597 fn expr<'src, I>() -> impl Parser<'src, I, (Expr, SimpleSpan)> + 'src
3598 where
3599 I: Input<'src, Token = Token, Span = SimpleSpan> + 'src,
3600 {
3601 todo().map_with(|expr, e| (expr, e.span()))
3602 }
3603 }
3604
3605 #[test]
3606 fn label() {
3607 use crate::label::LabelError;
3608
3609 fn parser<'src>() -> impl Parser<'src, &'src str, (), extra::Err<Rich<'src, char>>> {
3610 just("hello").labelled("greeting").as_context().ignored()
3611 }
3612
3613 let mut err = <Rich<_> as crate::LabelError<&str, char>>::expected_found(
3614 ['h'],
3615 Some('b'.into()),
3616 (0..1).into(),
3617 );
3618 <Rich<_, _> as LabelError<&str, _>>::label_with(&mut err, "greeting");
3619 assert_eq!(parser().parse("bye").into_errors(), vec![err]);
3620
3621 let mut err = <Rich<_> as crate::LabelError<&str, char>>::expected_found(
3622 ['l'],
3623 Some('p'.into()),
3624 (3..4).into(),
3625 );
3626 <Rich<_, _> as LabelError<&str, _>>::in_context(&mut err, "greeting", (0..3).into());
3627 assert_eq!(parser().parse("help").into_errors(), vec![err]);
3628
3629 fn parser2<'src>() -> impl Parser<'src, &'src str, (), extra::Err<Rich<'src, char>>> {
3630 text::keyword("hello")
3631 .labelled("greeting")
3632 .as_context()
3633 .ignored()
3634 }
3635
3636 let mut err =
3637 <Rich<_> as crate::LabelError<&str, char>>::expected_found(['h'], None, (0..7).into());
3638 <Rich<_, _> as LabelError<&str, _>>::label_with(&mut err, "greeting");
3639 assert_eq!(parser2().parse("goodbye").into_errors(), vec![err]);
3640 }
3641
3642 #[test]
3643 #[allow(dead_code)]
3644 fn invalid_escape() {
3645 use crate::LabelError;
3646
3647 fn string<'src>() -> impl Parser<'src, &'src str, &'src str, extra::Err<Rich<'src, char>>> {
3648 let quote = just("\"");
3649 let escaped = just("\\").then(just("n"));
3650 let unescaped = none_of("\\\"");
3651
3652 unescaped
3653 .ignored()
3654 .or(escaped.ignored())
3655 .repeated()
3656 .to_slice()
3657 .delimited_by(quote, quote)
3658 }
3659
3660 assert_eq!(
3661 string().parse(r#""Hello\m""#).into_result(),
3662 Err(vec![
3663 <Rich<char> as LabelError::<&str, char>>::expected_found(
3664 ['n'],
3665 Some('m'.into()),
3666 (7..8).into(),
3667 )
3668 ]),
3669 );
3670 }
3671
3672 #[test]
3673 #[allow(dead_code)]
3674 fn map_err_missed_info() {
3675 use crate::LabelError;
3676
3677 fn zero<'src>() -> impl Parser<'src, &'src str, (), extra::Err<Rich<'src, char>>> {
3678 just("-")
3679 .or_not()
3680 .then(just("0").map_err(move |e: Rich<_>| {
3681 LabelError::<&str, char>::expected_found(
3682 ['n'],
3683 e.found().map(|i| From::from(*i)),
3684 *e.span(),
3685 )
3686 }))
3687 .ignored()
3688 }
3689
3690 assert_eq!(
3691 zero().parse("_0").into_result(),
3692 Err(vec![
3693 <Rich<char> as LabelError::<&str, char>>::expected_found(
3694 ['-', 'n'],
3695 Some('_'.into()),
3696 (0..1).into(),
3697 )
3698 ]),
3699 );
3700 }
3701
3702 #[test]
3703 fn map_err() {
3704 use crate::LabelError;
3705
3706 let parser = just::<char, &str, extra::Err<_>>('"').map_err(move |e: Rich<char>| {
3707 println!("Found = {:?}", e.found());
3708 println!("Expected = {:?}", e.expected().collect::<Vec<_>>());
3709 println!("Span = {:?}", e.span());
3710 LabelError::<&str, char>::expected_found(
3711 ['"'],
3712 e.found().copied().map(Into::into),
3713 *e.span(),
3714 )
3715 });
3716
3717 assert_eq!(
3718 parser.parse(r#"H"#).into_result(),
3719 Err(vec![LabelError::<&str, char>::expected_found(
3720 ['"'],
3721 Some('H'.into()),
3722 (0..1).into()
3723 )])
3724 );
3725 }
3726
3727 #[test]
3728 fn zero_size_custom_failure() {
3729 fn my_custom<'src>() -> impl Parser<'src, &'src str, ()> {
3730 custom(|inp| {
3731 let check = inp.save();
3732 if inp.parse(just("foo")).is_err() {
3733 inp.rewind(check);
3734 }
3735 Ok(())
3736 })
3737 }
3738
3739 assert!(my_custom().parse("not foo").has_errors());
3740 }
3741
3742 #[test]
3743 fn labels() {
3744 use crate::{DefaultExpected, Error, LabelError, TextExpected};
3745
3746 let parser = just("a")
3747 .or_not()
3748 .then(text::whitespace::<&str, extra::Err<Rich<_>>>());
3749
3750 assert_eq!(
3751 parser.parse("b").into_output_errors(),
3752 (
3753 None,
3754 vec![Error::<&str>::merge(
3755 Error::<&str>::merge(
3756 LabelError::<&str, _>::expected_found(
3757 vec![DefaultExpected::Token('a'.into())],
3758 Some('b'.into()),
3759 SimpleSpan::new((), 0..1)
3760 ),
3761 LabelError::<&str, _>::expected_found(
3762 vec![TextExpected::<&str>::Whitespace],
3763 Some('b'.into()),
3764 SimpleSpan::new((), 0..1)
3765 ),
3766 ),
3767 LabelError::<&str, _>::expected_found(
3768 vec![DefaultExpected::EndOfInput],
3769 Some('b'.into()),
3770 SimpleSpan::new((), 0..1)
3771 ),
3772 )]
3773 )
3774 );
3775 }
3776
3777 /*
3778 #[test]
3779 fn label_sets() {
3780 use crate::{DefaultExpected, Error, LabelError, TextExpected, text::whitespace};
3781
3782 fn tuple<'input>() -> impl Parser<'input, &'input str, (), extra::Err<Rich<'input, char, SimpleSpan>>> {
3783 just("a")
3784 .repeated()
3785 .then_ignore(whitespace())
3786 .separated_by(just(","))
3787 .then_ignore(just(")"))
3788 }
3789
3790 assert_eq!(
3791 tuple().parse("a").into_output_errors(),
3792 (
3793 None,
3794 vec![Error::<&str>::merge(
3795 LabelError::<&str, _>::expected_found(
3796 vec![TextExpected::<&str>::Whitespace],
3797 None,
3798 SimpleSpan::new((), 1..1)
3799 ),
3800 LabelError::<&str, _>::expected_found(
3801 vec![
3802 DefaultExpected::Token('a'.into()),
3803 DefaultExpected::Token(','.into()),
3804 DefaultExpected::Token(')'.into()),
3805 ],
3806 None,
3807 SimpleSpan::new((), 1..1)
3808 )
3809 )]
3810 )
3811 );
3812 }
3813 */
3814}