Source code for pycoparsec.parser

from typing import Generic, TypeVar, Iterator, Callable, Protocol, Optional, List
from itertools import tee, chain

[docs]class SupportsAdd(Protocol): def __add__(self, other): ...
S = TypeVar('S') # Token stream type O = TypeVar('O', bound=SupportsAdd) # Parser output type. Repeated successful parsings call O.__add__.
[docs]class FailedParsing(Exception): """The parser that raised this exception did not match the current tokens.""" pass
[docs]class DoneParsing(Exception, Generic[S]): """The parser that raised this exception reached the end of the input. Eventually, there will be ways to perform early exits or otherwise indicate parsing is finished. That will be what the currently unused ``remaining`` argument will be for.""" def __init__(self, remaining: Iterator[S] = iter(())): self.remaining = [*(tok for tok in remaining)] if len(self.remaining) > 0: super().__init__(f"There are still tokens left in the stream! Here's what didn't get ingested:\n{self.remaining}") else: super().__init__()
[docs]class Parser(Generic[S, O]): """This class implements a parser-combinator style parser on arbitrary None-less iterators. :ivar matcher: The meat-and-potatoes of the parser. Takes in the next token from the stream, and the rest of the token iterator. Gives back either a constructed *output object* or None if the parse failed. :ivar choices: A list of other Parsers to try in order if this Parser fails. By default, a newly constructed parser always fails, so something like ``Parser().choice(parser1, parser2)`` will always defer to ``parser1`` and then ``parser2``. """ def __init__(self) -> None: self.matcher: Callable[[S, Iterator[S]], Optional[O]] = lambda tok, rest: None self.choices: List["Parser[S, O]"] = []
[docs] def exactly(self, token: S, factory: Callable[[S], O]) -> "Parser[S, O]": """Match one element of the input stream exactly, then exit.""" self.matcher = lambda tok, rest: factory(tok) if tok == token else None return self
[docs] def then(self, parser: "Parser[S, O]") -> "Parser[S, O]": """Chain another parser onto this one, linking their success states together. Successful parse chains call `__add__` on the output object to append them together. If the default behavior of `__add__` does not support the behavior you want, please make a new class which overrides `__add__` and inherits behavior from your desired output type. """ capturedMatcher = self.matcher def _matcher(tok, rest): if (out := capturedMatcher(tok, rest)) is None: return None try: subout = parser.run(rest) if subout is None: return None return out + subout except FailedParsing: return None self.matcher = _matcher return self
[docs] def choice(self, choices: List["Parser[S, O]"]) -> "Parser[S, O]": """Add a list of alternative Parsers in the case that this Parser fails.""" self.choices.extend(choices) return self
def __ror__(self, other: "Parser[S, O]") -> "Parser[S, O]": """Cute syntax for supplying alternatives. Allows you to use something like ``(parser1 | parser2).run()``""" self.choice([other]) return self def __or__(self, other: "Parser[S, O]") -> "Parser[S, O]": """Cute syntax for supplying alternatives. Allows you to use something like ``(parser1 | parser2).run()``""" self.choice([other]) return self
[docs] def run(self, iter: Iterator[S]) -> O: """Run this parser.""" ourTee, *tees = tee(iter, len(self.choices) + 1) tok = next(ourTee, None) if (out := self.matcher(tok, ourTee)) is not None: return out for teenum, subparser in enumerate(self.choices): try: subout = subparser.run(tees[teenum]) if subout is not None: return subout except FailedParsing: print(f"Failed subparser {teenum}") raise FailedParsing