little-earley-0.2.0.0: Simple implementation of Earley parsing
Safe HaskellSafe-Inferred
LanguageHaskell2010

Little.Earley

Description

Utilities for interpreting context-free grammars as Earley parsers and for processing parse trees.

The library is parameterized by types for non-terminal symbols n, terminal symbols t, and tokens c.

  • Non-terminal symbols n are associated with production rules.
  • Terminal symbols t denote sets of tokens c.
  • Lists of tokens [c] form the actual inputs.

Example

import Data.Char (isDigit)
import Little.Earley

data Ns = SUM | PRODUCT | FACTOR | NUMBER deriving (Eq, Ord, Enum, Bounded, Show)
data Ts = Digit | OneOf [Char] deriving (Eq, Ord, Show)

arithRules :: Ns -> [Rule Ns Ts]
arithRules n = case n of
  SUM ->
    [ [ N PRODUCT ]
    , [ N SUM, T (OneOf ['+', '-']), N PRODUCT ] ]
  PRODUCT ->
    [ [ N FACTOR ]
    , [ N PRODUCT, T (OneOf ['*', '/']), N FACTOR ] ]
  FACTOR ->
    [ [ N NUMBER ]
    , [ T (OneOf ['(']), N SUM, T (OneOf [')']) ] ]
  NUMBER ->
    [ [ T Digit ]
    , [ T Digit, N NUMBER ] ]

matchTs :: Ts -> Char -> Bool
matchTs Digit = isDigit
matchTs (OneOf s) = (`elem` s)

arithG :: Grammar Ns Ts Char
arithG = mkGrammar arithRules matchTs

Load that file in GHCi, then parse a string using that grammar:

> pparse arithG SUM "1+2*3"

See also Little.Earley.Examples for some examples of grammars.

Synopsis

Basic interface

parse :: (Ord n, Ord t) => Grammar n t c -> n -> [c] -> Result n t c Source #

Parse a chain of tokens [c] given a grammar and a starting symbol n.

Variants:

Example

parse arithG SUM "1+2*3"

pparse :: (Ord n, Ord t) => Grammar n t c -> n -> [c] -> Pretty (Result n t c) Source #

Wrapped parse with a pretty-printed result. Use this in the REPL.

Example

pparse arithG SUM "1+2*3"

Output:

     +-----+--SUM #1---+
     |     |           |
  SUM #0   |      +PRODUCT #1-+
     |     |      |     |     |
PRODUCT #0 | PRODUCT #0 |     |
     |     |      |     |     |
 FACTOR #0 |  FACTOR #0 | FACTOR #0
     |     |      |     |     |
 NUMBER #0 |  NUMBER #0 | NUMBER #0
     |     |      |     |     |
-----------------------------------
     1     +      2     *     3

parseTreeSet :: (Ord n, Ord t) => Grammar n t c -> n -> [c] -> Maybe (TreeSet n t c) Source #

Parse a chain of tokens [c] into a parse tree. Simplified variant of parse.

accepts :: (Ord n, Ord t) => Grammar n t c -> n -> [c] -> Bool Source #

Check whether a grammar matches a chain of character [c] from a starting symbol n.

data Result n t c Source #

Result of parse.

Constructors

ParseError (Error t c) 
ParseSuccess (ParsedResult n t c) 

Instances

Instances details
(PrettyPrint n, PrettyPrint t, PrettyPrint c) => PrettyPrint (Result n t c) Source # 
Instance details

Defined in Little.Earley.Internal.Result

Methods

prettyPrint :: Result n t c -> String Source #

data ParsedResult n t c Source #

Successful result of parse.

Constructors

ParsedResult 

Fields

data Error t c Source #

Parser error information.

Constructors

Error 

Fields

Grammars

data Grammar n t c Source #

Grammars with non-terminal symbols n, terminal symbols t, and tokens c.

A grammar defines a language, which is a set of sequences of tokens c.

Two basic choices for t and c are:

  • t = CharT and c = Char, with match = matchCharT: then the input [c] is a String.
  • t = String and c = String, with match = (==): then the input [c] is a [String], which can be produced using words; just remember to put spaces around operators and parentheses.

See also examples in Little.Earley.Examples.

Constructors

Grammar 

Fields

  • rules :: n -> [Rule n t]

    Production rules associated with each non-terminal symbol.

  • match :: t -> c -> Bool

    Match a token c with a terminal symbol t.

  • isNullable :: n -> Bool

    Predicate for non-terminal symbols which may expand to the empty string. This function MUST be correct for the library to work. It can be populated automatically using mkGrammar.

mkGrammar :: (Ord n, Bounded n, Enum n) => (n -> [Rule n t]) -> (t -> c -> Bool) -> Grammar n t c Source #

Construct a grammar given the fields rules and match, implicitly populating isNullable.

type Rule n t = [Atom n t] Source #

A production rule is a sequence of atoms.

data Atom n t Source #

An atom is either a non-terminal or a terminal.

Constructors

N n 
T t 

Instances

Instances details
(Eq n, Eq t) => Eq (Atom n t) Source # 
Instance details

Defined in Little.Earley.Internal.Core

Methods

(==) :: Atom n t -> Atom n t -> Bool #

(/=) :: Atom n t -> Atom n t -> Bool #

(Ord n, Ord t) => Ord (Atom n t) Source # 
Instance details

Defined in Little.Earley.Internal.Core

Methods

compare :: Atom n t -> Atom n t -> Ordering #

(<) :: Atom n t -> Atom n t -> Bool #

(<=) :: Atom n t -> Atom n t -> Bool #

(>) :: Atom n t -> Atom n t -> Bool #

(>=) :: Atom n t -> Atom n t -> Bool #

max :: Atom n t -> Atom n t -> Atom n t #

min :: Atom n t -> Atom n t -> Atom n t #

(Show n, Show t) => Show (Atom n t) Source # 
Instance details

Defined in Little.Earley.Internal.Core

Methods

showsPrec :: Int -> Atom n t -> ShowS #

show :: Atom n t -> String #

showList :: [Atom n t] -> ShowS #

data RuleId n Source #

A rule can be identified by a non-terminal and an index into all of the associated rules of that non-terminal.

Constructors

RuleId n Int 

Instances

Instances details
Eq n => Eq (RuleId n) Source # 
Instance details

Defined in Little.Earley.Internal.Core

Methods

(==) :: RuleId n -> RuleId n -> Bool #

(/=) :: RuleId n -> RuleId n -> Bool #

Ord n => Ord (RuleId n) Source # 
Instance details

Defined in Little.Earley.Internal.Core

Methods

compare :: RuleId n -> RuleId n -> Ordering #

(<) :: RuleId n -> RuleId n -> Bool #

(<=) :: RuleId n -> RuleId n -> Bool #

(>) :: RuleId n -> RuleId n -> Bool #

(>=) :: RuleId n -> RuleId n -> Bool #

max :: RuleId n -> RuleId n -> RuleId n #

min :: RuleId n -> RuleId n -> RuleId n #

Show n => Show (RuleId n) Source # 
Instance details

Defined in Little.Earley.Internal.Core

Methods

showsPrec :: Int -> RuleId n -> ShowS #

show :: RuleId n -> String #

showList :: [RuleId n] -> ShowS #

Parse trees

data TreeT f n t c Source #

Generalized parse tree.

A basic parse tree (Tree) consists of leaves labeled terminal symbols t (Leaf) and nodes labeled with grammar rules associated to nonterminal symbols ((Brch)).

Other variants of parse trees (TreeSet, TruncatedTreeSet) can be represented using extension nodes (Ext).

Trees may be infinite due to an input string matching infinitely many parse trees. Note that even though StrictData is enabled, we get laziness via the list type [] and tuple type (,).

Constructors

Leaf Int t c

The Int field is the position of the token in the input.

Brch (RuleId n) Int Int [TreeT f n t c]

The Int fields are the endpoints of this subtree in the input.

Ext (f (TreeT f n t c)) 

Instances

Instances details
(Eq n, Eq t, Eq c, Eq (f (TreeT f n t c))) => Eq (TreeT f n t c) Source # 
Instance details

Defined in Little.Earley.Internal.Tree

Methods

(==) :: TreeT f n t c -> TreeT f n t c -> Bool #

(/=) :: TreeT f n t c -> TreeT f n t c -> Bool #

(Show n, Show t, Show c, Show (f (TreeT f n t c))) => Show (TreeT f n t c) Source # 
Instance details

Defined in Little.Earley.Internal.Tree

Methods

showsPrec :: Int -> TreeT f n t c -> ShowS #

show :: TreeT f n t c -> String #

showList :: [TreeT f n t c] -> ShowS #

type Tree = TreeT NoExt Source #

Basic parse tree.

type TreeSet = TreeT Choice Source #

A set of Tree, using a compact encoding.

type TruncatedTree = TreeT (Sum Ellipsis NoExt) Source #

Result of truncateTree applied to a Tree.

Parse tree modifiers

data NoExt a Source #

No extensions.

Instances

Instances details
Functor NoExt Source # 
Instance details

Defined in Little.Earley.Internal.Tree

Methods

fmap :: (a -> b) -> NoExt a -> NoExt b #

(<$) :: a -> NoExt b -> NoExt a #

Eq (NoExt a) Source # 
Instance details

Defined in Little.Earley.Internal.Tree

Methods

(==) :: NoExt a -> NoExt a -> Bool #

(/=) :: NoExt a -> NoExt a -> Bool #

Show (NoExt a) Source # 
Instance details

Defined in Little.Earley.Internal.Tree

Methods

showsPrec :: Int -> NoExt a -> ShowS #

show :: NoExt a -> String #

showList :: [NoExt a] -> ShowS #

data Choice a Source #

Choice constructor to represent TreeSet.

Constructors

a :|: a infixr 1 

Instances

Instances details
Functor Choice Source # 
Instance details

Defined in Little.Earley.Internal.Tree

Methods

fmap :: (a -> b) -> Choice a -> Choice b #

(<$) :: a -> Choice b -> Choice a #

HasChoice Choice Source # 
Instance details

Defined in Little.Earley.Internal.Tree

Methods

(.:|:) :: a -> a -> Choice a Source #

Eq a => Eq (Choice a) Source # 
Instance details

Defined in Little.Earley.Internal.Tree

Methods

(==) :: Choice a -> Choice a -> Bool #

(/=) :: Choice a -> Choice a -> Bool #

Show a => Show (Choice a) Source # 
Instance details

Defined in Little.Earley.Internal.Tree

Methods

showsPrec :: Int -> Choice a -> ShowS #

show :: Choice a -> String #

showList :: [Choice a] -> ShowS #

HasChoice (Sum f Choice) Source # 
Instance details

Defined in Little.Earley.Internal.Tree

Methods

(.:|:) :: a -> a -> Sum f Choice a Source #

class HasChoice f Source #

Overloaded version of (:|:).

Minimal complete definition

(.:|:)

Instances

Instances details
HasChoice Choice Source # 
Instance details

Defined in Little.Earley.Internal.Tree

Methods

(.:|:) :: a -> a -> Choice a Source #

HasChoice (Sum f Choice) Source # 
Instance details

Defined in Little.Earley.Internal.Tree

Methods

(.:|:) :: a -> a -> Sum f Choice a Source #

(|:) :: HasChoice f => TreeT f n t c -> TreeT f n t c -> TreeT f n t c infixr 1 Source #

Construct the disjunction of two trees featuring the Choice functor.

data Ellipsis a Source #

Ellided by truncateTree.

Constructors

Ellipsis 

Instances

Instances details
Functor Ellipsis Source # 
Instance details

Defined in Little.Earley.Internal.Tree

Methods

fmap :: (a -> b) -> Ellipsis a -> Ellipsis b #

(<$) :: a -> Ellipsis b -> Ellipsis a #

Eq (Ellipsis a) Source # 
Instance details

Defined in Little.Earley.Internal.Tree

Methods

(==) :: Ellipsis a -> Ellipsis a -> Bool #

(/=) :: Ellipsis a -> Ellipsis a -> Bool #

Show (Ellipsis a) Source # 
Instance details

Defined in Little.Earley.Internal.Tree

Methods

showsPrec :: Int -> Ellipsis a -> ShowS #

show :: Ellipsis a -> String #

showList :: [Ellipsis a] -> ShowS #

ellipsis :: TreeT (Sum Ellipsis f) n t c Source #

Empty tree.

data Sum f g a Source #

Like Sum from Data.Functor.Sum but with more basic instances

Constructors

InL (f a) 
InR (g a) 

Instances

Instances details
(Functor f, Functor g) => Functor (Sum f g) Source # 
Instance details

Defined in Little.Earley.Internal.Tree

Methods

fmap :: (a -> b) -> Sum f g a -> Sum f g b #

(<$) :: a -> Sum f g b -> Sum f g a #

HasChoice (Sum f Choice) Source # 
Instance details

Defined in Little.Earley.Internal.Tree

Methods

(.:|:) :: a -> a -> Sum f Choice a Source #

(Eq (f a), Eq (g a)) => Eq (Sum f g a) Source # 
Instance details

Defined in Little.Earley.Internal.Tree

Methods

(==) :: Sum f g a -> Sum f g a -> Bool #

(/=) :: Sum f g a -> Sum f g a -> Bool #

(Show (f a), Show (g a)) => Show (Sum f g a) Source # 
Instance details

Defined in Little.Earley.Internal.Tree

Methods

showsPrec :: Int -> Sum f g a -> ShowS #

show :: Sum f g a -> String #

showList :: [Sum f g a] -> ShowS #

Operations on parse trees

fromSingleton :: TreeSet n t c -> Maybe (Tree n t c) Source #

Return Just if the given TreeSet represents a single Tree, Nothing otherwise (ambiguous parse tree).

arbTree :: TreeSet n t c -> Tree n t c Source #

Get an arbitrary Tree from a TreeSet, even if it is ambiguous.

truncateTree :: Functor f => Int -> TreeT f n t c -> TreeT (Sum Ellipsis f) n t c Source #

Truncate a tree to finite depth.

truncateTree :: Int -> TreeSet n t c -> TruncatedTreeSet n t c
truncateTree :: Int -> Tree n t c -> TruncatedTree n t c

ambiguities :: TreeSet n t c -> [LocAmbiguity n t c] Source #

Enumerate (some) ambiguous parses.

If there are multiple ambiguities at the same location, we just pick an arbitrary example.

data Range Source #

An interval in some input sequence.

Constructors

Range 

Fields

Instances

Instances details
Eq Range Source # 
Instance details

Defined in Little.Earley.Internal.Tree

Methods

(==) :: Range -> Range -> Bool #

(/=) :: Range -> Range -> Bool #

Ord Range Source # 
Instance details

Defined in Little.Earley.Internal.Tree

Methods

compare :: Range -> Range -> Ordering #

(<) :: Range -> Range -> Bool #

(<=) :: Range -> Range -> Bool #

(>) :: Range -> Range -> Bool #

(>=) :: Range -> Range -> Bool #

max :: Range -> Range -> Range #

min :: Range -> Range -> Range #

Show Range Source # 
Instance details

Defined in Little.Earley.Internal.Tree

Methods

showsPrec :: Int -> Range -> ShowS #

show :: Range -> String #

showList :: [Range] -> ShowS #

data Ambiguity n t c Source #

Evidence of ambiguity: two parse trees for the same input.

Constructors

Ambiguity (Tree n t c) (Tree n t c) 

Instances

Instances details
(Eq n, Eq t, Eq c) => Eq (Ambiguity n t c) Source #

This instance treats Ambiguity as an unordered pair.

Instance details

Defined in Little.Earley.Internal.Tree

Methods

(==) :: Ambiguity n t c -> Ambiguity n t c -> Bool #

(/=) :: Ambiguity n t c -> Ambiguity n t c -> Bool #

(Show n, Show t, Show c) => Show (Ambiguity n t c) Source # 
Instance details

Defined in Little.Earley.Internal.Tree

Methods

showsPrec :: Int -> Ambiguity n t c -> ShowS #

show :: Ambiguity n t c -> String #

showList :: [Ambiguity n t c] -> ShowS #

type LocAmbiguity n t c = (Range, Ambiguity n t c) Source #

Ambiguity at a given location.

Pretty-printing

class PrettyPrint a where Source #

A class for ad hoc pretty-printers.

Minimal complete definition

Nothing

Methods

prettyPrint :: a -> String Source #

default prettyPrint :: Show a => a -> String Source #

Instances

Instances details
PrettyPrint Char Source #

Display the character without quotes.

Instance details

Defined in Little.Earley.Internal.Pretty

Show a => PrettyPrint a Source # 
Instance details

Defined in Little.Earley.Internal.PrettyOrphan

Methods

prettyPrint :: a -> String Source #

PrettyPrint String Source #

Display the string without quotes.

Instance details

Defined in Little.Earley.Internal.Pretty

(PrettyPrint n, PrettyPrint t, PrettyPrint c) => PrettyPrint (Result n t c) Source # 
Instance details

Defined in Little.Earley.Internal.Result

Methods

prettyPrint :: Result n t c -> String Source #

newtype Pretty a Source #

Wrapper whose Show instance uses PrettyPrint.

This provides a convenient and explicit way to display results nicely in the REPL. See also pparse.

Constructors

Pretty a 

Instances

Instances details
PrettyPrint a => Show (Pretty a) Source # 
Instance details

Defined in Little.Earley.Internal.Pretty

Methods

showsPrec :: Int -> Pretty a -> ShowS #

show :: Pretty a -> String #

showList :: [Pretty a] -> ShowS #

drawTree :: (n -> String) -> (c -> String) -> Tree n t c -> [String] Source #

Draw a tree in the terminal.

prettyTree :: (PrettyPrint n, PrettyPrint c) => Tree n t c -> [String] Source #

drawTree using prettyPrint to show symbols.