{-# OPTIONS_HADDOCK not-home #-} {-# LANGUAGE OverloadedLists #-} module Pinchot.Rules where import qualified Control.Lens as Lens import Control.Monad (join) import Control.Monad.Trans.State (get, put, State) import qualified Control.Monad.Trans.State as State import Data.Monoid ((<>)) import Data.Sequence (Seq, (<|)) import qualified Data.Sequence as Seq import Data.Set (Set) import qualified Data.Set as Set import Pinchot.Types import Pinchot.Intervals -- | Name a 'Rule' for use in error messages. If you do not name a -- rule using this combinator, the rule's type name will be used in -- error messages. label :: Rule t -> String -> Rule t label (Rule n _ t) s = Rule n (Just s) t -- | Infix synonym for 'label'. Example: -- 'Pinchot.Examples.Postal.rDigit'. (<?>) :: Rule t -> String -> Rule t (<?>) = label infixr 0 <?> -- | Constructs a 'Rule' with no description. rule :: RuleName -> RuleType t -> Rule t rule n = Rule n Nothing -- | Creates a terminal production rule. Example: -- 'Pinchot.Examples.Postal.rLetter'. terminal :: RuleName -> Intervals t -- ^ Valid terminal symbols -> Rule t terminal n i = rule n (Terminal i) -- | Creates a non-terminal production rule. This is the most -- flexible way to create non-terminals. You can even create a -- non-terminal that depends on itself. Example: -- 'Pinchot.Examples.Postal.rLetters'. nonTerminal :: RuleName -- ^ Will be used for the name of the resulting type -> Seq (BranchName, Seq (Rule t)) -- ^ Branches of the non-terminal production rule. This 'Seq' -- must have at least one element; otherwise, an error will -- result. -> Rule t nonTerminal n branches = case Lens.uncons branches of Nothing -> error $ "nonTerminal: rule has no branches: " ++ n Just (b, bs) -> rule n (NonTerminal (f b) (fmap f bs)) where f = uncurry Branch -- | Creates a non-terminal production rule where each branch has -- only one production. This function ultimately uses -- 'nonTerminal'. Each branch is assigned a 'BranchName' that is -- -- @RULE_NAME'PRODUCTION_NAME@ -- -- where @RULE_NAME@ is the name of the rule itself, and -- @PRODUCTION_NAME@ is the rule name for what is being produced. -- -- Example: 'Pinchot.Examples.Postal.rDirection'. union :: RuleName -- ^ Will be used for the name of the resulting type -> Seq (Rule t) -- ^ List of branches. There must be at least one branch; -- otherwise a compile-time error will occur. -> Rule t union n rs = nonTerminal n (fmap f rs) where f rule@(Rule branchName _ _) = (n ++ '\'' : branchName, Seq.singleton rule) -- | Creates a production for a sequence of terminals. Useful for -- parsing specific words. Ultimately this is simply a function -- that creates a 'Rule' using the 'record' function. -- -- In @terminals n s@, For each 'Char' in the 'String', a 'Rule' is -- created whose 'RuleName' is @n@ followed by an apostrophe -- followed by the index of the position of the 'Char'. -- -- Examples: 'Pinchot.Examples.Postal.rBoulevard'. terminals :: RuleName -- ^ Will be used for the name of the resulting type, and for the -- name of the sole data constructor -> String -> Rule Char terminals n s = record n rules where rules = Seq.fromList . zipWith mkRule [(0 :: Int) ..] $ s mkRule idx char = terminal nm (solo char) where nm = n ++ ('\'' : show idx) -- | Creates a newtype wrapper. Example: -- 'Pinchot.Examples.Postal.rCity'. wrap :: RuleName -- ^ Will be used for the name of the resulting data type, and for -- the name of the sole data constructor -> Rule t -- ^ The resulting 'Rule' simply wraps this 'Rule'. -> Rule t wrap n r = rule n (Wrap r) -- | Creates a new non-terminal production rule with only one -- alternative where each field has a record name. The name of each -- record is: -- -- @_r\'RULE_NAME\'INDEX\'FIELD_TYPE@ -- -- where @RULE_NAME@ is the name of this rule, @INDEX@ is the index number -- for this field (starting with 0), and @FIELD_TYPE@ is the type of the -- field itself. -- -- Currently there is no way to change the names of the record fields. -- -- Example: 'Pinchot.Examples.Postal.rAddress'. record :: RuleName -- ^ The name of this rule, which is used both as the type name -- and for the name of the sole data constructor -> Seq (Rule t) -- ^ The right-hand side of this rule. This sequence can be empty, -- which results in an epsilon production. -> Rule t record n rs = rule n (Record rs) -- | Creates a rule for a production that optionally produces another -- rule. The name for the created 'Rule' is the name of the 'Rule' to -- which this function is applied, with @'Opt@ appended to the end. -- -- Example: 'Pinchot.Examples.Postal.rOptNewline'. opt :: Rule t -> Rule t opt r@(Rule innerNm _ _) = rule n (Opt r) where n = innerNm ++ "'Opt" -- | Creates a rule for the production of a sequence of other rules. -- The name for the created 'Rule' is the name of the 'Rule' to which -- this function is applied, with @'Star@ appended. -- -- Example: 'Pinchot.Examples.Postal.rPreSpacedWord'. star :: Rule t -> Rule t star r@(Rule innerNm _ _) = rule (innerNm ++ "'Star") (Star r) -- | Creates a rule for a production that appears at least once. The -- name for the created 'Rule' is the name of the 'Rule' to which this -- function is applied, with @'Plus@ appended. -- -- Example: 'Pinchot.Examples.Postal.rDigits'. plus :: Rule t -> Rule t plus r@(Rule innerNm _ _) = rule (innerNm ++ "'Plus") (Plus r) -- | Gets all ancestor rules to this 'Rule'. Includes the current -- rule if it has not already been seen. getAncestors :: Rule t -> State (Set RuleName) (Seq (Rule t)) getAncestors r@(Rule name _ ty) = do set <- get if Set.member name set then return Seq.empty else do put (Set.insert name set) case ty of Terminal _ -> return (Seq.singleton r) NonTerminal b1 bs -> do as1 <- branchAncestors b1 ass <- fmap join . traverse branchAncestors $ bs return $ r <| as1 <> ass Wrap c -> do cs <- getAncestors c return $ r <| cs Record ls -> do cs <- fmap join . traverse getAncestors $ ls return $ r <| cs Opt c -> do cs <- getAncestors c return $ r <| cs Star c -> do cs <- getAncestors c return $ r <| cs Plus c -> do cs <- getAncestors c return $ r <| cs where branchAncestors (Branch _ rs) = fmap join . traverse getAncestors $ rs -- | Gets all ancestor 'Rule's. Includes the current 'Rule'. Skips -- duplicates. family :: Rule t -> Seq (Rule t) family rule = State.evalState (getAncestors rule) Set.empty -- | Gets all the ancestor 'Rule's of a sequence of 'Rule'. Includes -- each 'Rule' that is in the sequence. Skips duplicates. families :: Seq (Rule t) -> Seq (Rule t) families = join . flip State.evalState Set.empty . traverse getAncestors