src/Text/ParserCombinators/UU/Examples.hs

{-# LANGUAGE  RankNTypes #-}
--              GADTs,
--              MultiParamTypeClasses,
--              FunctionalDependencies, 
--              FlexibleInstances, 
--              FlexibleContexts, 
--              UndecidableInstances,
--              NoMonomorphismRestriction
module Text.ParserCombinators.UU.Examples where
import Char
import Text.ParserCombinators.UU

-- |We start out by defining the type of parser we want; by specifying the type of the state we resolve a lot of overloading

type Parser a = P (Str Char) a 

-- | The fuction @`run`@ runs the parser and shows both the result, and the correcting steps which were taken during the parsing process.
run :: Show t =>  P (Str Char) t -> String -> IO ()
run p inp = do  let r@(a, errors) =  parse ( (,) <$> p <*> pEnd) (listToStr inp)
                putStrLn "--"
                putStrLn "-- @"
                putStrLn ("-- Result: " ++ show a)
                if null errors then  return ()
                               else  do putStr ("-- Correcting steps: \n")
                                        show_errors errors
                putStrLn "-- @"


-- | Our first two parsers are simple; one recognises a single 'a' character and the other one a single 'b'. Since we will use them later we 
--   convert the recognsied character into String so they can be easily combined.
pa  ::Parser String 
pa  = lift <$> pSym 'a'
pb  :: Parser String 
pb = lift <$> pSym 'b'
lift a = [a]

-- | We can now run the parser @`pa`@ on input \"a\", which succeeds: 
--
-- @ 
--   Result: \"a\"
-- @

test1 = run pa "a"

-- | If we   run the parser @`pa`@ on the empty input \"\", the expected symbol in inserted, that the position where it was inserted is reported, and
--   we get information about what was expected at that position:  @run pa \"\"@ 
--
-- @
-- Result: \"a\"
-- Correcting steps: 
--     Inserted 'a' at position 0 expecting 'a'
-- @

test2 = run pa ""

-- | Now let's see what happens if we encounter an unexpected symbol, as in @run pa \"b\"@:
--
-- @
-- Result: \"a\"
-- Correcting steps: 
--     Deleted  'b' at position 0 expecting 'a'
--     Inserted 'a' at position 1 expecting 'a'
-- @

test3 = run pa "b"

-- | The combinator @\<++>@ applies two parsers sequentially to the input and concatenates their results: @run (pa <++> pa) \"aa\"@:
--
-- @
-- Result: \"aa\"
-- @

(<++>) :: Parser String -> Parser String -> Parser String
p <++> q = (++) <$> p <*> q
pa2 =   pa <++> pa
pa3 =   pa <++> pa2

test4 = run pa2 "aa"

-- | The function @`pSym`@ is overloaded. The type of its argument determines how to interpret the argument. Thus far we have seen single characters, 
--   but we may pass ranges as well as argument: @\run (pList (pSym ('a','z'))) \"doaitse\"@
--
-- @
-- Result: "doaitse"
-- @

test5 =  run  (pList (pSym ('a','z'))) "doaitse"
paz = pList (pSym ('a', 'z'))

-- | An even more general instance of @`pSym`@ takes a triple as argument: a predicate, a string  indicating what is expected, 
--   and the value to insert if nothing can be recognised: @run (pSym (\t -> 'a' <= t && t <= 'z', \"'a'..'z'\", 'k')) \"v\"@
--
-- @
-- Result: 'k'
-- Correcting steps: 
--     Deleted  '1' at position 0 expecting 'a'..'z'
--     Inserted 'k' at position 1 expecting 'a'..'z'
-- @

test6 = run  paz' "1"
paz' = pSym (\t -> 'a' <= t && t <= 'z', "'a'..'z'", 'k')

-- | The parser `pCount` recognises a sequence of elements, throws away the results of the recognition process (@ \<$ @), and just returns the number of returned elements.
--   The choice combinator @\<\<|>@ indicates that prefernce is to be given to the left alternative if it can make progress. This enables us to specify greedy strategies:
--   @ run (pCount pa) \"aaaaa\"@
--
-- @
-- Result: 5
-- @

pCount p = (+1) <$ p <*> pCount p <<|> pReturn 0

test7 = run (pCount pa) "aaaaa"

-- | The parsers are instance of the class Monad and hence we can use the result of a previous parser to construct a following one:  @run (do  {l <- pCount pa; pExact l pb}) \"aaacabbb\"@
--
-- @
-- Result: [\"b\",\"b\",\"b\",\"b\"]
-- Correcting steps: 
--     Deleted  'c' at position 3 expecting one of ['a', 'b']
--     Inserted 'b' at position 8 expecting 'b'
-- @


test8 = run (do  {l <- pCount pa; pExact l pb}) "aaacabbb"
pExact 0 p = pReturn []
pExact n p = (:) <$> p <*> pExact (n-1) p

-- | The function @`amb`@ converts an ambigous parser into one which returns all possible parses: @run (amb ( (++) <$> pa2 <*> pa3 <|> (++) <$> pa3 <*> pa2))  \"aaaaa\"@
--
-- @
-- Result: [\"aaaaa\",\"aaaaa\"]
-- @

test9 = run (amb ( (++) <$> pa2 <*> pa3 <|> (++) <$> pa3 <*> pa2))  "aaaaa"

-- | The applicative style makes it very easy to merge recognsition and computing a result. As an example we parse a sequence of nested well formed parentheses pairs a,d
--   compute the maximum nesting depth: @run ( max <$> pParens ((+1) <$> wfp) <*> wfp `opt` 0) \"((()))()(())\" @
--
-- @
-- Result: 3
-- @

test10 = run wfp "((()))()(())"
wfp =  max <$> pParens ((+1) <$> wfp) <*> wfp `opt` 0

-- | It is very easy to recognise infix expressions with any number of priorities and operators:
--
-- @ 
-- pOp (c, op) = op <$ pSym c
-- sepBy p op = pChainl op p
-- expr    = foldr sepBy factor [(pOp ('+', (+)) <|> pOp ('-', (-))),  pOp ('*' , (*))] 
-- factor  = pNatural <|> pParens expr
-- @
-- 
-- | which we can call:  @run expr \"15-3*5\"@
--
-- @
-- Result: 0
-- @
--
-- | Note that also here correction takes place: @run expr \"2 + + 3 5\"@
--
-- @
-- Result: 37
-- Correcting steps: 
--     Deleted  ' ' at position 1 expecting one of ['0'..'9', '*', '-', '+']
--     Inserted '0' at position 3 expecting one of ['(', '0'..'9']
--     Deleted  ' ' at position 4 expecting one of ['(', '0'..'9']
--     Deleted  ' ' at position 6 expecting one of ['0'..'9', '*', '-', '+']
-- @


test11 = run expr "15-3*5"

-- parsing expressions
pOp (c, op) = op <$ pSym c
expr    = foldr pChainl factor [(pOp ('+', (+)) <|> pOp ('-', (-))),  pOp ('*' , (*))] 
factor  = pNatural <|> pParens expr
-- parsing numbers
pDigit :: Parser Char
pDigit = pSym ('0', '9')
pDigitAsInt = digit2Int <$> pDigit 
pNatural = foldl (\a b -> a * 10 + b ) 0 <$> pList1 pDigitAsInt
digit2Int a =  ord a - ord '0'

-- | A common case where ambiguity arises is when we e.g. want to recognise identifiers, but only those which are not keywords. 
--   The combinator `micro` inserts steps with a specfied cost in the result of the parser which can be used to disambiguate:
--
-- @
-- ident ::  Parser String
-- ident = ((:) <$> pSym ('a','z') <*> pMunch (\x -> 'a' <= x && x <= 'z') `micro` 2) <* spaces
-- idents = pList1 ident
-- pKey keyw = pToken keyw `micro` 1 <* spaces
-- spaces :: Parser String
-- spaces = pMunch (==' ')
-- takes_second_alt =   pList ident 
--                \<|> (\ c t e -> [\"IfThenElse\"] ++  c   ++  t  ++  e) 
--                    \<$ pKey \"if\"   \<*> pList_ng ident 
--                    \<* pKey \"then\" \<*> pList_ng ident
--                    \<* pKey \"else\" \<*> pList_ng ident  
-- @
--

-- | A keyword is followed by a small cost @1@, which makes sure that identifiers which have a keyword as a prefix win over the keyword. Identifiers are however
--   followed by a cost @2@, with as result that in this case the keyword wins. 
--   Note that a limitation of this approach is that keywords are only recognised as such when expected!
-- 
-- @
-- test13 = run takes_second_alt \"if a then if else c\"
-- test14 = run takes_second_alt \"ifx a then if else c\"
-- @
-- 
-- with results:
--
-- @
-- Text.ParserCombinators.UU.Examples> test14
-- Result: [\"IfThenElse\",\"a\",\"if\",\"c\"]
-- Text.ParserCombinators.UU.Examples> test14
-- Result: [\"ifx\",\"a\",\"then\",\"if\",\"else\",\"c\"]
-- @


ident ::  Parser String
ident = ((:) <$> pSym ('a','z') <*> pMunch (\x -> 'a' <= x && x <= 'z') `micro` 2) <* spaces
idents = pList1 ident

pKey keyw = pToken keyw `micro` 1 <* spaces
spaces :: Parser String
spaces = pMunch (==' ')
 
takes_second_alt =   pList ident 
              <|> (\ c t e -> ["IfThenElse"] ++  c   ++  t  ++  e) 
                  <$ pKey "if"   <*> pList_ng ident 
                  <* pKey "then" <*> pList_ng ident
                  <* pKey "else" <*> pList_ng ident  
test13 = run takes_second_alt "if a then if else c"
test14 = run takes_second_alt "ifx a then if else c"



munch = (,,) <$> pa <*> pMunch ( `elem` "^=*") <*> pb

-- bracketing expressions
pParens :: Parser a -> Parser a
pParens p =  pSym '(' *> p <* pSym ')'
pBracks p =  pSym '[' *> p <* pSym ']'
pCurlys p =  pSym '{' *> p <* pSym '}'

-- parsing letters and identifiers
pLower  = pSym ('a','z')
pUpper  = pSym ('A','Z')
pLetter = pUpper <|> pLower
pVarId  = (:) <$> pLower <*> pList pIdChar
pConId  = (:) <$> pUpper <*> pList pIdChar
pIdChar = pLower <|> pUpper <|> pDigit <|> pAnySym "='"

pAnyToken :: [String] -> Parser String
pAnyToken = pAny pToken

-- parsing two alternatives and returning both rsults
pAscii = pSym ('\000', '\254')
pIntList       ::Parser [Int] 
pIntList       =  pParens ((pSym ';') `pListSep` (read <$> pList (pSym ('0', '9'))))
parseIntString :: Parser String
parseIntString = pList (pAscii)

parseBoth = pPair pIntList parseIntString

pPair p q =  amb (Left <$> p <|> Right <$> q)

main :: IO ()
main = do test1
          run pa "b"
          run pa2 "bbab"
          run pa "ba"
          run pa "aa"
          run (do  {l <- pCount pa; pExact l pb}) "aaacabbbb"
          run (amb ( (++) <$> pa2 <*> pa3 <|> (++) <$> pa3 <*> pa2))  "aaabaa"
          run paz "ab1z7"
          run paz' "m"
          run paz' ""
          run (pa <|> pb <?> "just a message") "c"
          run parseBoth "(123;456;789)"
          run munch "a^=^**^^b"