module Data.SExp.Parse
( SExpToken(..)
, stringTokenValueM, booleanTokenValueM, integerTokenValueM, floatingTokenValueM
, symbolTokenValueM, lParenTokenValueM, rParenTokenValueM, whitespaceTokenValueM
, commentTokenValueM, antiConsTokenValueM, antiValueTokenValueM, antiAtomTokenValueM
, antiListTokenValueM, antiStringTokenValueM, antiBooleanTokenValueM, antiIntegerTokenValueM
, antiFloatingTokenValueM, antiSymbolTokenValueM
, isDiscardSExpToken
, sexpTokens
, lexSExp
, parseSExp, parseDualSExp
, convertAtomE, convertAntiAtomToValueE
, convertOpenLinkE
, convertAtomP, convertAntiAtomToValueP
, convertOpenLinkP
, sexp, sexpl
, unparseAtom, unparseSExp
, lexOne, tokParse, qtokParse
) where
import Control.Monad
import Data.SExp.Data
import Language.Haskell.TH
import Language.Haskell.TH.Quote
import Text.Parsec
import Text.Parsec.Pos
import Text.Parsec.String
import Text.Printf
data SExpToken =
StringToken String
| BooleanToken Bool
| IntegerToken Integer
| FloatingToken Double
| SymbolToken String
| LParenToken
| RParenToken
| WhitespaceToken
| CommentToken String
| AntiConsToken
| AntiValueToken String
| AntiAtomToken String
| AntiListToken String
| AntiStringToken String
| AntiBooleanToken String
| AntiIntegerToken String
| AntiFloatingToken String
| AntiSymbolToken String
deriving (Eq, Ord, Show)
isDiscardSExpToken :: SExpToken -> Bool
isDiscardSExpToken WhitespaceToken = True
isDiscardSExpToken (CommentToken _) = True
isDiscardSExpToken _ = False
stringTokenValueM :: SExpToken -> Maybe String
stringTokenValueM t = case t of (StringToken s) -> Just s ; _ -> Nothing
booleanTokenValueM :: SExpToken -> Maybe Bool
booleanTokenValueM t = case t of (BooleanToken b) -> Just b ; _ -> Nothing
integerTokenValueM :: SExpToken -> Maybe Integer
integerTokenValueM t = case t of (IntegerToken i) -> Just i ; _ -> Nothing
floatingTokenValueM :: SExpToken -> Maybe Double
floatingTokenValueM t = case t of (FloatingToken f) -> Just f ; _ -> Nothing
symbolTokenValueM :: SExpToken -> Maybe String
symbolTokenValueM t = case t of (SymbolToken s) -> Just s ; _ -> Nothing
lParenTokenValueM :: SExpToken -> Maybe ()
lParenTokenValueM t = case t of LParenToken -> Just () ; _ -> Nothing
rParenTokenValueM :: SExpToken -> Maybe ()
rParenTokenValueM t = case t of RParenToken -> Just () ; _ -> Nothing
whitespaceTokenValueM :: SExpToken -> Maybe ()
whitespaceTokenValueM t = case t of WhitespaceToken -> Just () ; _ -> Nothing
commentTokenValueM :: SExpToken -> Maybe String
commentTokenValueM t = case t of (CommentToken s) -> Just s ; _ -> Nothing
antiConsTokenValueM :: SExpToken -> Maybe ()
antiConsTokenValueM t = case t of AntiConsToken -> Just () ; _ -> Nothing
antiValueTokenValueM :: SExpToken -> Maybe String
antiValueTokenValueM t = case t of (AntiValueToken n) -> Just n ; _ -> Nothing
antiAtomTokenValueM :: SExpToken -> Maybe String
antiAtomTokenValueM t = case t of (AntiAtomToken n) -> Just n ; _ -> Nothing
antiListTokenValueM :: SExpToken -> Maybe String
antiListTokenValueM t = case t of (AntiListToken n) -> Just n ; _ -> Nothing
antiStringTokenValueM :: SExpToken -> Maybe String
antiStringTokenValueM t = case t of (AntiStringToken n) -> Just n ; _ -> Nothing
antiBooleanTokenValueM :: SExpToken -> Maybe String
antiBooleanTokenValueM t = case t of (AntiBooleanToken n) -> Just n ; _ -> Nothing
antiIntegerTokenValueM :: SExpToken -> Maybe String
antiIntegerTokenValueM t = case t of (AntiIntegerToken n) -> Just n ; _ -> Nothing
antiFloatingTokenValueM :: SExpToken -> Maybe String
antiFloatingTokenValueM t = case t of (AntiFloatingToken n) -> Just n ; _ -> Nothing
antiSymbolTokenValueM :: SExpToken -> Maybe String
antiSymbolTokenValueM t = case t of (AntiSymbolToken n) -> Just n ; _ -> Nothing
sexpTokens :: [Parser SExpToken]
sexpTokens =
[
let inside = many $ noneOf "\"\\" <|> esc
esc = char '\\' >> msum [escQuote, escEsc, escNewline]
escQuote = char '"'
escEsc = char '\\'
escNewline = char 'n' >> return '\n'
in liftM StringToken $ between (char '"') (char '"') inside
, string "true" >> return (BooleanToken True)
, string "false" >> return (BooleanToken False)
, do
sign <- option "" $ string "-"
num <- many1 digit
return . IntegerToken . read $ sign ++ num
, do
sign <- option "" $ string "-"
num <- many1 digit
decimal <- option "" $ do
dot <- string "."
followDot <- many1 digit
return $ dot ++ followDot
exponential <- option "" $ do
leading <- string "e"
followExp <- many1 digit
return $ leading ++ followExp
return . FloatingToken . read $ sign ++ num ++ decimal ++ exponential
, liftM SymbolToken $ do { a <- noneOf " \t\n@#()\"\\" ; b <- many (noneOf " \t\n#()\"\\") ; return $ a:b }
, char '(' >> return LParenToken
, char ')' >> return RParenToken
, many1 (oneOf " \t\n") >> return WhitespaceToken
, do
a <- liftM (:[]) $ char '#'
b <- option "" $ do
j <- liftM (:[]) $ noneOf "|\n"
k <- many (noneOf "\n")
return $ j ++ k
c <- liftM (:[]) $ char '\n'
return . CommentToken $ concat [a, b, c]
, let inside :: Int -> Parser String
inside n = if n == 0 then return "" else msum
[ do { a <- noneOf "#|" ; b <- inside n ; return $ a:b }
, do { a <- char '#' ; b <- insidePostPound n ; return $ a:b }
, do { a <- char '|' ; b <- insidePostPipe n ; return $ a:b }
]
insidePostPound n = msum
[ do { a <- noneOf "#|" ; b <- inside n ; return $ a:b }
, do { a <- char '#' ; b <- insidePostPound n ; return $ a:b }
, do { a <- char '|' ; b <- inside (n + 1) ; return $ a:b }
]
insidePostPipe n = msum
[ do { a <- noneOf "#|" ; b <- inside n ; return $ a:b }
, do { a <- char '|' ; b <- insidePostPipe n ; return $ a:b }
, if n == 0 then mzero else do { a <- char '#' ; b <- inside (n 1) ; return $ a:b }
]
in do { a <- string "#|" ; b <- inside 1 ; return . CommentToken $ a ++ b }
, string "@~" >> return AntiConsToken
, string "@:" >> liftM AntiValueToken antiName
, string "@atom:" >> liftM AntiAtomToken antiName
, string "@list:" >> liftM AntiListToken antiName
, string "@str:" >> liftM AntiStringToken antiName
, string "@bool:" >> liftM AntiBooleanToken antiName
, string "@int:" >> liftM AntiIntegerToken antiName
, string "@float:" >> liftM AntiFloatingToken antiName
, string "@sym:" >> liftM AntiSymbolToken antiName
]
where
antiName = do
a <- letter <|> char '_'
as <- many (alphaNum <|> oneOf "_'")
return (a:as)
lexSExp :: String -> Either String [(SourcePos, SExpToken)]
lexSExp i = do
(result, o, i') <- lexSExp' (initialPos "") i
case i' of
[] -> return $ filter (not . isDiscardSExpToken . snd) result
_ -> fail $ printf "did not consume all input: %s" (show o)
lexSExp' :: SourcePos -> String -> Either String ([(SourcePos, SExpToken)], SourcePos, String)
lexSExp' o [] = return ([], o, [])
lexSExp' o i = do
(lexed, o', i') <- lexOne o sexpTokens i
(rest, o'', i'') <- lexSExp' o' i'
return (lexed : rest, o'', i'')
type SExpParser a = GenParser (SourcePos, SExpToken) () a
sexpTokenParser :: (SExpToken -> Maybe a) -> SExpParser a
sexpTokenParser f = token show fst (f . snd)
stringTok :: SExpParser String ; stringTok = sexpTokenParser stringTokenValueM
booleanTok :: SExpParser Bool ; booleanTok = sexpTokenParser booleanTokenValueM
integerTok :: SExpParser Integer ; integerTok = sexpTokenParser integerTokenValueM
floatingTok :: SExpParser Double ; floatingTok = sexpTokenParser floatingTokenValueM
symbolTok :: SExpParser String ; symbolTok = sexpTokenParser symbolTokenValueM
lParenTok :: SExpParser () ; lParenTok = sexpTokenParser lParenTokenValueM
rParenTok :: SExpParser () ; rParenTok = sexpTokenParser rParenTokenValueM
antiConsTok :: SExpParser () ; antiConsTok = sexpTokenParser antiConsTokenValueM
antiValueTok :: SExpParser String ; antiValueTok = sexpTokenParser antiValueTokenValueM
antiAtomTok :: SExpParser String ; antiAtomTok = sexpTokenParser antiAtomTokenValueM
antiListTok :: SExpParser String ; antiListTok = sexpTokenParser antiListTokenValueM
antiStringTok :: SExpParser String ; antiStringTok = sexpTokenParser antiStringTokenValueM
antiBooleanTok :: SExpParser String ; antiBooleanTok = sexpTokenParser antiBooleanTokenValueM
antiIntegerTok :: SExpParser String ; antiIntegerTok = sexpTokenParser antiIntegerTokenValueM
antiFloatingTok :: SExpParser String ; antiFloatingTok = sexpTokenParser antiFloatingTokenValueM
antiSymbolTok :: SExpParser String ; antiSymbolTok = sexpTokenParser antiSymbolTokenValueM
atomP :: SExpParser Atom
atomP = msum
[ liftM StringAtom stringTok
, liftM BooleanAtom booleanTok
, liftM IntegerAtom integerTok
, liftM FloatingAtom floatingTok
, liftM SymbolAtom symbolTok
]
antiAtomP :: SExpParser AntiAtom
antiAtomP = msum
[ liftM ValueAntiAtom antiValueTok
, liftM AtomAntiAtom antiAtomTok
, liftM ListAntiAtom antiListTok
, liftM StringAntiAtom antiStringTok
, liftM BooleanAntiAtom antiBooleanTok
, liftM IntegerAntiAtom antiIntegerTok
, liftM FloatingAntiAtom antiFloatingTok
, liftM SymbolAntiAtom antiSymbolTok
]
sexpP :: SExpParser SExp
sexpP = msum
[ liftM (SExp . AtomSExp) atomP
, liftM (SExp . LinkSExp) $ between lParenTok rParenTok sexpLinkP
]
sexpLinkP :: SExpParser SExpLink
sexpLinkP = msum
[ do
s <- sexpP
ss <- sexpLinkP
return . SExpLink $ ConsLink s ss
, return $ SExpLink NullLink
]
dualSExpP :: SExpParser DualSExp
dualSExpP = msum
[ liftM (PositiveDualSExp . AtomSExp) atomP
, liftM AntiAtomDualSExp antiAtomP
, liftM (PositiveDualSExp . LinkSExp) $ between lParenTok rParenTok dualSExpLinkP
]
dualSExpLinkP :: SExpParser DualSExpLink
dualSExpLinkP = msum
[ do
s <- dualSExpP
ss <- dualSExpLinkP
return . PositiveDualSExpLink $ ConsLink s ss
, do
antiConsTok
msum [ do
anti <- antiValueTok
return $ AntiConsValueDualSExpLink anti
, do
anti <- antiListTok
return $ AntiConsListDualSExpLink anti
]
, return $ PositiveDualSExpLink NullLink
]
parseSExp :: String -> Either String SExp
parseSExp = tokParse lexSExp sexpP
parseDualSExp :: String -> Either String DualSExp
parseDualSExp = tokParse lexSExp dualSExpP
convertAtomE :: Atom -> Q Exp
convertAtomE (StringAtom s) = conE 'StringAtom `appE` litE (stringL s)
convertAtomE (BooleanAtom True) = conE 'BooleanAtom `appE` conE 'True
convertAtomE (BooleanAtom False) = conE 'BooleanAtom `appE` conE 'False
convertAtomE (IntegerAtom i) = conE 'IntegerAtom `appE` litE (integerL i)
convertAtomE (FloatingAtom f) = conE 'FloatingAtom `appE` litE (rationalL (toRational f))
convertAtomE (SymbolAtom s) = conE 'SymbolAtom `appE` litE (stringL s)
convertAntiAtomToValueE :: AntiAtom -> (Q Exp -> Q Exp) -> (Q Exp -> Q Exp) -> Q Exp
convertAntiAtomToValueE (ValueAntiAtom name) _ _ = varE $ mkName name
convertAntiAtomToValueE (AtomAntiAtom name) qatom _ = qatom . varE $ mkName name
convertAntiAtomToValueE (ListAntiAtom name) _ qlistToValue = qlistToValue . varE $ mkName name
convertAntiAtomToValueE (StringAntiAtom name) qatom _ = qatom $ conE 'StringAtom `appE` varE (mkName name)
convertAntiAtomToValueE (BooleanAntiAtom name) qatom _ = qatom $ conE 'BooleanAtom `appE` varE (mkName name)
convertAntiAtomToValueE (IntegerAntiAtom name) qatom _ = qatom $ conE 'IntegerAtom `appE` varE (mkName name)
convertAntiAtomToValueE (FloatingAntiAtom name) qatom _ = qatom $ conE 'FloatingAtom `appE` varE (mkName name)
convertAntiAtomToValueE (SymbolAntiAtom name) qatom _ = qatom $ conE 'SymbolAtom `appE` varE (mkName name)
convertOpenSExpE :: (l -> Q Exp) -> OpenSExp l -> Q Exp
convertOpenSExpE _ (AtomSExp a) = conE 'AtomSExp `appE` convertAtomE a
convertOpenSExpE cl (LinkSExp l) = conE 'LinkSExp `appE` cl l
convertOpenLinkE :: (a -> Q Exp) -> (l -> Q Exp) -> OpenLink a l -> Q Exp
convertOpenLinkE _ _ NullLink = conE 'NullLink
convertOpenLinkE ch cl (ConsLink h l) = conE 'ConsLink `appE` ch h `appE` cl l
convertDualSExpToSExpE :: DualSExp -> Q Exp
convertDualSExpToSExpE (PositiveDualSExp p) =
conE 'SExp `appE` convertOpenSExpE convertDualSExpToSExpLinkE p
convertDualSExpToSExpE (AntiAtomDualSExp aa) =
convertAntiAtomToValueE aa (\ x -> conE 'SExp `appE` (conE 'AtomSExp `appE` x))
(\ x -> varE 'listToSExp `appE` x)
convertDualSExpToSExpLinkE :: DualSExpLink -> Q Exp
convertDualSExpToSExpLinkE (PositiveDualSExpLink p) =
conE 'SExpLink `appE` convertOpenLinkE convertDualSExpToSExpE convertDualSExpToSExpLinkE p
convertDualSExpToSExpLinkE (AntiConsValueDualSExpLink name) =
conE 'SExpLink `appE` varE (mkName name)
convertDualSExpToSExpLinkE (AntiConsListDualSExpLink name) =
varE 'listToSExpLink `appE` varE (mkName name)
convertAtomP :: Atom -> Q Pat
convertAtomP (StringAtom s) = conP 'StringAtom [litP (stringL s)]
convertAtomP (BooleanAtom True) = conP 'BooleanAtom [conP 'True []]
convertAtomP (BooleanAtom False) = conP 'BooleanAtom [conP 'False []]
convertAtomP (IntegerAtom i) = conP 'IntegerAtom [litP (integerL i)]
convertAtomP (FloatingAtom f) = conP 'FloatingAtom [litP (rationalL (toRational f))]
convertAtomP (SymbolAtom s) = conP 'SymbolAtom [litP (stringL s)]
convertAntiAtomToValueP :: AntiAtom -> (Q Pat -> Q Pat) -> (Q Pat -> Q Pat) -> Q Pat
convertAntiAtomToValueP (ValueAntiAtom "_") _ _ = wildP
convertAntiAtomToValueP (ValueAntiAtom name) _ _ = varP $ mkName name
convertAntiAtomToValueP (AtomAntiAtom "_") qatom _ = qatom wildP
convertAntiAtomToValueP (AtomAntiAtom name) qatom _ = qatom . varP $ mkName name
convertAntiAtomToValueP (ListAntiAtom "_") _ qvalueToList = qvalueToList wildP
convertAntiAtomToValueP (ListAntiAtom name) _ qvalueToList = qvalueToList . varP $ mkName name
convertAntiAtomToValueP (StringAntiAtom "_") qatom _ = qatom $ conP 'StringAtom [wildP]
convertAntiAtomToValueP (StringAntiAtom name) qatom _ = qatom $ conP 'StringAtom [varP $ mkName name]
convertAntiAtomToValueP (BooleanAntiAtom "_") qatom _ = qatom $ conP 'BooleanAtom [wildP]
convertAntiAtomToValueP (BooleanAntiAtom name) qatom _ = qatom $ conP 'BooleanAtom [varP $ mkName name]
convertAntiAtomToValueP (IntegerAntiAtom "_") qatom _ = qatom $ conP 'IntegerAtom [wildP]
convertAntiAtomToValueP (IntegerAntiAtom name) qatom _ = qatom $ conP 'IntegerAtom [varP $ mkName name]
convertAntiAtomToValueP (FloatingAntiAtom "_") qatom _ = qatom $ conP 'FloatingAtom [wildP]
convertAntiAtomToValueP (FloatingAntiAtom name) qatom _ = qatom $ conP 'FloatingAtom [varP $ mkName name]
convertAntiAtomToValueP (SymbolAntiAtom "_") qatom _ = qatom $ conP 'SymbolAtom [wildP]
convertAntiAtomToValueP (SymbolAntiAtom name) qatom _ = qatom $ conP 'SymbolAtom [varP $ mkName name]
convertOpenSExpP :: (l -> Q Pat) -> OpenSExp l -> Q Pat
convertOpenSExpP _ (AtomSExp a) = conP 'AtomSExp [convertAtomP a]
convertOpenSExpP cl (LinkSExp l) = conP 'LinkSExp [cl l]
convertOpenLinkP :: (a -> Q Pat) -> (l -> Q Pat) -> OpenLink a l -> Q Pat
convertOpenLinkP _ _ NullLink = conP 'NullLink []
convertOpenLinkP ch cl (ConsLink h l) = conP 'ConsLink [ch h, cl l]
convertDualSExpToSExpP :: DualSExp -> Q Pat
convertDualSExpToSExpP (PositiveDualSExp p) =
conP 'SExp [convertOpenSExpP convertDualSExpToSExpLinkP p]
convertDualSExpToSExpP (AntiAtomDualSExp aa) =
convertAntiAtomToValueP aa (\ x -> conP 'SExp [conP 'AtomSExp [x]])
(\ x -> viewP (varE 'sexpToList) (conP 'Just [x]))
convertDualSExpToSExpLinkP :: DualSExpLink -> Q Pat
convertDualSExpToSExpLinkP (PositiveDualSExpLink p) =
conP 'SExpLink [convertOpenLinkP convertDualSExpToSExpP convertDualSExpToSExpLinkP p]
convertDualSExpToSExpLinkP (AntiConsValueDualSExpLink name) =
conP 'SExpLink [varP (mkName name)]
convertDualSExpToSExpLinkP (AntiConsListDualSExpLink name) =
viewP (varE 'sexpLinkToList) (varP (mkName name))
sexp :: QuasiQuoter
sexp = QuasiQuoter
{ quoteExp = convertDualSExpToSExpE <=< qtokParse lexSExp dualSExpP
, quotePat = convertDualSExpToSExpP <=< qtokParse lexSExp dualSExpP
, quoteType = undefined
, quoteDec = undefined
}
sexpl :: QuasiQuoter
sexpl = QuasiQuoter
{ quoteExp = convertDualSExpToSExpLinkE <=< qtokParse lexSExp dualSExpLinkP
, quotePat = convertDualSExpToSExpLinkP <=< qtokParse lexSExp dualSExpLinkP
, quoteType = undefined
, quoteDec = undefined
}
unparseAtom :: Atom -> String
unparseAtom (StringAtom s) = printf "\"%s\"" (concatMap escape s)
where
escape '\n' = "\\n"
escape '"' = "\\\""
escape '\\' = "\\\\"
escape o = [o]
unparseAtom (BooleanAtom True) = "true"
unparseAtom (BooleanAtom False) = "false"
unparseAtom (IntegerAtom i) = show i
unparseAtom (FloatingAtom f) = show f
unparseAtom (SymbolAtom s) = s
unparseSExp :: SExp -> String
unparseSExp (SExp (AtomSExp a)) = unparseAtom a
unparseSExp (SExp (LinkSExp (SExpLink NullLink))) = "()"
unparseSExp (SExp (LinkSExp (SExpLink (ConsLink h t)))) = printf "(%s%s)" (unparseSExp h) (unparseInList t)
unparseInList :: SExpLink -> String
unparseInList (SExpLink NullLink) = ""
unparseInList (SExpLink (ConsLink h t)) = printf " %s%s" (unparseSExp h) (unparseInList t)
lexOne :: (Ord a) => SourcePos -> [Parser a] -> String -> Either String ((SourcePos, a), SourcePos, String)
lexOne pos ps input = do
(a, b, c) <- foldl findBest (Left $ printf "could not lex at %s: %s" (show pos) (show input)) $ map tryP ps
return ((pos, a), b, c)
where
tryP p = left show $ parse p' "" input
where
p' = do
setPosition pos
x <- p
i <- getInput
o <- getPosition
return (x, o, i)
findBest x@(Left _) (Left _) = x
findBest (Left _) x@(Right _) = x
findBest x@(Right _) (Left _) = x
findBest x1@(Right (_, p1, _)) x2@(Right (_, p2, _))
| p2 > p1 = x2
| otherwise = x1
left :: (a -> b) -> Either a c -> Either b c
left f (Left x) = Left (f x)
left _ (Right x) = Right x
tokParse :: (String -> Either String [tok]) -> GenParser tok () a -> String -> Either String a
tokParse lexify p input = do
lexed <- lexify input
left show $ parse p "" lexed
where
left f (Left x) = Left (f x)
left _ (Right x) = Right x
qtokParse :: (String -> Either String [tok]) -> GenParser tok () a -> String -> Q a
qtokParse lexify p input = case tokParse lexify p input of
(Left e) -> runIO $ fail e
(Right r) -> return r