{-# LANGUAGE TemplateHaskell #-} module Data.SExp.Parse ( SExpToken(..) , stringTokenValueM, booleanTokenValueM, integerTokenValueM, floatingTokenValueM , symbolTokenValueM, lParenTokenValueM, rParenTokenValueM, whitespaceTokenValueM , commentTokenValueM, antiConsTokenValueM, antiValueTokenValueM, antiAtomTokenValueM , antiListTokenValueM, antiStringTokenValueM, antiBooleanTokenValueM, antiIntegerTokenValueM , antiFloatingTokenValueM, antiSymbolTokenValueM , isDiscardSExpToken , sexpTokens , lexSExp , parseSExp, parseDualSExp , convertAtomE, convertAntiAtomToValueE , convertOpenLinkE , convertAtomP, convertAntiAtomToValueP , convertOpenLinkP , sexp, sexpl , unparseAtom, unparseSExp , lexOne, tokParse, qtokParse ) where import Control.Monad import Data.SExp.Data import Language.Haskell.TH import Language.Haskell.TH.Quote import Text.Parsec import Text.Parsec.Pos import Text.Parsec.String import Text.Printf -- Lexing data SExpToken = -- Atom Tokens StringToken String | BooleanToken Bool | IntegerToken Integer | FloatingToken Double | SymbolToken String -- Structure Tokens | LParenToken | RParenToken -- Filtered Tokens | WhitespaceToken | CommentToken String -- Anti Tokens | AntiConsToken | AntiValueToken String | AntiAtomToken String | AntiListToken String | AntiStringToken String | AntiBooleanToken String | AntiIntegerToken String | AntiFloatingToken String | AntiSymbolToken String deriving (Eq, Ord, Show) isDiscardSExpToken :: SExpToken -> Bool isDiscardSExpToken WhitespaceToken = True isDiscardSExpToken (CommentToken _) = True isDiscardSExpToken _ = False stringTokenValueM :: SExpToken -> Maybe String stringTokenValueM t = case t of (StringToken s) -> Just s ; _ -> Nothing booleanTokenValueM :: SExpToken -> Maybe Bool booleanTokenValueM t = case t of (BooleanToken b) -> Just b ; _ -> Nothing integerTokenValueM :: SExpToken -> Maybe Integer integerTokenValueM t = case t of (IntegerToken i) -> Just i ; _ -> Nothing floatingTokenValueM :: SExpToken -> Maybe Double floatingTokenValueM t = case t of (FloatingToken f) -> Just f ; _ -> Nothing symbolTokenValueM :: SExpToken -> Maybe String symbolTokenValueM t = case t of (SymbolToken s) -> Just s ; _ -> Nothing lParenTokenValueM :: SExpToken -> Maybe () lParenTokenValueM t = case t of LParenToken -> Just () ; _ -> Nothing rParenTokenValueM :: SExpToken -> Maybe () rParenTokenValueM t = case t of RParenToken -> Just () ; _ -> Nothing whitespaceTokenValueM :: SExpToken -> Maybe () whitespaceTokenValueM t = case t of WhitespaceToken -> Just () ; _ -> Nothing commentTokenValueM :: SExpToken -> Maybe String commentTokenValueM t = case t of (CommentToken s) -> Just s ; _ -> Nothing antiConsTokenValueM :: SExpToken -> Maybe () antiConsTokenValueM t = case t of AntiConsToken -> Just () ; _ -> Nothing antiValueTokenValueM :: SExpToken -> Maybe String antiValueTokenValueM t = case t of (AntiValueToken n) -> Just n ; _ -> Nothing antiAtomTokenValueM :: SExpToken -> Maybe String antiAtomTokenValueM t = case t of (AntiAtomToken n) -> Just n ; _ -> Nothing antiListTokenValueM :: SExpToken -> Maybe String antiListTokenValueM t = case t of (AntiListToken n) -> Just n ; _ -> Nothing antiStringTokenValueM :: SExpToken -> Maybe String antiStringTokenValueM t = case t of (AntiStringToken n) -> Just n ; _ -> Nothing antiBooleanTokenValueM :: SExpToken -> Maybe String antiBooleanTokenValueM t = case t of (AntiBooleanToken n) -> Just n ; _ -> Nothing antiIntegerTokenValueM :: SExpToken -> Maybe String antiIntegerTokenValueM t = case t of (AntiIntegerToken n) -> Just n ; _ -> Nothing antiFloatingTokenValueM :: SExpToken -> Maybe String antiFloatingTokenValueM t = case t of (AntiFloatingToken n) -> Just n ; _ -> Nothing antiSymbolTokenValueM :: SExpToken -> Maybe String antiSymbolTokenValueM t = case t of (AntiSymbolToken n) -> Just n ; _ -> Nothing sexpTokens :: [Parser SExpToken] sexpTokens = [ -- Atom Tokens let inside = many $ noneOf "\"\\" <|> esc esc = char '\\' >> msum [escQuote, escEsc, escNewline] escQuote = char '"' escEsc = char '\\' escNewline = char 'n' >> return '\n' in liftM StringToken $ between (char '"') (char '"') inside , string "true" >> return (BooleanToken True) , string "false" >> return (BooleanToken False) , do sign <- option "" $ string "-" num <- many1 digit return . IntegerToken . read $ sign ++ num , do sign <- option "" $ string "-" num <- many1 digit decimal <- option "" $ do dot <- string "." followDot <- many1 digit return $ dot ++ followDot exponential <- option "" $ do leading <- string "e" followExp <- many1 digit return $ leading ++ followExp return . FloatingToken . read $ sign ++ num ++ decimal ++ exponential , liftM SymbolToken $ do { a <- noneOf " \t\n@#()\"\\" ; b <- many (noneOf " \t\n#()\"\\") ; return $ a:b } -- Structure Tokens , char '(' >> return LParenToken , char ')' >> return RParenToken -- Filtered Tokens , many1 (oneOf " \t\n") >> return WhitespaceToken , do a <- liftM (:[]) $ char '#' b <- option "" $ do j <- liftM (:[]) $ noneOf "|\n" k <- many (noneOf "\n") return $ j ++ k c <- liftM (:[]) $ char '\n' return . CommentToken $ concat [a, b, c] , let inside :: Int -> Parser String inside n = if n == 0 then return "" else msum [ do { a <- noneOf "#|" ; b <- inside n ; return $ a:b } , do { a <- char '#' ; b <- insidePostPound n ; return $ a:b } , do { a <- char '|' ; b <- insidePostPipe n ; return $ a:b } ] insidePostPound n = msum [ do { a <- noneOf "#|" ; b <- inside n ; return $ a:b } , do { a <- char '#' ; b <- insidePostPound n ; return $ a:b } , do { a <- char '|' ; b <- inside (n + 1) ; return $ a:b } ] insidePostPipe n = msum [ do { a <- noneOf "#|" ; b <- inside n ; return $ a:b } , do { a <- char '|' ; b <- insidePostPipe n ; return $ a:b } , if n == 0 then mzero else do { a <- char '#' ; b <- inside (n - 1) ; return $ a:b } ] in do { a <- string "#|" ; b <- inside 1 ; return . CommentToken $ a ++ b } -- Anti Tokens , string "@~" >> return AntiConsToken , string "@:" >> liftM AntiValueToken antiName , string "@atom:" >> liftM AntiAtomToken antiName , string "@list:" >> liftM AntiListToken antiName , string "@str:" >> liftM AntiStringToken antiName , string "@bool:" >> liftM AntiBooleanToken antiName , string "@int:" >> liftM AntiIntegerToken antiName , string "@float:" >> liftM AntiFloatingToken antiName , string "@sym:" >> liftM AntiSymbolToken antiName ] where antiName = do a <- letter <|> char '_' as <- many (alphaNum <|> oneOf "_'") return (a:as) lexSExp :: String -> Either String [(SourcePos, SExpToken)] lexSExp i = do (result, o, i') <- lexSExp' (initialPos "") i case i' of [] -> return $ filter (not . isDiscardSExpToken . snd) result _ -> fail $ printf "did not consume all input: %s" (show o) lexSExp' :: SourcePos -> String -> Either String ([(SourcePos, SExpToken)], SourcePos, String) lexSExp' o [] = return ([], o, []) lexSExp' o i = do (lexed, o', i') <- lexOne o sexpTokens i (rest, o'', i'') <- lexSExp' o' i' return (lexed : rest, o'', i'') -- Parsing type SExpParser a = GenParser (SourcePos, SExpToken) () a sexpTokenParser :: (SExpToken -> Maybe a) -> SExpParser a sexpTokenParser f = token show fst (f . snd) stringTok :: SExpParser String ; stringTok = sexpTokenParser stringTokenValueM booleanTok :: SExpParser Bool ; booleanTok = sexpTokenParser booleanTokenValueM integerTok :: SExpParser Integer ; integerTok = sexpTokenParser integerTokenValueM floatingTok :: SExpParser Double ; floatingTok = sexpTokenParser floatingTokenValueM symbolTok :: SExpParser String ; symbolTok = sexpTokenParser symbolTokenValueM lParenTok :: SExpParser () ; lParenTok = sexpTokenParser lParenTokenValueM rParenTok :: SExpParser () ; rParenTok = sexpTokenParser rParenTokenValueM antiConsTok :: SExpParser () ; antiConsTok = sexpTokenParser antiConsTokenValueM antiValueTok :: SExpParser String ; antiValueTok = sexpTokenParser antiValueTokenValueM antiAtomTok :: SExpParser String ; antiAtomTok = sexpTokenParser antiAtomTokenValueM antiListTok :: SExpParser String ; antiListTok = sexpTokenParser antiListTokenValueM antiStringTok :: SExpParser String ; antiStringTok = sexpTokenParser antiStringTokenValueM antiBooleanTok :: SExpParser String ; antiBooleanTok = sexpTokenParser antiBooleanTokenValueM antiIntegerTok :: SExpParser String ; antiIntegerTok = sexpTokenParser antiIntegerTokenValueM antiFloatingTok :: SExpParser String ; antiFloatingTok = sexpTokenParser antiFloatingTokenValueM antiSymbolTok :: SExpParser String ; antiSymbolTok = sexpTokenParser antiSymbolTokenValueM atomP :: SExpParser Atom atomP = msum [ liftM StringAtom stringTok , liftM BooleanAtom booleanTok , liftM IntegerAtom integerTok , liftM FloatingAtom floatingTok , liftM SymbolAtom symbolTok ] antiAtomP :: SExpParser AntiAtom antiAtomP = msum [ liftM ValueAntiAtom antiValueTok , liftM AtomAntiAtom antiAtomTok , liftM ListAntiAtom antiListTok , liftM StringAntiAtom antiStringTok , liftM BooleanAntiAtom antiBooleanTok , liftM IntegerAntiAtom antiIntegerTok , liftM FloatingAntiAtom antiFloatingTok , liftM SymbolAntiAtom antiSymbolTok ] sexpP :: SExpParser SExp sexpP = msum [ liftM (SExp . AtomSExp) atomP , liftM (SExp . LinkSExp) $ between lParenTok rParenTok sexpLinkP ] sexpLinkP :: SExpParser SExpLink sexpLinkP = msum [ do s <- sexpP ss <- sexpLinkP return . SExpLink $ ConsLink s ss , return $ SExpLink NullLink ] dualSExpP :: SExpParser DualSExp dualSExpP = msum [ liftM (PositiveDualSExp . AtomSExp) atomP , liftM AntiAtomDualSExp antiAtomP , liftM (PositiveDualSExp . LinkSExp) $ between lParenTok rParenTok dualSExpLinkP ] dualSExpLinkP :: SExpParser DualSExpLink dualSExpLinkP = msum [ do s <- dualSExpP ss <- dualSExpLinkP return . PositiveDualSExpLink $ ConsLink s ss , do antiConsTok msum [ do anti <- antiValueTok return $ AntiConsValueDualSExpLink anti , do anti <- antiListTok return $ AntiConsListDualSExpLink anti ] , return $ PositiveDualSExpLink NullLink ] parseSExp :: String -> Either String SExp parseSExp = tokParse lexSExp sexpP parseDualSExp :: String -> Either String DualSExp parseDualSExp = tokParse lexSExp dualSExpP -- Quasi Quoting convertAtomE :: Atom -> Q Exp convertAtomE (StringAtom s) = conE 'StringAtom `appE` litE (stringL s) convertAtomE (BooleanAtom True) = conE 'BooleanAtom `appE` conE 'True convertAtomE (BooleanAtom False) = conE 'BooleanAtom `appE` conE 'False convertAtomE (IntegerAtom i) = conE 'IntegerAtom `appE` litE (integerL i) convertAtomE (FloatingAtom f) = conE 'FloatingAtom `appE` litE (rationalL (toRational f)) convertAtomE (SymbolAtom s) = conE 'SymbolAtom `appE` litE (stringL s) convertAntiAtomToValueE :: AntiAtom -> (Q Exp -> Q Exp) -> (Q Exp -> Q Exp) -> Q Exp convertAntiAtomToValueE (ValueAntiAtom name) _ _ = varE $ mkName name convertAntiAtomToValueE (AtomAntiAtom name) qatom _ = qatom . varE $ mkName name convertAntiAtomToValueE (ListAntiAtom name) _ qlistToValue = qlistToValue . varE $ mkName name convertAntiAtomToValueE (StringAntiAtom name) qatom _ = qatom $ conE 'StringAtom `appE` varE (mkName name) convertAntiAtomToValueE (BooleanAntiAtom name) qatom _ = qatom $ conE 'BooleanAtom `appE` varE (mkName name) convertAntiAtomToValueE (IntegerAntiAtom name) qatom _ = qatom $ conE 'IntegerAtom `appE` varE (mkName name) convertAntiAtomToValueE (FloatingAntiAtom name) qatom _ = qatom $ conE 'FloatingAtom `appE` varE (mkName name) convertAntiAtomToValueE (SymbolAntiAtom name) qatom _ = qatom $ conE 'SymbolAtom `appE` varE (mkName name) convertOpenSExpE :: (l -> Q Exp) -> OpenSExp l -> Q Exp convertOpenSExpE _ (AtomSExp a) = conE 'AtomSExp `appE` convertAtomE a convertOpenSExpE cl (LinkSExp l) = conE 'LinkSExp `appE` cl l convertOpenLinkE :: (a -> Q Exp) -> (l -> Q Exp) -> OpenLink a l -> Q Exp convertOpenLinkE _ _ NullLink = conE 'NullLink convertOpenLinkE ch cl (ConsLink h l) = conE 'ConsLink `appE` ch h `appE` cl l convertDualSExpToSExpE :: DualSExp -> Q Exp convertDualSExpToSExpE (PositiveDualSExp p) = conE 'SExp `appE` convertOpenSExpE convertDualSExpToSExpLinkE p convertDualSExpToSExpE (AntiAtomDualSExp aa) = convertAntiAtomToValueE aa (\ x -> conE 'SExp `appE` (conE 'AtomSExp `appE` x)) (\ x -> varE 'listToSExp `appE` x) convertDualSExpToSExpLinkE :: DualSExpLink -> Q Exp convertDualSExpToSExpLinkE (PositiveDualSExpLink p) = conE 'SExpLink `appE` convertOpenLinkE convertDualSExpToSExpE convertDualSExpToSExpLinkE p convertDualSExpToSExpLinkE (AntiConsValueDualSExpLink name) = conE 'SExpLink `appE` varE (mkName name) convertDualSExpToSExpLinkE (AntiConsListDualSExpLink name) = varE 'listToSExpLink `appE` varE (mkName name) convertAtomP :: Atom -> Q Pat convertAtomP (StringAtom s) = conP 'StringAtom [litP (stringL s)] convertAtomP (BooleanAtom True) = conP 'BooleanAtom [conP 'True []] convertAtomP (BooleanAtom False) = conP 'BooleanAtom [conP 'False []] convertAtomP (IntegerAtom i) = conP 'IntegerAtom [litP (integerL i)] convertAtomP (FloatingAtom f) = conP 'FloatingAtom [litP (rationalL (toRational f))] convertAtomP (SymbolAtom s) = conP 'SymbolAtom [litP (stringL s)] convertAntiAtomToValueP :: AntiAtom -> (Q Pat -> Q Pat) -> (Q Pat -> Q Pat) -> Q Pat convertAntiAtomToValueP (ValueAntiAtom "_") _ _ = wildP convertAntiAtomToValueP (ValueAntiAtom name) _ _ = varP $ mkName name convertAntiAtomToValueP (AtomAntiAtom "_") qatom _ = qatom wildP convertAntiAtomToValueP (AtomAntiAtom name) qatom _ = qatom . varP $ mkName name convertAntiAtomToValueP (ListAntiAtom "_") _ qvalueToList = qvalueToList wildP convertAntiAtomToValueP (ListAntiAtom name) _ qvalueToList = qvalueToList . varP $ mkName name convertAntiAtomToValueP (StringAntiAtom "_") qatom _ = qatom $ conP 'StringAtom [wildP] convertAntiAtomToValueP (StringAntiAtom name) qatom _ = qatom $ conP 'StringAtom [varP $ mkName name] convertAntiAtomToValueP (BooleanAntiAtom "_") qatom _ = qatom $ conP 'BooleanAtom [wildP] convertAntiAtomToValueP (BooleanAntiAtom name) qatom _ = qatom $ conP 'BooleanAtom [varP $ mkName name] convertAntiAtomToValueP (IntegerAntiAtom "_") qatom _ = qatom $ conP 'IntegerAtom [wildP] convertAntiAtomToValueP (IntegerAntiAtom name) qatom _ = qatom $ conP 'IntegerAtom [varP $ mkName name] convertAntiAtomToValueP (FloatingAntiAtom "_") qatom _ = qatom $ conP 'FloatingAtom [wildP] convertAntiAtomToValueP (FloatingAntiAtom name) qatom _ = qatom $ conP 'FloatingAtom [varP $ mkName name] convertAntiAtomToValueP (SymbolAntiAtom "_") qatom _ = qatom $ conP 'SymbolAtom [wildP] convertAntiAtomToValueP (SymbolAntiAtom name) qatom _ = qatom $ conP 'SymbolAtom [varP $ mkName name] convertOpenSExpP :: (l -> Q Pat) -> OpenSExp l -> Q Pat convertOpenSExpP _ (AtomSExp a) = conP 'AtomSExp [convertAtomP a] convertOpenSExpP cl (LinkSExp l) = conP 'LinkSExp [cl l] convertOpenLinkP :: (a -> Q Pat) -> (l -> Q Pat) -> OpenLink a l -> Q Pat convertOpenLinkP _ _ NullLink = conP 'NullLink [] convertOpenLinkP ch cl (ConsLink h l) = conP 'ConsLink [ch h, cl l] convertDualSExpToSExpP :: DualSExp -> Q Pat convertDualSExpToSExpP (PositiveDualSExp p) = conP 'SExp [convertOpenSExpP convertDualSExpToSExpLinkP p] convertDualSExpToSExpP (AntiAtomDualSExp aa) = convertAntiAtomToValueP aa (\ x -> conP 'SExp [conP 'AtomSExp [x]]) (\ x -> viewP (varE 'sexpToList) (conP 'Just [x])) convertDualSExpToSExpLinkP :: DualSExpLink -> Q Pat convertDualSExpToSExpLinkP (PositiveDualSExpLink p) = conP 'SExpLink [convertOpenLinkP convertDualSExpToSExpP convertDualSExpToSExpLinkP p] convertDualSExpToSExpLinkP (AntiConsValueDualSExpLink name) = conP 'SExpLink [varP (mkName name)] convertDualSExpToSExpLinkP (AntiConsListDualSExpLink name) = viewP (varE 'sexpLinkToList) (varP (mkName name)) sexp :: QuasiQuoter sexp = QuasiQuoter { quoteExp = convertDualSExpToSExpE <=< qtokParse lexSExp dualSExpP , quotePat = convertDualSExpToSExpP <=< qtokParse lexSExp dualSExpP , quoteType = undefined , quoteDec = undefined } sexpl :: QuasiQuoter sexpl = QuasiQuoter { quoteExp = convertDualSExpToSExpLinkE <=< qtokParse lexSExp dualSExpLinkP , quotePat = convertDualSExpToSExpLinkP <=< qtokParse lexSExp dualSExpLinkP , quoteType = undefined , quoteDec = undefined } -- Unparsing unparseAtom :: Atom -> String unparseAtom (StringAtom s) = printf "\"%s\"" (concatMap escape s) where escape '\n' = "\\n" escape '"' = "\\\"" escape '\\' = "\\\\" escape o = [o] unparseAtom (BooleanAtom True) = "true" unparseAtom (BooleanAtom False) = "false" unparseAtom (IntegerAtom i) = show i unparseAtom (FloatingAtom f) = show f unparseAtom (SymbolAtom s) = s unparseSExp :: SExp -> String unparseSExp (SExp (AtomSExp a)) = unparseAtom a unparseSExp (SExp (LinkSExp (SExpLink NullLink))) = "()" unparseSExp (SExp (LinkSExp (SExpLink (ConsLink h t)))) = printf "(%s%s)" (unparseSExp h) (unparseInList t) unparseInList :: SExpLink -> String unparseInList (SExpLink NullLink) = "" unparseInList (SExpLink (ConsLink h t)) = printf " %s%s" (unparseSExp h) (unparseInList t) -- Helpers lexOne :: (Ord a) => SourcePos -> [Parser a] -> String -> Either String ((SourcePos, a), SourcePos, String) lexOne pos ps input = do (a, b, c) <- foldl findBest (Left $ printf "could not lex at %s: %s" (show pos) (show input)) $ map tryP ps return ((pos, a), b, c) where tryP p = left show $ parse p' "" input where p' = do setPosition pos x <- p i <- getInput o <- getPosition return (x, o, i) findBest x@(Left _) (Left _) = x findBest (Left _) x@(Right _) = x findBest x@(Right _) (Left _) = x findBest x1@(Right (_, p1, _)) x2@(Right (_, p2, _)) | p2 > p1 = x2 | otherwise = x1 left :: (a -> b) -> Either a c -> Either b c left f (Left x) = Left (f x) left _ (Right x) = Right x tokParse :: (String -> Either String [tok]) -> GenParser tok () a -> String -> Either String a tokParse lexify p input = do lexed <- lexify input left show $ parse p "" lexed where left f (Left x) = Left (f x) left _ (Right x) = Right x qtokParse :: (String -> Either String [tok]) -> GenParser tok () a -> String -> Q a qtokParse lexify p input = case tokParse lexify p input of (Left e) -> runIO $ fail e (Right r) -> return r