{-# LANGUAGE TemplateHaskell #-}

module Data.SExp.Parse
  ( SExpToken(..)
  , stringTokenValueM, booleanTokenValueM, integerTokenValueM, floatingTokenValueM
  , symbolTokenValueM, lParenTokenValueM, rParenTokenValueM, whitespaceTokenValueM
  , commentTokenValueM, antiConsTokenValueM, antiValueTokenValueM, antiAtomTokenValueM
  , antiStringTokenValueM, antiBooleanTokenValueM, antiIntegerTokenValueM, antiFloatingTokenValueM
  , antiSymbolTokenValueM
  , isDiscardSExpToken
  , sexpTokens
  , lexSExp
  , parseSExp, parseDualSExp
  , convertAtomE, convertAntiAtomToAtomE
  , convertOpenLinkE
  , convertAtomP, convertAntiAtomToAtomP
  , convertOpenLinkP
  , sexp, sexpl
  , lexOne, tokParse, qtokParse
  ) where

import Control.Monad
import Data.SExp.Data
import Language.Haskell.TH
import Language.Haskell.TH.Quote
import Text.Parsec
import Text.Parsec.Pos
import Text.Parsec.String
import Text.Printf

-- Lexing

data SExpToken =
  -- Atom Tokens
    StringToken String
  | BooleanToken Bool
  | IntegerToken Integer
  | FloatingToken Double
  | SymbolToken String

  -- Structure Tokens
  | LParenToken
  | RParenToken

  -- Filtered Tokens
  | WhitespaceToken
  | CommentToken String

  -- Anti Tokens
  | AntiConsToken
  | AntiValueToken String
  | AntiAtomToken String
  | AntiStringToken String
  | AntiBooleanToken String
  | AntiIntegerToken String
  | AntiFloatingToken String
  | AntiSymbolToken String
  deriving (Eq, Ord, Show)

isDiscardSExpToken :: SExpToken -> Bool
isDiscardSExpToken WhitespaceToken = True
isDiscardSExpToken (CommentToken _) = True
isDiscardSExpToken _ = False

stringTokenValueM :: SExpToken -> Maybe String
stringTokenValueM t = case t of (StringToken s) -> Just s ; _ -> Nothing
booleanTokenValueM :: SExpToken -> Maybe Bool
booleanTokenValueM t = case t of (BooleanToken b) -> Just b ; _ -> Nothing
integerTokenValueM :: SExpToken -> Maybe Integer
integerTokenValueM t = case t of (IntegerToken i) -> Just i ; _ -> Nothing
floatingTokenValueM :: SExpToken -> Maybe Double
floatingTokenValueM t = case t of (FloatingToken f) -> Just f ; _ -> Nothing
symbolTokenValueM :: SExpToken -> Maybe String
symbolTokenValueM t = case t of (SymbolToken s) -> Just s ; _ -> Nothing
lParenTokenValueM :: SExpToken -> Maybe ()
lParenTokenValueM t = case t of LParenToken -> Just () ; _ -> Nothing
rParenTokenValueM :: SExpToken -> Maybe ()
rParenTokenValueM t = case t of RParenToken -> Just () ; _ -> Nothing
whitespaceTokenValueM :: SExpToken -> Maybe ()
whitespaceTokenValueM t = case t of WhitespaceToken -> Just () ; _ -> Nothing
commentTokenValueM :: SExpToken -> Maybe String
commentTokenValueM t = case t of (CommentToken s) -> Just s  ; _ -> Nothing
antiConsTokenValueM :: SExpToken -> Maybe ()
antiConsTokenValueM t = case t of AntiConsToken -> Just () ; _ -> Nothing
antiValueTokenValueM :: SExpToken -> Maybe String
antiValueTokenValueM t = case t of (AntiValueToken n) -> Just n ; _ -> Nothing
antiAtomTokenValueM :: SExpToken -> Maybe String
antiAtomTokenValueM t = case t of (AntiAtomToken n) -> Just n ; _ -> Nothing
antiStringTokenValueM :: SExpToken -> Maybe String
antiStringTokenValueM t = case t of (AntiStringToken n) -> Just n ; _ -> Nothing
antiBooleanTokenValueM :: SExpToken -> Maybe String
antiBooleanTokenValueM t = case t of (AntiBooleanToken n) -> Just n ; _ -> Nothing
antiIntegerTokenValueM :: SExpToken -> Maybe String
antiIntegerTokenValueM t = case t of (AntiIntegerToken n) -> Just n ; _ -> Nothing
antiFloatingTokenValueM :: SExpToken -> Maybe String
antiFloatingTokenValueM t = case t of (AntiFloatingToken n) -> Just n ; _ -> Nothing
antiSymbolTokenValueM :: SExpToken -> Maybe String
antiSymbolTokenValueM t = case t of (AntiSymbolToken n) -> Just n ; _ -> Nothing

sexpTokens :: [Parser SExpToken]
sexpTokens = 
  [
  -- Atom Tokens
    let inside = many $ noneOf "\"\\" <|> esc
        esc = char '\\' >> msum [escQuote, escEsc, escNewline]
        escQuote = char '"'
        escEsc = char '\\'
        escNewline = char 'n' >> return '\n'
    in liftM StringToken $ between (char '"') (char '"') inside
  , string "true" >> return (BooleanToken True)
  , string "false" >> return (BooleanToken False)
  , do
      sign <- option "" $ string "-"
      num <- many1 digit
      return . IntegerToken . read $ sign ++ num
  , do
      sign <- option "" $ string "-"
      num <- many1 digit
      decimal <- option "" $ do
        dot <- string "."
        followDot <- many1 digit
        return $ dot ++ followDot
      exponential <- option "" $ do
        leading <- string "e"
        followExp <- many1 digit
        return $ leading ++ followExp
      return . FloatingToken . read $ sign ++ num ++ decimal ++ exponential
  , liftM SymbolToken $ do { a <- noneOf " \t\n@#()\"\\" ; b <- many (noneOf " \t\n#()\"\\") ; return $ a:b }

  -- Structure Tokens
  , char '(' >> return LParenToken
  , char ')' >> return RParenToken

  -- Filtered Tokens
  , many1 (oneOf " \t\n") >> return WhitespaceToken
  , do  
      a <- liftM (:[]) $ char '#' 
      b <- option "" $ do
        j <- liftM (:[]) $ noneOf "|\n"
        k <- many (noneOf "\n")
        return $ j ++ k
      c <- liftM (:[]) $ char '\n' 
      return . CommentToken $ concat [a, b, c]
  , let inside :: Int -> Parser String
        inside n = if n == 0 then return "" else msum 
          [ do { a <- noneOf "#|" ; b <- inside n ; return $ a:b }
          , do { a <- char '#' ; b <- insidePostPound n ; return $ a:b }
          , do { a <- char '|' ; b <- insidePostPipe n ; return $ a:b }
          ]
        insidePostPound n = msum
          [ do { a <- noneOf "#|" ; b <- inside n ; return $ a:b }
          , do { a <- char '#' ; b <- insidePostPound n ; return $ a:b }
          , do { a <- char '|' ; b <- inside (n + 1) ; return $ a:b }
          ]
        insidePostPipe n = msum
          [ do { a <- noneOf "#|" ; b <- inside n ; return $ a:b }
          , do { a <- char '|' ; b <- insidePostPipe n ; return $ a:b }
          , if n == 0 then mzero else do { a <- char '#' ; b <- inside (n - 1) ; return $ a:b }
          ]
    in do { a <- string "#|" ; b <- inside 1 ; return . CommentToken $ a ++ b }

  -- Anti Tokens
  , string "@~" >> return AntiConsToken
  , string "@:" >> liftM AntiValueToken antiName
  , string "@atom:" >> liftM AntiAtomToken antiName
  , string "@string:" >> liftM AntiStringToken antiName
  , string "@boolean:" >> liftM AntiBooleanToken antiName
  , string "@integer:" >> liftM AntiIntegerToken antiName
  , string "@floating:" >> liftM AntiFloatingToken antiName
  , string "@symbol:" >> liftM AntiSymbolToken antiName
  ]
  where
    antiName = do
      a <- letter <|> char '_'
      as <- many (alphaNum <|> oneOf "_'")
      return (a:as)

lexSExp :: String -> Either String [(SourcePos, SExpToken)]
lexSExp i = do
  (result, o, i') <- lexSExp' (initialPos "") i
  case i' of
    [] -> return $ filter (not . isDiscardSExpToken . snd) result
    _ -> fail $ printf "did not consume all input: %s" (show o) 

lexSExp' :: SourcePos -> String -> Either String ([(SourcePos, SExpToken)], SourcePos, String)
lexSExp' o [] = return ([], o, [])
lexSExp' o i = do
  (lexed, o', i') <- lexOne o sexpTokens i
  (rest, o'', i'') <- lexSExp' o' i'
  return (lexed : rest, o'', i'')

-- Parsing

type SExpParser a = GenParser (SourcePos, SExpToken) () a

sexpTokenParser :: (SExpToken -> Maybe a) -> SExpParser a
sexpTokenParser f = token show fst (f . snd)

stringTok       :: SExpParser String  ; stringTok       = sexpTokenParser stringTokenValueM
booleanTok      :: SExpParser Bool    ; booleanTok      = sexpTokenParser booleanTokenValueM
integerTok      :: SExpParser Integer ; integerTok      = sexpTokenParser integerTokenValueM
floatingTok     :: SExpParser Double  ; floatingTok     = sexpTokenParser floatingTokenValueM
symbolTok       :: SExpParser String  ; symbolTok       = sexpTokenParser symbolTokenValueM
lParenTok       :: SExpParser ()      ; lParenTok       = sexpTokenParser lParenTokenValueM
rParenTok       :: SExpParser ()      ; rParenTok       = sexpTokenParser rParenTokenValueM
antiConsTok     :: SExpParser ()      ; antiConsTok     = sexpTokenParser antiConsTokenValueM
antiValueTok    :: SExpParser String  ; antiValueTok    = sexpTokenParser antiValueTokenValueM
antiAtomTok     :: SExpParser String  ; antiAtomTok     = sexpTokenParser antiAtomTokenValueM
antiStringTok   :: SExpParser String  ; antiStringTok   = sexpTokenParser antiStringTokenValueM
antiBooleanTok  :: SExpParser String  ; antiBooleanTok  = sexpTokenParser antiBooleanTokenValueM
antiIntegerTok  :: SExpParser String  ; antiIntegerTok  = sexpTokenParser antiIntegerTokenValueM
antiFloatingTok :: SExpParser String  ; antiFloatingTok = sexpTokenParser antiFloatingTokenValueM
antiSymbolTok   :: SExpParser String  ; antiSymbolTok   = sexpTokenParser antiSymbolTokenValueM

atomP :: SExpParser Atom
atomP = msum
  [ liftM StringAtom stringTok
  , liftM BooleanAtom booleanTok
  , liftM IntegerAtom integerTok
  , liftM FloatingAtom floatingTok
  , liftM SymbolAtom symbolTok
  ]

antiAtomP :: SExpParser AntiAtom
antiAtomP = msum
  [ liftM ValueAntiAtom antiValueTok
  , liftM AtomAntiAtom antiAtomTok
  , liftM StringAntiAtom antiStringTok
  , liftM BooleanAntiAtom antiBooleanTok
  , liftM IntegerAntiAtom antiIntegerTok
  , liftM FloatingAntiAtom antiFloatingTok
  , liftM SymbolAntiAtom antiSymbolTok
  ]

sexpP :: SExpParser SExp
sexpP = msum
  [ liftM (SExp . AtomSExp) atomP
  , liftM (SExp . LinkSExp) $ between lParenTok rParenTok sexpLinkP
  ]

sexpLinkP :: SExpParser SExpLink
sexpLinkP = msum
  [ do
      s <- sexpP
      ss <- sexpLinkP
      return . SExpLink $ ConsLink s ss
  , return $ SExpLink NullLink
  ]

dualSExpP :: SExpParser DualSExp
dualSExpP = msum
  [ liftM (PositiveDualSExp . AtomSExp) atomP
  , liftM AntiAtomDualSExp antiAtomP
  , liftM (PositiveDualSExp . LinkSExp) $ between lParenTok rParenTok dualSExpLinkP
  ]

dualSExpLinkP :: SExpParser DualSExpLink
dualSExpLinkP = msum
  [ do 
      s <- dualSExpP 
      ss <- dualSExpLinkP 
      return . PositiveDualSExpLink $ ConsLink s ss
  , do 
      antiConsTok 
      anti <- antiValueTok 
      return $ AntiConsDualSExpLink anti
  , return $ PositiveDualSExpLink NullLink
  ]

parseSExp :: String -> Either String SExp
parseSExp = tokParse lexSExp sexpP

parseDualSExp :: String -> Either String DualSExp
parseDualSExp = tokParse lexSExp dualSExpP

-- Quasi Quoting

convertAtomE :: Atom -> Q Exp
convertAtomE (StringAtom s) = conE 'StringAtom `appE` litE (stringL s)
convertAtomE (BooleanAtom True) = conE 'BooleanAtom `appE` conE 'True
convertAtomE (BooleanAtom False) = conE 'BooleanAtom `appE` conE 'False
convertAtomE (IntegerAtom i) = conE 'IntegerAtom `appE` litE (integerL i)
convertAtomE (FloatingAtom f) = conE 'FloatingAtom `appE` litE (rationalL (toRational f))
convertAtomE (SymbolAtom s) = conE 'SymbolAtom `appE` litE (stringL s)

convertAntiAtomToAtomE :: AntiAtom -> Q Exp
convertAntiAtomToAtomE (ValueAntiAtom name) = varE $ mkName name
convertAntiAtomToAtomE (AtomAntiAtom name) = conE 'SExp `appE` conE 'AtomSExp `appE` varE (mkName name)
convertAntiAtomToAtomE (StringAntiAtom name) = mkAntiAtomE 'StringAtom $ mkName name
convertAntiAtomToAtomE (BooleanAntiAtom name) = mkAntiAtomE 'BooleanAtom $ mkName name
convertAntiAtomToAtomE (IntegerAntiAtom name) = mkAntiAtomE 'IntegerAtom $ mkName name
convertAntiAtomToAtomE (FloatingAntiAtom name) = mkAntiAtomE 'FloatingAtom $ mkName name
convertAntiAtomToAtomE (SymbolAntiAtom name) = mkAntiAtomE 'SymbolAtom $ mkName name

mkAntiAtomE :: Name -> Name -> Q Exp
mkAntiAtomE conName varName = conE 'AtomSExp `appE` conE conName `appE` varE varName

convertOpenSExpE :: (l -> Q Exp) -> OpenSExp l -> Q Exp
convertOpenSExpE _ (AtomSExp a) = conE 'AtomSExp `appE` convertAtomE a
convertOpenSExpE cl (LinkSExp l) = conE 'LinkSExp `appE` cl l

convertOpenLinkE :: (a -> Q Exp) -> (l -> Q Exp) -> OpenLink a l -> Q Exp
convertOpenLinkE _ _ NullLink = conE 'NullLink
convertOpenLinkE ch cl (ConsLink h l) = conE 'ConsLink `appE` ch h `appE` cl l

convertDualSExpToSExpE :: DualSExp -> Q Exp
convertDualSExpToSExpE (PositiveDualSExp p) = 
  conE 'SExp `appE` convertOpenSExpE convertDualSExpToSExpLinkE p
convertDualSExpToSExpE (AntiAtomDualSExp aa) = 
  conE 'SExp `appE` conE 'AtomSExp `appE` convertAntiAtomToAtomE aa

convertDualSExpToSExpLinkE :: DualSExpLink -> Q Exp
convertDualSExpToSExpLinkE (PositiveDualSExpLink p) = 
  conE 'SExpLink `appE` convertOpenLinkE convertDualSExpToSExpE convertDualSExpToSExpLinkE p
convertDualSExpToSExpLinkE (AntiConsDualSExpLink name) =
  conE 'SExpLink `appE` varE (mkName name)

convertAtomP :: Atom -> Q Pat
convertAtomP (StringAtom s) = conP 'StringAtom [litP (stringL s)]
convertAtomP (BooleanAtom True) = conP 'BooleanAtom [conP 'True []]
convertAtomP (BooleanAtom False) = conP 'BooleanAtom [conP 'False []]
convertAtomP (IntegerAtom i) = conP 'IntegerAtom [litP (integerL i)]
convertAtomP (FloatingAtom f) = conP 'FloatingAtom [litP (rationalL (toRational f))]
convertAtomP (SymbolAtom s) = conP 'SymbolAtom [litP (stringL s)]

convertAntiAtomToAtomP :: AntiAtom -> Q Pat
convertAntiAtomToAtomP (ValueAntiAtom "_") = wildP
convertAntiAtomToAtomP (ValueAntiAtom name) = varP $ mkName name
convertAntiAtomToAtomP (AtomAntiAtom "_") = conP 'AtomSExp [wildP]
convertAntiAtomToAtomP (AtomAntiAtom name) = conP 'AtomSExp [varP (mkName name)]
convertAntiAtomToAtomP (StringAntiAtom "_") = mkWildAtomP 'StringAtom
convertAntiAtomToAtomP (StringAntiAtom name) = mkAntiAtomP 'StringAtom $ mkName name
convertAntiAtomToAtomP (BooleanAntiAtom "_") = mkWildAtomP 'BooleanAtom
convertAntiAtomToAtomP (BooleanAntiAtom name) = mkAntiAtomP 'BooleanAtom $ mkName name
convertAntiAtomToAtomP (IntegerAntiAtom "_") = mkWildAtomP 'IntegerAtom
convertAntiAtomToAtomP (IntegerAntiAtom name) = mkAntiAtomP 'IntegerAtom $ mkName name
convertAntiAtomToAtomP (FloatingAntiAtom "_") = mkWildAtomP 'FloatingAtom
convertAntiAtomToAtomP (FloatingAntiAtom name) = mkAntiAtomP 'FloatingAtom $ mkName name
convertAntiAtomToAtomP (SymbolAntiAtom "_") = mkWildAtomP 'SymbolAtom
convertAntiAtomToAtomP (SymbolAntiAtom name) = mkAntiAtomP 'SymbolAtom $ mkName name

mkAntiAtomP :: Name -> Name -> Q Pat
mkAntiAtomP conName varName = conP conName [varP varName]
mkWildAtomP :: Name -> Q Pat
mkWildAtomP conName = conP conName [wildP]

convertOpenSExpP :: (l -> Q Pat) -> OpenSExp l -> Q Pat
convertOpenSExpP _ (AtomSExp a) = conP 'AtomSExp [convertAtomP a]
convertOpenSExpP cl (LinkSExp l) = conP 'LinkSExp [cl l]

convertOpenLinkP :: (a -> Q Pat) -> (l -> Q Pat) -> OpenLink a l -> Q Pat
convertOpenLinkP _ _ NullLink = conP 'NullLink []
convertOpenLinkP ch cl (ConsLink h l) = conP 'ConsLink [ch h, cl l]

convertDualSExpToSExpP :: DualSExp -> Q Pat
convertDualSExpToSExpP (PositiveDualSExp p) =
  conP 'SExp [convertOpenSExpP convertDualSExpToSExpLinkP p]
convertDualSExpToSExpP (AntiAtomDualSExp aa) =
  conP 'SExp [conP 'AtomSExp [convertAntiAtomToAtomP aa]]

convertDualSExpToSExpLinkP :: DualSExpLink -> Q Pat
convertDualSExpToSExpLinkP (PositiveDualSExpLink p) = 
  conP 'SExpLink [convertOpenLinkP convertDualSExpToSExpP convertDualSExpToSExpLinkP p]
convertDualSExpToSExpLinkP (AntiConsDualSExpLink name) =
  conP 'SExpLink [varP (mkName name)]

sexp :: QuasiQuoter
sexp = QuasiQuoter 
  { quoteExp = convertDualSExpToSExpE <=< qtokParse lexSExp dualSExpP
  , quotePat = convertDualSExpToSExpP <=< qtokParse lexSExp dualSExpP
  , quoteType = undefined
  , quoteDec = undefined
  }

sexpl :: QuasiQuoter
sexpl = QuasiQuoter 
  { quoteExp = convertDualSExpToSExpLinkE <=< qtokParse lexSExp dualSExpLinkP
  , quotePat = convertDualSExpToSExpLinkP <=< qtokParse lexSExp dualSExpLinkP
  , quoteType = undefined
  , quoteDec = undefined
  }

-- Helpers

lexOne :: (Ord a) => SourcePos -> [Parser a] -> String -> Either String ((SourcePos, a), SourcePos, String)
lexOne pos ps input = do
  (a, b, c) <- foldl findBest (Left $ printf "could not lex at %s: %s" (show pos) (show input)) $ map tryP ps
  return ((pos, a), b, c)
  where
    tryP p = left show $ parse p' "" input
      where
        p' = do
          setPosition pos
          x <- p
          i <- getInput
          o <- getPosition
          return (x, o, i)
    findBest x@(Left _) (Left _) = x
    findBest (Left _) x@(Right _) = x
    findBest x@(Right _) (Left _) = x
    findBest x1@(Right (_, p1, _)) x2@(Right (_, p2, _))
      | p2 > p1 = x2
      | otherwise = x1

    left :: (a -> b) -> Either a c -> Either b c
    left f (Left x) = Left (f x)
    left _ (Right x) = Right x

tokParse :: (String -> Either String [tok]) -> GenParser tok () a -> String -> Either String a
tokParse lexify p input = do
  lexed <- lexify input
  left show $ parse p "" lexed
  where
    left f (Left x) = Left (f x)
    left _ (Right x) = Right x

qtokParse :: (String -> Either String [tok]) -> GenParser tok () a -> String -> Q a
qtokParse lexify p input = case tokParse lexify p input  of
  (Left e) -> runIO $ fail e
  (Right r) -> return r