{-# LANGUAGE ViewPatterns #-}
{-# LANGUAGE OverloadedStrings #-}

module Data.SCargot.Parse
  ( -- * Parsing
    decode
  , decodeOne
    -- * Parsing Control
  , SExprParser
  , Reader
  , Comment
  , mkParser
  , setCarrier
  , addReader
  , setComment
    -- * Specific SExprParser Conversions
  , asRich
  , asWellFormed
  , withQuote
  ) where

#if !MIN_VERSION_base(4,8,0)
import Control.Applicative ((<$>), (<*), pure)
#endif
import           Control.Monad ((>=>))
import           Data.Map.Strict (Map)
import qualified Data.Map.Strict as M
import           Data.Text (Text)
import           Data.String (IsString)
import           Text.Parsec ( (<|>)
                             , (<?>)
                             , char
                             , eof
                             , lookAhead
                             , many1
                             , runParser
                             , skipMany
                             )
import           Text.Parsec.Char (anyChar, space)
import           Text.Parsec.Text (Parser)

import           Data.SCargot.Repr ( SExpr(..)
                                   , RichSExpr
                                   , WellFormedSExpr
                                   , toRich
                                   , toWellFormed
                                   )

type ReaderMacroMap atom = Map Char (Reader atom)

-- | A 'Reader' represents a reader macro: it takes a parser for
--   the S-Expression type and performs as much or as little
--   parsing as it would like, and then returns an S-expression.
type Reader atom = (Parser (SExpr atom) -> Parser (SExpr atom))

-- | A 'Comment' represents any kind of skippable comment. This
--   parser __must__ be able to fail if a comment is not being
--   recognized, and it __must__ not consume any input in case
--   of failure.
type Comment = Parser ()

-- | A 'SExprParser' describes a parser for a particular value
--   that has been serialized as an s-expression. The @atom@ parameter
--   corresponds to a Haskell type used to represent the atoms,
--   and the @carrier@ parameter corresponds to the parsed S-Expression
--   structure.
data SExprParser atom carrier = SExprParser
  { forall atom carrier. SExprParser atom carrier -> Parser atom
sesPAtom   :: Parser atom
  , forall atom carrier.
SExprParser atom carrier -> ReaderMacroMap atom
readerMap  :: ReaderMacroMap atom
  , forall atom carrier. SExprParser atom carrier -> Maybe Comment
comment    :: Maybe Comment
  , forall atom carrier.
SExprParser atom carrier -> SExpr atom -> Either String carrier
postparse  :: SExpr atom -> Either String carrier
  }

-- | Create a basic 'SExprParser' when given a parser
--   for an atom type.
--
--   >>> import Text.Parsec (alphaNum, many1)
--   >>> let parser = mkParser (many1 alphaNum)
--   >>> decode parser "(ele phant)"
--   Right [SCons (SAtom "ele") (SCons (SAtom "phant") SNil)]
mkParser :: Parser atom -> SExprParser atom (SExpr atom)
mkParser :: forall atom. Parser atom -> SExprParser atom (SExpr atom)
mkParser Parser atom
parser = SExprParser
  { sesPAtom :: Parser atom
sesPAtom   = Parser atom
parser
  , readerMap :: ReaderMacroMap atom
readerMap  = forall k a. Map k a
M.empty
  , comment :: Maybe Comment
comment    = forall a. Maybe a
Nothing
  , postparse :: SExpr atom -> Either String (SExpr atom)
postparse  = forall (m :: * -> *) a. Monad m => a -> m a
return
  }

-- | Modify the carrier type for a 'SExprParser'. This is
--   used internally to convert between various 'SExpr' representations,
--   but could also be used externally to add an extra conversion layer
--   onto a 'SExprParser'.
--
-- >>> import Text.Parsec (alphaNum, many1)
-- >>> import Data.SCargot.Repr (toRich)
-- >>> let parser = setCarrier (return . toRich) (mkParser (many1 alphaNum))
-- >>> decode parser "(ele phant)"
-- Right [RSlist [RSAtom "ele",RSAtom "phant"]]
setCarrier :: (b -> Either String c) -> SExprParser a b -> SExprParser a c
setCarrier :: forall b c a.
(b -> Either String c) -> SExprParser a b -> SExprParser a c
setCarrier b -> Either String c
f SExprParser a b
spec = SExprParser a b
spec { postparse :: SExpr a -> Either String c
postparse = forall atom carrier.
SExprParser atom carrier -> SExpr atom -> Either String carrier
postparse SExprParser a b
spec forall (m :: * -> *) a b c.
Monad m =>
(a -> m b) -> (b -> m c) -> a -> m c
>=> b -> Either String c
f }

-- | Convert the final output representation from the 'SExpr' type
--   to the 'RichSExpr' type.
--
-- >>> import Text.Parsec (alphaNum, many1)
-- >>> let parser = asRich (mkParser (many1 alphaNum))
-- >>> decode parser "(ele phant)"
-- Right [RSlist [RSAtom "ele",RSAtom "phant"]]
asRich :: SExprParser a (SExpr b) -> SExprParser a (RichSExpr b)
asRich :: forall a b. SExprParser a (SExpr b) -> SExprParser a (RichSExpr b)
asRich = forall b c a.
(b -> Either String c) -> SExprParser a b -> SExprParser a c
setCarrier (forall (m :: * -> *) a. Monad m => a -> m a
return forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall atom. SExpr atom -> RichSExpr atom
toRich)

-- | Convert the final output representation from the 'SExpr' type
--   to the 'WellFormedSExpr' type.
--
-- >>> import Text.Parsec (alphaNum, many1)
-- >>> let parser = asWellFormed (mkParser (many1 alphaNum))
-- >>> decode parser "(ele phant)"
-- Right [WFSList [WFSAtom "ele",WFSAtom "phant"]]
asWellFormed :: SExprParser a (SExpr b) -> SExprParser a (WellFormedSExpr b)
asWellFormed :: forall a b.
SExprParser a (SExpr b) -> SExprParser a (WellFormedSExpr b)
asWellFormed = forall b c a.
(b -> Either String c) -> SExprParser a b -> SExprParser a c
setCarrier forall atom. SExpr atom -> Either String (WellFormedSExpr atom)
toWellFormed

-- | Add the ability to execute some particular reader macro, as
--   defined by its initial character and the 'Parser' which returns
--   the parsed S-Expression. The 'Reader' is passed a 'Parser' which
--   can be recursively called to parse more S-Expressions, and begins
--   parsing after the reader character has been removed from the
--   stream.
--
-- >>> import Text.Parsec (alphaNum, char, many1)
-- >>> let vecReader p = (char ']' *> pure SNil) <|> (SCons <$> p <*> vecReader p)
-- >>> let parser = addReader '[' vecReader (mkParser (many1 alphaNum))
-- >>> decode parser "(an [ele phant])"
-- Right [SCons (SAtom "an") (SCons (SCons (SAtom "ele") (SCons (SAtom "phant") SNil)) SNil)]

addReader :: Char -> Reader a -> SExprParser a c -> SExprParser a c
addReader :: forall a c. Char -> Reader a -> SExprParser a c -> SExprParser a c
addReader Char
c Reader a
reader SExprParser a c
spec = SExprParser a c
spec
  { readerMap :: ReaderMacroMap a
readerMap = forall k a. Ord k => k -> a -> Map k a -> Map k a
M.insert Char
c Reader a
reader (forall atom carrier.
SExprParser atom carrier -> ReaderMacroMap atom
readerMap SExprParser a c
spec) }

-- | Add the ability to ignore some kind of comment. This gets
--   factored into whitespace parsing, and it's very important that
--   the parser supplied __be able to fail__ (as otherwise it will
--   cause an infinite loop), and also that it __not consume any input__
--   (which may require it to be wrapped in 'try'.)
--
-- >>> import Text.Parsec (alphaNum, anyChar, manyTill, many1, string)
-- >>> let comment = string "//" *> manyTill anyChar newline *> pure ()
-- >>> let parser = setComment comment (mkParser (many1 alphaNum))
-- >>> decode parser "(ele //a comment\n  phant)"
-- Right [SCons (SAtom "ele") (SCons (SAtom "phant") SNil)]

setComment :: Comment -> SExprParser a c -> SExprParser a c
setComment :: forall a c. Comment -> SExprParser a c -> SExprParser a c
setComment Comment
c SExprParser a c
spec = SExprParser a c
spec { comment :: Maybe Comment
comment = forall a. a -> Maybe a
Just (Comment
c forall s u (m :: * -> *) a.
ParsecT s u m a -> String -> ParsecT s u m a
<?> String
"comment") }

-- | Add the ability to understand a quoted S-Expression.
--   Many Lisps use @'sexpr@ as sugar for @(quote sexpr)@. This
--   assumes that the underlying atom type implements the "IsString"
--   class, and will create the @quote@ atom using @fromString "quote"@.
--
-- >>> import Text.Parsec (alphaNum, many1)
-- >>> let parser = withQuote (mkParser (many1 alphaNum))
-- >>> decode parser "'elephant"
-- Right [SCons (SAtom "quote") (SCons (SAtom "foo") SNil)]
withQuote :: IsString t => SExprParser t (SExpr t) -> SExprParser t (SExpr t)
withQuote :: forall t.
IsString t =>
SExprParser t (SExpr t) -> SExprParser t (SExpr t)
withQuote = forall a c. Char -> Reader a -> SExprParser a c -> SExprParser a c
addReader Char
'\'' (forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap forall {atom}. IsString atom => SExpr atom -> SExpr atom
go)
  where go :: SExpr atom -> SExpr atom
go SExpr atom
s  = forall atom. SExpr atom -> SExpr atom -> SExpr atom
SCons SExpr atom
"quote" (forall atom. SExpr atom -> SExpr atom -> SExpr atom
SCons SExpr atom
s forall atom. SExpr atom
SNil)

peekChar :: Parser (Maybe Char)
peekChar :: Parser (Maybe Char)
peekChar = forall a. a -> Maybe a
Just forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> forall s (m :: * -> *) t u a.
Stream s m t =>
ParsecT s u m a -> ParsecT s u m a
lookAhead forall s (m :: * -> *) u. Stream s m Char => ParsecT s u m Char
anyChar forall s u (m :: * -> *) a.
ParsecT s u m a -> ParsecT s u m a -> ParsecT s u m a
<|> forall (f :: * -> *) a. Applicative f => a -> f a
pure forall a. Maybe a
Nothing

parseGenericSExpr ::
  Parser atom  -> ReaderMacroMap atom -> Parser () -> Parser (SExpr atom)
parseGenericSExpr :: forall atom.
Parser atom
-> ReaderMacroMap atom -> Comment -> Parser (SExpr atom)
parseGenericSExpr Parser atom
atom ReaderMacroMap atom
reader Comment
skip = do
  let sExpr :: Parser (SExpr atom)
sExpr = forall atom.
Parser atom
-> ReaderMacroMap atom -> Comment -> Parser (SExpr atom)
parseGenericSExpr Parser atom
atom ReaderMacroMap atom
reader Comment
skip forall s u (m :: * -> *) a.
ParsecT s u m a -> String -> ParsecT s u m a
<?> String
"s-expr"
  Comment
skip
  Maybe Char
c <- Parser (Maybe Char)
peekChar
  SExpr atom
r <- case Maybe Char
c of
    Maybe Char
Nothing -> forall (m :: * -> *) a. MonadFail m => String -> m a
fail String
"Unexpected end of input"
    Just Char
'(' -> forall s (m :: * -> *) u.
Stream s m Char =>
Char -> ParsecT s u m Char
char Char
'(' forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> Comment
skip forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> forall atom. Parser (SExpr atom) -> Comment -> Parser (SExpr atom)
parseList Parser (SExpr atom)
sExpr Comment
skip
    Just (forall a b c. (a -> b -> c) -> b -> a -> c
flip forall k a. Ord k => k -> Map k a -> Maybe a
M.lookup ReaderMacroMap atom
reader -> Just Reader atom
r) -> forall s (m :: * -> *) u. Stream s m Char => ParsecT s u m Char
anyChar forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> Reader atom
r Parser (SExpr atom)
sExpr
    Maybe Char
_ -> forall atom. atom -> SExpr atom
SAtom forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
`fmap` Parser atom
atom
  Comment
skip
  forall (m :: * -> *) a. Monad m => a -> m a
return SExpr atom
r

parseList :: Parser (SExpr atom) -> Parser () -> Parser (SExpr atom)
parseList :: forall atom. Parser (SExpr atom) -> Comment -> Parser (SExpr atom)
parseList Parser (SExpr atom)
sExpr Comment
skip = do
  Maybe Char
i <- Parser (Maybe Char)
peekChar
  case Maybe Char
i of
    Maybe Char
Nothing  -> forall (m :: * -> *) a. MonadFail m => String -> m a
fail String
"Unexpected end of input"
    Just Char
')' -> forall s (m :: * -> *) u.
Stream s m Char =>
Char -> ParsecT s u m Char
char Char
')' forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> forall (m :: * -> *) a. Monad m => a -> m a
return forall atom. SExpr atom
SNil
    Maybe Char
_        -> do
      SExpr atom
car <- Parser (SExpr atom)
sExpr
      Comment
skip
      Maybe Char
c <- Parser (Maybe Char)
peekChar
      case Maybe Char
c of
        Just Char
'.' -> do
          Char
_ <- forall s (m :: * -> *) u.
Stream s m Char =>
Char -> ParsecT s u m Char
char Char
'.'
          SExpr atom
cdr <- Parser (SExpr atom)
sExpr
          Comment
skip
          Char
_ <- forall s (m :: * -> *) u.
Stream s m Char =>
Char -> ParsecT s u m Char
char Char
')'
          Comment
skip
          forall (m :: * -> *) a. Monad m => a -> m a
return (forall atom. SExpr atom -> SExpr atom -> SExpr atom
SCons SExpr atom
car SExpr atom
cdr)
        Just Char
')' -> do
          Char
_ <- forall s (m :: * -> *) u.
Stream s m Char =>
Char -> ParsecT s u m Char
char Char
')'
          Comment
skip
          forall (m :: * -> *) a. Monad m => a -> m a
return (forall atom. SExpr atom -> SExpr atom -> SExpr atom
SCons SExpr atom
car forall atom. SExpr atom
SNil)
        Maybe Char
_ -> do
          SExpr atom
cdr <- forall atom. Parser (SExpr atom) -> Comment -> Parser (SExpr atom)
parseList Parser (SExpr atom)
sExpr Comment
skip
          forall (m :: * -> *) a. Monad m => a -> m a
return (forall atom. SExpr atom -> SExpr atom -> SExpr atom
SCons SExpr atom
car SExpr atom
cdr)

-- | Given a CommentMap, create the corresponding parser to
--   skip those comments (if they exist).
buildSkip :: Maybe (Parser ()) -> Parser ()
buildSkip :: Maybe Comment -> Comment
buildSkip Maybe Comment
Nothing  = forall s u (m :: * -> *) a. ParsecT s u m a -> ParsecT s u m ()
skipMany forall s (m :: * -> *) u. Stream s m Char => ParsecT s u m Char
space
buildSkip (Just Comment
c) = Comment
alternate
  where alternate :: Comment
alternate = forall s u (m :: * -> *) a. ParsecT s u m a -> ParsecT s u m ()
skipMany forall s (m :: * -> *) u. Stream s m Char => ParsecT s u m Char
space forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> ((Comment
c forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> Comment
alternate) forall s u (m :: * -> *) a.
ParsecT s u m a -> ParsecT s u m a -> ParsecT s u m a
<|> forall (m :: * -> *) a. Monad m => a -> m a
return ())

doParse :: Parser a -> Text -> Either String a
doParse :: forall a. Parser a -> Text -> Either String a
doParse Parser a
p Text
t = case forall s t u a.
Stream s Identity t =>
Parsec s u a -> u -> String -> s -> Either ParseError a
runParser Parser a
p () String
"" Text
t of
  Left ParseError
err -> forall a b. a -> Either a b
Left (forall a. Show a => a -> String
show ParseError
err)
  Right a
x  -> forall a b. b -> Either a b
Right a
x

-- | Decode a single S-expression. If any trailing input is left after
--   the S-expression (ignoring comments or whitespace) then this
--   will fail: for those cases, use 'decode', which returns a list of
--   all the S-expressions found at the top level.
decodeOne :: SExprParser atom carrier -> Text -> Either String carrier
decodeOne :: forall atom carrier.
SExprParser atom carrier -> Text -> Either String carrier
decodeOne SExprParser atom carrier
spec = forall a. Parser a -> Text -> Either String a
doParse (ParsecT Text () Identity (SExpr atom)
parser forall (f :: * -> *) a b. Applicative f => f a -> f b -> f a
<* forall s (m :: * -> *) t u.
(Stream s m t, Show t) =>
ParsecT s u m ()
eof) forall (m :: * -> *) a b c.
Monad m =>
(a -> m b) -> (b -> m c) -> a -> m c
>=> (forall atom carrier.
SExprParser atom carrier -> SExpr atom -> Either String carrier
postparse SExprParser atom carrier
spec)
  where parser :: ParsecT Text () Identity (SExpr atom)
parser = forall atom.
Parser atom
-> ReaderMacroMap atom -> Comment -> Parser (SExpr atom)
parseGenericSExpr
                   (forall atom carrier. SExprParser atom carrier -> Parser atom
sesPAtom SExprParser atom carrier
spec)
                   (forall atom carrier.
SExprParser atom carrier -> ReaderMacroMap atom
readerMap SExprParser atom carrier
spec)
                   (Maybe Comment -> Comment
buildSkip (forall atom carrier. SExprParser atom carrier -> Maybe Comment
comment SExprParser atom carrier
spec))

-- | Decode several S-expressions according to a given 'SExprParser'. This
--   will return a list of every S-expression that appears at the top-level
--   of the document.
decode :: SExprParser atom carrier -> Text -> Either String [carrier]
decode :: forall atom carrier.
SExprParser atom carrier -> Text -> Either String [carrier]
decode SExprParser atom carrier
spec =
  forall a. Parser a -> Text -> Either String a
doParse (forall s (m :: * -> *) t u a.
Stream s m t =>
ParsecT s u m a -> ParsecT s u m [a]
many1 Parser (SExpr atom)
parser forall (f :: * -> *) a b. Applicative f => f a -> f b -> f a
<* forall s (m :: * -> *) t u.
(Stream s m t, Show t) =>
ParsecT s u m ()
eof) forall (m :: * -> *) a b c.
Monad m =>
(a -> m b) -> (b -> m c) -> a -> m c
>=> forall (t :: * -> *) (m :: * -> *) a b.
(Traversable t, Monad m) =>
(a -> m b) -> t a -> m (t b)
mapM (forall atom carrier.
SExprParser atom carrier -> SExpr atom -> Either String carrier
postparse SExprParser atom carrier
spec)
    where parser :: Parser (SExpr atom)
parser = forall atom.
Parser atom
-> ReaderMacroMap atom -> Comment -> Parser (SExpr atom)
parseGenericSExpr
                     (forall atom carrier. SExprParser atom carrier -> Parser atom
sesPAtom SExprParser atom carrier
spec)
                     (forall atom carrier.
SExprParser atom carrier -> ReaderMacroMap atom
readerMap SExprParser atom carrier
spec)
                     (Maybe Comment -> Comment
buildSkip (forall atom carrier. SExprParser atom carrier -> Maybe Comment
comment SExprParser atom carrier
spec))

{-
-- | Encode (without newlines) a single S-expression.
encodeSExpr :: SExpr atom -> (atom -> Text) -> Text
encodeSExpr SNil _         = "()"
encodeSExpr (SAtom s) t    = t s
encodeSExpr (SCons x xs) t = go xs (encodeSExpr x t)
  where go (SAtom s) rs = "(" <> rs <> " . " <> t s <> ")"
        go SNil rs      = "(" <> rs <> ")"
        go (SCons x xs) rs = go xs (rs <> " " <> encodeSExpr x t)

-- | Emit an S-Expression in a machine-readable way. This does no
--   pretty-printing or indentation, and produces no comments.
encodeOne :: SExprParser atom carrier -> carrier -> Text
encodeOne spec c = encodeSExpr (preserial spec c) (sesSAtom spec)

encode :: SExprParser atom carrier -> [carrier] -> Text
encode spec cs = T.concat (map (encodeOne spec) cs)
-}