module Data.EDN.Parser (
parseMaybe, parseEither, parseBSL, parseBS, parseT, parseTL, parseS,
parseValue, parseTagged
) where
import Control.Applicative (pure, (*>), (<|>))
import Data.Attoparsec.Char8 as A
import Data.Attoparsec.Combinator ()
import qualified Data.Attoparsec.Lazy as AL
import qualified Data.ByteString.UTF8 as UTF8
import qualified Data.ByteString.Char8 as BS
import qualified Data.ByteString.Lazy.Char8 as BSL
import Data.ByteString.Search (replace)
import qualified Data.Text as T
import qualified Data.Text.Encoding as TE
import qualified Data.Text.Lazy as TL
import qualified Data.Text.Lazy.Encoding as TLE
import Prelude hiding (String, takeWhile)
import Data.EDN.Types (Tagged (..), TaggedValue,
Value (..), makeMap, makeSet,
makeVec)
import qualified Prelude as P
isSpaceOrComma :: Char -> Bool
isSpaceOrComma ' ' = True
isSpaceOrComma '\r' = True
isSpaceOrComma '\n' = True
isSpaceOrComma '\t' = True
isSpaceOrComma ',' = True
isSpaceOrComma _ = False
spaceOrComma :: Parser Char
spaceOrComma = satisfy isSpaceOrComma <?> "space/comma"
skipSoC :: Parser ()
skipSoC = skipWhile isSpaceOrComma
parseNil :: Parser Value
parseNil = do
skipSoC
string "nil"
return Nil
parseBool :: Parser Value
parseBool = do
skipSoC
choice [ string "true" *> pure (Boolean True)
, string "false" *> pure (Boolean False)
]
parseString :: Parser Value
parseString = do
skipSoC
char '"'
x <- A.scan False $ \s c -> if s then Just False
else if c == '"'
then Nothing
else Just (c == '\\')
char '"'
if '\\' `BS.elem` x
then return $! String
. TE.decodeUtf8
. rep "\\\"" "\""
. rep "\\\\" "\\"
. rep "\\n" "\n"
. rep "\\r" "\r"
. rep "\\t" "\t"
$ x
else return $! String . TE.decodeUtf8 $ x
where rep f t s = BS.concat . BSL.toChunks $! replace (BS.pack f) (BS.pack t) s
parseCharacter :: Parser Value
parseCharacter = do
skipSoC
char '\\'
simple <|> anyCharUtf8
where
simple :: Parser Value
simple = do
x <- string "newline"
<|> string "return"
<|> string "space"
<|> string "tab"
<|> string "\\"
return . Character $! case x of
"newline" -> '\n'
"return" -> '\r'
"space" -> ' '
"tab" -> '\t'
"\\" -> '\\'
_ -> error ("EDN.parseCharacter: impossible - simple" ++ show x)
anyCharUtf8 :: Parser Value
anyCharUtf8 = do
bs <- scan BS.empty go
case UTF8.decode bs of
Just (c, _) -> return $! Character c
Nothing -> error $ "EDN.parseCharacter: bad utf8 data? " ++ show bs
go :: BS.ByteString -> Char -> Maybe BS.ByteString
go s c
| BS.null s = Just (BS.singleton c)
| otherwise = case UTF8.decode s of
Nothing -> Just (s `BS.snoc` c)
Just (uc, _) -> if uc == UTF8.replacement_char
then Just (s `BS.snoc` c)
else Nothing
parseSymbol :: Parser Value
parseSymbol = do
skipSoC
c <- satisfy (inClass "a-zA-Z.*/!?$%&=+_-")
(ns, val) <- withNS c <|> withoutNS c
return $! Symbol ns val
where
withNS c = do
ns <- takeWhile (inClass "a-zA-Z0-9#:.*!?$%&=+_-")
char '/'
vc <- satisfy (inClass "a-zA-Z.*/!?$%&=+_-")
val <- takeWhile1 (inClass "a-zA-Z0-9#:.*!?$%&=+_-")
return (c `BS.cons` ns, vc `BS.cons` val)
withoutNS c = do
val <- takeWhile (inClass "a-zA-Z0-9#:.*!?$%&=+_-")
return ("", c `BS.cons` val)
parseKeyword :: Parser Value
parseKeyword = do
skipSoC
char ':'
c <- satisfy (inClass "a-zA-Z.*/!?$%&=+_-")
x <- takeWhile (inClass "a-zA-Z0-9#:.*/!?$%&=+_-")
return $! Keyword (c `BS.cons` x)
parseNumber :: Parser Value
parseNumber = do
skipSoC
n <- number
case n of
I i -> return $! Integer i
D d -> return $! Floating d
parseList :: Parser Value
parseList = do
skipSoC
char '('
A.skipSpace
vs <- parseTagged `sepBy` spaceOrComma
A.skipSpace
char ')'
return $! List vs
parseVector :: Parser Value
parseVector = do
skipSoC
char '['
A.skipSpace
vs <- parseTagged `sepBy` spaceOrComma
A.skipSpace
char ']'
return $! makeVec vs
parseMap :: Parser Value
parseMap = do
skipSoC
char '{'
A.skipSpace
pairs <- parseAssoc `sepBy` spaceOrComma
A.skipSpace
char '}'
return $! makeMap pairs
where
parseAssoc = do
key <- parseValue
val <- parseTagged
return (key, val)
parseSet :: Parser Value
parseSet = do
skipSoC
char '#'
char '{'
A.skipSpace
vs <- parseTagged `sepBy` spaceOrComma
A.skipSpace
char '}'
return $! makeSet vs
skipComment :: Parser ()
skipComment = skipSoC >> char ';' >> skipWhile (/= '\n')
parseDiscard :: Parser ()
parseDiscard = do
skipSoC
string "#_"
parseValue
return ()
parseValue :: Parser Value
parseValue = do
skipSoC
skipMany skipComment
skipMany parseDiscard
parseSet <|> parseMap
<|> parseVector <|> parseList
<|> parseNil <|> parseBool
<|> parseNumber
<|> parseKeyword <|> parseSymbol
<|> parseCharacter
<|> parseString
parseTagged :: Parser TaggedValue
parseTagged = do
skipSoC
withNS <|> withoutNS <|> noTag
where
withNS = do
char '#'
ns <- takeWhile1 (inClass "a-zA-Z0-9-")
char '/'
tag <- takeWhile1 (inClass "a-zA-Z0-9-")
value <- parseValue
return $! Tagged value ns tag
withoutNS = do
char '#'
tag <- takeWhile1 (inClass "a-zA-Z0-9-")
value <- parseValue
return $! Tagged value "" tag
noTag = do
value <- parseValue
return $! NoTag value
parseMaybe :: BSL.ByteString -> Maybe TaggedValue
parseMaybe = AL.maybeResult . parseBSL
parseEither :: BSL.ByteString -> Either P.String TaggedValue
parseEither = AL.eitherResult . parseBSL
parseBSL :: BSL.ByteString -> AL.Result TaggedValue
parseBSL = AL.parse parseTagged
parseBS :: BS.ByteString -> AL.Result TaggedValue
parseBS s = parseBSL . BSL.fromChunks $ [s]
parseTL :: TL.Text -> AL.Result TaggedValue
parseTL = parseBSL . TLE.encodeUtf8
parseT :: T.Text -> AL.Result TaggedValue
parseT = parseBS . TE.encodeUtf8
parseS :: [Char] -> AL.Result TaggedValue
parseS = parseBSL . BSL.pack