module Data.EDN.Parser (
parseMaybe, parseBSL, parseBS, parseT, parseTL, parseS,
parseValue, parseTagged
) where
import Control.Applicative (pure, (*>), (<|>))
import Data.Attoparsec.Char8 as A
import Data.Attoparsec.Combinator ()
import qualified Data.Attoparsec.Lazy as AL
import qualified Data.ByteString.Char8 as BS
import qualified Data.ByteString.Lazy.Char8 as BSL
import Data.ByteString.Search (replace)
import qualified Data.Text as T
import qualified Data.Text.Encoding as TE
import qualified Data.Text.Lazy as TL
import qualified Data.Text.Lazy.Encoding as TLE
import Prelude hiding (String, takeWhile)
import Data.EDN.Types (Tagged (..), TaggedValue,
Value (..), makeMap, makeSet,
makeVec)
isSpaceOrComma :: Char -> Bool
isSpaceOrComma ' ' = True
isSpaceOrComma '\r' = True
isSpaceOrComma '\n' = True
isSpaceOrComma '\t' = True
isSpaceOrComma ',' = True
isSpaceOrComma _ = False
spaceOrComma :: Parser Char
spaceOrComma = satisfy isSpaceOrComma <?> "space/comma"
skipSoC :: Parser ()
skipSoC = skipWhile isSpaceOrComma
parseNil :: Parser Value
parseNil = do
skipSoC
string "nil"
return Nil
parseBool :: Parser Value
parseBool = do
skipSoC
choice [ string "true" *> pure (Boolean True)
, string "false" *> pure (Boolean False)
]
parseString :: Parser Value
parseString = do
skipSoC
char '"'
x <- A.scan False $ \s c -> if s then Just False
else if c == '"'
then Nothing
else Just (c == '\\')
char '"'
if '\\' `BS.elem` x
then return $! String
. TE.decodeUtf8
. rep "\\\"" "\""
. rep "\\\\" "\\"
. rep "\\n" "\n"
. rep "\\r" "\r"
. rep "\\t" "\t"
$ x
else return $! String . TE.decodeUtf8 $ x
where rep f t s = BS.concat . BSL.toChunks $! replace (BS.pack f) (BS.pack t) s
parseCharacter :: Parser Value
parseCharacter = do
skipSoC
char '\\'
x <- string "newline" <|> string "space" <|> string "tab" <|> A.take 1
return . Character $! case x of
"newline" -> '\n'
"return" -> '\r'
"space" -> ' '
"tab" -> '\t'
_ -> BS.head x
parseSymbol :: Parser Value
parseSymbol = do
skipSoC
c <- satisfy (inClass "a-zA-Z.*/!?$%&=+_-")
(ns, val) <- withNS c <|> withoutNS c
return $! Symbol ns val
where
withNS c = do
ns <- takeWhile (inClass "a-zA-Z0-9#:.*!?$%&=+_-")
char '/'
vc <- satisfy (inClass "a-zA-Z.*/!?$%&=+_-")
val <- takeWhile1 (inClass "a-zA-Z0-9#:.*!?$%&=+_-")
return (c `BS.cons` ns, vc `BS.cons` val)
withoutNS c = do
val <- takeWhile (inClass "a-zA-Z0-9#:.*!?$%&=+_-")
return ("", c `BS.cons` val)
parseKeyword :: Parser Value
parseKeyword = do
skipSoC
char ':'
c <- satisfy (inClass "a-zA-Z.*/!?$%&=+_-")
x <- takeWhile (inClass "a-zA-Z0-9#:.*/!?$%&=+_-")
return $! Keyword (c `BS.cons` x)
parseNumber :: Parser Value
parseNumber = do
skipSoC
n <- number
case n of
I i -> return $! Integer i
D d -> return $! Floating d
parseList :: Parser Value
parseList = do
skipSoC
char '('
A.skipSpace
vs <- parseTagged `sepBy` spaceOrComma
A.skipSpace
char ')'
return $! List vs
parseVector :: Parser Value
parseVector = do
skipSoC
char '['
A.skipSpace
vs <- parseTagged `sepBy` spaceOrComma
A.skipSpace
char ']'
return $! makeVec vs
parseMap :: Parser Value
parseMap = do
skipSoC
char '{'
A.skipSpace
pairs <- parseAssoc `sepBy` spaceOrComma
A.skipSpace
char '}'
return $! makeMap pairs
where
parseAssoc = do
key <- parseValue
val <- parseTagged
return (key, val)
parseSet :: Parser Value
parseSet = do
skipSoC
char '#'
char '{'
A.skipSpace
vs <- parseTagged `sepBy` spaceOrComma
A.skipSpace
char '}'
return $! makeSet vs
skipComment :: Parser ()
skipComment = skipSoC >> char ';' >> skipWhile (/= '\n')
parseDiscard :: Parser ()
parseDiscard = do
skipSoC
string "#_"
parseValue
return ()
parseValue :: Parser Value
parseValue = do
skipSoC
skipMany skipComment
skipMany parseDiscard
parseSet <|> parseMap
<|> parseVector <|> parseList
<|> parseNil <|> parseBool
<|> parseNumber
<|> parseKeyword <|> parseSymbol
<|> parseCharacter
<|> parseString
parseTagged :: Parser TaggedValue
parseTagged = do
skipSoC
withNS <|> withoutNS <|> noTag
where
withNS = do
char '#'
ns <- takeWhile1 (inClass "a-zA-Z0-9-")
char '/'
tag <- takeWhile1 (inClass "a-zA-Z0-9-")
value <- parseValue
return $! Tagged value ns tag
withoutNS = do
char '#'
tag <- takeWhile1 (inClass "a-zA-Z0-9-")
value <- parseValue
return $! Tagged value "" tag
noTag = do
value <- parseValue
return $! NoTag value
parseMaybe :: BSL.ByteString -> Maybe TaggedValue
parseMaybe src = case parseBSL src of
AL.Done _ r -> Just r
_ -> Nothing
parseBSL :: BSL.ByteString -> AL.Result TaggedValue
parseBSL = AL.parse parseTagged
parseBS :: BS.ByteString -> AL.Result TaggedValue
parseBS s = parseBSL . BSL.fromChunks $ [s]
parseTL :: TL.Text -> AL.Result TaggedValue
parseTL = parseBSL . TLE.encodeUtf8
parseT :: T.Text -> AL.Result TaggedValue
parseT = parseBS . TE.encodeUtf8
parseS :: [Char] -> AL.Result TaggedValue
parseS = parseBSL . BSL.pack