module Data.EDN.Parser (
decode, parseBSL, parseBS, parseT, parseTL, parseS,
parseValue, parseTagged
) where
import Prelude hiding (String, takeWhile)
import Data.Attoparsec.Char8 as A
import qualified Data.Attoparsec.Lazy as AL
import Data.Attoparsec.Combinator()
import Control.Applicative (pure, (<|>), (*>))
import qualified Data.ByteString.Char8 as BS
import qualified Data.ByteString.Lazy.Char8 as BSL
import qualified Data.Text as T
import qualified Data.Text.Encoding as TE
import qualified Data.Text.Lazy as TL
import qualified Data.Text.Lazy.Encoding as TLE
import Data.ByteString.Search (replace)
import Data.EDN.Types (Value(..), Tagged(..), TaggedValue, makeVec, makeMap, makeSet)
isSpaceOrComma :: Char -> Bool
isSpaceOrComma ' ' = True
isSpaceOrComma '\r' = True
isSpaceOrComma '\n' = True
isSpaceOrComma '\t' = True
isSpaceOrComma ',' = True
isSpaceOrComma _ = False
spaceOrComma :: Parser Char
spaceOrComma = satisfy isSpaceOrComma <?> "space/comma"
skipSoC :: Parser ()
skipSoC = skipWhile isSpaceOrComma
parseNil :: Parser Value
parseNil = do
skipSoC
string "nil"
return Nil
parseBool :: Parser Value
parseBool = do
skipSoC
choice [ string "true" *> pure (Boolean True)
, string "false" *> pure (Boolean False)
]
parseString :: Parser Value
parseString = do
skipSoC
char '"'
x <- A.scan False $ \s c -> if s then Just False
else if c == '"'
then Nothing
else Just (c == '\\')
char '"'
if '\\' `BS.elem` x
then return $! String . TE.decodeUtf8 . rep "\\\"" "\"". rep "\\\\" "\\" . rep "\\n" "\n" . rep "\\r" "\r" . rep "\\t" "\t" $ x
else return $! String . TE.decodeUtf8 $ x
where rep f t s = BS.concat . BSL.toChunks $! replace (BS.pack f) (BS.pack t) s
parseCharacter :: Parser Value
parseCharacter = do
skipSoC
char '\\'
x <- string "newline" <|> string "space" <|> string "tab" <|> A.take 1
return . Character $! case x of
"newline" -> '\n'
"return" -> '\r'
"space" -> ' '
"tab" -> '\t'
_ -> BS.head x
parseSymbol :: Parser Value
parseSymbol = do
skipSoC
c <- satisfy (inClass "a-zA-Z.*/!?$%&=+_-")
(ns, val) <- withNS c <|> withoutNS c
return $! Symbol ns val
where
withNS c = do
ns <- takeWhile1 (inClass "a-zA-Z0-9#:.*!?$%&=+_-")
char '/'
vc <- satisfy (inClass "a-zA-Z.*/!?$%&=+_-")
val <- takeWhile1 (inClass "a-zA-Z0-9#:.*!?$%&=+_-")
return (c `BS.cons` ns, vc `BS.cons` val)
withoutNS c = do
val <- takeWhile (inClass "a-zA-Z0-9#:.*!?$%&=+_-")
return ("", c `BS.cons` val)
parseKeyword :: Parser Value
parseKeyword = do
skipSoC
char ':'
c <- satisfy (inClass "a-zA-Z.*/!?$%&=+_-")
x <- takeWhile (inClass "a-zA-Z0-9#:.*/!?$%&=+_-")
return $! Keyword (c `BS.cons` x)
parseNumber :: Parser Value
parseNumber = do
skipSoC
n <- number
case n of
I i -> return $! Integer i
D d -> return $! Floating d
parseList :: Parser Value
parseList = do
skipSoC
char '('
vs <- parseTagged `sepBy` spaceOrComma
char ')'
return $! List vs
parseVector :: Parser Value
parseVector = do
skipSoC
char '['
vs <- parseTagged `sepBy` spaceOrComma
char ']'
return $! makeVec vs
parseMap :: Parser Value
parseMap = do
skipSoC
char '{'
pairs <- parseAssoc `sepBy` spaceOrComma
char '}'
return $! makeMap pairs
where
parseAssoc = do
key <- parseValue
val <- parseTagged
return (key, val)
parseSet :: Parser Value
parseSet = do
skipSoC
char '#'
char '{'
vs <- parseTagged `sepBy` spaceOrComma
char '}'
return $! makeSet vs
skipComment :: Parser ()
skipComment = skipSoC >> char ';' >> skipWhile (/= '\n')
parseDiscard :: Parser ()
parseDiscard = do
skipSoC
string "#_"
parseValue
return ()
parseValue :: Parser Value
parseValue = do
skipSoC
skipMany skipComment
skipMany parseDiscard
parseSet <|> parseMap
<|> parseVector <|> parseList
<|> parseNil <|> parseBool
<|> parseNumber
<|> parseKeyword <|> parseSymbol
<|> parseCharacter
<|> parseString
parseTagged :: Parser TaggedValue
parseTagged = do
skipSoC
withNS <|> withoutNS <|> noTag
where
withNS = do
char '#'
ns <- takeWhile1 (inClass "a-zA-Z0-9-")
char '/'
tag <- takeWhile1 (inClass "a-zA-Z0-9-")
value <- parseValue
return $! Tagged value ns tag
withoutNS = do
char '#'
tag <- takeWhile1 (inClass "a-zA-Z0-9-")
value <- parseValue
return $! Tagged value "" tag
noTag = do
value <- parseValue
return $! NoTag value
decode :: BSL.ByteString -> Maybe TaggedValue
decode src = case parseBSL src of
AL.Done _ r -> Just r
_ -> Nothing
parseBSL :: BSL.ByteString -> AL.Result TaggedValue
parseBSL = AL.parse parseTagged
parseBS :: BS.ByteString -> AL.Result TaggedValue
parseBS s = parseBSL . BSL.fromChunks $ [s]
parseTL :: TL.Text -> AL.Result TaggedValue
parseTL = parseBSL . TLE.encodeUtf8
parseT :: T.Text -> AL.Result TaggedValue
parseT = parseBS . TE.encodeUtf8
parseS :: [Char] -> AL.Result TaggedValue
parseS = parseBSL . BSL.pack