{-# LANGUAGE OverloadedStrings #-} -- | Parse an UTF-8 encoded EDN string into a haskell representation of EDN objects. -- Use 'Data.EDN.decode' to get actual types. module Data.EDN.Parser ( -- * Data parsers parseMaybe, parseBSL, parseBS, parseT, parseTL, parseS, -- * Attoparsec implementation parseValue, parseTagged ) where import Control.Applicative (pure, (*>), (<|>)) import Data.Attoparsec.Char8 as A import Data.Attoparsec.Combinator () import qualified Data.Attoparsec.Lazy as AL import qualified Data.ByteString.Char8 as BS import qualified Data.ByteString.Lazy.Char8 as BSL import Data.ByteString.Search (replace) import qualified Data.Text as T import qualified Data.Text.Encoding as TE import qualified Data.Text.Lazy as TL import qualified Data.Text.Lazy.Encoding as TLE import Prelude hiding (String, takeWhile) import Data.EDN.Types (Tagged (..), TaggedValue, Value (..), makeMap, makeSet, makeVec) isSpaceOrComma :: Char -> Bool isSpaceOrComma ' ' = True isSpaceOrComma '\r' = True isSpaceOrComma '\n' = True isSpaceOrComma '\t' = True isSpaceOrComma ',' = True isSpaceOrComma _ = False spaceOrComma :: Parser Char spaceOrComma = satisfy isSpaceOrComma "space/comma" skipSoC :: Parser () skipSoC = skipWhile isSpaceOrComma parseNil :: Parser Value parseNil = do skipSoC string "nil" return Nil parseBool :: Parser Value parseBool = do skipSoC choice [ string "true" *> pure (Boolean True) , string "false" *> pure (Boolean False) ] parseString :: Parser Value parseString = do skipSoC char '"' x <- A.scan False $ \s c -> if s then Just False else if c == '"' then Nothing else Just (c == '\\') char '"' if '\\' `BS.elem` x then return $! String . TE.decodeUtf8 . rep "\\\"" "\"" . rep "\\\\" "\\" . rep "\\n" "\n" . rep "\\r" "\r" . rep "\\t" "\t" $ x else return $! String . TE.decodeUtf8 $ x where rep f t s = BS.concat . BSL.toChunks $! replace (BS.pack f) (BS.pack t) s parseCharacter :: Parser Value parseCharacter = do skipSoC char '\\' x <- string "newline" <|> string "space" <|> string "tab" <|> A.take 1 return . Character $! case x of "newline" -> '\n' "return" -> '\r' "space" -> ' ' "tab" -> '\t' _ -> BS.head x parseSymbol :: Parser Value parseSymbol = do skipSoC c <- satisfy (inClass "a-zA-Z.*/!?$%&=+_-") (ns, val) <- withNS c <|> withoutNS c return $! Symbol ns val where withNS c = do ns <- takeWhile (inClass "a-zA-Z0-9#:.*!?$%&=+_-") char '/' vc <- satisfy (inClass "a-zA-Z.*/!?$%&=+_-") val <- takeWhile1 (inClass "a-zA-Z0-9#:.*!?$%&=+_-") return (c `BS.cons` ns, vc `BS.cons` val) withoutNS c = do val <- takeWhile (inClass "a-zA-Z0-9#:.*!?$%&=+_-") return ("", c `BS.cons` val) parseKeyword :: Parser Value parseKeyword = do skipSoC char ':' c <- satisfy (inClass "a-zA-Z.*/!?$%&=+_-") x <- takeWhile (inClass "a-zA-Z0-9#:.*/!?$%&=+_-") return $! Keyword (c `BS.cons` x) parseNumber :: Parser Value parseNumber = do skipSoC n <- number case n of I i -> return $! Integer i D d -> return $! Floating d parseList :: Parser Value parseList = do skipSoC char '(' A.skipSpace vs <- parseTagged `sepBy` spaceOrComma A.skipSpace char ')' return $! List vs parseVector :: Parser Value parseVector = do skipSoC char '[' A.skipSpace vs <- parseTagged `sepBy` spaceOrComma A.skipSpace char ']' return $! makeVec vs parseMap :: Parser Value parseMap = do skipSoC char '{' A.skipSpace pairs <- parseAssoc `sepBy` spaceOrComma A.skipSpace char '}' return $! makeMap pairs where parseAssoc = do key <- parseValue val <- parseTagged return (key, val) parseSet :: Parser Value parseSet = do skipSoC char '#' char '{' A.skipSpace vs <- parseTagged `sepBy` spaceOrComma A.skipSpace char '}' return $! makeSet vs skipComment :: Parser () skipComment = skipSoC >> char ';' >> skipWhile (/= '\n') parseDiscard :: Parser () parseDiscard = do skipSoC string "#_" parseValue return () -- | Parse a \"raw\" EDN value into a 'Value'. parseValue :: Parser Value parseValue = do skipSoC skipMany skipComment skipMany parseDiscard parseSet <|> parseMap <|> parseVector <|> parseList <|> parseNil <|> parseBool <|> parseNumber <|> parseKeyword <|> parseSymbol <|> parseCharacter <|> parseString -- | Parse a probably tagged EDN value into a 'TaggedValue'. parseTagged :: Parser TaggedValue parseTagged = do skipSoC withNS <|> withoutNS <|> noTag where withNS = do char '#' ns <- takeWhile1 (inClass "a-zA-Z0-9-") char '/' tag <- takeWhile1 (inClass "a-zA-Z0-9-") value <- parseValue return $! Tagged value ns tag withoutNS = do char '#' tag <- takeWhile1 (inClass "a-zA-Z0-9-") value <- parseValue return $! Tagged value "" tag noTag = do value <- parseValue return $! NoTag value -- | Parse a lazy 'BSL.ByteString' into a 'TaggedValue'. If fails due to incomplete or invalid input, 'Nothing' is returned. parseMaybe :: BSL.ByteString -> Maybe TaggedValue parseMaybe src = case parseBSL src of AL.Done _ r -> Just r _ -> Nothing -- | Parse a lazy 'BSL.ByteString'. parseBSL :: BSL.ByteString -> AL.Result TaggedValue parseBSL = AL.parse parseTagged {-# INLINE parseBSL #-} -- | Parse a strict 'BS.ByteString', but without continutations. parseBS :: BS.ByteString -> AL.Result TaggedValue parseBS s = parseBSL . BSL.fromChunks $ [s] {-# INLINE parseBS #-} -- | Parse a lazy 'TL.Text'. parseTL :: TL.Text -> AL.Result TaggedValue parseTL = parseBSL . TLE.encodeUtf8 {-# INLINE parseTL #-} -- | Parse a strict 'T.Text'. parseT :: T.Text -> AL.Result TaggedValue parseT = parseBS . TE.encodeUtf8 {-# INLINE parseT #-} -- | Parse a string AKA '[Char]'. Not really useful other than for debugging purposes. parseS :: [Char] -> AL.Result TaggedValue parseS = parseBSL . BSL.pack {-# INLINE parseS #-}