{-# LANGUAGE OverloadedStrings #-} -- | Parse an UTF-8 encoded EDN string into a haskell representation of EDN objects. -- Use 'Data.EDN.decode' to get actual types. module Data.EDN.Parser ( -- * Data parsers parseMaybe, parseEither, parseBSL, parseBS, parseT, parseTL, parseS, -- * Attoparsec implementation parseValue, parseTagged ) where import Prelude () import Prelude.Compat hiding (String, takeWhile) import Control.Applicative ((<|>)) import Data.Attoparsec.ByteString.Char8 as A import Data.Attoparsec.Combinator () import qualified Data.Attoparsec.Lazy as AL import qualified Data.ByteString.Char8 as BS import qualified Data.ByteString.Lazy.Char8 as BSL import Data.ByteString.Search (replace) import qualified Data.ByteString.UTF8 as UTF8 import Data.Maybe (fromJust) import Data.Scientific as Sci import qualified Data.Text as T import qualified Data.Text.Encoding as TE import qualified Data.Text.Lazy as TL import qualified Data.Text.Lazy.Encoding as TLE import Data.EDN.Types (Tagged (..), TaggedValue, Value (..), makeMap, makeSet, makeVec) import qualified Prelude as P isSpaceOrComma :: Char -> Bool isSpaceOrComma ' ' = True isSpaceOrComma '\r' = True isSpaceOrComma '\n' = True isSpaceOrComma '\t' = True isSpaceOrComma ',' = True isSpaceOrComma _ = False spaceOrComma :: Parser Char spaceOrComma = satisfy isSpaceOrComma "space/comma" skipSoC :: Parser () skipSoC = skipWhile isSpaceOrComma parseNil :: Parser Value parseNil = do skipSoC string "nil" return Nil parseBool :: Parser Value parseBool = do skipSoC choice [ string "true" *> pure (Boolean True) , string "false" *> pure (Boolean False) ] parseString :: Parser Value parseString = do skipSoC char '"' x <- A.scan False $ \s c -> if s then Just False else if c == '"' then Nothing else Just (c == '\\') char '"' let prepare = if '\\' `BS.elem` x then rep "\\\"" "\"" . rep "\\\\" "\\" . rep "\\n" "\n" . rep "\\r" "\r" . rep "\\t" "\t" else id return $! String $ TE.decodeUtf8 $ prepare x where rep f t s = BS.concat . BSL.toChunks $! replace (BS.pack f) (BS.pack t) s parseCharacter :: Parser Value parseCharacter = do skipSoC char '\\' simple <|> anyCharUtf8 where simple :: Parser Value simple = do x <- string "newline" <|> string "return" <|> string "space" <|> string "tab" <|> string "\\" return . Character $! case x of "newline" -> '\n' "return" -> '\r' "space" -> ' ' "tab" -> '\t' "\\" -> '\\' _ -> error ("EDN.parseCharacter: impossible - simple" ++ show x) anyCharUtf8 :: Parser Value anyCharUtf8 = do bs <- scan BS.empty go case UTF8.decode bs of Just (c, _) -> return $! Character c Nothing -> error $ "EDN.parseCharacter: bad utf8 data? " ++ show bs go :: BS.ByteString -> Char -> Maybe BS.ByteString go s c | BS.null s = Just (BS.singleton c) | otherwise = case UTF8.decode s of Nothing -> Just (s `BS.snoc` c) Just (uc, _) -> if uc == UTF8.replacement_char then Just (s `BS.snoc` c) else Nothing parseSymbol :: Parser Value parseSymbol = do skipSoC c <- satisfy (inClass "a-zA-Z.*/<>!?$%&=+_-") (ns, val) <- withNS c <|> withoutNS c return $! Symbol ns val where withNS c = do ns <- takeWhile (inClass "a-zA-Z0-9#:.*<>!?$%&=+_-") char '/' vc <- satisfy (inClass "a-zA-Z.*/<>!?$%&=+_-") val <- takeWhile1 (inClass "a-zA-Z0-9#:.*<>!?$%&=+_-") return (c `BS.cons` ns, vc `BS.cons` val) withoutNS c = do val <- takeWhile (inClass "a-zA-Z0-9#:.*<>!?$%&=+_-") return ("", c `BS.cons` val) parseKeyword :: Parser Value parseKeyword = do skipSoC char ':' c <- satisfy (inClass "a-zA-Z.*/<>!?$%&=+_-") x <- takeWhile (inClass "a-zA-Z0-9#:.*/<>!?$%&=+_-") return $! Keyword (c `BS.cons` x) parseNumber :: Parser Value parseNumber = do skipSoC n <- A.scientific return $! if Sci.isInteger n then Integer (fromIntegral (fromJust (Sci.toBoundedInteger n) :: P.Int)) else Floating (Sci.toRealFloat n) parseColl :: Parser t1 -- opening bracket -> Parser t2 -- closing bracket -> Parser a -- item parser -> ([a] -> Value) -- Value constructor -> Parser Value parseColl openingBr closingBr item construct = do skipSoC _ <- openingBr A.skipSpace vs <- item `sepBy` spaceOrComma A.skipSpace _<- closingBr return $! construct vs parseList :: Parser Value parseList = parseColl (char '(') (char ')') parseTagged List parseVector :: Parser Value parseVector = parseColl (char '[') (char ']') parseTagged makeVec parseSet :: Parser Value parseSet = parseColl (char '#' *> char '{') (char '}') parseTagged makeSet parseMap :: Parser Value parseMap = parseColl (char '{') (char '}') parseAssoc makeMap where parseAssoc = do key <- parseValue val <- parseTagged return (key, val) skipComment :: Parser () skipComment = skipSoC *> char ';' *> skipWhile (/= '\n') parseDiscard :: Parser () parseDiscard = do skipSoC string "#_" parseValue return () -- | Parse a \"raw\" EDN value into a 'Value'. parseValue :: Parser Value parseValue = do skipSoC skipMany skipComment skipMany parseDiscard parseSet <|> parseMap <|> parseVector <|> parseList <|> parseNil <|> parseBool <|> parseNumber <|> parseKeyword <|> parseSymbol <|> parseCharacter <|> parseString -- | Parse a probably tagged EDN value into a 'TaggedValue'. parseTagged :: Parser TaggedValue parseTagged = do skipSoC withNS <|> withoutNS <|> noTag where withNS = do char '#' ns <- parseIdent char '/' tag <- parseIdent value <- parseValue return $! Tagged value ns tag withoutNS = do char '#' tag <- parseIdent value <- parseValue return $! Tagged value "" tag parseIdent = takeWhile1 (inClass "a-zA-Z0-9-") noTag = do value <- parseValue return $! NoTag value {- | Parse a lazy 'BSL.ByteString' into a 'TaggedValue'. If fails due to incomplete or invalid input, 'Nothing' is returned. -} parseMaybe :: BSL.ByteString -> Maybe TaggedValue parseMaybe = AL.maybeResult . parseBSL {- | Parse a lazy 'BSL.ByteString' into a 'TaggedValue'. If fails due to incomplete or invalid input, 'Left' is returned with the error message. -} parseEither :: BSL.ByteString -> Either P.String TaggedValue parseEither = AL.eitherResult . parseBSL -- | Parse a lazy 'BSL.ByteString'. parseBSL :: BSL.ByteString -> AL.Result TaggedValue parseBSL = AL.parse parseTagged {-# INLINE parseBSL #-} -- | Parse a strict 'BS.ByteString', but without continutations. parseBS :: BS.ByteString -> AL.Result TaggedValue parseBS s = parseBSL . BSL.fromChunks $ [s] {-# INLINE parseBS #-} -- | Parse a lazy 'TL.Text'. parseTL :: TL.Text -> AL.Result TaggedValue parseTL = parseBSL . TLE.encodeUtf8 {-# INLINE parseTL #-} -- | Parse a strict 'T.Text'. parseT :: T.Text -> AL.Result TaggedValue parseT = parseBS . TE.encodeUtf8 {-# INLINE parseT #-} -- | Parse a string AKA '[Char]'. Not really useful other than for debugging purposes. parseS :: P.String -> AL.Result TaggedValue parseS = parseBSL . BSL.pack {-# INLINE parseS #-}