{-# LANGUAGE FlexibleContexts #-} {-# LANGUAGE MonoLocalBinds #-} {-# LANGUAGE OverloadedStrings #-} {-# LANGUAGE RankNTypes #-} module Text.Toml.Parser ( module Text.Toml.Parser , module Text.Toml.Types ) where import Control.Applicative hiding (many, optional, some, (<|>)) import Control.Monad import Control.Monad.State (evalState) import Data.Functor import qualified Data.HashMap.Lazy as M import Data.Monoid import qualified Data.Set as S import Data.Text (Text, pack, unpack) import qualified Data.Text as T import qualified Data.Vector as V import Data.Void import Text.Megaparsec.CharRW import Data.Time.Format (defaultTimeLocale, iso8601DateFormat, parseTimeM) import Numeric (readHex) import Text.Megaparsec import Text.Toml.Types import Prelude type TomlError = ParseError (Token Text) Void -- | Convenience function for the test suite and GHCI. parseOnly :: Parser Toml a -> Text -> Either TomlError a parseOnly parser = flip evalState mempty . runParserT parser "noneSrc" -- | Parses a complete document formatted according to the TOML spec. tomlDoc :: (TomlM m) => Parser m Table tomlDoc = do skipBlanks topTable <- table namedSections <- many namedSection -- Ensure the input is completely consumed eof -- Load each named section into the top table foldM (flip (insert Explicit)) topTable namedSections -- | Parses a table of key-value pairs. table :: (TomlM m) => Parser m Table table = do pairs <- many (assignment <* skipBlanks) <|> (skipBlanks >> pure []) case maybeDupe (map fst pairs) of Just k -> throwParser $ "Cannot redefine key '" ++ unpack k ++ "'" Nothing -> return $ M.fromList pairs -- | Parses an inline table of key-value pairs. inlineTable :: (TomlM m) => Parser m (Either (S.Set (ErrorFancy Void)) Node) inlineTable = do pairs <- between (char '{') (char '}') (skipSpaces *> separatedValues <* skipSpaces) case maybeDupe (map fst pairs) of Just k -> pure $ Left (S.fromList [ErrorFail $ "Cannot redefine key " ++ unpack k ]) Nothing -> pure $ Right $ VTable $ M.fromList pairs where skipSpaces = many (satisfy isSpc) separatedValues = sepBy (skipSpaces *> assignment <* skipSpaces) comma comma = skipSpaces >> char ',' >> skipSpaces -- | Find dupes, if any. maybeDupe :: Ord a => [a] -> Maybe a maybeDupe xx = dup xx S.empty where dup [] _ = Nothing dup (x:xs) s = if S.member x s then Just x else dup xs (S.insert x s) -- | Parses a 'Table' or 'TableArray' with its header. -- The resulting tuple has the header's value in the first position, and the -- 'NTable' or 'NTArray' in the second. namedSection :: (TomlM m) => Parser m ([Text], Node) namedSection = do eitherHdr <- try (Left <$> tableHeader) <|> (Right <$> tableArrayHeader) skipBlanks tbl <- table skipBlanks return $ case eitherHdr of Left ns -> (ns, VTable tbl ) Right ns -> (ns, VTArray $ V.singleton tbl) -- | Parses a table header. tableHeader :: (TomlM m) => Parser m [Text] tableHeader = between (char '[') (char ']') headerValue -- | Parses a table array header. tableArrayHeader :: (TomlM m) => Parser m [Text] tableArrayHeader = between (string "[[") (string "]]") headerValue -- | Parses the value of any header (names separated by dots), into a list of 'Text'. headerValue :: (TomlM m) => Parser m [Text] headerValue = ((pack <$> some keyChar) <|> anyStr') `sepBy1` char '.' where keyChar = alphaNumChar <|> oneOf ("-_" :: String) -- | Parses a key-value assignment. assignment :: (TomlM m) => Parser m (Text, Node) assignment = do k <- (pack <$> some keyChar) <|> anyStr' many (satisfy isSpc) >> char '=' >> skipBlanks v' <- value v <- case v' of Right x -> pure x Left y -> fancyFailure y return (k, v) where -- TODO: Follow the spec, e.g.: only first char cannot be '['. keyChar = alphaNumChar <|> oneOf ("-_" :: String) -- | Parses a value. value :: (TomlM m) => Parser m (Either (S.Set (ErrorFancy Void)) Node) value = (pure <$> try array "array") <|> (pure <$> try boolean "boolean") <|> (pure <$> try anyStr "string") <|> (pure <$> try datetime "datetime") <|> (pure <$> try float "float") <|> (pure <$> try integer "integer") <|> (inlineTable "inline table") -- -- | * Toml value parsers -- array :: (TomlM m) => Parser m Node array = (try (arrayOf array) "array of arrays") <|> (try (arrayOf boolean) "array of booleans") <|> (try (arrayOf anyStr) "array of strings") <|> (try (arrayOf datetime) "array of datetimes") <|> (try (arrayOf float) "array of floats") <|> (arrayOf integer "array of integers") boolean :: (TomlM m) => Parser m Node boolean = VBoolean <$> ( string "true" $> True <|> string "false" $> False ) anyStr :: (TomlM m) => Parser m Node anyStr = VString <$> anyStr' anyStr' :: (TomlM m) => Parser m Text anyStr' = try multiBasicStr <|> try basicStr <|> try multiLiteralStr <|> literalStr basicStr :: (TomlM m) => Parser m Text basicStr = between dQuote dQuote (pack <$> many strChar) where strChar = escSeq <|> noneOf ("\"\\" :: String) dQuote = char '\"' multiBasicStr :: (TomlM m) => Parser m Text multiBasicStr = openDQuote3 *> escWhiteSpc *> (pack <$> manyTill strChar (try dQuote3)) where -- | Parse the a tripple-double quote, with possibly a newline attached openDQuote3 = try (dQuote3 <* char '\n') <|> dQuote3 -- | Parse tripple-double quotes dQuote3 = count 3 $ char '"' -- | Parse a string char, accepting escaped codes, ignoring escaped white space strChar = escSeq <|> noneOf ("\\" :: String) <* escWhiteSpc -- | Parse escaped white space, if any escWhiteSpc = many $ char '\\' >> char '\n' >> many (oneOf ("\n\t " :: String)) literalStr :: (TomlM m) => Parser m Text literalStr = between sQuote sQuote (pack <$> many (noneOf ("'" :: String))) where sQuote = char '\'' multiLiteralStr :: (TomlM m) => Parser m Text multiLiteralStr = openSQuote3 *> (pack <$> manyTill anyChar sQuote3) where -- | Parse the a tripple-single quote, with possibly a newline attached openSQuote3 = try (sQuote3 <* char '\n') <|> sQuote3 -- | Parse tripple-single quotes sQuote3 = try . count 3 . char $ '\'' datetime :: (TomlM m) => Parser m Node datetime = do d <- manyTill anyChar (char 'Z') let mt = parseTimeM True defaultTimeLocale (iso8601DateFormat $ Just "%X") d case mt of Just t -> return $ VDatetime t Nothing -> throwParser "parsing datetime failed" -- | Attoparsec 'double' parses scientific "e" notation; reimplement according to Toml spec. float :: (TomlM m) => Parser m Node float = VFloat <$> do n <- intStr <* lookAhead (oneOf (".eE" :: String)) d <- (char '.' *> uintStr) <|> return "0" e <- (oneOf ("eE" :: String) *> intStr) <|> return "0" return . read . join $ [n, ".", d, "e", e] where sign = (T.singleton <$> char '-') <|> (char '+' >> return "") <|> return "" uintStr = (:) <$> digitChar <*> many (optional (char '_') *> digitChar) intStr = do s <- T.unpack <$> sign u <- uintStr return . join $ s : [u] integer :: (TomlM m) => Parser m Node integer = VInteger <$> signed (read <$> uintStr) where uintStr :: (TomlM m) => Parser m String uintStr = (:) <$> digitChar <*> many (optional (char '_') *> digitChar) -- -- * Utility functions -- -- | Parses the elements of an array, while restricting them to a certain type. arrayOf :: (TomlM m) => Parser m Node -> Parser m Node arrayOf p = VArray . V.fromList <$> between (char '[') (char ']') (skipBlanks *> separatedValues) where separatedValues = sepEndBy (skipBlanks *> try p <* skipBlanks) comma <* skipBlanks comma = skipBlanks >> char ',' >> skipBlanks -- | Parser for escape sequences. escSeq :: (TomlM m) => Parser m Char escSeq = char '\\' *> escSeqChar where escSeqChar = char '"' <|> char '\\' <|> char '/' <|> char 'n' $> '\n' <|> char 't' $> '\t' <|> char 'r' $> '\r' <|> char 'b' $> '\b' <|> char 'f' $> '\f' <|> char 'u' *> unicodeHex 4 <|> char 'U' *> unicodeHex 8 "escaped character" -- | Parser for unicode hexadecimal values of representation length 'n'. unicodeHex :: (TomlM m) => Int -> Parser m Char unicodeHex n = do h <- count n hexDigitChar -- (satisfy isHex) let v = fst . head . readHex $ h return $ if v <= maxChar then toEnum v else '�' where -- isHex x = or . sequence [isDigit, isAsciiUpper, isAsciiLower] maxChar = fromEnum (maxBound :: Char) -- | Parser for signs (a plus or a minus). signed :: (Num a, TomlM m) => Parser m a -> Parser m a signed p = p <|> (negate <$> (char '-' *> p)) <|> (char '+' *> p) -- | Parses the (rest of the) line including an EOF, whitespace and comments. skipBlanks :: (TomlM m) => Parser m () skipBlanks = skipMany blank where blank = void (some (satisfy isSpc)) <|> comment <|> void eol comment = char '#' >> void (many $ noneOf ("\n" :: String)) -- | Results in 'True' for whitespace chars, tab or space, according to spec. isSpc :: Char -> Bool isSpc ' ' = True isSpc '\t' = True isSpc _ = False