{-# LANGUAGE OverloadedStrings #-} {-# LANGUAGE TypeFamilies #-} -- | -- Module: Data.Configurator.Syntax -- Copyright: (c) 2011 MailRank, Inc. -- (c) 2015-2016 Leon P Smith -- License: BSD3 -- Maintainer: Leon P Smith -- Stability: experimental -- Portability: portable -- -- A parser for configuration files. module Data.Configurator.Syntax ( topLevel , interp ) where import Protolude hiding (First, try) import Control.Monad (fail) import Text.Megaparsec import Text.Megaparsec.Char import qualified Text.Megaparsec.Char.Lexer as Lexer import qualified Data.Char as Char import Data.Configurator.Types import qualified Data.Text as T type Parser = Parsec Void Text topLevel :: Parser [Directive] topLevel = skipLWS *> directives <* skipLWS <* eof directive :: Parser Directive directive = choice [ do try (keyword "import") <* skipLWS Import <$> string_ , do ident <- identifier <* skipLWS choice [ Bind ident <$> (char '=' *> skipLWS *> value) , Group ident <$> brackets '{' '}' directives ] , do string "#;" *> skipHWS DirectiveComment <$> directive ] directives :: Parser [Directive] directives = (directive <* skipHWS) `sepEndBy` (eol *> skipLWS) <* skipLWS -- | Skip lines, comments, or horizontal white space. skipLWS :: Parser () skipLWS = Lexer.space space1 comment empty where beginComment = char '#' *> notFollowedBy (char ';') comment = try beginComment <* takeWhileP Nothing (\c -> c /= '\r' && c /= '\n') -- | Skip comments or horizontal white space. skipHWS :: Parser () skipHWS = Lexer.space (satisfy (\c -> c == ' ' || c == '\t') >> return ()) (Lexer.skipLineComment "#") empty isIdentifier :: Char -> Bool isIdentifier c = Char.isAlphaNum c || c == '_' || c == '-' keyword :: Text -> Parser () keyword kw = string kw *> notFollowedBy (satisfy isAnyIdentifier) where isAnyIdentifier c = c == '.' || isIdentifier c identifier :: Parser Key identifier = fst <$> match (word `sepBy1` char '.') where word = T.cons <$> letterChar <*> takeWhileP (Just "alphanumeric character") isIdentifier value :: Parser Value value = choice [ Bool <$> boolean , String <$> string_ , Number <$> Lexer.scientific , List <$> brackets '[' ']' ((value <* skipLWS) `sepBy` (char ',' <* skipLWS)) ] where boolean = choice [ string "on" *> pure True , string "off" *> pure False , string "true" *> pure True , string "false" *> pure False ] string_ :: Parser Text string_ = T.pack <$> str where str = char '"' *> manyTill charLiteral (char '"') brackets :: Char -> Char -> Parser a -> Parser a brackets open close p = between (char open *> skipLWS) (char close) p charLiteral :: Parser Char charLiteral = choice [ char '\\' *> parseEscape , anySingle ] where parseEscape = do c <- oneOf ("ntru\"\\" :: [Char]) case c of 'n' -> pure '\n' 't' -> pure '\t' 'r' -> pure '\r' '"' -> pure '"' '\\' -> pure '\\' _ -> hexQuad hexQuad :: Parser Char hexQuad = do a <- quad if a < 0xd800 || a > 0xdfff then return (chr a) else do b <- string "\\u" *> quad if a <= 0xdbff && b >= 0xdc00 && b <= 0xdfff then return $! chr (((a - 0xd800) `shiftL` 10) + (b - 0xdc00) + 0x10000) else fail "invalid UTF-16 surrogates" where quad = mkNum <$> count 4 (satisfy Char.isHexDigit "hexadecimal digit") mkNum = foldl' step 0 step a c = a * 16 + fromIntegral (Char.digitToInt c) -- | Parse a string interpolation spec. -- -- The sequence @$$@ is treated as a single @$@ character. The -- sequence @$(@ begins a section to be interpolated, and @)@ ends it. interp :: Parser [Interpolate] interp = reverse <$> p [] where p acc = do h <- Literal <$> takeWhileP Nothing (/='$') let rest = do let cont x = p (x : h : acc) c <- char '$' *> satisfy (\c -> c == '$' || c == '(') case c of '$' -> cont (Literal (T.singleton '$')) _ -> (cont . Interpolate) =<< takeWhile1P Nothing (/=')') <* char ')' done <- atEnd if done then return (h : acc) else rest