-- | Combinators for parsing YAML into Haskell types. -- -- Based on the article . {-# LANGUAGE PolyKinds, DataKinds, KindSignatures, ExplicitForAll, TemplateHaskell, ViewPatterns, ScopedTypeVariables, TypeOperators, TypeFamilies, GeneralizedNewtypeDeriving #-} module Data.Yaml.Combinators ( Parser , parse , runParser -- * Scalars , string , theString , number , integer , bool , null_ -- * Arrays , array , theArray , ElementParser , element -- * Objects , object , FieldParser , field , optField , defaultField , theField , anyValue -- * Errors , ParseError(..) , ppParseError , Reason(..) , validate ) where import Data.Aeson (Value(..), Object, Array) import Data.Scientific import Data.Yaml (decodeEither, encode) import Data.Text (Text) import Data.List import Data.Maybe import Data.ByteString (ByteString) import Data.Semigroup (Semigroup((<>))) import qualified Data.ByteString.Char8 as BS8 import Data.Bifunctor (first) import Control.Monad.Trans.Reader import Control.Monad.Trans.State as State import Data.Vector (Vector) import qualified Data.Vector as V import Data.Functor.Product import Data.Functor.Constant import Data.HashMap.Strict (HashMap) import qualified Data.HashMap.Strict as HM import Data.HashSet (HashSet) import qualified Data.HashSet as HS import Data.Ord import Generics.SOP import Generics.SOP.TH -- $setup -- >>> :set -XOverloadedStrings -XTypeApplications -- >>> import Data.Semigroup -- orphan Value instances deriveGeneric ''Value ---------------------------------------------------------------------- -- Parsing function ---------------------------------------------------------------------- -- | Run a 'Parser' on a 'ByteString' containing the YAML content. -- -- This is a high-level function implemented on top of 'runParser'. parse :: Parser a -> ByteString -> Either String a parse p bs = do aesonValue <- decodeEither bs first ppParseError $ runParser p aesonValue ---------------------------------------------------------------------- -- Errors and Pretty-printing ---------------------------------------------------------------------- -- | A parse error. 'Reason' describes the error. -- The 'Int' field denotes at which level the error occurred and -- is used to select the deepest (most relevant) error -- when merging multiple parsers. data ParseError = ParseError !Int -- level Reason deriving (Eq, Show) -- | Describes what exactly went wrong during parsing. data Reason -- NB: the order of constructors is important for the Ord instance = UnexpectedAsPartOf Value Value | ExpectedAsPartOf (HashSet String) Value | ExpectedInsteadOf (HashSet String) Value deriving (Eq, Show) -- | Find out which error is more severe compareSeverity :: ParseError -> ParseError -> Ordering compareSeverity (ParseError l1 r1) (ParseError l2 r2) = -- extra stuff is always less severe than mismatching/missing stuff comparing (not . isUnexpected) r1 r2 <> -- otherwise, compare the depths compare l1 l2 <> -- if the depths are equal, mismatches are more severe that misses, comparing isMismatch r1 r2 where isUnexpected e = case e of UnexpectedAsPartOf {} -> True _ -> False isMismatch e = case e of ExpectedInsteadOf {} -> True _ -> False -- | Choose the more severe of two errors. -- -- If they are equally severe, pick the earlier one. moreSevere :: ParseError -> ParseError -> ParseError moreSevere e1 e2 = case compareSeverity e1 e2 of LT -> e2 _ -> e1 -- | Choose the less severe of two errors. -- -- If they are equally severe, pick the earlier one. lessSevere :: ParseError -> ParseError -> ParseError lessSevere e1 e2 = case compareSeverity e1 e2 of GT -> e2 _ -> e1 newtype Validation a = Validation { getValidation :: Either ParseError a } deriving Functor instance Applicative Validation where pure = Validation . Right Validation a <*> Validation b = Validation $ case a of Right va -> fmap va b Left ea -> either (Left . moreSevere ea) (const $ Left ea) b bindV :: Validation a -> (a -> Validation b) -> Validation b bindV a b = Validation $ getValidation a >>= getValidation . b mergeParseError :: ParseError -> ParseError -> ParseError mergeParseError e1@(ParseError l1 r1) e2@(ParseError l2 r2) -- first, see if we can merge the two errors | l1 == l2 , ExpectedAsPartOf exp1 w1 <- r1 , ExpectedAsPartOf exp2 w2 <- r2 , w1 == w2 = ParseError l1 (ExpectedAsPartOf (exp1 <> exp2) w1) | l1 == l2 , ExpectedInsteadOf exp1 w1 <- r1 , ExpectedInsteadOf exp2 w2 <- r2 , w1 == w2 = ParseError l1 (ExpectedInsteadOf (exp1 <> exp2) w1) -- otherwise, just choose the least severe one, -- since its branch is more likely to be the right one | otherwise = lessSevere e1 e2 -- | Pretty-print a 'ParseError' -- -- @since 1.1 ppParseError :: ParseError -> String ppParseError (ParseError _lvl reason) = case reason of UnexpectedAsPartOf part whole -> "Unexpected \n\n" ++ showYaml part ++ "\nas part of\n\n" ++ showYaml whole ExpectedInsteadOf exp1 got -> "Expected " ++ fmt_list exp1 ++ " instead of:\n\n" ++ showYaml got ExpectedAsPartOf exp1 got -> "Expected " ++ fmt_list exp1 ++ " as part of:\n\n" ++ showYaml got where showYaml :: Value -> String showYaml = BS8.unpack . encode fmt_list :: HashSet String -> String fmt_list = intercalate ", " . sort . HS.toList ---------------------------------------------------------------------- -- Core definitions ---------------------------------------------------------------------- newtype ParserComponent a fs = ParserComponent (Maybe (Value -> NP I fs -> Validation a)) -- | A top-level YAML parser. -- -- * Construct a 'Parser' with 'string', 'number', 'integer', 'bool', 'array', or 'object'. -- -- * Combine two or more 'Parser's with 'Monoid' or 'Semigroup' operators -- such as 'mappend', '<>', or `mconcat` — -- e.g. if you expect either an object or a string. -- -- * Run with 'parse' or 'runParser'. newtype Parser a = Parser (NP (ParserComponent a) (Code Value)) -- fmap for ParserComponent (in its first type argument) pcFmap :: (a -> b) -> ParserComponent a fs -> ParserComponent b fs pcFmap f (ParserComponent mbP) = ParserComponent $ (fmap . fmap . fmap . fmap $ f) mbP instance Functor Parser where fmap f (Parser comps) = Parser $ hliftA (pcFmap f) comps instance Semigroup (ParserComponent a fs) where ParserComponent mbP1 <> ParserComponent mbP2 = ParserComponent $ case (mbP1, mbP2) of (Nothing, Nothing) -> Nothing (Just p1, Nothing) -> Just p1 (Nothing, Just p2) -> Just p2 (Just p1, Just p2) -> Just $ \o v -> Validation $ case (getValidation $ p1 o v, getValidation $ p2 o v) of (Right r1, _) -> Right r1 (_, Right r2) -> Right r2 (Left l1, Left l2) -> Left $ mergeParseError l1 l2 instance Monoid (ParserComponent a fs) where mempty = ParserComponent Nothing mappend = (<>) instance Semigroup (Parser a) where Parser rec1 <> Parser rec2 = Parser $ hliftA2 mappend rec1 rec2 instance Monoid (Parser a) where mempty = Parser $ hpure mempty mappend = (<>) -- | A low-level function to run a 'Parser'. runParser :: Parser a -> Value -> Either ParseError a runParser p = getValidation . runParserV p runParserV :: Parser a -> Value -> Validation a runParserV (Parser comps) orig@(from -> SOP v) = hcollapse $ hliftA2 match comps v where match :: ParserComponent a fs -> NP I fs -> K (Validation a) fs match (ParserComponent mbP) v1 = K $ case mbP of Nothing -> Validation . Left $ ParseError 0 $ ExpectedInsteadOf (HS.singleton expected) orig Just p -> p orig v1 expected = let f (ParserComponent pc) (K name) = K (name <$ pc) in intercalate ", " . catMaybes . hcollapse $ hliftA2 f comps valueConNames valueConNames :: NP (K String) (Code Value) valueConNames = let ADT _ _ cons = datatypeInfo (Proxy :: Proxy Value) in hliftA (\(Constructor name) -> K name) cons fromComponent :: forall a . NS (ParserComponent a) (Code Value) -> Parser a fromComponent parser = Parser $ hexpand mempty parser -- Wrap a parser with a decorator. The decorator has access to the parsed value as well -- as the original and can inject its own processing logic. decorate :: forall a b. Parser a -> (a -> Value -> Either ParseError b) -> Parser b decorate (Parser components) decorator = Parser $ hmap wrap components where wrap :: ParserComponent a fs -> ParserComponent b fs wrap (ParserComponent maybeP) = ParserComponent $ case maybeP of Nothing -> Nothing Just p -> Just $ \orig val -> p orig val `bindV` \parsed -> Validation $ decorator parsed orig ---------------------------------------------------------------------- -- Combinators ---------------------------------------------------------------------- incErrLevel :: Validation a -> Validation a incErrLevel = Validation . first (\(ParseError l r) -> ParseError (l+1) r) . getValidation -- | Match a single YAML string. -- -- >>> parse string "howdy" -- Right "howdy" string :: Parser Text string = fromComponent $ S . S . Z $ ParserComponent $ Just $ const $ \(I s :* Nil) -> pure s -- | Match a specific YAML string, usually a «tag» identifying a particular -- form of an array or object. -- -- >>> parse (theString "hello") "hello" -- Right () -- >>> either putStr print $ parse (theString "hello") "bye" -- Expected "hello" instead of: -- -- bye theString :: Text -> Parser () theString t = fromComponent $ S . S . Z $ ParserComponent $ Just $ const $ \(I s :* Nil) -> Validation $ if s == t then Right () else Left $ ParseError 0 (ExpectedInsteadOf (HS.singleton $ show t) (String s)) -- | Match an array of elements, where each of elements are matched by -- the same parser. This is the function you'll use most of the time when -- parsing arrays, as they are usually homogeneous. -- -- >>> parse (array string) "[a,b,c]" -- Right ["a","b","c"] array :: Parser a -> Parser (Vector a) array p = fromComponent $ S . Z $ ParserComponent $ Just $ const $ \(I a :* Nil) -> incErrLevel $ traverse (runParserV p) a -- | An 'ElementParser' describes how to parse a fixed-size array -- where each positional element has its own parser. -- -- This can be used to parse heterogeneous tuples represented as YAML -- arrays. -- -- * Construct an 'ElementParser' with 'element' and the 'Applicative' combinators. -- -- * Turn a 'FieldParser' into a 'Parser' with 'theArray'. newtype ElementParser a = ElementParser (((State [Value]) :.: (ReaderT Array Validation)) a) deriving (Functor, Applicative) -- | Construct an 'ElementParser' that parses the current array element -- with the given 'Parser'. element :: Parser a -> ElementParser a element p = ElementParser $ Comp $ do vs <- State.get case vs of [] -> return $ ReaderT $ \arr -> Validation . Left $ let n = V.length arr + 1 in ParseError 0 $ ExpectedAsPartOf (HS.singleton $ "at least " ++ show n ++ " elements") $ Array arr (v:vs') -> do State.put vs' return . liftR $ runParserV p v -- | Match an array consisting of a fixed number of elements. The way each -- element is parsed depends on its position within the array and -- is determined by the 'ElementParser'. -- -- >>> parse (theArray $ (,) <$> element string <*> element bool) "[f, true]" -- Right ("f",True) theArray :: ElementParser a -> Parser a theArray (ElementParser (Comp ep)) = fromComponent $ S . Z $ ParserComponent $ Just $ const $ \(I a :* Nil) -> incErrLevel $ case first (flip runReaderT a) $ runState ep (V.toList a) of (result, leftover) -> result <* (case leftover of [] -> pure () v : _ -> Validation . Left $ ParseError 0 $ UnexpectedAsPartOf v $ Array a ) -- | Match a real number. -- -- >>> parse number "3.14159" -- Right 3.14159 number :: Parser Scientific number = fromComponent $ S . S . S . Z $ ParserComponent $ Just $ const $ \(I n :* Nil) -> pure n -- | Match an integer. -- -- >>> parse (integer @Int) "2017" -- Right 2017 integer :: (Integral i, Bounded i) => Parser i integer = fromComponent $ S . S . S . Z $ ParserComponent $ Just $ const $ \(I n :* Nil) -> case toBoundedInteger n of Just i -> pure i Nothing -> Validation . Left $ ParseError 0 $ ExpectedInsteadOf (HS.singleton "integer") (Number n) -- | Match a boolean. -- -- >>> parse bool "yes" -- Right True bool :: Parser Bool bool = fromComponent $ S . S . S . S . Z $ ParserComponent $ Just $ const $ \(I b :* Nil) -> pure b -- | Match the @null@ value. -- -- >>> parse null_ "null" -- Right () -- -- @since 1.1 null_ :: Parser () null_ = fromComponent $ S . S . S . S . S . Z $ ParserComponent $ Just $ const $ \Nil -> pure () -- | Make a parser match only valid values. -- -- If the validator does not accept the value, it should return a -- 'Left' 'String' with a noun phrase that characterizes the expected -- value, as in the example: -- -- >>> let acceptEven n = if even n then Right n else Left "an even number" -- >>> either putStr print $ parse (integer @Int `validate` acceptEven) "2017" -- Expected an even number instead of: -- -- 2017 -- -- @since 1.0.1 validate :: Parser a -- ^ parser to wrap -> (a -> Either String b) -- ^ validator -> Parser b validate parser validator = decorate parser (validity . validator) where validity (Right result) _ = Right result validity (Left problem) orig = Left $ ParseError 1 $ ExpectedInsteadOf (HS.singleton problem) orig -- | A 'FieldParser' describes how to parse an object. -- -- * Construct a 'FieldParser' with 'field', 'optField', or 'theField', and the 'Applicative' combinators. -- -- * Turn a 'FieldParser' into a 'Parser' with 'object'. newtype FieldParser a = FieldParser (Product (ReaderT Object Validation) (Constant (HashMap Text ())) a) deriving (Functor, Applicative) -- | Require an object field with the given name and with a value matched by -- the given 'Parser'. field :: Text -- ^ field name -> Parser a -- ^ value parser -> FieldParser a field name p = FieldParser $ Pair (ReaderT $ \o -> case HM.lookup name o of Nothing -> Validation . Left $ ParseError 0 $ ExpectedAsPartOf (HS.singleton $ "field " ++ show name) $ Object o Just v -> runParserV p v ) (Constant $ HM.singleton name ()) -- | Declare an optional object field with the given name and with a value -- matched by the given 'Parser'. optField :: Text -- ^ field name -> Parser a -- ^ value parser -> FieldParser (Maybe a) optField name p = FieldParser $ Pair (ReaderT $ \o -> traverse (runParserV p) $ HM.lookup name o) (Constant $ HM.singleton name ()) -- | Declare an optional object field with the given name and with a default -- to use if the field is absent. defaultField :: Text -- ^ field name -> a -- ^ default value -> Parser a -- ^ value parser -> FieldParser a defaultField name defaultVal p = fromMaybe defaultVal <$> optField name p -- | Require an object field with the given name and the given string value. -- -- This is a convenient wrapper around 'theString' intended for «tagging» -- objects. -- -- >>> :{ -- let p = object (Right <$ theField "type" "number" <*> field "value" number) -- <> object (Left <$ theField "type" "string" <*> field "value" string) -- >>> :} -- -- >>> parse p "{type: string, value: abc}" -- Right (Left "abc") -- >>> parse p "{type: number, value: 123}" -- Right (Right 123.0) theField :: Text -- ^ key name -> Text -- ^ expected value -> FieldParser () theField key value = field key (theString value) -- | Match an object. Which set of keys to expect and how their values -- should be parsed is determined by the 'FieldParser'. -- -- >>> let p = object $ (,) <$> field "name" string <*> optField "age" (integer @Int) -- >>> parse p "{ name: Anton, age: 2 }" -- Right ("Anton",Just 2) -- >>> parse p "name: Roma" -- Right ("Roma",Nothing) object :: FieldParser a -> Parser a object (FieldParser (Pair (ReaderT parseFn) (Constant names))) = fromComponent $ Z $ ParserComponent $ Just $ const $ \(I o :* Nil) -> incErrLevel $ parseFn o <* (case HM.keys (HM.difference o names) of [] -> pure () name : _ -> let v = o HM.! name in Validation . Left $ ParseError 0 $ UnexpectedAsPartOf (Object (HM.singleton name v)) (Object o) ) -- | Match any JSON value and return it as Aeson's 'Value'. -- -- >>> parse anyValue "[one, two, {three: four}]" -- Right (Array [String "one",String "two",Object (fromList [("three",String "four")])]) -- -- @since 1.1.1 anyValue :: Parser Value anyValue = Parser $ hpure $ ParserComponent . Just $ \val _np -> pure val -- | Like 'lift' for 'ReaderT', but doesn't require a 'Monad' instance liftR :: f a -> ReaderT r f a liftR = ReaderT . const