-- | Combinators for parsing YAML into Haskell types. -- -- Based on the article . {-# LANGUAGE PolyKinds, DataKinds, KindSignatures, ExplicitForAll, TemplateHaskell, ViewPatterns, TypeOperators, TypeFamilies, GeneralizedNewtypeDeriving #-} module Data.Yaml.Combinators ( Parser , parse , runParser -- * Scalars , string , theString , number , integer , bool -- * Arrays , array , theArray , ElementParser , element -- * Objects , object , FieldParser , field , optField , theField -- * Errors , ParseError(..) , Reason(..) ) where import Data.Aeson (Value(..), Object, Array) import Data.Scientific import Data.Yaml (decodeEither, encode) import Data.Text (Text) import Data.List import Data.Maybe import Data.ByteString (ByteString) import qualified Data.ByteString.Char8 as BS8 import Data.Bifunctor (first) import Control.Monad.Trans.Reader import Control.Monad.Trans.State as State import Control.Monad.Trans.Class import Data.Vector (Vector) import qualified Data.Vector as V import Data.Functor.Product import Data.Functor.Constant import Data.HashMap.Strict (HashMap) import qualified Data.HashMap.Strict as HM import Generics.SOP import Generics.SOP.TH -- $setup -- >>> :set -XOverloadedStrings -XTypeApplications -- >>> import Data.Monoid deriveGeneric ''Value ---------------------------------------------------------------------- -- Parsing function ---------------------------------------------------------------------- -- | Run a 'Parser' on a 'ByteString' containing the YAML content. -- -- This is a high-level function implemented on top of 'runParser'. parse :: Parser a -> ByteString -> Either String a parse p bs = do aesonValue <- decodeEither bs first ppParseError $ runParser p aesonValue ---------------------------------------------------------------------- -- Errors and Pretty-printing ---------------------------------------------------------------------- -- | A parse error. 'Reason' describes the error. -- The 'Int' field denotes at which level the error occurred and -- is used to select the deepest (most relevant) error -- when merging multiple parsers. data ParseError = ParseError !Int -- level Reason deriving (Eq, Show) -- | Describes what exactly went wrong during parsing. data Reason = UnexpectedAsPartOf Value Value | ExpectedAsPartOf String Value | ExpectedInsteadOf String Value deriving (Eq, Show) mergeParseError :: ParseError -> ParseError -> ParseError mergeParseError e1@(ParseError l1 r1) e2@(ParseError l2 r2) = case compare l1 l2 of GT -> e1 EQ | ExpectedAsPartOf exp1 w1 <- r1 , ExpectedAsPartOf exp2 w2 <- r2 , w1 == w2 -> ParseError l1 (ExpectedAsPartOf (exp1 ++ ", " ++ exp2) w1) | ExpectedInsteadOf exp1 w1 <- r1 , ExpectedInsteadOf exp2 w2 <- r2 , w1 == w2 -> ParseError l1 (ExpectedInsteadOf (exp1 ++ ", " ++ exp2) w1) _ -> e2 ppParseError :: ParseError -> String ppParseError (ParseError _lvl reason) = case reason of UnexpectedAsPartOf part whole -> "Unexpected \n\n" ++ showYaml part ++ "\nas part of\n\n" ++ showYaml whole ExpectedInsteadOf exp1 got -> "Expected " ++ exp1 ++ " instead of:\n\n" ++ showYaml got ExpectedAsPartOf exp1 got -> "Expected " ++ exp1 ++ " as part of:\n\n" ++ showYaml got where showYaml :: Value -> String showYaml = BS8.unpack . encode ---------------------------------------------------------------------- -- Core definitions ---------------------------------------------------------------------- newtype ParserComponent a fs = ParserComponent (Maybe (NP I fs -> Either ParseError a)) -- | A top-level YAML parser. -- -- * Construct a 'Parser' with 'string', 'number', 'integer', 'bool', 'array', or 'object'. -- -- * Combine two or more 'Parser's with 'Monoid' operators -- such as 'mappend', 'Data.Monoid.<>', or `mconcat` — -- e.g. if you expect either an object or a string. -- -- * Run with 'parse' or 'runParser'. newtype Parser a = Parser (NP (ParserComponent a) (Code Value)) -- fmap for ParserComponent (in its first type argument) pcFmap :: (a -> b) -> ParserComponent a fs -> ParserComponent b fs pcFmap f (ParserComponent mbP) = ParserComponent $ (fmap . fmap . fmap $ f) mbP instance Functor Parser where fmap f (Parser comps) = Parser $ hliftA (pcFmap f) comps instance Monoid (ParserComponent a fs) where mempty = ParserComponent Nothing ParserComponent mbP1 `mappend` ParserComponent mbP2 = ParserComponent $ case (mbP1, mbP2) of (Nothing, Nothing) -> Nothing (Just p1, Nothing) -> Just p1 (Nothing, Just p2) -> Just p2 (Just p1, Just p2) -> Just $ \v -> case (p1 v, p2 v) of (Right r1, _) -> Right r1 (_, Right r2) -> Right r2 (Left l1, Left l2) -> Left $ mergeParseError l1 l2 instance Monoid (Parser a) where mempty = Parser $ hpure mempty Parser rec1 `mappend` Parser rec2 = Parser $ hliftA2 mappend rec1 rec2 -- | A low-level function to run a 'Parser'. runParser :: Parser a -> Value -> Either ParseError a runParser (Parser comps) orig@(from -> SOP v) = hcollapse $ hliftA2 match comps v where match :: ParserComponent a fs -> NP I fs -> K (Either ParseError a) fs match (ParserComponent mbP) v1 = K $ case mbP of Nothing -> Left $ ParseError 0 $ ExpectedInsteadOf expected orig Just p -> p v1 expected = let f (ParserComponent pc) (K name) = K (name <$ pc) in intercalate ", " . catMaybes . hcollapse $ hliftA2 f comps valueConNames valueConNames :: NP (K String) (Code Value) valueConNames = let ADT _ _ cons = datatypeInfo (Proxy :: Proxy Value) in hliftA (\(Constructor name) -> K name) cons fromComponent :: forall a . NS (ParserComponent a) (Code Value) -> Parser a fromComponent parser = Parser $ hap' (hliftA (Fn . const) parser) (hpure mempty :: NP (ParserComponent a) (Code Value)) hap' :: forall f (xs :: [k]) . NS (f -.-> f) xs -> NP f xs -> NP f xs hap' (Z (Fn f)) (h :* t) = f h :* t hap' (S f) (h :* t) = h :* hap' f t ---------------------------------------------------------------------- -- Combinators ---------------------------------------------------------------------- incErrLevel :: Either ParseError a -> Either ParseError a incErrLevel = first $ \(ParseError l r) -> ParseError (l+1) r -- | Match a single YAML string. -- -- >>> parse string "howdy" -- Right "howdy" string :: Parser Text string = fromComponent $ S . S . Z $ ParserComponent $ Just $ \(I s :* Nil) -> Right s -- | Match a specific YAML string, usually a «tag» identifying a particular -- form of an array or object. -- -- >>> parse (theString "hello") "hello" -- Right () -- >>> either putStr print $ parse (theString "hello") "bye" -- Expected "hello" instead of: -- -- bye theString :: Text -> Parser () theString t = fromComponent $ S . S . Z $ ParserComponent $ Just $ \(I s :* Nil) -> if s == t then Right () else Left $ ParseError 1 (ExpectedInsteadOf (show t) (String s)) -- | Match an array of elements, where each of elements are matched by -- the same parser. This is the function you'll use most of the time when -- parsing arrays, as they are usually homogeneous. -- -- >>> parse (array string) "[a,b,c]" -- Right ["a","b","c"] array :: Parser a -> Parser (Vector a) array p = fromComponent $ S . Z $ ParserComponent $ Just $ \(I a :* Nil) -> incErrLevel $ mapM (runParser p) a -- | An 'ElementParser' describes how to parse a fixed-size array -- where each positional element has its own parser. -- -- This can be used to parse heterogeneous tuples represented as YAML -- arrays. -- -- * Construct an 'ElementParser' with 'element' and the 'Applicative' combinators. -- -- * Turn a 'FieldParser' into a 'Parser' with 'theArray'. newtype ElementParser a = ElementParser (StateT [Value] (Either (Array -> ParseError)) a) deriving (Functor, Applicative) -- | Construct an 'ElementParser' that parses the current array element -- with the given 'Parser'. element :: Parser a -> ElementParser a element p = ElementParser $ do vs <- State.get case vs of [] -> lift $ Left $ \arr -> let n = V.length arr + 1 in ParseError 0 $ ExpectedAsPartOf ("at least " ++ show n ++ " elements") $ Array arr (v:vs') -> do State.put vs' lift $ first const $ incErrLevel $ runParser p v -- | Match an array consisting of a fixed number of elements. The way each -- element is parsed depends on its position within the array and -- is determined by the 'ElementParser'. -- -- >>> parse (theArray $ (,) <$> element string <*> element bool) "[f, true]" -- Right ("f",True) theArray :: ElementParser a -> Parser a theArray (ElementParser ep) = fromComponent $ S . Z $ ParserComponent $ Just $ \(I a :* Nil) -> incErrLevel $ case runStateT ep (V.toList a) of Right (r, []) -> return r Right (_, v:_) -> Left $ ParseError 0 $ UnexpectedAsPartOf v $ Array a Left errFn -> Left $ errFn a -- | Match a real number. -- -- >>> parse number "3.14159" -- Right 3.14159 number :: Parser Scientific number = fromComponent $ S . S . S . Z $ ParserComponent $ Just $ \(I n :* Nil) -> Right n -- | Match an integer. -- -- >>> parse (integer @Int) "2017" -- Right 2017 integer :: (Integral i, Bounded i) => Parser i integer = fromComponent $ S . S . S . Z $ ParserComponent $ Just $ \(I n :* Nil) -> case toBoundedInteger n of Just i -> Right i Nothing -> Left $ ParseError 0 $ ExpectedInsteadOf "integer" (Number n) -- | Match a boolean. -- -- >>> parse bool "yes" -- Right True bool :: Parser Bool bool = fromComponent $ S . S . S . S . Z $ ParserComponent $ Just $ \(I b :* Nil) -> Right b -- | A 'FieldParser' describes how to parse an object. -- -- * Construct a 'FieldParser' with 'field', 'optField', or 'theField', and the 'Applicative' combinators. -- -- * Turn a 'FieldParser' into a 'Parser' with 'object'. newtype FieldParser a = FieldParser (Product (ReaderT Object (Either ParseError)) (Constant (HashMap Text ())) a) deriving (Functor, Applicative) -- | Require an object field with the given name and with a value matched by -- the given 'Parser'. field :: Text -- ^ field name -> Parser a -- ^ value parser -> FieldParser a field name p = FieldParser $ Pair (ReaderT $ \o -> case HM.lookup name o of Nothing -> Left $ ParseError 0 $ ExpectedAsPartOf ("field " ++ show name) $ Object o Just v -> incErrLevel $ runParser p v ) (Constant $ HM.singleton name ()) -- | Declare an optional object field with the given name and with a value -- matched by the given 'Parser'. optField :: Text -- ^ field name -> Parser a -- ^ value parser -> FieldParser (Maybe a) optField name p = FieldParser $ Pair (ReaderT $ \o -> traverse (incErrLevel . runParser p) $ HM.lookup name o) (Constant $ HM.singleton name ()) -- | Require an object field with the given name and the given string value. -- -- This is a convenient wrapper around 'theString' intended for «tagging» -- objects. -- -- >>> :{ -- let p = object (Right <$ theField "type" "number" <*> field "value" number) -- <> object (Left <$ theField "type" "string" <*> field "value" string) -- >>> :} -- -- >>> parse p "{type: string, value: abc}" -- Right (Left "abc") -- >>> parse p "{type: number, value: 123}" -- Right (Right 123.0) theField :: Text -- ^ key name -> Text -- ^ expected value -> FieldParser () theField key value = field key (theString value) -- | Match an object. Which set of keys to expect and how their values -- should be parsed is determined by the 'FieldParser'. -- -- >>> let p = object $ (,) <$> field "name" string <*> optField "age" (integer @Int) -- >>> parse p "{ name: Anton, age: 2 }" -- Right ("Anton",Just 2) -- >>> parse p "name: Roma" -- Right ("Roma",Nothing) object :: FieldParser a -> Parser a object (FieldParser (Pair (ReaderT parseFn) (Constant names))) = fromComponent $ Z $ ParserComponent $ Just $ \(I o :* Nil) -> incErrLevel $ parseFn o <* (case HM.keys (HM.difference o names) of [] -> pure () name : _ -> let v = o HM.! name in Left $ ParseError 0 $ UnexpectedAsPartOf (Object (HM.singleton name v)) (Object o) )