{-# LANGUAGE DeriveGeneric,DefaultSignatures,UndecidableInstances,ScopedTypeVariables, OverlappingInstances,TypeOperators,FlexibleContexts,FlexibleInstances #-} {-# OPTIONS_GHC -fcontext-stack=50 #-} {-| Module : Text.XML.ToFromXML Description : A library for reading and writing Haskell data from/to XML files Copyright : (C) Robert Reitmeier, 2014 License : GPL-3 Maintainer : concat ["public","< a t >","thinking","-","machines",".","net"] Stability : experimental Portability : n/a In order to write a Haskell value to an XML file, the value's type just has to be an instance of 'Generic' (usually one would use the @deriving 'Generic'@ clause in the @data@ defintion). Then, the 'writeToXMLFile' will write an XML file representing the value. This value can be read again using 'readFromXMLFile'. See the following example (pragma syntax below is broken, how can one correctly incorporate comments in a haddock code block?): >{ -# LANGUAGE DeriveGeneric #- } >{ -# OPTIONS_GHC -fcontext-stack=50 #- } > >import Text.XML.ToFromXML >-- GHC.Generics is exported by ToFromXML > >data Test = Test { str::String, something::(Int,Char) } > deriving (Generic,Show,Eq) > >main = do > let test = Test "abc" (42,'z') > writeToXMLFile "test.xml" test > putStrLn $ "writeToXMLFile : " ++ show test > > -- readFromXMLFile's return type can be inferred in this example, > -- otherwise it would have to be declared > test' <- readFromXMLFile "test.xml" > putStrLn $ "readFromXMLFile: " ++ show test' > > putStrLn $ if test==test' then "OK." else "ERROR!" Remark: Both 'writeToXMLFile' and 'readFromXMLFile' require the value type @a@'s generic represention @Rep a@ be an instance of @GFromToXML@, but this is automatically fulfilled. The generated XML file is > > > abc > > > 42 > z > > > The general intention of this module is to keep the generated XML as intuitive and easy to read as possible. For example, we do flatten nested pairs used by "GHC.Generics" to represent n-tuples, and TABs and CRs are used to encode long text, so it keeps its formatting (unfortunately, CDATA sections are not supported with the Node types used in "Text.XML.Expat.Pickle"). One might need to increase the context stack size with the @-fcontext-stack@ option when compiling code using this module. A test suite is included in the package, see @TestToFromXML.hs@ in the package's test directory. -} module Text.XML.ToFromXML ( ToFromXML(..), toXML,fromXML,fromXMLEither, readFromXMLFile,writeToXMLFile, module GHC.Generics ) where import GHC.Generics import qualified Data.ByteString as BS import Text.XML.Expat.Pickle import Text.XML.Expat.Tree import qualified Text.XML.Expat.Format as Format import Text.Printf import Data.Char import Data.Word import Data.Ratio import Data.Int import qualified Data.Set as Set import qualified Data.Map as Map import qualified Data.IntMap as IntMap import Data.Complex import Data.Array.IArray -- | For the picklers, are using nodes with both tag and text being Strings. type Pickler a = PU (ListOf (UNode String)) a class GToFromXML f where gXMLPickler :: Pickler (f p) {-| We do not want our XML to be cluttered with nested tuples, so we flatten these in the XML representation. -} class PickleProdN f where -- | Given the current index in the flattened tuple, pickleProdN returns the incremented index and the pickler. pickleProdN :: Int -> (Int,Pickler (f p)) {-| Pickling a product is done by first pickling the first component, and then pickling the second component with the current index @i1@ returned by the first @pickleProdN i@. The current index @i2@ after @pickleProdN i1@ is returned. -} instance (PickleProdN f1,PickleProdN f2) => PickleProdN (f1 :*: f2) where pickleProdN i = (i2,xpWrap (uncurry (:*:),\ (a :*: b) -> (a,b)) $ xpPair p1 p2) where (i1,p1) = pickleProdN i (i2,p2) = pickleProdN i1 argTagNames = [ "ARG-" ++ show i | i <- [1..] ] instance (GToFromXML f) => PickleProdN (M1 S NoSelector f) where pickleProdN i = (i+1,xpElemNodes (argTagNames!!i) gXMLPickler) {-| For constructor arguments, one can also leave out the selector attribute (like Haskell syntax allows leaving out record syntax). This makes it easier to manually write data XML. -} instance (GToFromXML f,Selector s) => PickleProdN (M1 S s f) where pickleProdN i = (i+1,picklearg) where selname = selName (undefined :: M1 S s f p) picklearg = xpAlt (\_->0) [ xpElemWithAttr (argTagNames!!i) "selector" selname gXMLPickler, xpElemNodes (argTagNames!!i) gXMLPickler ] instance (PickleProdN (f1 :*: f2)) => GToFromXML (f1 :*: f2) where gXMLPickler = snd (pickleProdN 0) instance (GToFromXML f1,GToFromXML f2) => GToFromXML (f1 :+: f2) where gXMLPickler = xpAlt selfun [ xpWrap (L1,\ (L1 x) -> x) gXMLPickler, xpWrap (R1,\ (R1 x) -> x) gXMLPickler ] where selfun (L1 _) = 0 selfun (R1 _) = 1 {-| We won't construct a tag for @M1 D@, since it is unnecessary and cluttering the XML. Although, it might add to safety... -} instance (GToFromXML f,Datatype d) => GToFromXML (M1 D d f) where gXMLPickler = xpWrap (M1,unM1) gXMLPickler {-| The CONSTRUCTOR tag contains a name attribute with the constructor's name. Strictly speaking it is unnecessary to encode the name as well, but improves readability and might add to safety... -} instance (GToFromXML f,Constructor c) => GToFromXML (M1 C c f) where gXMLPickler = xpWrap (M1,unM1) $ xpElemWithAttr "CONSTRUCTOR" "name" conname gXMLPickler where conname = conName (undefined :: M1 C c f p) -- | No tag created for @M1 S NoSelector@, this is handled in 'PickleProdN'. instance (GToFromXML f) => GToFromXML (M1 S NoSelector f) where gXMLPickler = xpWrap (M1,unM1) gXMLPickler -- | No tag created for @M1 S s@, this is handled in 'PickleProdN'. instance (GToFromXML f,Selector s) => GToFromXML (M1 S s f) where gXMLPickler = xpWrap (M1,unM1) gXMLPickler -- | A helper function injecting an attribute with a given/fixed value in a tag. xpElemWithAttr tag attrname attrval pickler = xpWrap (snd,\b->((),b)) $ xpElem tag (xpAttrFixed attrname attrval) pickler {- @V1@ is not an instance of 'GToFromXML', because we can't serialize data of an empty type since there are no constructors. It shouldn't be necessary to serialize an empty type, so @V1@ should be rejected by the type checker. -} instance GToFromXML U1 where gXMLPickler = xpLift U1 instance (ToFromXML a) => GToFromXML (K1 R a) where gXMLPickler = xpWrap (K1,unK1) xMLPickler {-| The class 'ToFromXML' declares that there is a pickler for the instance type wrapped by the generic representation's @K1@ constructor. There are instances for data types that we do want to encode in more precise way, not following the generic pattern. For example, we want to encode unit @()@ as a distinct tag @\@, not as text content generated with show/read. We give a default signature and definition which we can use conveniently by just stating @instance ToFromXML Word32@ for numeric types, for example. Special chars are represented in XML by Haskell's escape sequences. A char is not embraced by single quotes in the tag content (as it would be using the default instance). Since unfortunately CDATA sections are not supported with the Node types used in "Text.XML.Expat.Pickle" we are relying on, a String has to be represented in XML as a tag's text content, using the common Haskell escape sequences. For better readability and prevention of very long lines in the XML file, after each @'\r'@, @'\n'@ and @'\t'@ the same character is inserted unescaped after the character's escape sequence. The unescaped character is removed again when parsing the String. For example, @"abc\ndef"@ will be written in the XML file as @"abc\\n\ndef"@, with the LF escaped (i.e. @"\\n"@) and followed by a real LF (@'\n'@). Injecting line breaks and tabs makes long text much more readable und editable by humans (which is one of the original purposes of XML). The real LF will be filtered out again while parsing the XML file String. -} class ToFromXML a where -- | An instance of 'ToFromXML' provides a pickler for type @a@. xMLPickler :: Pickler a -- | The default signature of 'xMLPickler' requires 'Read' and 'Show' instances default xMLPickler :: (Read a,Show a) => Pickler a -- | The default definition via 'xpPrim' uses show and read to convert the value to XML tag text content. xMLPickler = xpContent xpPrim instance ToFromXML () where xMLPickler = xpElemNodes "UNIT" xpUnit instance (ToFromXML a) => ToFromXML [a] where xMLPickler = xpElemNodes "LIST" $ xpList0 $ xpElemNodes "ITEM" xMLPickler instance ToFromXML Int instance ToFromXML Int8 instance ToFromXML Int16 instance ToFromXML Int32 instance ToFromXML Int64 instance ToFromXML Integer instance ToFromXML Word instance ToFromXML Word8 instance ToFromXML Word16 instance ToFromXML Word32 instance ToFromXML Word64 instance ToFromXML Float instance ToFromXML Double instance (Show a,Read a,Integral a) => ToFromXML (Ratio a) instance (Show a,Read a) => ToFromXML (Complex a) instance ToFromXML Bool where xMLPickler = xpAlt selfun [ xpElemNodes "TRUE" $ xpLift True, xpElemNodes "FALSE" $ xpLift False ] where selfun True = 0 selfun False = 1 instance ToFromXML Char where xMLPickler = xpContent $ xpWrap ( fst.head.readLitChar, (`showLitChar` "") ) xpText instance ToFromXML String where xMLPickler = xpContent pickleContentString skipPickleString = (`elem` ['\r','\n','\t']) {-| Pickles a string to/from text content with escaping -} pickleContentString :: PU String String pickleContentString = xpWrap ( readLitString, showLitString ) xpText0 where readLitString "" = "" readLitString (c:ss) | skipPickleString c = readLitString ss readLitString s = let [(c,ss)] = readLitChar s in c : readLitString ss showLitString "" = "" showLitString (c:ss) | skipPickleString c = showLitChar c $ c : showLitString ss showLitString (c:ss) = showLitChar c $ showLitString ss {- Pickler for a Map. Didn't use 'xpMap' because @show@ing keys as attributes might be inconvenient/unreadable for more complex key types. -} instance (Ord k,ToFromXML k,ToFromXML v) => ToFromXML (Map.Map k v) where xMLPickler = xpElemNodes "MAP" $ xpWrap (Map.fromList,Map.toList) $ xpList $ xpElemNodes "ASSOC" $ xpPair (xpElemNodes "KEY" xMLPickler) (xpElemNodes "ELEM" xMLPickler) instance (ToFromXML v) => ToFromXML (IntMap.IntMap v) where xMLPickler = xpElemNodes "INTMAP" $ xpWrap (IntMap.fromList,IntMap.toList) $ xpList $ xpElem "ELEM" (xpAttr "index" xpPrim) xMLPickler -- Didn't use an attribute for the dimension of the tuple because this is not checkable by a schema. instance (ToFromXML a,ToFromXML b) => ToFromXML (a,b) where xMLPickler = xpElemNodes "PAIR" $ xpPair (xpComponent 0) (xpComponent 1) instance (ToFromXML a,ToFromXML b,ToFromXML c) => ToFromXML (a,b,c) where xMLPickler = xpElemNodes "TRIPLE" $ xpTriple (xpComponent 0) (xpComponent 1) (xpComponent 2) instance (ToFromXML a,ToFromXML b,ToFromXML c,ToFromXML d) => ToFromXML (a,b,c,d) where xMLPickler = xpElemNodes "QUADRUPLE" $ xp4Tuple (xpComponent 0) (xpComponent 1) (xpComponent 2) (xpComponent 3) instance (ToFromXML a,ToFromXML b,ToFromXML c,ToFromXML d,ToFromXML e) => ToFromXML (a,b,c,d,e) where xMLPickler = xpElemNodes "QUINTUPLE" $ xp5Tuple (xpComponent 0) (xpComponent 1) (xpComponent 2) (xpComponent 3) (xpComponent 4) instance (ToFromXML a,ToFromXML b,ToFromXML c,ToFromXML d,ToFromXML e,ToFromXML f) => ToFromXML (a,b,c,d,e,f) where xMLPickler = xpElemNodes "SEXTUPLE" $ xp6Tuple (xpComponent 0) (xpComponent 1) (xpComponent 2) (xpComponent 3) (xpComponent 4) (xpComponent 5) -- | Pickle a tuple's i\'th component. The component tag names are distinct, otherwise hexpat-pickle confuses this with a list and gives a parse error. xpComponent :: (ToFromXML a) => Int -> Pickler a xpComponent i = xpElemNodes (componentnames!!i) xMLPickler where componentnames = [ "FIRST","SECOND","THIRD","FOURTH","FIFTH","SIXTH" ] instance (ToFromXML a) => ToFromXML (Maybe a) where xMLPickler = xpAlt selfun [ xpElemNodes "NOTHING" $ xpLift Nothing, xpElemNodes "JUST" $ xpWrap (Just, \ (Just a) -> a ) xMLPickler ] where selfun Nothing = 0 selfun (Just _) = 1 instance (ToFromXML a,ToFromXML b) => ToFromXML (Either a b) where xMLPickler = xpAlt selfun [ xpElemNodes "LEFT" $ xpWrap ( Left, \ (Left a) -> a ) xMLPickler, xpElemNodes "RIGHT" $ xpWrap ( Right, \ (Right b) -> b ) xMLPickler ] where selfun (Left _) = 0 selfun (Right _) = 1 instance (ToFromXML a,Ord a) => ToFromXML (Set.Set a) where xMLPickler = xpElemNodes "SET" $ xpWrap (Set.fromList,Set.toList) $ xpList $ xpElemNodes "ELEM" xMLPickler -- TODO: Insert ToFromXML instances for date and time types, maybe in conformance to XSD types? -- Here we assume that array index types (usually 'Int') are sufficiently simple to be represented as string in an attribute. instance (Ix i,Show i,Read i,ToFromXML e) => ToFromXML (Array i e) where xMLPickler = xpWrap (list2arr,arr2list) $ xpElem "ARRAY" xpbounds $ xpList $ xpElemNodes "ELEM" xMLPickler where xpbounds = xpPair (xpAttr "lowerBound" xpPrim) (xpAttr "upperBound" xpPrim) arr2list arr = (bounds arr,elems arr) list2arr (bounds,arrelems) = listArray bounds arrelems -- This is the catch-all instance, leading to the generic @Rep a@ representation instance (Generic a,GToFromXML (Rep a)) => ToFromXML a where xMLPickler = xpWrap (to,from) gXMLPickler {-| Converts generic Haskell data to a (strict) 'ByteString' containing the XML representation. In most cases, the data type would probably be @deriving 'Generic'@ using the @DeriveGeneric@ language pragma. -} toXML :: (Generic a,GToFromXML (Rep a)) => a -> BS.ByteString toXML x = Format.format' $ Format.indent 2 $ pickleTree (xpRoot gXMLPickler) (from x) {-| Construct generic Haskell data from a (strict) 'ByteString' containing the XML representation. fromXMLEither will return 'Left' in case of a 'XMLParseError', 'Right' otherwise. -} fromXMLEither :: (GToFromXML f) => BS.ByteString -> Either String (f p) fromXMLEither bs = case parse' defaultParseOptions bs of Left (XMLParseError errmsg loc) -> Left $ printf "XMLParseError at %s:\n %s" (show loc) errmsg Right tree -> unpickleTree' (xpRoot gXMLPickler) tree {-| Convenience function wrapping around 'fromXMLEither', throwing an error in case of a parsing error. -} fromXML :: (Generic a,GToFromXML (Rep a)) => BS.ByteString -> a fromXML bs = case fromXMLEither bs of Left errmsg -> error errmsg Right x -> to x {-| Action writing an XML representation of generic Haskell data to a file. The underlying writeFile operation is strict. -} writeToXMLFile :: (Generic a,GToFromXML (Rep a)) => FilePath -> a -> IO () writeToXMLFile filepath a = BS.writeFile filepath $ toXML a {-| Action reading generic Haskell data from an XML file. The underlying readFile operation is strict. -} readFromXMLFile :: (Generic a,GToFromXML (Rep a)) => FilePath -> IO a readFromXMLFile filepath = BS.readFile filepath >>= return . fromXML