----------------------------------------------------------------------------- -- Copyright 2014, Open Universiteit Nederland. This file is distributed -- under the terms of the GNU General Public License. For more information, -- see the file "LICENSE.txt", which is included in the distribution. ----------------------------------------------------------------------------- -- | -- Maintainer : bastiaan.heeren@ou.nl -- Stability : provisional -- Portability : portable (depends on ghc) -- -- Collection of common operation on XML documents -- ----------------------------------------------------------------------------- -- $Id: Interface.hs 6535 2014-05-14 11:05:06Z bastiaan $ module Ideas.Text.XML.Interface ( Element(..), Content, Attribute(..), Attributes , normalize, parseXML, compactXML , children, findAttribute, findChild, getData ) where import Control.Arrow import Data.Char (chr, ord) import Data.Maybe import Ideas.Text.Parsing (parseSimple) import Ideas.Text.XML.Document (Name, prettyElement) import Ideas.Text.XML.Parser (document) import Ideas.Text.XML.Unicode (decoding) import qualified Ideas.Text.XML.Document as D data Element = Element { name :: Name , attributes :: Attributes , content :: Content } instance Show Element where show = show . extend compactXML :: Element -> String compactXML = show . prettyElement True . extend type Content = [Either String Element] type Attributes = [Attribute] data Attribute = Name := String normalize :: D.XMLDoc -> Element normalize doc = toElement (D.root doc) where toElement :: D.Element -> Element toElement (D.Element n as c) = Element n (map toAttribute as) (toContent c) toAttribute :: D.Attribute -> Attribute toAttribute (n D.:= v) = n := concatMap (either return refToString) v toContent :: D.Content -> Content toContent = merge . concatMap f where f :: D.XML -> Content f (D.Tagged e) = [Right (toElement e)] f (D.CharData s) = [Left s] f (D.CDATA s) = [Left s] f (D.Reference r) = refToContent r refToString :: D.Reference -> String refToString (D.CharRef i) = [chr i] refToString (D.EntityRef s) = maybe "" return (lookup s general) refToContent :: D.Reference -> Content refToContent (D.CharRef i) = [Left [chr i]] refToContent (D.EntityRef s) = fromMaybe [] (lookup s entities) entities :: [(String, Content)] entities = [ (n, toContent (snd ext)) | (n, ext) <- D.externals doc ] ++ -- predefined entities map (second (return . Left . return)) general general :: [(String, Char)] general = [("lt",'<'), ("gt",'>'), ("amp",'&'), ("apos",'\''), ("quot",'"')] merge :: Content -> Content merge (Left s:Left t:rest) = merge (Left (s++t) : rest) merge (x:xs) = x:merge xs merge [] = [] extend :: Element -> D.Element extend (Element n as c) = D.Element n (map toAttribute as) (concatMap toXML c) where toAttribute :: Attribute -> D.Attribute toAttribute (m := s) = (D.:=) m (map Left s) toXML :: Either String Element -> [D.XML] toXML = either fromString (return . D.Tagged . extend) fromString :: String -> [D.XML] fromString [] = [] fromString xs@(hd:tl) | null xs1 = D.Reference (D.CharRef (ord hd)) : fromString tl | otherwise = D.CharData xs1 : fromString xs2 where (xs1, xs2) = break ((> 127) . ord) xs ----------------------------------------------------- parseXML :: String -> Either String Element parseXML xs = do input <- decoding xs doc <- parseSimple document input return (normalize doc) ----------------------------------------------------- findAttribute :: Monad m => String -> Element -> m String findAttribute s (Element _ as _) = case [ t | n := t <- as, s==n ] of [hd] -> return hd _ -> fail $ "Invalid attribute: " ++ show s findChild :: Monad m => String -> Element -> m Element findChild s e = case filter ((==s) . name) (children e) of [a] -> return a _ -> fail $ "Child not found: " ++ show s children :: Element -> [Element] children e = [ c | Right c <- content e ] getData :: Element -> String getData e = concat [ s | Left s <- content e ]