{-# LANGUAGE OverloadedStrings, TypeFamilies, QuasiQuotes #-} {-# OPTIONS_GHC -fno-warn-orphans #-} module Text.XML.Papillon(Xmlns, XEQName, parseXmlEvent, XmlEvent(..)) where import Control.Arrow import Data.List import Data.Char import Data.ByteString.Char8 (ByteString, pack) import Text.Papillon import Numeric import qualified Data.ByteString.Char8 as BSC type Xmlns = (BSC.ByteString, BSC.ByteString) type XEQName = (BSC.ByteString, BSC.ByteString) data XmlEvent = XEXmlDecl (Int, Int) | XESTag XEQName [Xmlns] [(XEQName, BSC.ByteString)] | XEETag XEQName | XEEmptyElemTag XEQName [Xmlns] [(XEQName, BSC.ByteString)] | XECharData BSC.ByteString deriving Show data Attribute = NSAttribute BSC.ByteString BSC.ByteString | Attribute (BSC.ByteString, BSC.ByteString) BSC.ByteString deriving Show procAtts :: [Attribute] -> ( [(BSC.ByteString, BSC.ByteString)], [((BSC.ByteString, BSC.ByteString), BSC.ByteString)]) procAtts = (map fromNSAttribute *** map fromAttribute) . partition isNSAtt fromNSAttribute :: Attribute -> (BSC.ByteString, BSC.ByteString) fromNSAttribute (NSAttribute k v) = (k, v) fromNSAttribute _ = error "bad" fromAttribute :: Attribute -> ((BSC.ByteString, BSC.ByteString), BSC.ByteString) fromAttribute (Attribute k v) = (k, v) fromAttribute _ = error "bad" isNSAtt :: Attribute -> Bool isNSAtt (NSAttribute _ _) = True isNSAtt _ = False parseXmlEvent :: ByteString -> Maybe XmlEvent parseXmlEvent = either (const Nothing) (Just . fst) . runError . xmlEvent . parse fromHex :: String -> Char fromHex = chr . fst . head . readHex [papillon| source: ByteString xmlEvent :: XmlEvent = et:emptyElemTag { et } / st:sTag { st } / et:eTag { et } / cd:charData { cd } / xd:xmlDecl { xd } spaces = _:(' ' / '\t' / '\r' / '\n')+ nameStartChar :: Char = <(`elem` (":_" ++ ['a' .. 'z'] ++ ['A' .. 'Z']))> nameChar :: Char = s:nameStartChar { s } / <(`elem` ("-." ++ ['0' .. '9']))> ncNameStartChar :: Char = !':' s:nameStartChar { s } ncNameChar :: Char = !':' c:nameChar { c } -- name :: ByteString -- = sc:nameStartChar cs:(c:nameChar { c })* { pack $ sc : cs } ncName :: ByteString = sc:ncNameStartChar cs:(c:ncNameChar { c })* { pack $ sc : cs } qName :: (ByteString, ByteString) = pn:prefixedName { pn } / un:unprefixedName { ("", un) } prefixedName :: (ByteString, ByteString) = p:prefix ':' l:localPart { (p, l) } unprefixedName :: ByteString = l:localPart { l } prefix :: ByteString = n:ncName { n } localPart :: ByteString = n:ncName { n } attValue :: ByteString = '"' v:(c:<(`notElem` "<&\"")> { c } / c:charEntRef { c })* '"' { pack v } / '\'' v:(c:<(`notElem` "<&'")> { c } / c:charEntRef { c })* '\'' { pack v } charData :: XmlEvent = '>' cds:(c:<(`notElem` "<&")> { c } / c:charEntRef { c })* { XECharData $ pack cds } charEntRef :: Char = c:charRef { c } / c:entityRef { c } charRef :: Char = '&' '#' 'x' ds:(<(`elem` "0123456789abcdefABCDEF")>)+ ';' { fromHex ds } entityRef :: Char = '&' 'a' 'm' 'p' ';' { '&' } / '&' 'l' 't' ';' { '<' } / '&' 'g' 't' ';' { '>' } / '&' 'q' 'u' 'o' 't' ';' { '"' } / '&' 'a' 'p' 'o' 's' ';' { '\'' } xmlDecl :: XmlEvent = '<' '?' 'x' 'm' 'l' vi:versionInfo _:spaces? '?' _:eof { XEXmlDecl vi } versionInfo :: (Int, Int) = _:spaces 'v' 'e' 'r' 's' 'i' 'o' 'n' _:eq vn:('"' v:versionNum '"' { v } / '\'' v:versionNum '\'' { v }) { vn } eq :: () = _:spaces? '=' _:spaces? versionNum :: (Int, Int) = '1' '.' d:+ { (1, read d) } sTag :: XmlEvent = '<' n:qName as:(_:spaces a:attribute { a })* _:spaces? _:eof { uncurry (XESTag n) $ procAtts as } emptyElemTag :: XmlEvent = '<' n:qName as:(_:spaces a:attribute { a })* _:spaces? '/' _:eof { uncurry (XEEmptyElemTag n) $ procAtts as } prefixedAttName :: ByteString = 'x' 'm' 'l' 'n' 's' ':' n:ncName { n } defaultAttName = 'x' 'm' 'l' 'n' 's' nsAttName :: ByteString = n:prefixedAttName { n } / _:defaultAttName { "" } attribute :: Attribute = n:nsAttName _:eq v:attValue { NSAttribute n v } / n:qName _:eq v:attValue { Attribute n v } eTag :: XmlEvent = '<' '/' n:qName _:spaces? _:eof { XEETag n } eof = !_ |]