-------------------------------------------------------------------- -- | -- Module : Text.XML.Light.Input -- Copyright : (c) Galois, Inc. 2007 -- License : BSD3 -- -- Maintainer: Iavor S. Diatchki -- Stability : provisional -- Portability: portable -- -- Lightweight XML parsing -- module Text.XML.Light.Input (parseXML,parseXMLDoc) where import Text.XML.Light.Lexer import Text.XML.Light.Types import Text.XML.Light.Proc import Text.XML.Light.Output(tagEnd) import Data.List(isPrefixOf) -- | parseXMLDoc, parse a XMLl document to maybe an element parseXMLDoc :: XmlSource s => s -> Maybe Element parseXMLDoc xs = strip (parseXML xs) where strip cs = case onlyElems cs of e : es | "?xml" `isPrefixOf` qName (elName e) -> strip (map Elem es) | otherwise -> Just e _ -> Nothing -- | parseXML to a list of content chunks parseXML :: XmlSource s => s -> [Content] parseXML = parse . tokens ------------------------------------------------------------------------ parse :: [Token] -> [Content] parse [] = [] parse ts = let (es,_,ts1) = nodes ([],Nothing) [] ts in es ++ parse ts1 -- Information about namespaces. -- The first component is a map that associates prefixes to URIs, -- the second is the URI for the default namespace, if one was provided. type NSInfo = ([(String,String)],Maybe String) nodes :: NSInfo -> [QName] -> [Token] -> ([Content], [QName], [Token]) nodes ns ps (TokCRef ref : ts) = let (es,qs,ts1) = nodes ns ps ts in (CRef ref : es, qs, ts1) nodes ns ps (TokText txt : ts) = let (es,qs,ts1) = nodes ns ps ts (more,es1) = case es of Text cd : es1' | cdVerbatim cd == cdVerbatim txt -> (cdData cd,es1') _ -> ([],es) in (Text txt { cdData = cdData txt ++ more } : es1, qs, ts1) nodes cur_info ps (TokStart p t as empty : ts) = (node : siblings, open, toks) where new_name = annotName new_info t new_info = foldr addNS cur_info as node = Elem Element { elLine = Just p , elName = new_name , elAttribs = map (annotAttr new_info) as , elContent = children } (children,(siblings,open,toks)) | empty = ([], nodes cur_info ps ts) | otherwise = let (es1,qs1,ts1) = nodes new_info (new_name:ps) ts in (es1, case qs1 of [] -> nodes cur_info ps ts1 _ : qs3 -> ([],qs3,ts1)) nodes ns ps (TokEnd p t : ts) = let t1 = annotName ns t in case break (t1 ==) ps of (as,_:_) -> ([],as,ts) -- Unknown closing tag. Insert as text. (_,[]) -> let (es,qs,ts1) = nodes ns ps ts in (Text CData { cdLine = Just p, cdVerbatim = CDataText, cdData = tagEnd t "" } : es,qs, ts1) nodes _ ps [] = ([],ps,[]) annotName :: NSInfo -> QName -> QName annotName (namespaces,def_ns) n = n { qURI = maybe def_ns (`lookup` namespaces) (qPrefix n) } annotAttr :: NSInfo -> Attr -> Attr annotAttr ns a@(Attr { attrKey = k}) = case (qPrefix k, qName k) of -- Do not apply the default name-space to unqualified -- attributes. See Section 6.2 of . (Nothing, _) -> a _ -> a { attrKey = annotName ns k } addNS :: Attr -> NSInfo -> NSInfo addNS (Attr { attrKey = key, attrVal = val }) (ns,def) = case (qPrefix key, qName key) of (Nothing,"xmlns") -> (ns, if null val then Nothing else Just val) (Just "xmlns", k) -> ((k, val) : ns, def) _ -> (ns,def)