{- | $Id: SimpleExamples.hs,v 1.3 2006/11/17 17:16:24 hxml Exp $ The examples from the HXT tutorial at haskell.org "http://www.haskell.org/haskellwiki/HXT" -} module Main where import Text.XML.HXT.Arrow -- import HXT stuff import Text.XML.HXT.XPath import Data.List -- auxiliary functions import Data.Maybe import System.Environment import System.Console.GetOpt() import System.Exit -- | call this program with 3 arguments, -- the function name, see list of examples, -- the input URL or file -- and the output file, - for stdout -- -- example: SimpleExamples selectAllText http://www.haskell.org/ - main :: IO () main = do argv <- getArgs (al, fct, src, dst) <- cmdlineOpts argv [rc] <- runX (application al fct src dst) if rc >= c_err then exitWith (ExitFailure (-1)) else exitWith ExitSuccess application :: Attributes -> String -> String -> String -> IOSArrow b Int application al fct src dst = readDocument al src >>> processChildren (processRootElement fct `when` isElem) >>> writeDocument ( (a_indent, v_1) : (a_output_encoding, isoLatin1) : al ) dst >>> getErrStatus -- | the dummy for the boring stuff of option evaluation, -- usually done with 'System.Console.GetOpt' cmdlineOpts :: [String] -> IO (Attributes, String, String, String) cmdlineOpts argv = return ([(a_validate, v_0),(a_parse_html, v_1)], argv!!0, argv!!1, argv!!2) -- | the processing examples examples :: [ (String, IOSArrow XmlTree XmlTree) ] examples = [ ( "selectAllText", selectAllText ) , ( "selectAllTextAndAltValues", selectAllTextAndAltValues ) , ( "selectAllTextAndRealAltValues", selectAllTextAndRealAltValues ) , ( "addRefIcon", addRefIcon ) , ( "helloWorld", helloWorld ) , ( "helloWorld2", helloWorld2 ) , ( "imageTable", imageTable ) , ( "imageTable0", imageTable0 ) , ( "imageTable1", imageTable1 ) , ( "imageTable2", imageTable2 ) , ( "imageTable3", imageTable3 ) , ( "toAbsHRefs", toAbsHRefs ) , ( "toAbsRefs", toAbsRefs ) , ( "toAbsRefs1", toAbsRefs1 ) ] processRootElement :: String -> IOSArrow XmlTree XmlTree processRootElement fct = fromMaybe this . lookup fct $ examples -- | selection arrows selectAllText :: ArrowXml a => a XmlTree XmlTree selectAllText = selem "the-plain-text" [ deep isText ] -- create a root element, neccessary for wellformed XML output selectAllTextAndAltValues :: ArrowXml a => a XmlTree XmlTree selectAllTextAndAltValues = selem "the-plain-text" [ deep ( isText <+> ( isElem >>> hasName "img" >>> getAttrValue "alt" >>> mkText ) ) ] selectAllTextAndRealAltValues :: ArrowXml a => a XmlTree XmlTree selectAllTextAndRealAltValues = selem "the-plain-text" [ deep ( isText <+> ( isElem >>> hasName "img" >>> getAttrValue "alt" >>> isA significant >>> arr addBrackets >>> mkText ) ) ] where significant :: String -> Bool significant = not . all (`elem` " \n\r\t") addBrackets :: String -> String addBrackets s = " [[ " ++ s ++ " ]] " -- | transformation arrows addRefIcon :: ArrowXml a => a XmlTree XmlTree addRefIcon = processTopDown ( addImg `when` isExternalRef ) where isExternalRef = isElem >>> hasName "a" >>> hasAttr "href" >>> getAttrValue "href" >>> isA isExtRef where isExtRef = isPrefixOf "http:" addImg = replaceChildren ( getChildren <+> imgElement ) imgElement = mkelem "img" [ sattr "src" "/icons/ref.png" , sattr "alt" "external ref" ] [] -- | construction examples helloWorld :: ArrowXml a => a XmlTree XmlTree helloWorld = mkelem "html" [] [ mkelem "head" [] [ mkelem "title" [] [ txt "Hello World" ] ] , mkelem "body" [ sattr "class" "haskell" ] [ mkelem "h1" [] [ txt "Hello World" ] ] ] helloWorld2 :: ArrowXml a => a XmlTree XmlTree helloWorld2 = selem "html" [ selem "head" [ selem "title" [ txt "Hello World" ] ] , mkelem "body" [ sattr "class" "haskell" ] [ selem "h1" [ txt "Hello World" ] ] ] imageTable :: ArrowXml a => a XmlTree XmlTree imageTable = selem "html" [ selem "head" [ selem "title" [ txt "Images in Page" ] ] , selem "body" [ selem "h1" [ txt "Images in Page" ] , selem "table" [ collectImages >>> genTableRows ] ] ] where genTableRows = selem "tr" [ selem "td" [ getAttrValue "src" >>> mkText ] ] imageTable0 :: ArrowXml a => a XmlTree XmlTree imageTable0 = selem "html" [ pageHeader , selem "body" [ selem "h1" [ txt "Images in Page" ] , selem "table" [ collectImages >>> genTableRows ] ] ] where pageHeader = constA "