module Example.Example where import Text.HTML.TagSoup import Text.HTML.Download import Control.Monad import Data.List import Data.Char grab :: String -> IO () grab x = openItem x >>= putStr parse :: String -> IO () parse x = openItem x >>= putStr . show2 . parseTags where show2 [] = "[]" show2 xs = "[" ++ concat (intersperseNotBroken "\n," $ map show xs) ++ "\n]" -- the standard intersperse has a strictness bug which sucks! intersperseNotBroken :: a -> [a] -> [a] intersperseNotBroken _ [] = [] intersperseNotBroken sep (x:xs) = x : is xs where is [] = [] is (y:ys) = sep : y : is ys {-

Retrieved from "http://haskell.org/haskellwiki/Haskell"

This page has been accessed 507,753 times. This page was last modified 08:05, 24 January 2007. Recent content is available under a simple permissive license.

-} haskellHitCount :: IO () haskellHitCount = do tags <- liftM parseTags $ openURL "http://haskell.org/haskellwiki/Haskell" let count = fromFooter $ head $ sections (~== "
") tags putStrLn $ "haskell.org has been hit " ++ show count ++ " times" where fromFooter x = read (filter isDigit num) :: Int where num = ss !! (i - 1) Just i = findIndex (== "times.") ss ss = words s TagText s = sections (~== "

") x !! 1 !! 1 {- Blogger code of conduct proposed -} googleTechNews :: IO () googleTechNews = do tags <- liftM parseTags $ openURL "http://news.google.com/?ned=us&topic=t" let links = [ text | TagOpen "a" atts:TagOpen "b" []:TagText text:_ <- tails tags, ("id",'u':'-':_) <- atts] putStr $ unlines links spjPapers :: IO () spjPapers = do tags <- liftM parseTags $ openURL "http://research.microsoft.com/~simonpj/" let links = map f $ sections (~== "") $ takeWhile (~/= "") $ drop 5 $ dropWhile (~/= "") tags putStr $ unlines links where f :: [Tag] -> String f = dequote . unwords . words . fromTagText . head . filter isTagText dequote ('\"':xs) | last xs == '\"' = init xs dequote x = x ndmPapers :: IO () ndmPapers = do tags <- liftM parseTags $ openURL "http://www-users.cs.york.ac.uk/~ndm/downloads/" let papers = map f $ sections (~== "

  • ") tags putStr $ unlines papers where f :: [Tag] -> String f xs = fromTagText (xs !! 2) currentTime :: IO () currentTime = do tags <- liftM parseTags $ openURL "http://www.timeanddate.com/worldclock/city.html?n=136" let time = fromTagText (dropWhile (~/= "") tags !! 1) putStrLn time type Section = String data Package = Package {name :: String, desc :: String, href :: String} deriving Show hackage :: IO [(Section,[Package])] hackage = do tags <- liftM parseTags $ openURL "http://hackage.haskell.org/packages/archive/pkg-list.html" return $ map parseSect $ partitions (~== "

    ") tags where parseSect xs = (nam, packs) where nam = fromTagText $ xs !! 2 packs = map parsePackage $ partitions (~== "
  • ") xs parsePackage xs = Package (fromTagText $ xs !! 2) (drop 2 $ dropWhile (/= ':') $ fromTagText $ xs !! 4) (fromAttrib "href" $ xs !! 1) -- rssCreators Example: prints names of story contributors on -- sequence.complete.org. This content is RSS (not HTML), and the selected -- tag uses a different XML namespace "dc:creator". rssCreators :: IO () rssCreators = do tags <- liftM parseTags $ openURL "http://sequence.complete.org/node/feed" putStrLn $ unlines $ map names $ partitions (~== "") tags where names xs = fromTagText $ xs !! 1 validate :: String -> IO () validate x = putStr . unlines . g . f . parseTagsOptions opts =<< openItem x where opts = parseOptions{optTagPosition=True, optTagWarning=True} f :: [Tag] -> [String] f (TagPosition row col:TagWarning warn:rest) = ("Warning (" ++ show row ++ "," ++ show col ++ "): " ++ warn) : f rest f (TagWarning warn:rest) = ("Warning (?,?): " ++ warn) : f rest f (_:rest) = f rest f [] = [] g xs = xs ++ [if n == 0 then "Success, no warnings" else "Failed, " ++ show n ++ " warning" ++ ['s'|n>1]] where n = length xs