{-# LANGUAGE OverloadedStrings #-} module MBug.Scrape (parseBugs) where import Control.Applicative (Alternative, empty) import Control.Monad (guard) import qualified Data.ByteString.Lazy as BL import Data.Maybe.Extended (maybeToAlternative) import Data.Text (Text) import qualified Data.Text.Compat as T import Data.Text.Encoding (decodeUtf8) import MBug.Data.Bug (Bug (..)) import Text.HTML.Scalpel.Core.Extended ( Scraper , chroots , guardPos , innerHTMLs , scrape , text , (@:) , (@=) ) import Text.HTML.TagSoup (parseTags) import Text.Read (readMaybe) -- | Parse string in format #nnn into integer. bugNumber :: (Alternative m) => Text -> m Int bugNumber txt = case T.uncons txt of Nothing -> empty Just (c, bug) -> guard (c == '#') *> maybeToAlternative (readMaybe $ T.unpack bug) -- | Extract bug severity from "Severity: important;" string. bugSeverity :: (Alternative m) => Text -> m Text bugSeverity txt = let (prefix, suffix) = T.splitAt 10 txt in case T.unsnoc suffix of Nothing -> empty Just (severity, c) -> guard (prefix == "Severity: " && c == ';') *> pure severity -- | Extract bugs information from BTS response. bugsScraper :: Scraper BL.ByteString [Bug] bugsScraper = let decode = decodeUtf8 . BL.toStrict in chroots ("div" @: [ "class" @= "shortbugstatus" ]) $ do (bug:package:subject:_) <- map decode <$> innerHTMLs "a" (severity:_) <- chroots "span" $ do guardPos (== 3) decode <$> text "span" Bug <$> bugNumber bug <*> bugSeverity severity <*> pure package <*> pure subject -- Parse response from BTS and return list of bugs. FIXME: This -- function never returns Nothing, both invalid input and response with -- no bugs are collapsed into @Just []@. parseBugs :: BL.ByteString -> Maybe [Bug] parseBugs input = scrape bugsScraper (parseTags input)