module Processing (getAttrs, extractLink, extractLinks) where import Text.HTML.TagSoup import Text.Regex.Posix import Data.Maybe -- | Implementation getAttrs :: [Tag String] -> String -> String -> [String] getAttrs tags tagName attrName = mapMaybe link tags where firstAttr [] = Nothing firstAttr ((n, v):as) | n == attrName = Just v | otherwise = firstAttr as link (TagOpen tag attrs) | tag == tagName = firstAttr attrs link _ = Nothing extractLinks :: String -> String -> String -> String -> [String] extractLinks r tag attr regexp = let tags = parseTags r :: [Tag String] links = getAttrs tags tag attr isPicture x = (x =~ regexp) --(x =~ "\\.jpg$") || (x =~ "\\.png$") isPictureUrl x = (isPicture x) --(&&) (isURI x) (True) --(isPicture x) urls = filter isPictureUrl links in urls extractLink :: String -> String -> String -> String -> Maybe String extractLink r tag attr regexp = passUrl (extractLinks r tag attr regexp) where passUrl [] = Nothing passUrl (x:_) = Just x