module Text.HandsomeSoup (openUrl, fromUrl, parseHtml, (!), css) where
import Text.XML.HXT.Core
import Network.HTTP
import Network.URI
import Data.Tree.NTree.TypeDefs
import Control.Monad.Maybe
import Control.Monad.Trans
import Data.Maybe
import Text.Parsec
import qualified Data.Map as M
import Data.Monoid (mconcat)
import qualified Data.Functor.Identity as I
import qualified Debug.Trace as D
import Data.List
import Control.Monad
import Text.CSS.Parser hiding (css)
openUrl :: String -> MaybeT IO String
openUrl url = case parseURI url of
Nothing -> fail "couldn't parse url"
Just u -> liftIO (getResponseBody =<< simpleHTTP (mkRequest GET u))
fromUrl :: String -> IO (IOSArrow XmlTree (NTree XNode))
fromUrl url = do
contents <- runMaybeT $ openUrl url
return $ parseHtml (fromMaybe "" contents)
parseHtml :: String -> IOSArrow XmlTree (NTree XNode)
parseHtml = readString [withParseHTML yes, withWarnings no]
(!) :: ArrowXml cat => cat a XmlTree -> String -> cat a String
(!) a str = a >>> getAttrValue str
css :: ArrowXml a => [Char] -> a (NTree XNode) (NTree XNode)
css tag = case (parse selector "" tag) of
Left err -> D.trace (show err) this
Right x -> fromSelectors x
fromSelectors sel@(s:selectors) = foldl (\acc selector -> acc <+> _fromSelectors selector) (_fromSelectors s) selectors
_fromSelectors (s:selectors) = foldl (\acc selector -> make acc selector) (make this s) selectors
where
make acc sel@(Selector name attrs pseudo)
| name == "*" = acc >>> (multi this >>> makeAttrs attrs)
| otherwise = acc >>> (multi $ hasName name >>> makeAttrs attrs)
make acc Space = acc >>> multi getChildren
make acc ChildOf = acc >>> getChildren
makeAttrs (a:attrs) = foldl (\acc attr -> acc >>> makeAttr attr) (makeAttr a) attrs
makeAttrs [] = this
makeAttr (name, "") = hasAttr name
makeAttr (name, '~':value) = hasAttrValue name (elem value . words)
makeAttr (name, value) = hasAttrValue name (==value)