{-# OPTIONS_HADDOCK hide #-}
module Text.HTML.Scalpel.Internal.Scrape (
    Scraper (..)
,   attr
,   attrs
,   text
,   texts
,   chroot
,   chroots
) where

import Text.HTML.Scalpel.Internal.Select
import Text.HTML.Scalpel.Internal.Select.Types

import Control.Applicative
import Control.Monad
import Data.Maybe

import qualified Text.HTML.TagSoup as TagSoup
import qualified Text.StringLike as TagSoup


-- | A value of 'Scraper' @a@ defines a web scraper that is capable of consuming
-- a list of 'TagSoup.Tag's and optionally producing a value of type @a@.
newtype Scraper str a = MkScraper {
        -- | The 'scrape' function executes a 'Scraper' on a list of
        -- 'TagSoup.Tag's and produces an optional value.
        scrape :: [TagSoup.Tag str] -> Maybe a
    }

instance Functor (Scraper str) where
    fmap f (MkScraper a) = MkScraper $ fmap (fmap f) a

instance Applicative (Scraper str) where
    pure = MkScraper . const . Just
    (MkScraper f) <*> (MkScraper a) = MkScraper applied
        where applied tags | (Just aVal) <- a tags = ($ aVal) <$> f tags
                           | otherwise             = Nothing

instance Alternative (Scraper str) where
    empty = MkScraper $ const Nothing
    (MkScraper a) <|> (MkScraper b) = MkScraper choice
        where choice tags | (Just aVal) <- a tags = Just aVal
                          | otherwise             = b tags

instance Monad (Scraper str) where
    return = pure
    (MkScraper a) >>= f = MkScraper combined
        where combined tags | (Just aVal) <- a tags = let (MkScraper b) = f aVal
                                                      in  b tags
                            | otherwise             = Nothing

-- | The 'chroot' function takes a selector and an inner scraper and executes
-- the inner scraper as if it were scraping a document that consists solely of
-- the tags corresponding to the selector.
--
-- This function will match only the first set of tags matching the selector, to
-- match every set of tags, use 'chroots'.
chroot :: (TagSoup.StringLike str, Selectable str s)
       => s -> Scraper str a -> Scraper str a
chroot selector (MkScraper inner) = MkScraper
                                  $ join . (inner <$>)
                                  . listToMaybe . select selector

-- | The 'chroots' function takes a selector and an inner scraper and executes
-- the inner scraper as if it were scraping a document that consists solely of
-- the tags corresponding to the selector. The inner scraper is executed for
-- each set of tags matching the given selector.
chroots :: (TagSoup.StringLike str, Selectable str s)
        => s -> Scraper str a -> Scraper str [a]
chroots selector (MkScraper inner) = MkScraper
                                   $ return . catMaybes
                                   . map inner . select selector

-- | The 'text' function takes a selector and returns the inner text from the
-- set of tags described by the given selector.
--
-- This function will match only the first set of tags matching the selector, to
-- match every set of tags, use 'texts'.
text :: (TagSoup.StringLike str, Selectable str s) => s -> Scraper str str
text s = MkScraper $ withHead tagsToText . select s

-- | The 'texts' function takes a selector and returns the inner text from every
-- set of tags matching the given selector.
texts :: (TagSoup.StringLike str, Selectable str s) => s -> Scraper str [str]
texts s = MkScraper $ withAll tagsToText . select s

-- | The 'attr' function takes an attribute name and a selector and returns the
-- value of the attribute of the given name for the first opening tag that
-- matches the given selector.
--
-- This function will match only the opening tag matching the selector, to match
-- every tag, use 'attrs'.
attr :: (Show str, TagSoup.StringLike str, Selectable str s)
     => str -> s -> Scraper str str
attr name s = MkScraper $ join . withHead (tagsToAttr name) . select s

-- | The 'attrs' function takes an attribute name and a selector and returns the
-- value of the attribute of the given name for every opening tag that matches
-- the given selector.
attrs :: (Show str, TagSoup.StringLike str, Selectable str s)
     => str -> s -> Scraper str [str]
attrs name s = MkScraper $ fmap catMaybes . withAll (tagsToAttr name) . select s

withHead :: (a -> b) -> [a] -> Maybe b
withHead _ []    = Nothing
withHead f (x:_) = Just $ f x

withAll :: (a -> b) -> [a] -> Maybe [b]
withAll _ [] = Nothing
withAll f xs = Just $ map f xs

tagsToText :: TagSoup.StringLike str => [TagSoup.Tag str] -> str
tagsToText = TagSoup.innerText

tagsToAttr :: (Show str, TagSoup.StringLike str)
           => str -> [TagSoup.Tag str] -> Maybe str
tagsToAttr attr tags = do
    tag <- listToMaybe tags
    guard $ TagSoup.isTagOpen tag
    return $ TagSoup.fromAttrib attr tag