-- |
-- Scalpel is a web scraping library inspired by libraries like parsec and
-- Perl's <http://search.cpan.org/~miyagawa/Web-Scraper-0.38/ Web::Scraper>.
-- Scalpel builds on top of "Text.HTML.TagSoup" to provide a declarative and
-- monadic interface.
--
-- There are two general mechanisms provided by this library that are used to
-- build web scrapers: Selectors and Scrapers.
--
--
-- Selectors describe a location within an HTML DOM tree. The simplest selector,
-- that can be written is a simple string value. For example, the selector
-- @\"div\"@ matches every single div node in a DOM. Selectors can be combined
-- using tag combinators. The '//' operator to define nested relationships
-- within a DOM tree. For example, the selector @\"div\" \/\/ \"a\"@ matches all
-- anchor tags nested arbitrarily deep within a div tag.
--
-- In addition to describing the nested relationships between tags, selectors
-- can also include predicates on the attributes of a tag. The '@:' operator
-- creates a selector that matches a tag based on the name and various
-- conditions on the tag's attributes. An attribute predicate is just a function
-- that takes an attribute and returns a boolean indicating if the attribute
-- matches a criteria. There are several attribute operators that can be used
-- to generate common predicates. The '@=' operator creates a predicate that
-- matches the name and value of an attribute exactly. For example, the selector
-- @\"div\" \@: [\"id\" \@= \"article\"]@ matches div tags where the id
-- attribute is equal to @\"article\"@.
--
--
-- Scrapers are values that are parameterized over a selector and produce
-- a value from an HTML DOM tree. The 'Scraper' type takes two type parameters.
-- The first is the string like type that is used to store the text values
-- within a DOM tree. Any string like type supported by "Text.StringLike" is
-- valid. The second type is the type of value that the scraper produces.
--
-- There are several scraper primitives that take selectors and extract content
-- from the DOM. Each primitive defined by this library comes in two variants:
-- singular and plural. The singular variants extract the first instance
-- matching the given selector, while the plural variants match every instance.
--
--
-- The following is an example that demonstrates most of the features provided
-- by this library. Suppose you have the following hypothetical HTML located at
-- @\"http://example.com/article.html\"@ and you would like to extract a list of
-- all of the comments.
--
-- > <html>
-- >   <body>
-- >     <div class='comments'>
-- >       <div class='comment container'>
-- >         <span class='comment author'>Sally</span>
-- >         <div class='comment text'>Woo hoo!</div>
-- >       </div>
-- >       <div class='comment container'>
-- >         <span class='comment author'>Bill</span>
-- >         <img class='comment image' src='http://example.com/cat.gif' />
-- >       </div>
-- >       <div class='comment container'>
-- >         <span class='comment author'>Susan</span>
-- >         <div class='comment text'>WTF!?!</div>
-- >       </div>
-- >     </div>
-- >   </body>
-- > </html>
--
-- The following snippet defines a function, @allComments@, that will download
-- the web page, and extract all of the comments into a list:
--
-- @
-- type Author = String
--
-- data Comment
--     = TextComment Author String
--     | ImageComment Author URL
--     deriving (Show, Eq)
--
-- allComments :: IO (Maybe [Comment])
-- allComments = 'scrapeURL' \"http:\/\/example.com/article.html\" comments
--    where
--        comments :: Scraper String [Comment]
--        comments = 'chroots' ("div" '@:' ['hasClass' "container"]) comment
--
--        comment :: Scraper String Comment
--        comment = textComment `<|>` imageComment
--
--        textComment :: Scraper String Comment
--        textComment = do
--            author      <- 'text' $ "span" \@: [hasClass "author"]
--            commentText <- text $ "div"  \@: [hasClass "text"]
--            return $ TextComment author commentText
--
--        imageComment :: Scraper String Comment
--        imageComment = do
--            author   <- text       $ "span" \@: [hasClass "author"]
--            imageURL <- 'attr' "src" $ "img"  \@: [hasClass "image"]
--            return $ ImageComment author imageURL
-- @
--
-- Complete examples can be found in the
-- <https://github.com/fimad/scalpel/tree/master/examples examples> folder in
-- the scalpel git repository.
module Text.HTML.Scalpel (
-- * Selectors
    Selector
,   AttributePredicate
,   AttributeName (..)
,   TagName (..)
,   tagSelector
,   textSelector
-- ** Wildcards
,   anySelector
-- ** Tag combinators
,   (//)
,   atDepth
-- ** Attribute predicates
,   (@:)
,   (@=)
,   (@=~)
,   hasClass
,   notP
,   match

-- * Scrapers
,   Scraper
-- ** Primitives
,   attr
,   attrs
,   html
,   htmls
,   innerHTML
,   innerHTMLs
,   text
,   texts
,   chroot
,   chroots
,   position
,   matches
-- ** Executing scrapers
,   scrape
,   scrapeStringLike
,   URL
,   scrapeURL
,   scrapeURLWithConfig
,   Config (..)
,   Decoder
,   defaultDecoder
,   utf8Decoder
,   iso88591Decoder

-- * Serial Scraping
,   SerialScraper
,   inSerial
-- ** Primitives
,   stepNext
,   stepBack
,   seekNext
,   seekBack
,   untilNext
,   untilBack
) where

import Text.HTML.Scalpel.Core
import Text.HTML.Scalpel.Internal.Scrape.URL