title\
-- \Section 1\
-- \Paragraph 1.1\
-- \Paragraph 1.2\
-- \Section 2\
-- \Paragraph 2.1\
-- \Paragraph 2.2\
-- \{-# LANGUAGE TupleSections #-} {-# LANGUAGE GeneralizedNewtypeDeriving #-} {-# LANGUAGE StandaloneDeriving #-} {-# LANGUAGE FlexibleInstances #-} {-# LANGUAGE MultiParamTypeClasses #-} {-# LANGUAGE FunctionalDependencies #-} {-# LANGUAGE UndecidableInstances #-} {-# LANGUAGE CPP #-} {-# OPTIONS_HADDOCK hide #-} module Text.HTML.Scalpel.Internal.Serial ( SerialScraper , SerialScraperT , inSerial , stepBack , stepNext , seekBack , seekNext , untilBack , untilNext ) where import Text.HTML.Scalpel.Internal.Scrape import Text.HTML.Scalpel.Internal.Select import Control.Applicative import Control.Monad import Control.Monad.Trans import Control.Monad.Except (MonadError) import Control.Monad.Cont (MonadCont) import Control.Monad.Reader import Control.Monad.State import Control.Monad.Trans.Maybe import Control.Monad.Writer (MonadWriter) import Data.Bifunctor import Data.Functor.Identity import Data.List.PointedList (PointedList) import Data.Maybe import Prelude hiding (until) import qualified Control.Monad.Fail as Fail import qualified Data.List.PointedList as PointedList import qualified Data.Tree as Tree import qualified Text.StringLike as TagSoup -- | Serial scrapers operate on a zipper of tag specs that correspond to the -- root nodes / siblings in a document. -- -- Access to the zipper is always performed in a move-then-read manner. For this -- reason it is valid for the current focus of the zipper to be just off either -- end of list such that moving forward or backward would result in reading the -- first or last node. -- -- These valid focuses are expressed as Nothing values at either end of the -- zipper since they are valid positions for the focus to pass over, but not -- valid positions to read. type SpecZipper str = PointedList (Maybe (TagSpec str)) -- | A 'SerialScraper' allows for the application of 'Scraper's on a sequence of -- sibling nodes. This allows for use cases like targeting the sibling of a -- node, or extracting a sequence of sibling nodes (e.g. paragraphs (\
) -- under a header (\
Paragraph 1.1\
-- \Paragraph 1.2\
-- \Paragraph 2.1\
-- \Paragraph 2.2\
-- \