module Text.HTML.TagSoup.Parsec 
   ( module Text.HTML.TagSoup
   , TagParser
   , WholeTag 
   , tParse
   , openTag 
   , maybeOpenTag
   , eitherOpenTag
   , notOpenTag
   , allOpenTags
   , wholeTag
   , maybeWholeTag
   , eitherWholeTag
   , allWholeTags
   , closeTag
   , maybeCloseTag
   , eitherCloseTag
   , notCloseTag
   , allCloseTags
   , maybeP
   , allP
   , eitherP
   )
   where

import Text.ParserCombinators.Parsec
import Text.ParserCombinators.Parsec.Pos
import Text.HTML.TagSoup

import Data.Maybe
import Data.Char

-- | A type represent the TagOpen, any inner tags , and the TagClose.
type WholeTag =
   ( Tag , [ Tag ] , Tag )

-- | The Tag parser, using Tag as the token.
type TagParser =
   GenParser Tag ( )

-- | Used to invoke parsing of Tags.
tParse :: TagParser a -> [ Tag ] -> a
tParse p ts =
   case parse p "tagsoup" ts of
      Left err ->
         error $ show err
      Right crap ->
         crap

-- Tag eater is the basic tag matcher, it increments the line number for each tag parsed.
tagEater matcher =
   tokenPrim show 
             ( \ oldSp _ _ -> do 
                  setSourceLine oldSp ( 1 + sourceLine oldSp )
             )
             matcher

-- make a string lowercase
lowercase =
   map toLower


-- | openTag matches a TagOpen with the given name.  It is not case sensitive.
openTag :: String -> TagParser Tag
openTag soughtName =
   openTagMatch soughtName ( Just ) $ \ _ -> Nothing

-- | notOpenTag will match any tag which is not a TagOpen with the given name.  It is not case sensitive.
notOpenTag :: String -> TagParser Tag
notOpenTag avoidName =
   openTagMatch avoidName ( \ _ -> Nothing ) Just

-- openTagMatch is the higher order function which will receive a TagOpen, and call match if it matches the soughtName, and noMatch if it doesn't
openTagMatch soughtName match noMatch =
   tagEater $ \ tag ->
                 case tag of
                    t@( TagOpen tname atrs ) ->
                       if lowercase tname == lowercase soughtName
                          then
                             match t
                          else
                             noMatch t
                    t ->
                       noMatch t

-- closeTagMatch is the higher order function which will receive a TagClose, and call match if it matches the soughtName and noMatch if it doesn't.
closeTagMatch soughtName match noMatch =
   tagEater $ \ tag ->
                 case tag of
                    t@( TagClose tname ) ->
                       if lowercase tname == lowercase soughtName
                          then
                             match t
                          else
                             noMatch t
                    t ->
                       noMatch t

-- | wholeTag matches a TagOpen with the given name,
-- then all intervening tags,
-- until it reaches a TagClose with the given name.
-- It is not case sensitive.
wholeTag :: String -> TagParser WholeTag
wholeTag soughtName = do
   open <- openTag soughtName
   ts <- many $ notCloseTag soughtName
   close <- closeTag soughtName
   return ( open , ts , close )

-- | closeTag matches a TagClose with the given name.  It is not case sensitive.
closeTag :: String -> TagParser Tag
closeTag soughtName =
   closeTagMatch soughtName ( Just ) $ \ _ -> Nothing

-- | notCloseTag will match any tag which is not a TagClose with the given name.  It is not case sensitive.
notCloseTag :: String -> TagParser Tag
notCloseTag avoidName =
   closeTagMatch avoidName ( \ _ -> Nothing ) Just

-- | maybeOpenTag will return `Just` the tag if it gets a TagOpen with he given name,
-- It will return `Nothing` otherwise.
-- It is not case sensitive.
maybeOpenTag :: String -> TagParser ( Maybe Tag )
maybeOpenTag =
   maybeP . openTag

-- | maybeCloseTag will return `Just` the tag if it gets a TagClose with he given name,
-- It will return `Nothing` otherwise.
-- It is not case sensitive.
maybeCloseTag :: String -> TagParser ( Maybe Tag )
maybeCloseTag =
   maybeP . closeTag

-- | maybeWholeTag will return `Just` the tag if it gets a WholeTag with he given name,
-- It will return `Nothing` otherwise.
-- It is not case sensitive.
maybeWholeTag :: String -> TagParser ( Maybe WholeTag )
maybeWholeTag =
   maybeP . wholeTag

-- | allOpenTags will return all TagOpen with the given name.
-- It is not case sensitive.
allOpenTags :: String -> TagParser [ Tag ]
allOpenTags =
   allP . maybeOpenTag

-- | allCloseTags will return all TagClose with the given name.
-- It is not case sensitive.
allCloseTags :: String -> TagParser [ Tag ]
allCloseTags =
   allP . maybeCloseTag

-- | allWholeTags will return all WholeTag with the given name.
-- It is not case sensitive.
allWholeTags :: String -> TagParser [ WholeTag ]
allWholeTags =
   allP . maybeWholeTag

-- | eitherP takes a parser, and becomes its `Either` equivalent -- returning `Right` if it matches, and `Left` if it doesn't.
eitherP :: Show tok => GenParser tok st a -> GenParser tok st ( Either tok a )
eitherP p = do
   try ( do t <- p
            return $ Right t
       ) <|> ( do t <- anyToken
                  return $ Left t
             )
-- | either a Right TagOpen or a Left arbitary tag.
eitherOpenTag :: String -> TagParser ( Either Tag Tag )
eitherOpenTag = 
   eitherP . openTag

-- | either a Right TagClose or a Left arbitary tag.
eitherCloseTag :: String -> TagParser ( Either Tag Tag )
eitherCloseTag =
   eitherP . closeTag

-- | either a Right WholeTag or a Left arbitary tag.
eitherWholeTag :: String -> TagParser ( Either Tag WholeTag )
eitherWholeTag = 
   eitherP . wholeTag

-- | allP takes a parser which returns  a `Maybe` value, and returns a list of matching tokens.
allP :: GenParser tok st ( Maybe a ) -> GenParser tok st [ a ]
allP p = do
   ts <- many p
   let ls = 
          catMaybes ts
   return ls

-- | maybeP takes a parser, and becomes its `Maybe` equivalent -- returning `Just` if it matches, and `Nothing` if it doesn't.
maybeP :: Show tok => GenParser tok st a -> GenParser tok st ( Maybe a )
maybeP p =
   try ( do t <- p
            return $ Just t
       ) <|> ( do anyToken
                  return Nothing
             )