{-# LANGUAGE OverloadedStrings     #-}
{-# LANGUAGE ViewPatterns          #-}

module Text.HTML.Tree
    ( -- * Constructing forests
      tokensToForest
    , ParseTokenForestError(..), PStack(..)
    , nonClosing
      -- * Deconstructing forests
    , tokensFromForest
    , tokensFromTree
    ) where

import           Data.Monoid
import           Data.Text (Text)
import           Data.Tree
import           Prelude

import           Text.HTML.Parser

-- | construct a 'Forest' from a 'Token' list.
--
-- This code correctly handles void elements. Void elements are required to have a start tag and must not have an end tag. See 'nonClosing'.
--
-- This code does __not__ correctly handle optional tags. It assumes all optional start and end tags are present.
--
-- <https:\/\/www.w3.org\/TR\/html52\/syntax.html#optional-tags>
tokensToForest :: [Token] -> Either ParseTokenForestError (Forest Token)
tokensToForest :: [Token] -> Either ParseTokenForestError (Forest Token)
tokensToForest = PStack -> [Token] -> Either ParseTokenForestError (Forest Token)
f (Forest Token -> [(Token, Forest Token)] -> PStack
PStack [] [])
  where
    f :: PStack -> [Token] -> Either ParseTokenForestError (Forest Token)
f (PStack Forest Token
ss []) [] = forall a b. b -> Either a b
Right (forall a. [a] -> [a]
reverse Forest Token
ss)
    f PStack
pstack []         = forall a b. a -> Either a b
Left forall a b. (a -> b) -> a -> b
$ PStack -> Maybe Token -> ParseTokenForestError
ParseTokenForestErrorBracketMismatch PStack
pstack forall a. Maybe a
Nothing
    f PStack
pstack (Token
t : [Token]
ts)   = case Token
t of
        TagOpen TagName
n [Attr]
_     -> if TagName
n forall (t :: * -> *) a. (Foldable t, Eq a) => a -> t a -> Bool
`elem` [TagName]
nonClosing
                             then PStack -> [Token] -> Either ParseTokenForestError (Forest Token)
f (Token -> PStack -> PStack
pushFlatSibling Token
t PStack
pstack) [Token]
ts
                             else PStack -> [Token] -> Either ParseTokenForestError (Forest Token)
f (Token -> PStack -> PStack
pushParent Token
t PStack
pstack) [Token]
ts
        TagSelfClose {} -> PStack -> [Token] -> Either ParseTokenForestError (Forest Token)
f (Token -> PStack -> PStack
pushFlatSibling Token
t PStack
pstack) [Token]
ts
        TagClose TagName
n      -> (PStack -> [Token] -> Either ParseTokenForestError (Forest Token)
`f` [Token]
ts) forall (m :: * -> *) a b. Monad m => (a -> m b) -> m a -> m b
=<< TagName -> PStack -> Either ParseTokenForestError PStack
popParent TagName
n PStack
pstack
        ContentChar Char
_   -> PStack -> [Token] -> Either ParseTokenForestError (Forest Token)
f (Token -> PStack -> PStack
pushFlatSibling Token
t PStack
pstack) [Token]
ts
        ContentText TagName
_   -> PStack -> [Token] -> Either ParseTokenForestError (Forest Token)
f (Token -> PStack -> PStack
pushFlatSibling Token
t PStack
pstack) [Token]
ts
        Comment Builder
_       -> PStack -> [Token] -> Either ParseTokenForestError (Forest Token)
f (Token -> PStack -> PStack
pushFlatSibling Token
t PStack
pstack) [Token]
ts
        Doctype TagName
_       -> PStack -> [Token] -> Either ParseTokenForestError (Forest Token)
f (Token -> PStack -> PStack
pushFlatSibling Token
t PStack
pstack) [Token]
ts

-- | void elements which must not have an end tag
--
-- This list does not include the obsolete @\<command\>@ and @\<keygen\>@ elements.
--
-- @ nonClosing = ["br", "hr", "img", "meta", "area", "base", "col", "embed", "input", "link", "param", "source", "track", "wbr"] @
--
-- <https:\/\/www.w3.org\/TR\/html52\/syntax.html#void-elements>
nonClosing :: [Text]
nonClosing :: [TagName]
nonClosing = [TagName
"br", TagName
"hr", TagName
"img", TagName
"meta", TagName
"area", TagName
"base", TagName
"col", TagName
"embed", TagName
"input", TagName
"link", TagName
"param", TagName
"source", TagName
"track", TagName
"wbr"]

data ParseTokenForestError =
    ParseTokenForestErrorBracketMismatch PStack (Maybe Token)
  deriving (ParseTokenForestError -> ParseTokenForestError -> Bool
forall a. (a -> a -> Bool) -> (a -> a -> Bool) -> Eq a
/= :: ParseTokenForestError -> ParseTokenForestError -> Bool
$c/= :: ParseTokenForestError -> ParseTokenForestError -> Bool
== :: ParseTokenForestError -> ParseTokenForestError -> Bool
$c== :: ParseTokenForestError -> ParseTokenForestError -> Bool
Eq, Int -> ParseTokenForestError -> ShowS
[ParseTokenForestError] -> ShowS
ParseTokenForestError -> String
forall a.
(Int -> a -> ShowS) -> (a -> String) -> ([a] -> ShowS) -> Show a
showList :: [ParseTokenForestError] -> ShowS
$cshowList :: [ParseTokenForestError] -> ShowS
show :: ParseTokenForestError -> String
$cshow :: ParseTokenForestError -> String
showsPrec :: Int -> ParseTokenForestError -> ShowS
$cshowsPrec :: Int -> ParseTokenForestError -> ShowS
Show)

data PStack = PStack
    { PStack -> Forest Token
_pstackToplevelSiblings :: Forest Token
    , PStack -> [(Token, Forest Token)]
_pstackParents          :: [(Token, Forest Token)]
    }
  deriving (PStack -> PStack -> Bool
forall a. (a -> a -> Bool) -> (a -> a -> Bool) -> Eq a
/= :: PStack -> PStack -> Bool
$c/= :: PStack -> PStack -> Bool
== :: PStack -> PStack -> Bool
$c== :: PStack -> PStack -> Bool
Eq, Int -> PStack -> ShowS
[PStack] -> ShowS
PStack -> String
forall a.
(Int -> a -> ShowS) -> (a -> String) -> ([a] -> ShowS) -> Show a
showList :: [PStack] -> ShowS
$cshowList :: [PStack] -> ShowS
show :: PStack -> String
$cshow :: PStack -> String
showsPrec :: Int -> PStack -> ShowS
$cshowsPrec :: Int -> PStack -> ShowS
Show)

pushParent :: Token -> PStack -> PStack
pushParent :: Token -> PStack -> PStack
pushParent Token
t (PStack Forest Token
ss [(Token, Forest Token)]
ps) = Forest Token -> [(Token, Forest Token)] -> PStack
PStack [] ((Token
t, Forest Token
ss) forall a. a -> [a] -> [a]
: [(Token, Forest Token)]
ps)

popParent :: TagName -> PStack -> Either ParseTokenForestError PStack
popParent :: TagName -> PStack -> Either ParseTokenForestError PStack
popParent TagName
n (PStack Forest Token
ss ((p :: Token
p@(TagOpen TagName
n' [Attr]
_), Forest Token
ss') : [(Token, Forest Token)]
ps))
    | TagName
n forall a. Eq a => a -> a -> Bool
== TagName
n' = forall a b. b -> Either a b
Right forall a b. (a -> b) -> a -> b
$ Forest Token -> [(Token, Forest Token)] -> PStack
PStack (forall a. a -> [Tree a] -> Tree a
Node Token
p (forall a. [a] -> [a]
reverse Forest Token
ss) forall a. a -> [a] -> [a]
: Forest Token
ss') [(Token, Forest Token)]
ps
popParent TagName
n PStack
pstack
    = forall a b. a -> Either a b
Left forall a b. (a -> b) -> a -> b
$ PStack -> Maybe Token -> ParseTokenForestError
ParseTokenForestErrorBracketMismatch PStack
pstack (forall a. a -> Maybe a
Just forall a b. (a -> b) -> a -> b
$ TagName -> Token
TagClose TagName
n)

pushFlatSibling :: Token -> PStack -> PStack
pushFlatSibling :: Token -> PStack -> PStack
pushFlatSibling Token
t (PStack Forest Token
ss [(Token, Forest Token)]
ps) = Forest Token -> [(Token, Forest Token)] -> PStack
PStack (forall a. a -> [Tree a] -> Tree a
Node Token
t [] forall a. a -> [a] -> [a]
: Forest Token
ss) [(Token, Forest Token)]
ps

-- | convert a 'Forest' of 'Token' into a list of 'Token'.
--
-- This code correctly handles void elements. Void elements are required to have a start tag and must not have an end tag. See 'nonClosing'.
tokensFromForest :: Forest Token -> [Token]
tokensFromForest :: Forest Token -> [Token]
tokensFromForest = forall a. Monoid a => [a] -> a
mconcat forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap Tree Token -> [Token]
tokensFromTree

-- | convert a 'Tree' of 'Token' into a list of 'Token'.
--
-- This code correctly handles void elements. Void elements are required to have a start tag and must not have an end tag. See 'nonClosing'.
tokensFromTree :: Tree Token -> [Token]
tokensFromTree :: Tree Token -> [Token]
tokensFromTree (Node o :: Token
o@(TagOpen TagName
n [Attr]
_) Forest Token
ts) | TagName
n forall (t :: * -> *) a. (Foldable t, Eq a) => a -> t a -> Bool
`notElem` [TagName]
nonClosing
    = [Token
o] forall a. Semigroup a => a -> a -> a
<> Forest Token -> [Token]
tokensFromForest Forest Token
ts forall a. Semigroup a => a -> a -> a
<> [TagName -> Token
TagClose TagName
n]
tokensFromTree (Node Token
t [])
    = [Token
t]
tokensFromTree Tree Token
_
    = forall a. HasCallStack => String -> a
error String
"renderTokenTree: leaf node with children."