{-# LANGUAGE LambdaCase #-}

module Xmlbf.XmlHtml
 ( element
 , element'
 , nodesXml
 , nodesHtml
 ) where

import Control.Monad ((>=>))
import qualified Data.ByteString as B
import qualified Data.HashMap.Strict as HM
import Data.Maybe (catMaybes)
import qualified Text.XmlHtml as XmlHtml
import qualified Xmlbf

--------------------------------------------------------------------------------
-- XmlHtml support

-- | Convert a 'XmlHtml.Node' from "Text.XmlHtml" into an 'Node' from "Xmlbf",
-- if possible.
element
  :: XmlHtml.Node -- ^ A 'XmlHtml.Node' from "Text.XmlHtml".
  -> Either String Xmlbf.Node -- ^ A 'Xmlbf.Node' from "Xmlbf".
element = element' >=> \case
  Just x -> Right x
  Nothing -> Left "Comments not supported"

-- | Like 'element', but returns 'Nothing' in case the given node is
-- a 'XmlHtml.Comment'. Children 'XmlHtml.Comment's are discarded from the
-- result.
element'
  :: XmlHtml.Node -- ^ A 'XmlHtml.Node' from "Text.XmlHtml".
  -> Either String (Maybe Xmlbf.Node)
element' = \case
  XmlHtml.Comment _ -> Right Nothing
  XmlHtml.TextNode t -> Right (Just (Xmlbf.text t))
  XmlHtml.Element t as cs -> do
     cs' <- catMaybes <$> traverse element' cs
     Just <$> Xmlbf.element t (HM.fromList as) cs'

-- | Parses a given UTF8-encoded raw XML fragment into @a@, using the @xeno@
-- Haskell library, so all of @xeno@'s parsing quirks apply.
--
-- You can provide the output of this function as input to "Xmlbf"'s
-- 'Xmlbf.runParser'.
--
-- The given XML can contain more zero or more text or element nodes.
--
-- Comments are discarded from the resulting nodes and their children.
--
-- Surrounding whitespace is not stripped.
nodesXml
  :: B.ByteString                 -- ^ Raw XML fragment.
  -> Either String [Xmlbf.Node]   -- ^ 'Xmlbf.Node's from "Xmlbf"
nodesXml = \bs -> case XmlHtml.parseXML "xmlbf-xmlhtml-input.xml" bs of
  Left e -> Left ("Malformed XML: " ++ show e)
  Right d -> catMaybes <$> traverse element' (XmlHtml.docContent d)

-- | Like 'nodesXml', but parses using @xmlhtml@'s quirks HTML mode.
nodesHtml
  :: B.ByteString                 -- ^ Raw HTML fragment.
  -> Either String [Xmlbf.Node]   -- ^ 'Xmlbf.Node's from "Xmlbf"
nodesHtml = \bs -> case XmlHtml.parseHTML "xmlbf-xmlhtml-input.html" bs of
  Left e -> Left ("Malformed HTML: " ++ show e)
  Right d -> catMaybes <$> traverse element' (XmlHtml.docContent d)