module Xmlbf.Xeno
( element
, nodes
) where
import qualified Data.Bifunctor as Bif
import qualified Data.ByteString as B
import qualified Data.HashMap.Strict as HM
import Data.Monoid ((<>))
import qualified Data.Text as T
import qualified Data.Text.Lazy as TL
import qualified Data.Text.Lazy.Builder as TB
import qualified Data.Text.Encoding as T
import Data.Traversable (for)
import qualified HTMLEntities.Decoder
import qualified Xeno.DOM as Xeno
import qualified Xmlbf
element
:: Xeno.Node
-> Either String Xmlbf.Node
element x = do
n <- decodeUtf8 (Xeno.name x)
as <- for (Xeno.attributes x) $ \(k,v) -> do
(,) <$> decodeUtf8 k <*> unescapeXmlUtf8 v
cs <- for (Xeno.contents x) $ \case
Xeno.Element n1 -> element n1
Xeno.Text bs -> Xmlbf.text <$> unescapeXmlUtf8 bs
Xeno.CData bs -> Xmlbf.text <$> decodeUtf8 bs
Xmlbf.element n (HM.fromList as) cs
nodes
:: B.ByteString
-> Either String [Xmlbf.Node]
nodes = \bs -> case Xeno.parse ("<x>" <> bs <> "</x>") of
Left e -> Left ("Malformed XML: " ++ show e)
Right n -> element n >>= \(Xmlbf.Element "x" _ cs) -> pure cs
decodeUtf8 :: B.ByteString -> Either String T.Text
decodeUtf8 bs = Bif.first show (T.decodeUtf8' bs)
unescapeXmlText :: T.Text -> T.Text
unescapeXmlText = \t ->
TL.toStrict (TB.toLazyText (HTMLEntities.Decoder.htmlEncodedText t))
unescapeXmlUtf8 :: B.ByteString -> Either String T.Text
unescapeXmlUtf8 bs = fmap unescapeXmlText (decodeUtf8 bs)