{-# LANGUAGE LambdaCase #-} {-# LANGUAGE OverloadedStrings #-} module Xmlbf.Xeno ( element , nodes ) where import qualified Data.Bifunctor as Bif import qualified Data.ByteString as B import qualified Data.HashMap.Strict as HM import Data.Monoid ((<>)) import qualified Data.Text as T import qualified Data.Text.Lazy as TL import qualified Data.Text.Lazy.Builder as TB import qualified Data.Text.Encoding as T import Data.Traversable (for) import qualified HTMLEntities.Decoder import qualified Xeno.DOM as Xeno import qualified Xmlbf -------------------------------------------------------------------------------- -- Xeno support -- | Convert a 'Xeno.Node' from "Xeno.DOM" into an 'Element' from "Xmlbf". element :: Xeno.Node -- ^ A 'Xeno.Node' from "Xeno.DOM". -> Either String Xmlbf.Node -- ^ A 'Xmlbf.Node' from "Xmlbf". element x = do n <- decodeUtf8 (Xeno.name x) as <- for (Xeno.attributes x) $ \(k,v) -> do (,) <$> decodeUtf8 k <*> unescapeXmlUtf8 v cs <- for (Xeno.contents x) $ \case Xeno.Element n1 -> element n1 Xeno.Text bs -> Xmlbf.text <$> unescapeXmlUtf8 bs Xeno.CData bs -> Xmlbf.text <$> decodeUtf8 bs Xmlbf.element n (HM.fromList as) cs -- | Parses a given UTF8-encoded raw XML fragment into @a@, using the @xeno@ -- Haskell library, so all of @xeno@'s parsing quirks apply. -- -- You can provide the output of this function as input to "Xmlbf"'s -- 'Xmlbf.runParser'. -- -- The given XML can contain more zero or more text or element nodes. -- -- Surrounding whitespace is not stripped. nodes :: B.ByteString -- ^ Raw XML fragment. -> Either String [Xmlbf.Node] -- ^ 'Xmlbf.Node's from "Xmlbf" nodes = \bs -> case Xeno.parse ("" <> bs <> "") of Left e -> Left ("Malformed XML: " ++ show e) Right n -> element n >>= \(Xmlbf.Element "x" _ cs) -> pure cs -------------------------------------------------------------------------------- -------------------------------------------------------------------------------- -- Miscellaneous decodeUtf8 :: B.ByteString -> Either String T.Text {-# INLINE decodeUtf8 #-} decodeUtf8 bs = Bif.first show (T.decodeUtf8' bs) unescapeXmlText :: T.Text -> T.Text {-# INLINE unescapeXmlText #-} unescapeXmlText = \t -> TL.toStrict (TB.toLazyText (HTMLEntities.Decoder.htmlEncodedText t)) unescapeXmlUtf8 :: B.ByteString -> Either String T.Text {-# INLINE unescapeXmlUtf8 #-} unescapeXmlUtf8 bs = fmap unescapeXmlText (decodeUtf8 bs)