module Text.XML.HXT.Helper

-- == EXPORTS =================================================================
    (XmlValidator
    ,XmlEncoding, xmlEncodingFromString, xmlEncodingToString
    ,serializeXml, parseXml, ParseXmlConfig(..), defaultParseXmlConfig, xmlEncoding
    ,pickle, pickleWithEnc, pickleStr, unpickle, unpickleStr, unpickleDocM
    ,validateAndUnpickle
    ,PUCase, xpOne, xpMany, xpTagSwitch, xpCase, xpCaseConst, xpSwitch
    ,xpTextFixed, xpConsumeAll, xpExhaustiveList, xpIgnoreOnUnpickle, xpEither
    ,xpByteString
    ,_ISO8859_1_, _ISO8859_2_, _ISO8859_3_, _ISO8859_4_, _ISO8859_5_
    ,_ISO8859_6_, _ISO8859_7_, _ISO8859_8_, _ISO8859_9_, _ISO8859_10_
    ,_ISO8859_11_, _ISO8859_13_, _ISO8859_14_, _ISO8859_15_, _ISO8859_16_
    ,_USASCII_, _UCS2_, _UTF8_, _UTF16_, _UTF16BE_, _UTF16LE_, _ISOLATIN1_
    ,_UNICODE_)
where

-- == IMPORTS =================================================================

----------------------------------------
-- STDLIB
----------------------------------------
import Control.Monad.Error ()
import qualified Control.Monad as M
import Data.Char (toLower)
import Data.Maybe (fromMaybe)
import Data.List (elemIndex)

----------------------------------------
-- SITE-PACKAGES
----------------------------------------
import Control.Arrow.ArrowIf (when)
import Control.Arrow.ArrowList (this,none)
import Control.Arrow.ArrowTree (processChildren, processTopDown)

import Data.Tree.NTree.TypeDefs (NTree(..))

import Text.XML.HXT.Arrow
    ( XmlTree
    , runLA, constA, (>>>), arr
    , replaceChildren,xshow,mkText,escapeXmlDoc
    , getChildren)

import Text.XML.HXT.Arrow.Namespace (propagateNamespaces,uniqueNamespacesFromDeclAndQNames)
import Text.XML.HXT.Arrow.Pickle (PU(..), unpickleDoc,
                                 pickleDoc ,xpElem, xpList, xpAlt, xpWrap,
                                 xpWrapMaybe, xpText, xpText0)
import Text.XML.HXT.Arrow.Pickle.Schema (scFixed)
import Text.XML.HXT.Arrow.Pickle.Xml (St(..))
import Text.XML.HXT.Arrow.ParserInterface (parseXmlDoc,substXmlEntityRefs)
import Text.XML.HXT.Arrow.Edit (canonicalizeContents, removeDocWhiteSpace, removeAllComment
                               ,indentDoc)
import Text.XML.HXT.Arrow.XmlArrow (ArrowXml, root, isPi, hasName)
import Text.XML.HXT.Arrow.DocumentOutput (encodeDocument')
import Text.XML.HXT.DOM.TypeDefs (XNode(..))
import Text.XML.HXT.DOM.XmlKeywords
    (t_xml
    ,iso8859_1, iso8859_2, iso8859_3, iso8859_4, iso8859_5, iso8859_6
    ,iso8859_7, iso8859_8, iso8859_9, iso8859_10, iso8859_11, iso8859_13
    ,iso8859_14, iso8859_15, iso8859_16, usAscii, ucs2, utf8, utf16, utf16be
    ,utf16le, unicodeString, isoLatin1)

import qualified Data.ByteString.Lazy.Char8 as BSLChar
import qualified Data.ByteString.Lazy as BSL
import Data.Encoding (DynEncoding, encodingFromStringExplicit, decodeLazyByteStringExplicit)

-- == TYPES ===================================================================

type XmlValidator = BSLChar.ByteString -> Either String ()

newtype XmlEncoding = XmlEncoding { xmlEncodingName :: String } deriving (Eq)

instance Show XmlEncoding where
    show (XmlEncoding name) = name

_UNICODE_ :: XmlEncoding
_UNICODE_ = XmlEncoding unicodeString

_ISO8859_1_ :: XmlEncoding
_ISO8859_1_ = XmlEncoding iso8859_1

_ISO8859_2_ :: XmlEncoding
_ISO8859_2_ = XmlEncoding iso8859_2

_ISO8859_3_ :: XmlEncoding
_ISO8859_3_ = XmlEncoding iso8859_3

_ISO8859_4_ :: XmlEncoding
_ISO8859_4_ = XmlEncoding iso8859_4

_ISO8859_5_ :: XmlEncoding
_ISO8859_5_ = XmlEncoding iso8859_5

_ISO8859_6_ :: XmlEncoding
_ISO8859_6_ = XmlEncoding iso8859_6

_ISO8859_7_ :: XmlEncoding
_ISO8859_7_ = XmlEncoding iso8859_7

_ISO8859_8_ :: XmlEncoding
_ISO8859_8_ = XmlEncoding iso8859_8

_ISO8859_9_ :: XmlEncoding
_ISO8859_9_ = XmlEncoding iso8859_9

_ISO8859_10_ :: XmlEncoding
_ISO8859_10_ = XmlEncoding iso8859_10

_ISO8859_11_ :: XmlEncoding
_ISO8859_11_ = XmlEncoding iso8859_11

_ISO8859_13_ :: XmlEncoding
_ISO8859_13_ = XmlEncoding iso8859_13

_ISO8859_14_ :: XmlEncoding
_ISO8859_14_ = XmlEncoding iso8859_14

_ISO8859_15_ :: XmlEncoding
_ISO8859_15_ = XmlEncoding iso8859_15

_ISO8859_16_ :: XmlEncoding
_ISO8859_16_ = XmlEncoding iso8859_16

_USASCII_ :: XmlEncoding
_USASCII_ = XmlEncoding usAscii

_UCS2_ :: XmlEncoding
_UCS2_ = XmlEncoding ucs2

_UTF8_ :: XmlEncoding
_UTF8_ = XmlEncoding utf8

_UTF16_ :: XmlEncoding
_UTF16_ = XmlEncoding utf16

_UTF16BE_ :: XmlEncoding
_UTF16BE_ = XmlEncoding utf16be

_UTF16LE_ :: XmlEncoding
_UTF16LE_ = XmlEncoding utf16le

_ISOLATIN1_ :: XmlEncoding
_ISOLATIN1_ = XmlEncoding isoLatin1



-- == ENCODING FUNCTIONS ======================================================

xmlEncodingFromString :: String -> Either String XmlEncoding
xmlEncodingFromString name =
    case map toLower name of
      "latin1" -> return _ISOLATIN1_
      "latin9" -> return _ISO8859_15_
      "iso-8859-1" -> return _ISO8859_1_
      "iso-8859-2" -> return _ISO8859_2_
      "iso-8859-3" -> return _ISO8859_3_
      "iso-8859-4" -> return _ISO8859_4_
      "iso-8859-5" -> return _ISO8859_5_
      "iso-8859-6" -> return _ISO8859_6_
      "iso-8859-7" -> return _ISO8859_7_
      "iso-8859-8" -> return _ISO8859_8_
      "iso-8859-9" -> return _ISO8859_9_
      "iso-8859-10" -> return _ISO8859_10_
      "iso-8859-11" -> return _ISO8859_11_
      "iso-8859-13" -> return _ISO8859_13_
      "iso-8859-14" -> return _ISO8859_14_
      "iso-8859-15" -> return _ISO8859_15_
      "utf8" -> return _UTF8_
      "utf-8" -> return _UTF8_
      "utf16" -> return _UTF16_
      "utf-16" -> return _UTF16_
      "ascii" -> return _USASCII_
      "us-ascii" -> return _USASCII_
      "unicode" -> return _UNICODE_
      _ -> fail $ "Can't parse unsupported XmlEncoding name `" ++ name ++ "'."

xmlEncodingToString :: XmlEncoding -> String
xmlEncodingToString = show

xmlEncoding :: Monad m => BSL.ByteString -> m DynEncoding
xmlEncoding bs =
    let enc = case parseProlog of
                Nothing -> "UTF-8"
                Just s -> s
    in case encodingFromStringExplicit enc of
         Just e -> return e
         Nothing -> fail $ "unknown encoding: " ++ enc
    where
      parseProlog =
          do M.when (BSLChar.take 5 bs /= BSLChar.pack "<?xml") $ fail "no xml prolog"
             (header, _) <- breakSubstring' (BSLChar.pack "?>") bs
             (_, rest) <- breakSubstring' (BSLChar.pack "encoding") header
             let notQuote = \c -> c /= '"' && c /= '\''
                 enc = (BSLChar.takeWhile notQuote .    -- get the encoding
                        BSLChar.drop 1 .                -- drop quote
                        BSLChar.dropWhile notQuote)     -- skip to first quote
                       rest
             return $ BSLChar.unpack enc
      breakSubstring' x bs =
          let (prefix, y) = breakSubstring x bs
          in if x `BSL.isPrefixOf` y
               then return (prefix, BSL.drop (BSL.length x) y)
               else fail ("breakSubstring failed: substring " ++ show x
                          ++ " not contained in " ++ show bs)
      breakSubstring x bs =
          if x `BSL.isPrefixOf` bs
             then (BSL.empty, bs)
             else case BSL.uncons bs of
                    Nothing -> (BSL.empty, BSL.empty)
                    Just (c, bs') ->
                        let (bs1, bs2) = breakSubstring x bs'
                        in (BSL.cons c bs1, bs2)

-- == XML FUNCTIONS ===========================================================

xpOne :: PU a -> PU a
xpOne = id

xpMany :: String -> PU a -> PU [a]
xpMany rootName pu = xpElem rootName (xpList pu)

xpByteString :: PU BSL.ByteString
xpByteString = xpWrap (BSLChar.pack, BSLChar.unpack) xpText0

pickle :: PU a -> a -> BSLChar.ByteString
pickle pu = serializeXml _UTF8_ . pickleDoc pu

pickleWithEnc :: XmlEncoding -> PU a -> a -> BSLChar.ByteString
pickleWithEnc enc pu = serializeXml' noProlog enc . pickleDoc pu
    where noProlog = enc == _UNICODE_

unpickle :: Monad m => PU a -> BSLChar.ByteString -> m a
unpickle pu bsl = parseXml defaultParseXmlConfig bsl >>= unpickleDocM pu

validateAndUnpickle :: XmlValidator -> PU a -> BSLChar.ByteString -> Either String a
validateAndUnpickle val pu bsl =
    case val bsl of
      Left s -> Left s
      _ ->
        case parseXml defaultParseXmlConfig bsl of
          Right t -> unpickleDocM pu t
          Left s -> Left s

pickleStr :: PU a -> a -> String
pickleStr pu = BSLChar.unpack . pickleWithEnc _UNICODE_ pu

unpickleDocM :: Monad m => PU a -> XmlTree -> m a
unpickleDocM pu t =
    case unpickleDoc pu t of
      Nothing -> fail "Unpickling failed."
      Just x -> return x

unpickleStr :: Monad m => PU a -> [Char] -> m a
unpickleStr pu = unpickle pu . BSLChar.pack

serializeXml :: XmlEncoding -> XmlTree -> BSLChar.ByteString
serializeXml enc xml = serializeXml' False enc xml

serializeXml' :: Bool -> XmlEncoding -> XmlTree -> BSLChar.ByteString
serializeXml' indent enc xml =
    let [str] = runLA (constA xml >>> encodeA) (error "serializeXml': undefined")
    in (BSLChar.pack (strip str))
    where rmpi = enc == _UNICODE_
          encodeA =
              processChildren (uniqueNamespacesFromDeclAndQNames)
              >>>
              (if indent then processChildren indentDoc else this)
              >>>
              escapeXmlDoc
              >>>
              encodeDocument' rmpi (xmlEncodingName enc)
              >>>
              replaceChildren (xshow getChildren >>> arr encode >>> mkText)
              >>>
              xshow getChildren
          encode = id
          strip = reverse . dropWhile isSpace . reverse . dropWhile isSpace
          isSpace = (`elem` " \n\t")

data ParseXmlConfig = ParseXmlConfig { pxml_removeProcessingInstructions :: Bool
                                     , pxml_removeNonSignificantWhitespace :: Bool
                                     , pxml_removeComments :: Bool }

defaultParseXmlConfig :: ParseXmlConfig
defaultParseXmlConfig = ParseXmlConfig True True True

parseXml :: Monad m => ParseXmlConfig -> BSLChar.ByteString -> m XmlTree
parseXml cfg bstr =
    do enc <- xmlEncoding bstr
       case decodeLazyByteStringExplicit enc bstr of
         Left err -> fail (show err)
         Right str ->
             let trees = runLA (pipe str) (error "parseXml: undefined")
             in case trees of
                  [] -> fail "empty result"
                  (NTree _ ((NTree (XError _ msg) _) : _) : _) -> fail msg
                  trees' -> return (last trees')
    where pipe str = root [] []
                     >>> replaceChildren (parse str)
                     >>> (if pxml_removeProcessingInstructions cfg then removeXmlPi else this)
                     >>> (if pxml_removeNonSignificantWhitespace cfg then removeDocWhiteSpace else this)
                     >>> (if pxml_removeComments cfg then removeAllComment else this)
          parse str = constA ("urn:Data.ByteString", str)
                      >>> parseXmlDoc
                      >>> substXmlEntityRefs
                      >>> canonicalizeContents
                      >>> propagateNamespaces

removeXmlPi :: ArrowXml a => a (NTree XNode) (NTree XNode)
removeXmlPi = processTopDown (none `when` (isPi >>> hasName t_xml))

data PUCase a = PUCase { case_value :: a
                       , case_spec :: PU a }

xpCase :: (a -> b, b -> a) -> PU a -> PUCase b
xpCase wrapfuns@(aToB,_bToA) specA = PUCase value spec
    where value = aToB (error "xpCase: tagging function requires evaluation")
          spec = xpWrap wrapfuns specA

xpCaseConst :: a -> PU () -> PUCase a
xpCaseConst a pu = xpCase (const a, const $ error undef) pu
    where undef = error "xpCaseConst: this value should have been ignored"

xpSwitch :: Show a => [PUCase a] -> PU a
xpSwitch = xpTagSwitch (takeWhile (/= ' ') . show)

xpTagSwitch :: Eq t => (a -> t) -> [PUCase a] -> PU a
xpTagSwitch tag cases = xpAlt idx (map case_spec cases)
    where idx = fromMaybe err . flip elemIndex tags . tag
          err = error $ "xpTagSwitch: no case matched"
          tags = map (tag . case_value) cases

xpTextFixed :: String -> PU ()
xpTextFixed text =
    (xpWrapMaybe (\t -> if t == text then Just () else Nothing, const text)
     xpText) { theSchema = scFixed text }

xpConsumeAll :: PU a -> PU a
xpConsumeAll pu =
    PU { appPickle   = appPickle pu
       , appUnPickle = \s1 -> case appUnPickle pu s1 of
                                r@(Nothing, _) -> r
                                r@(Just _, s2) ->
                                    case contents s2 of
                                      [] -> r
                                      _ -> (Nothing, s2)
       , theSchema   = theSchema pu
       }

xpIgnoreOnUnpickle :: (PU a -> PU a) -> PU a -> PU a
xpIgnoreOnUnpickle tf pu =
    PU { appPickle   = appPickle (tf pu)
       , appUnPickle = appUnPickle pu
       , theSchema   = theSchema pu
       }

xpExhaustiveList :: PU a -> PU [a]
xpExhaustiveList = xpConsumeAll . xpList

xpEither :: (Show a, Show b) => PU a -> PU b -> PU (Either a b)
xpEither xpL xpR =
    xpSwitch
    [ xpCase (Left, \(Left l) -> l) xpL
    , xpCase (Right, \(Right r) -> r) xpR
    ]