{-# LANGUAGE OverloadedStrings #-}
module Text.XML.ToJSON
  ( elementToJSON
  , tokensToJSON
  , xmlToJSON
  ) where

import Control.Monad (when)
import Control.Arrow (second)
import Control.Applicative ( (<$>), (*>) )

import Data.Maybe (fromMaybe)
import qualified Data.CaseInsensitive as CI
import qualified Data.Text as T
import Data.ByteString (ByteString)
import qualified Blaze.ByteString.Builder as B
import Data.Attoparsec.ByteString.Char8 (char)
import Data.Conduit
import Data.Conduit.Internal (ResumableSource(ResumableSource))
import qualified Data.Conduit.List as C
import qualified Data.Conduit.Attoparsec as C
import qualified Data.Conduit.Text as C
import qualified Data.HashMap.Strict as HM
import qualified Data.Vector as V

import Text.HTML.TagStream
import qualified Text.HTML.TagStream.Text as T
import qualified Text.HTML.TagStream.ByteString as S
import Text.XML.ToJSON.Builder
import Data.Aeson (Value(..), Object)

tokenToBuilder :: T.Token -> Builder
tokenToBuilder (TagOpen s as selfClose) = do
    beginElement s
    addAttrs as
    when selfClose endElement
tokenToBuilder (TagClose _) = endElement -- FIXME should match tag name?
tokenToBuilder (Text s) = addValue s
tokenToBuilder _ = return ()

attrsToObject :: [(Str, Str)] -> Object
attrsToObject = HM.fromList . map (second String)

mergeObject :: Value -> Value -> Value
mergeObject (Array arr) v  = Array (V.cons v arr)
mergeObject v1          v2 = Array (V.fromList [v1, v2])

elementToJSON :: Element -> Value
elementToJSON (Element as vs cs) =
    if null as && null cs
      then
        String (T.concat vs)
      else
        Object $ HM.fromListWith mergeObject
                   $ attrs
                  ++ values
                  ++ map (second elementToJSON) cs
  where
    attrs = if null as
              then []
              else [("__attributes", Object (attrsToObject as))]
    values = if null vs
               then []
               else [("__values", Array (V.fromList (map String vs)))]

tokensToJSON :: [T.Token] -> Value
tokensToJSON tokens =
    elementToJSON $ runBuilder (mapM_ tokenToBuilder tokens)

xmlToJSON :: (Functor m, Monad m, MonadThrow m) => Source m ByteString -> m Value
xmlToJSON src = do
    -- try to peek the first tag to find the xml encoding.
    (src', token) <- src $$+ C.sinkParser (char '<' *> S.tag)

    let (mencoding, src'') =
          case token of
            (TagOpen "?xml" as _) ->
                (lookup "encoding" as, src')
            _ ->
                ( Nothing
                , prependRSrc
                    (yield (B.toByteString (S.showToken id token)))
                    src'
                )

        codec = fromMaybe C.utf8 (mencoding >>= getCodec . CI.mk)

    tokensToJSON <$> (src'' $$+- (C.decode codec =$ T.tokenStream =$ C.consume))

prependRSrc :: Monad m
            => Source m a
            -> ResumableSource m a
            -> ResumableSource m a
prependRSrc src (ResumableSource src' close) = ResumableSource (src >> src') close

getCodec :: CI.CI ByteString -> Maybe C.Codec
getCodec c =
    case c of
        "utf-8" -> Just C.utf8
        "utf8"  -> Just C.utf8
        "gbk"   -> Just C.iso8859_1
        _       -> Nothing