module Text.XML.JSON.StreamingXmlToJson(xmlStreamToJSON)
where

import Text.HTML.TagSoup
import qualified Data.Text.Lazy as T
import Data.List (intercalate)
import qualified Data.Foldable



xmlStreamToJSON :: String -> [String]
xmlStreamToJSON fileData = map toText linesWithLevels
    where xmlData = parseTags fileData
          jsonData = map getEncodedJSON . parseXML $ xmlData
          linesWithLevels = Data.Foldable.toList jsonData

parseXML :: [Tag String] -> [State]
parseXML d = scanl convertTag (State Empty []) d

type Attrs = [(String, String)]
type Name = String
data State = State { getEncodedJSON :: EncodedJSON, getParents :: [Int] }

data EncodedJSON = StartObject Name Attrs Bool | EndObject | Text String Bool | Empty

quoteT :: String
quoteT = "\""

toText :: EncodedJSON -> String
toText Empty = ""
toText (Text t hasLeadingComma) = concat [leadingComma, quoteT, encodeStr t, quoteT]
    where leadingComma = if hasLeadingComma then ", " else ""
toText EndObject = "]}\n"
toText (StartObject name attrs hasLeadingComma) = concat [ leadingComma
                                                         , "{\"name\": \""
                                                         , name
                                                         , "\", "
                                                         , toTextAttrs attrs
                                                         , "\"items\": [ "
                                                         ]
    where leadingComma = if hasLeadingComma then ", " else ""

toTextAttrs :: Attrs -> String
toTextAttrs [] = ""
toTextAttrs as = concat [ "\"attrs\": { "
                          , intercalate (", ") . map toTextKV $ as
                          , " }, "
                          ]

toTextKV :: (String, String) -> String
toTextKV (k,v) = concat [quoteT, k, "\": \"", encodeStr v, quoteT]

-- TODO: use a faster method for quotation escaping. Consider implementing the encoding function using String (or ByteString)
encodeStr :: String -> String
encodeStr t = concatMap (\c -> case c of
                                   '"' -> "\\\""
                                   '\\' -> "\\\\"
                                   _ -> [c])
              $ t

convertTag :: State -> Tag String -> State
convertTag (State _ (curCount:parents)) (TagOpen name attrs) 
    = State startObj (0 : (curCount + 1) : parents)
      where startObj = createStartObject name attrs (curCount > 0)
convertTag (State _ []) (TagOpen name attrs) 
    = State startObj [0]
      where startObj = createStartObject name attrs False
convertTag (State _ ( _:ancestors)) (TagClose _)
    = State EndObject ancestors
convertTag (State _ []) t@(TagClose _)
    = error $ "Malformed XML, unexpected close tag: " ++ (show t)
convertTag (State _ parents) (TagText text) 
    = if stripped == ""
      then State Empty parents
      else State (Text stripped comma) newParents
      where stripped = T.unpack . T.strip . T.pack $ text
            (comma, newParents) = case parents of
                      [] -> (False, [])
                      count:ps -> (count > 0, (count + 1):ps)
convertTag (State _ parents) _ = (State Empty parents)

createStartObject :: String -> Attrs -> Bool -> EncodedJSON
createStartObject name attrs hasLeadingComma 
    = case head name of
        '!' -> Empty
        '?' -> Empty
        _ -> StartObject name attrs hasLeadingComma