-- | This is a fast non-pretty-printer for turning the internal representation
--   of generic structured XML documents into Lazy ByteStrings.
--   Like in Text.Xml.HaXml.Pretty, there is one pp function for each type in
--   Text.Xml.HaXml.Types, so you can pretty-print as much or as little
--   of the document as you wish.

module Text.XML.HaXml.ByteStringPP
  (
  -- * Pretty-print a whole document
    document
  -- ** Just one content
  ,   content
  -- ** Just one tagged element
  ,   element
  -- * Pretty-print just a DTD
  , doctypedecl
  -- ** The prolog
  ,   prolog
  -- ** A content particle description
  ,   cp
  ) where

import Prelude hiding (maybe,either,elem,concat)
import Maybe hiding (maybe)
import Data.List (intersperse)
--import Data.ByteString.Lazy hiding (pack,map,head,any,singleton,intersperse,join)
import Data.ByteString.Lazy.Char8 (ByteString(), concat, pack, singleton, intercalate
                                  , append, elem, empty)
import Text.XML.HaXml.Types

either :: (t -> t1) -> (t2 -> t1) -> Either t t2 -> t1
either f _ (Left x)  = f x
either _ g (Right x) = g x

maybe :: (t -> ByteString) -> Maybe t -> ByteString
maybe _ Nothing  = empty
maybe f (Just x) = f x


-- A simple implementation of the pretty-printing combinator interface,
-- but for plain ByteStrings:
infixl 6 <>
infixl 6 <+>
infixl 5 $$
(<>)   :: ByteString   -> ByteString -> ByteString -- Beside
hcat   :: [ByteString] -> ByteString                 -- List version of <>
(<+>)  :: ByteString   -> ByteString -> ByteString -- Beside, separated by space
hsep   :: [ByteString] -> ByteString                 -- List version of <+>
($$)   :: ByteString   -> ByteString -> ByteString -- Above; if there is no
                                                   -- overlap, it "dovetails"
vcat   :: [ByteString] -> ByteString       -- List version of $$
-- cat    :: [ByteString] -> ByteString       -- Either hcat or vcat
sep    :: [ByteString] -> ByteString       -- Either hsep or vcat
-- fcat   :: [ByteString] -> ByteString       -- ``Paragraph fill'' version of cat
fsep   :: [ByteString] -> ByteString       -- ``Paragraph fill'' version of sep
nest   :: Int -> ByteString -> ByteString  -- Nested

(<>)  b1 b2  = b1 `append` b2
(<+>) b1 b2  = b1 <> pack " " <> b2
($$)  b1 b2  = b1 <> pack "\n" <> b2
-- ($+$)        = ($$)

hcat = Data.ByteString.Lazy.Char8.concat
hsep = Data.ByteString.Lazy.Char8.intercalate (singleton ' ')
vcat = Data.ByteString.Lazy.Char8.intercalate (singleton '\n')
_  = hcat
sep  = hsep
text :: [Char] -> ByteString
text = pack
-- _ = cat
fsep = sep
nest _ b = pack " " <> b
parens :: ByteString -> ByteString
parens p = pack "(" <> p <> pack ")"


----
-- Now for the XML pretty-printing interface.
-- (Basically copied direct from Text.XML.HaXml.Pretty).

document :: Document i -> ByteString
prolog   :: Prolog     -> ByteString
xmldecl  :: XMLDecl    -> ByteString
misc     :: Misc       -> ByteString
sddecl   :: Bool       -> ByteString

doctypedecl   :: DocTypeDecl   -> ByteString
markupdecl    :: MarkupDecl    -> ByteString
-- extsubset     :: ExtSubset     -> ByteString
-- extsubsetdecl :: ExtSubsetDecl -> ByteString
cp            :: CP            -> ByteString

element   :: Element i -> ByteString
attribute :: Attribute -> ByteString
content   :: Content i -> ByteString

----

document (Document p _ e m)= prolog p $$ element e $$ vcat (Prelude.map misc m)
prolog (Prolog x m1 dtd m2)= maybe xmldecl x $$
                             vcat (Prelude.map misc m1) $$
                             maybe doctypedecl dtd $$
                             vcat (Prelude.map misc m2)
xmldecl (XMLDecl v e sd)   = text "<?xml version='" <> text v <> text "'" <+>
                             maybe encodingdecl e <+>
                             maybe sddecl sd <+>
                             text "?>"
misc (Comment s)           = text "<!--" <+> text s <+> text "-->"
misc (PI (n,s))            = text "<?" <> text n <+> text s <+> text "?>"
sddecl sd   | sd           = text "standalone='yes'"
            | otherwise    = text "standalone='no'"
doctypedecl (DTD n eid ds) = if Prelude.null ds then
                                  hd <> text ">"
                             else hd <+> text " [" $$
                                  vcat (Prelude.map markupdecl ds) $$ text "]>"
                           where hd = text "<!DOCTYPE" <+> text n <+>
                                      maybe externalid eid
markupdecl (Element e)     = elementdecl e
markupdecl (AttList a)     = attlistdecl a
markupdecl (Entity e)      = entitydecl e
markupdecl (Notation n)    = notationdecl n
markupdecl (MarkupMisc m)  = misc m
--markupdecl (MarkupPE p m)  = peref p
-- _ (ExtSubset t ds) = maybe textdecl t $$
--                             vcat (Prelude.map extsubsetdecl ds)
-- _ (ExtMarkupDecl m)      = markupdecl m
-- extsubsetdecl (ExtConditionalSect c) = conditionalsect c
--extsubsetdecl (ExtPEReference p e)   = peref p

element (Elem n as []) = text "<" <> text n <+>
                         fsep (Prelude.map attribute as) <> text "/>"
element e@(Elem n as cs)
--  | any isText cs    = text "<" <> text n <+> fsep (map attribute as) <>
--                       text ">" <> hcat (map content cs) <>
--                       text "</" <> text n <> text ">"
    | isText (head cs) = text "<" <> text n <+> fsep (Prelude.map attribute as) <>
                         text ">" <> hcat (Prelude.map content cs) <>
                         text "</" <> text n <> text ">"
    | otherwise        = let (d,c) = carryelem e empty
                         in d <> c

isText :: Content t -> Bool
isText (CString _ _ _) = True
isText (CRef _ _)      = True
isText _               = False

carryelem :: Element t -> ByteString -> (ByteString, ByteString)
carryelem (Elem n as []) c
                       = ( c <>
                           text "<" <> text n <+> fsep (Prelude.map attribute as)
                         , text "/>")
carryelem (Elem n as cs) c
--  | any isText cs    =  ( c <> element e, empty)
    | otherwise        =  let (cs0,d0) = carryscan carrycontent cs (text ">")
                          in
                          ( c <>
                            text "<" <> text n <+> fsep (Prelude.map attribute as) $$
                            nest 2 (vcat cs0) <> --- $$
                            d0 <> text "</" <> text n
                          , text ">")
carrycontent :: Content t -> ByteString -> (ByteString, ByteString)
carrycontent (CElem e _) c   = carryelem e c
carrycontent (CString False s _) c = (c <> chardata s, empty)
carrycontent (CString True  s _) c = (c <> cdsect s, empty)
carrycontent (CRef r _) c    = (c <> reference r, empty)
carrycontent (CMisc m _) c   = (c <> misc m, empty)

carryscan :: (a->c->(b,c)) -> [a] -> c -> ([b],c)
carryscan _ []     c = ([],c)
carryscan f (a:as) c = let (b, c0) = f a c
                           (bs,c1) = carryscan f as c0
                       in (b:bs, c1)

--carryelem e@(Elem n as cs) c
--  | isText (head cs) =
--        ( start <>
--          text ">" <> hcat (map content cs) <> text "</" <> text n
--        , text ">")
--  | otherwise =
--        let (d,c0) = foldl carrycontent (start, text ">") cs in
--        ( d <> c0 <> text "</" <> text n
--        , text ">")
--  where start = c <> text "<" <> text n <+> fsep (map attribute as)
--
--carrycontent (d,c) (CElem e)   = let (d',c') = carryelem e c in
--                                 (d $$ nest 2 d',       c')
--carrycontent (d,c) (CString _ s) = (d <> c <> chardata s, empty)
--carrycontent (d,c) (CRef r)    = (d <> c <> reference r,empty)
--carrycontent (d,c) (CMisc m)   = (d $$ c <> misc m,     empty)


attribute (n,v)             = text n <> text "=" <> attvalue v
content (CElem e _)         = element e
content (CString False s _) = chardata s
content (CString True s _)  = cdsect s
content (CRef r _)          = reference r
content (CMisc m _)         = misc m

elementdecl :: ElementDecl -> ByteString
elementdecl (ElementDecl n cs) = text "<!ELEMENT" <+> text n <+>
                                 contentspec cs <> text ">"
contentspec :: ContentSpec -> ByteString
contentspec EMPTY              = text "EMPTY"
contentspec ANY                = text "ANY"
contentspec (Mixed m)          = mixed m
contentspec (ContentSpec c)    = cp c
--contentspec (ContentPE p cs)   = peref p
cp (TagName n m)       = text n <> modifier m
cp (Choice cs m)       = parens (hcat (intersperse (text "|") (Prelude.map cp cs))) <>
                           modifier m
cp (Seq cs m)          = parens (hcat (intersperse (text ",") (Prelude.map cp cs))) <>
                           modifier m
--cp (CPPE p c)          = peref p
modifier :: Modifier -> ByteString
modifier None          = empty
modifier Query         = text "?"
modifier Star          = text "*"
modifier Plus          = text "+"
mixed :: Mixed -> ByteString
mixed  PCDATA          = text "(#PCDATA)"
mixed (PCDATAplus ns)  = text "(#PCDATA |" <+>
                         hcat (intersperse (text "|") (Prelude.map text ns)) <>
                         text ")*"

attlistdecl :: AttListDecl -> ByteString
attlistdecl (AttListDecl n ds) = text "<!ATTLIST" <+> text n <+>
                                 fsep (Prelude.map attdef ds) <> text ">"
attdef :: AttDef -> ByteString
attdef (AttDef n t d)          = text n <+> atttype t <+> defaultdecl d
atttype :: AttType -> ByteString
atttype  StringType            = text "CDATA"
atttype (TokenizedType t)      = tokenizedtype t
atttype (EnumeratedType t)     = enumeratedtype t
tokenizedtype :: TokenizedType -> ByteString
tokenizedtype ID               = text "ID"
tokenizedtype IDREF            = text "IDREF"
tokenizedtype IDREFS           = text "IDREFS"
tokenizedtype ENTITY           = text "ENTITY"
tokenizedtype ENTITIES         = text "ENTITIES"
tokenizedtype NMTOKEN          = text "NMTOKEN"
tokenizedtype NMTOKENS         = text "NMTOKENS"
enumeratedtype :: EnumeratedType -> ByteString
enumeratedtype (NotationType n)= notationtype n
enumeratedtype (Enumeration e) = enumeration e
notationtype :: [[Char]] -> ByteString
notationtype ns                = text "NOTATION" <+>
                                 parens (hcat (intersperse (text "|") (Prelude.map text ns)))
enumeration :: [[Char]] -> ByteString
enumeration ns                 = parens (hcat (intersperse (text "|") (Prelude.map nmtoken ns)))
defaultdecl :: DefaultDecl -> ByteString
defaultdecl  REQUIRED          = text "#REQUIRED"
defaultdecl  IMPLIED           = text "#IMPLIED"
defaultdecl (DefaultTo a f)    = maybe (const (text "#FIXED")) f <+> attvalue a
--_ (IncludeSect i)= text "<![INCLUDE [" <+>
--                                 vcat (Prelude.map extsubsetdecl i) <+> text "]]>"
-- conditionalsect (IgnoreSect i) = text "<![IGNORE [" <+>
--                                  fsep (Prelude.map ignoresectcontents i) <+> text "]]>"
-- -- _ (Ignore)                = empty
-- ignoresectcontents :: IgnoreSectContents -> ByteString
-- _ (IgnoreSectContents i is)
--                                = ignore i <+> vcat (Prelude.map internal is)
--                           where internal (ics,i) = text "<![[" <+>
--                                                    ignoresectcontents ics <+>
--                                                    text "]]>" <+> ignore i
reference :: Reference -> ByteString
reference (RefEntity er)       = entityref er
reference (RefChar cr)         = charref cr
entityref :: [Char] -> ByteString
entityref n                    = text "&" <> text n <> text ";"
charref :: (Show a) => a -> ByteString
charref c                      = text "&#" <> text (show c) <> text ";"
entitydecl :: EntityDecl -> ByteString
entitydecl (EntityGEDecl d)    = gedecl d
entitydecl (EntityPEDecl d)    = pedecl d
gedecl :: GEDecl -> ByteString
gedecl (GEDecl n ed)           = text "<!ENTITY" <+> text n <+> entitydef ed <>
                                 text ">"
pedecl :: PEDecl -> ByteString
pedecl (PEDecl n pd)           = text "<!ENTITY %" <> text n <+> pedef pd <>
                                 text ">"
entitydef :: EntityDef -> ByteString
entitydef (DefEntityValue ew)  = entityvalue ew
entitydef (DefExternalID i nd) = externalid i <+> maybe ndatadecl nd
pedef :: PEDef -> ByteString
pedef (PEDefEntityValue ew)    = entityvalue ew
pedef (PEDefExternalID eid)    = externalid eid
externalid :: ExternalID -> ByteString
externalid (SYSTEM sl)         = text "SYSTEM" <+> systemliteral sl
externalid (PUBLIC i sl)       = text "PUBLIC" <+> pubidliteral i <+>
                                 systemliteral sl
ndatadecl :: NDataDecl -> ByteString
ndatadecl (NDATA n)            = text "NDATA" <+> text n
-- _ (TextDecl vi ed)      = text "<?xml" <+> maybe text vi <+>
--                                  encodingdecl ed <> text "?>"
-- extparsedent :: ExtParsedEnt t -> ByteString
-- _ (ExtParsedEnt t c)= maybe textdecl t <+> content c
-- _ (ExtPE t esd)            = maybe textdecl t <+>
--                                  vcat (Prelude.map extsubsetdecl esd)
notationdecl :: NotationDecl -> ByteString
notationdecl (NOTATION n e)    = text "<!NOTATION" <+> text n <+>
                                 either externalid publicid e <>
                                 text ">"
publicid :: PublicID -> ByteString
publicid (PUBLICID p)          = text "PUBLICID" <+> pubidliteral p
encodingdecl :: EncodingDecl -> ByteString
encodingdecl (EncodingDecl s)  = text "encoding='" <> text s <> text "'"
nmtoken :: [Char] -> ByteString
nmtoken s                      = text s
attvalue :: AttValue -> ByteString
attvalue (AttValue esr)        = text "\"" <>
                                 hcat (Prelude.map (either text reference) esr) <>
                                 text "\""
entityvalue :: EntityValue -> ByteString
entityvalue (EntityValue evs)
  | containsDoubleQuote evs    = text "'"  <> hcat (Prelude.map ev evs) <> text "'"
  | otherwise                  = text "\"" <> hcat (Prelude.map ev evs) <> text "\""
ev :: EV -> ByteString
ev (EVString s)                = text s
--ev (EVPERef p e)               = peref p
ev (EVRef r)                   = reference r
pubidliteral :: PubidLiteral -> ByteString
pubidliteral (PubidLiteral s)
    | toWord8 '"' `elem` (pack s) = text "'" <> text s <> text "'"
    | otherwise                = text "\"" <> text s <> text "\""
systemliteral :: SystemLiteral -> ByteString
systemliteral (SystemLiteral s)
    | toWord8 '"' `elem` (pack s) = text "'" <> text s <> text "'"
    | otherwise                = text "\"" <> text s <> text "\""
chardata, cdsect :: [Char] -> ByteString
chardata s                     = {-if all isSpace s then empty else-} text s
cdsect c                       = text "<![CDATA[" <> chardata c <> text "]]>"

-- toWord8 :: Char -> Word8
toWord8 :: (Enum a, Enum a1) => a1 -> a
toWord8 = toEnum . fromEnum

containsDoubleQuote :: [EV] -> Bool
containsDoubleQuote evs = any csq evs
    where csq (EVString s) = toWord8 '"' `elem` (pack s)
          csq _            = False