{-# LANGUAGE BangPatterns          #-}
{-# LANGUAGE FlexibleContexts      #-}
{-# LANGUAGE FlexibleInstances     #-}
{-# LANGUAGE InstanceSigs          #-}
{-# LANGUAGE MultiParamTypeClasses #-}

module HaskellWorks.Data.Xml.Succinct.Index
( XmlIndex(..)
, XmlIndexAt(..)
)
where

import           Control.Arrow
import qualified Data.Attoparsec.ByteString.Char8           as ABC
import qualified Data.ByteString                            as BS
import qualified Data.List                                  as L
import           Data.Monoid
import           HaskellWorks.Data.Bits.BitWise
import           HaskellWorks.Data.Drop
import           HaskellWorks.Data.Positioning
import qualified HaskellWorks.Data.BalancedParens           as BP
import           HaskellWorks.Data.RankSelect.Base.Rank0
import           HaskellWorks.Data.RankSelect.Base.Rank1
import           HaskellWorks.Data.RankSelect.Base.Select1
import           HaskellWorks.Data.TreeCursor
import           HaskellWorks.Data.Uncons
import           HaskellWorks.Data.Xml.CharLike
import           HaskellWorks.Data.Xml.Grammar
import           HaskellWorks.Data.Xml.Succinct
import           Prelude hiding (drop)

data XmlIndex
  = XmlIndexDocument [XmlIndex]
  | XmlIndexElement String [XmlIndex]
  | XmlIndexCData BS.ByteString
  | XmlIndexComment BS.ByteString
  | XmlIndexMeta String [XmlIndex]
  | XmlIndexAttrList [XmlIndex]
  | XmlIndexValue BS.ByteString
  | XmlIndexAttrName BS.ByteString
  | XmlIndexAttrValue BS.ByteString
  | XmlIndexError String
  deriving (Eq, Show)

class XmlIndexAt a where
  xmlIndexAt :: a -> XmlIndex

pos :: (Select1 v, Rank1 w) => XmlCursor t v w -> Position
pos c = lastPositionOf (select1 (interests c) (rank1 (balancedParens c) (cursorRank c)))

remText :: (Drop v, Select1 v1, Rank1 w) => XmlCursor v v1 w -> v
remText c = drop (toCount (pos c)) (cursorText c)

instance (BP.BalancedParens w, Rank0 w, Rank1 w, Select1 v, TestBit w) => XmlIndexAt (XmlCursor BS.ByteString v w) where
  xmlIndexAt :: XmlCursor BS.ByteString v w -> XmlIndex
  xmlIndexAt k = case uncons remainder of
    Just (!c, cs) | isElementStart c          -> parseElem cs
    Just (!c, _ ) | isSpace c                 -> XmlIndexAttrList $ mapValuesFrom (firstChild k)
    Just (!c, _ ) | isAttribute && isQuote c  -> XmlIndexAttrValue remainder
    Just _        | isAttribute               -> XmlIndexAttrName remainder
    Just _                                    -> XmlIndexValue remainder
    Nothing                                   -> XmlIndexError "End of data"
    where remainder         = remText k
          mapValuesFrom     = L.unfoldr (fmap (xmlIndexAt &&& nextSibling))
          isAttribute = case remText <$> parent k >>= uncons of
            Just (!c, _) | isSpace c -> True
            _                        -> False

          parseElem bs =
            case ABC.parse parseXmlElement bs of
              ABC.Fail {}    -> decodeErr "Unable to parse element name" bs
              ABC.Partial _  -> decodeErr "Unexpected end of string" bs
              ABC.Done i r   -> case r of
                XmlElementTypeCData     -> XmlIndexCData i
                XmlElementTypeComment   -> XmlIndexComment i
                XmlElementTypeMeta s    -> XmlIndexMeta s    (mapValuesFrom $ firstChild k)
                XmlElementTypeElement s -> XmlIndexElement s (mapValuesFrom $ firstChild k)
                XmlElementTypeDocument  -> XmlIndexDocument  (mapValuesFrom (firstChild k) <> mapValuesFrom (nextSibling k))


decodeErr :: String -> BS.ByteString -> XmlIndex
decodeErr reason bs =
  XmlIndexError $ reason <>": " <> show (BS.take 20 bs) <> "...'"