module HaskellWorks.Data.Xml.Succinct.Index
( XmlIndex(..)
, XmlIndexAt(..)
)
where
import Control.Arrow
import qualified Data.Attoparsec.ByteString.Char8 as ABC
import qualified Data.ByteString as BS
import qualified Data.List as L
import Data.Monoid
import HaskellWorks.Data.Bits.BitWise
import HaskellWorks.Data.Drop
import HaskellWorks.Data.Positioning
import qualified HaskellWorks.Data.BalancedParens as BP
import HaskellWorks.Data.RankSelect.Base.Rank0
import HaskellWorks.Data.RankSelect.Base.Rank1
import HaskellWorks.Data.RankSelect.Base.Select1
import HaskellWorks.Data.TreeCursor
import HaskellWorks.Data.Uncons
import HaskellWorks.Data.Xml.CharLike
import HaskellWorks.Data.Xml.Grammar
import HaskellWorks.Data.Xml.Succinct
import Prelude hiding (drop)
data XmlIndex
= XmlIndexDocument [XmlIndex]
| XmlIndexElement String [XmlIndex]
| XmlIndexCData BS.ByteString
| XmlIndexComment BS.ByteString
| XmlIndexMeta String [XmlIndex]
| XmlIndexAttrList [XmlIndex]
| XmlIndexValue BS.ByteString
| XmlIndexAttrName BS.ByteString
| XmlIndexAttrValue BS.ByteString
| XmlIndexError String
deriving (Eq, Show)
class XmlIndexAt a where
xmlIndexAt :: a -> XmlIndex
pos :: (Select1 v, Rank1 w) => XmlCursor t v w -> Position
pos c = lastPositionOf (select1 (interests c) (rank1 (balancedParens c) (cursorRank c)))
remText :: (Drop v, Select1 v1, Rank1 w) => XmlCursor v v1 w -> v
remText c = drop (toCount (pos c)) (cursorText c)
instance (BP.BalancedParens w, Rank0 w, Rank1 w, Select1 v, TestBit w) => XmlIndexAt (XmlCursor BS.ByteString v w) where
xmlIndexAt :: XmlCursor BS.ByteString v w -> XmlIndex
xmlIndexAt k = case uncons remainder of
Just (!c, cs) | isElementStart c -> parseElem cs
Just (!c, _ ) | isSpace c -> XmlIndexAttrList $ mapValuesFrom (firstChild k)
Just (!c, _ ) | isAttribute && isQuote c -> XmlIndexAttrValue remainder
Just _ | isAttribute -> XmlIndexAttrName remainder
Just _ -> XmlIndexValue remainder
Nothing -> XmlIndexError "End of data"
where remainder = remText k
mapValuesFrom = L.unfoldr (fmap (xmlIndexAt &&& nextSibling))
isAttribute = case remText <$> parent k >>= uncons of
Just (!c, _) | isSpace c -> True
_ -> False
parseElem bs =
case ABC.parse parseXmlElement bs of
ABC.Fail {} -> decodeErr "Unable to parse element name" bs
ABC.Partial _ -> decodeErr "Unexpected end of string" bs
ABC.Done i r -> case r of
XmlElementTypeCData -> XmlIndexCData i
XmlElementTypeComment -> XmlIndexComment i
XmlElementTypeMeta s -> XmlIndexMeta s (mapValuesFrom $ firstChild k)
XmlElementTypeElement s -> XmlIndexElement s (mapValuesFrom $ firstChild k)
XmlElementTypeDocument -> XmlIndexDocument (mapValuesFrom (firstChild k) <> mapValuesFrom (nextSibling k))
decodeErr :: String -> BS.ByteString -> XmlIndex
decodeErr reason bs =
XmlIndexError $ reason <>": " <> show (BS.take 20 bs) <> "...'"