module Xeno.SAX
  ( process
  , fold
  , validate
  , dump
  ) where
import           Control.Exception
import           Control.Monad.State.Strict
import           Control.Spork
import           Data.ByteString (ByteString)
import qualified Data.ByteString as S
import qualified Data.ByteString.Char8 as S8
import qualified Data.ByteString.Unsafe as SU
import           Data.Functor.Identity
import           Data.Monoid
import           Data.Word
import           Xeno.Types
validate :: ByteString -> Bool
validate s =
  case spork
         (runIdentity
            (process
               (\_ -> pure ())
               (\_ _ -> pure ())
               (\_ -> pure ())
               (\_ -> pure ())
               (\_ -> pure ())
               (\_ -> pure ())
               s)) of
    Left (_ :: XenoException) -> False
    Right _ -> True
dump :: ByteString -> IO ()
dump str =
  evalStateT
    (process
       (\name -> do
          level <- get
          lift (S8.putStr (S8.replicate level ' ' <> "<" <> name <> "")))
       (\key value -> lift (S8.putStr (" " <> key <> "=\"" <> value <> "\"")))
       (\_ -> do
          level <- get
          let !level' = level + 2
          put level'
          lift (S8.putStrLn (">")))
       (\text -> do
          level <- get
          lift (S8.putStrLn (S8.replicate level ' ' <> S8.pack (show text))))
       (\name -> do
          level <- get
          let !level' = level  2
          put level'
          lift (S8.putStrLn (S8.replicate level' ' ' <> "</" <> name <> ">")))
       (\cdata -> do
          level <- get
          lift (S8.putStrLn (S8.replicate level ' ' <> "CDATA: " <> S8.pack (show cdata))))
       str)
    (0 :: Int)
fold
  :: (s -> ByteString -> s) 
  -> (s -> ByteString -> ByteString -> s) 
  -> (s -> ByteString -> s) 
  -> (s -> ByteString -> s) 
  -> (s -> ByteString -> s) 
  -> (s -> ByteString -> s) 
  -> s
  -> ByteString
  -> Either XenoException s
fold openF attrF endOpenF textF closeF cdataF s str =
  spork
    (execState
       (process
          (\name -> modify (\s' -> openF s' name))
          (\key value -> modify (\s' -> attrF s' key value))
          (\name -> modify (\s' -> endOpenF s' name))
          (\text -> modify (\s' -> textF s' text))
          (\name -> modify (\s' -> closeF s' name))
          (\cdata -> modify (\s' -> cdataF s' cdata))
          str)
       s)
process
  :: Monad m
  => (ByteString -> m ()) 
  -> (ByteString -> ByteString -> m ()) 
  -> (ByteString -> m ()) 
  -> (ByteString -> m ()) 
  -> (ByteString -> m ()) 
  -> (ByteString -> m ()) 
  -> ByteString -> m ()
process openF attrF endOpenF textF closeF cdataF str = findLT 0
  where
    findLT index =
      case elemIndexFrom openTagChar str index of
        Nothing -> unless (S.null text) (textF text)
          where text = S.drop index str
        Just fromLt -> do
          unless (S.null text) (textF text)
          checkOpenComment (fromLt + 1)
          where text = substring str index fromLt
    
    checkOpenComment index =
      if | s_index this 0 == bangChar 
           && s_index this 1 == commentChar 
           && s_index this 2 == commentChar -> 
           findCommentEnd (index + 3)
         | s_index this 0 == bangChar 
           && s_index this 1 == openAngleBracketChar 
           && s_index this 2 == 67 
           && s_index this 3 == 68 
           && s_index this 4 == 65 
           && s_index this 5 == 84 
           && s_index this 6 == 65 
           && s_index this 7 == openAngleBracketChar -> 
           findCDataEnd (index + 8) (index + 8)
         | otherwise ->
           findTagName index
      where
        this = S.drop index str
    findCommentEnd index =
      case elemIndexFrom commentChar str index of
        Nothing -> throw (XenoParseError "Couldn't find the closing comment dash.")
        Just fromDash ->
          if s_index this 0 == commentChar && s_index this 1 == closeTagChar
            then findLT (fromDash + 2)
            else findCommentEnd (fromDash + 1)
          where this = S.drop index str
    findCDataEnd cdata_start index =
      case elemIndexFrom closeAngleBracketChar str index of
        Nothing -> throw (XenoParseError "Couldn't find closing angle bracket for CDATA.")
        Just fromCloseAngleBracket ->
          if s_index str (fromCloseAngleBracket + 1) == closeAngleBracketChar
             then do
               cdataF (substring str cdata_start fromCloseAngleBracket)
               findLT (fromCloseAngleBracket + 3) 
             else
               
               findCDataEnd cdata_start (fromCloseAngleBracket + 1)
    findTagName index0 =
      let spaceOrCloseTag = parseName str index
      in if | s_index str index0 == questionChar ->
              case elemIndexFrom closeTagChar str spaceOrCloseTag of
                Nothing -> throw (XenoParseError "Couldn't find the end of the tag.")
                Just fromGt -> do
                  findLT (fromGt + 1)
            | s_index str spaceOrCloseTag == closeTagChar ->
              do let tagname = substring str index spaceOrCloseTag
                 if s_index str index0 == slashChar
                   then closeF tagname
                   else do
                     openF tagname
                     endOpenF tagname
                 findLT (spaceOrCloseTag + 1)
            | otherwise ->
              do let tagname = substring str index spaceOrCloseTag
                 openF tagname
                 result <- findAttributes spaceOrCloseTag
                 endOpenF tagname
                 case result of
                   Right closingTag -> findLT (closingTag + 1)
                   Left closingPair -> do
                     closeF tagname
                     findLT (closingPair + 2)
      where
        index =
          if s_index str index0 == slashChar
            then index0 + 1
            else index0
    findAttributes index0 =
      if s_index str index == slashChar &&
         s_index str (index + 1) == closeTagChar
        then pure (Left index)
        else if s_index str index == closeTagChar
               then pure (Right index)
               else let afterAttrName = parseName str index
                    in if s_index str afterAttrName == equalChar
                         then let quoteIndex = afterAttrName + 1
                                  usedChar = s_index str quoteIndex
                              in if usedChar == quoteChar ||
                                    usedChar == doubleQuoteChar
                                   then case elemIndexFrom
                                               usedChar
                                               str
                                               (quoteIndex + 1) of
                                          Nothing ->
                                            throw (XenoParseError "Couldn't find the matching quote character.")
                                          Just endQuoteIndex -> do
                                            attrF
                                              (substring str index afterAttrName)
                                              (substring
                                                 str
                                                 (quoteIndex + 1)
                                                 (endQuoteIndex))
                                            findAttributes (endQuoteIndex + 1)
                                   else throw (XenoParseError ("Expected ' or \", got: " <> S.singleton usedChar))
                         else throw (XenoParseError ("Expected =, got: " <> S.singleton (s_index str afterAttrName) <> " at character index: " <> (S8.pack . show) afterAttrName))
      where
        index = skipSpaces str index0
s_index :: ByteString -> Int -> Word8
s_index ps n
    | n < 0          = throw XenoStringIndexProblem
    | n >= S.length ps = throw XenoStringIndexProblem
    | otherwise      = ps `SU.unsafeIndex` n
skipSpaces :: ByteString -> Int -> Int
skipSpaces str i =
  if isSpaceChar (s_index str i)
    then skipSpaces str (i + 1)
    else i
substring :: ByteString -> Int -> Int -> ByteString
substring s start end = S.take (end  start) (S.drop start s)
parseName :: ByteString -> Int -> Int
parseName str index =
  if not (isNameChar1 (s_index str index))
     then index
     else parseName' str (index + 1)
parseName' :: ByteString -> Int -> Int
parseName' str index =
  if not (isNameChar (s_index str index))
     then index
     else parseName' str (index + 1)
elemIndexFrom :: Word8 -> ByteString -> Int -> Maybe Int
elemIndexFrom c str offset = fmap (+ offset) (S.elemIndex c (S.drop offset str))
isSpaceChar :: Word8 -> Bool
isSpaceChar c = c == 32 || (c <= 10 && c >= 9) || c == 13
isNameChar1 :: Word8 -> Bool
isNameChar1 c =
  (c >= 97 && c <= 122) || (c >= 65 && c <= 90) || c == 95 || c == 58
isNameChar :: Word8 -> Bool
isNameChar c =
  (c >= 97 && c <= 122) || (c >= 65 && c <= 90) || c == 95 || c == 58 ||
  c == 45 || c == 46 || (c >= 48 && c <= 57)
quoteChar :: Word8
quoteChar = 39
doubleQuoteChar :: Word8
doubleQuoteChar = 34
equalChar :: Word8
equalChar = 61
questionChar :: Word8
questionChar = 63
slashChar :: Word8
slashChar = 47
bangChar :: Word8
bangChar = 33
commentChar :: Word8
commentChar = 45 
openTagChar :: Word8
openTagChar = 60 
closeTagChar :: Word8
closeTagChar = 62 
openAngleBracketChar :: Word8
openAngleBracketChar = 91
closeAngleBracketChar :: Word8
closeAngleBracketChar = 93