{-# LANGUAGE BinaryLiterals #-}
{-# LANGUAGE LambdaCase #-}
{-# LANGUAGE MultiWayIf #-}
{-# LANGUAGE NamedFieldPuns #-}
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE RecordWildCards #-}

-- | RON-Text parsing
module RON.Text.Parse
    ( parseAtom
    , parseObject
    , parseOp
    , parseStateFrame
    , parseString
    , parseUuid
    , parseUuidKey
    , parseUuidAtom
    , parseWireFrame
    , parseWireFrames
    ) where

import           Prelude hiding (takeWhile)
import           RON.Internal.Prelude

import           Attoparsec.Extra (Parser, char, endOfInputEx, isSuccessful,
                                   label, manyTill, parseOnlyL, satisfy, (<+>),
                                   (??))
import qualified Data.Aeson as Json
import           Data.Attoparsec.ByteString (takeWhile1)
import           Data.Attoparsec.ByteString.Char8 (anyChar, decimal, double,
                                                   signed, skipSpace, takeWhile)
import           Data.Bits (complement, shiftL, shiftR, (.&.), (.|.))
import qualified Data.ByteString as BS
import qualified Data.ByteString.Char8 as BSC
import           Data.Char (ord)
import qualified Data.Map.Strict as Map
import           Data.Text (Text)

import qualified RON.Base64 as Base64
import           RON.Types (Atom (AFloat, AInteger, AString, AUuid),
                            Object (..), Op (..),
                            OpTerm (THeader, TQuery, TRaw, TReduced),
                            RawOp (..), StateChunk (..), StateFrame,
                            UUID (UUID), WireChunk (Query, Raw, Value),
                            WireFrame, WireReducedChunk (..))
import           RON.Util.Word (Word2, Word4, Word60, b00, b0000, b01, b10, b11,
                                ls60, safeCast)
import           RON.UUID (UuidFields (..))
import qualified RON.UUID as UUID

-- | Parse a common frame
parseWireFrame :: ByteStringL -> Either String WireFrame
parseWireFrame = parseOnlyL frame

chunksTill :: Parser () -> Parser [WireChunk]
chunksTill end = label "[WireChunk]" $ go opZero
  where
    go prev = do
        skipSpace
        atEnd <- isSuccessful end
        if atEnd then
            pure []
        else do
            (ch, lastOp) <- pChunk prev
            (ch :) <$> go lastOp

-- | Returns a chunk and the last op in it
pChunk :: RawOp -> Parser (WireChunk, RawOp)
pChunk prev = label "WireChunk" $ wireStateChunk prev <+> chunkRaw prev

chunkRaw :: RawOp -> Parser (WireChunk, RawOp)
chunkRaw prev = label "WireChunk-raw" $ do
    skipSpace
    (_, x) <- rawOp prev
    skipSpace
    void $ char ';'
    pure (Raw x, x)

-- | Returns a chunk and the last op (converted to raw) in it
wireStateChunk :: RawOp -> Parser (WireChunk, RawOp)
wireStateChunk prev = label "WireChunk-reduced" $ do
    (wrcHeader, isQuery) <- header prev
    let reducedOps y = do
            skipSpace
            (isNotEmpty, x) <- reducedOp (opObject wrcHeader) y
            t <- optional term
            unless (t == Just TReduced || isNothing t) $
                fail "reduced op may end with `,` only"
            unless (isNotEmpty || t == Just TReduced) $ fail "Empty reduced op"
            xs <- reducedOps x <|> stop
            pure $ x : xs
    wrcBody <- reducedOps (op wrcHeader) <|> stop
    let lastOp = case wrcBody of
            [] -> op wrcHeader
            _  -> last wrcBody
        wrap op = RawOp
            {opType = opType wrcHeader, opObject = opObject wrcHeader, op}
    pure ((if isQuery then Query else Value) WireReducedChunk{..}, wrap lastOp)
  where
    stop = pure []

frame :: Parser WireFrame
frame = label "WireFrame" $ chunksTill (endOfFrame <|> endOfInputEx)

-- | Parse a sequence of common frames
parseWireFrames :: ByteStringL -> Either String [WireFrame]
parseWireFrames = parseOnlyL $ manyTill frameInStream endOfInputEx

frameInStream :: Parser WireFrame
frameInStream = label "WireFrame-stream" $ chunksTill endOfFrame

-- | Parse a single context-free op
parseOp :: ByteStringL -> Either String RawOp
parseOp = parseOnlyL $ do
    (_, x) <- rawOp opZero <* skipSpace <* endOfInputEx
    pure x

-- | Parse a single context-free UUID
parseUuid :: ByteStringL -> Either String UUID
parseUuid = parseOnlyL $
    uuid UUID.zero UUID.zero PrevOpSameKey <* skipSpace <* endOfInputEx

-- | Parse a UUID in key position
parseUuidKey
    :: UUID  -- ^ same key in the previous op (default is 'UUID.zero')
    -> UUID  -- ^ previous key of the same op (default is 'UUID.zero')
    -> ByteStringL
    -> Either String UUID
parseUuidKey prevKey prev =
    parseOnlyL $ uuid prevKey prev PrevOpSameKey <* skipSpace <* endOfInputEx

-- | Parse a UUID in value (atom) position
parseUuidAtom
    :: UUID  -- ^ previous
    -> ByteStringL
    -> Either String UUID
parseUuidAtom prev = parseOnlyL $ uuidAtom prev <* skipSpace <* endOfInputEx

endOfFrame :: Parser ()
endOfFrame = label "end of frame" $ void $ skipSpace *> char '.'

rawOp :: RawOp -> Parser (Bool, RawOp)
rawOp prev = label "RawOp-cont" $ do
    (hasTyp, opType)   <- key "type"   '*' (opType   prev)  UUID.zero
    (hasObj, opObject) <- key "object" '#' (opObject prev)  opType
    (hasEvt, opEvent)  <- key "event"  '@' (opEvent  prev') opObject
    (hasLoc, opRef)    <- key "ref"    ':' (opRef    prev') opEvent
    opPayload <- payload opObject
    let op = Op{..}
    pure
        ( hasTyp || hasObj || hasEvt || hasLoc || not (null opPayload)
        , RawOp{..}
        )
  where
    prev' = op prev

reducedOp :: UUID -> Op -> Parser (Bool, Op)
reducedOp opObject prev = label "Op-cont" $ do
    (hasEvt, opEvent) <- key "event" '@' (opEvent prev) opObject
    (hasLoc, opRef)   <- key "ref"   ':' (opRef   prev) opEvent
    opPayload <- payload opObject
    let op = Op{..}
    pure (hasEvt || hasLoc || not (null opPayload), op)

key :: String -> Char -> UUID -> UUID -> Parser (Bool, UUID)
key name keyChar prevOpSameKey sameOpPrevUuid = label name $ do
    skipSpace
    isKeyPresent <- isSuccessful $ char keyChar
    if isKeyPresent then do
        u <- uuid prevOpSameKey sameOpPrevUuid PrevOpSameKey
        pure (True, u)
    else
        -- no key => use previous key
        pure (False, prevOpSameKey)

uuid :: UUID -> UUID -> UuidZipBase -> Parser UUID
uuid prevOpSameKey sameOpPrevUuid defaultZipBase = label "UUID" $
    uuid22 <+> uuid11 <+> uuidZip prevOpSameKey sameOpPrevUuid defaultZipBase

uuid11 :: Parser UUID
uuid11 = label "UUID-RON-11-letter-value" $ do
    rawX <- base64word 11
    guard $ BS.length rawX == 11
    x <- Base64.decode64 rawX ?? fail "Base64.decode64"
    skipSpace
    rawUuidVersion <- optional pUuidVersion
    rawOrigin <- optional $ base64word $ maybe 11 (const 10) rawUuidVersion
    y <- case (rawUuidVersion, BS.length <$> rawOrigin) of
        (Nothing, Just 11) ->
            case rawOrigin of
                Nothing     -> pure 0
                Just origin -> Base64.decode64 origin ?? fail "Base64.decode64"
        _ -> do
            origin <- case rawOrigin of
                Nothing     -> pure $ ls60 0
                Just origin -> Base64.decode60 origin ?? fail "Base64.decode60"
            pure $ UUID.buildY b00 (fromMaybe b00 rawUuidVersion) origin
    pure $ UUID x y

data UuidZipBase = PrevOpSameKey | SameOpPrevUuid

uuidZip :: UUID -> UUID -> UuidZipBase -> Parser UUID
uuidZip prevOpSameKey sameOpPrevUuid defaultZipBase = label "UUID-zip" $ do
    changeZipBase <- isSuccessful $ char '`'
    rawVariety <- optional pVariety
    rawReuseValue <- optional pReuse
    rawValue <- optional $ base64word60 $ 10 - fromMaybe 0 rawReuseValue
    skipSpace
    rawUuidVersion <- optional pUuidVersion
    rawReuseOrigin <- optional pReuse
    rawOrigin <- optional $ base64word60 $ 10 - fromMaybe 0 rawReuseOrigin

    let prev = UUID.split $ whichPrev changeZipBase
    let isSimple
            =   uuidVariant prev /= b00
            ||  (   not changeZipBase
                &&  isNothing rawReuseValue && isJust rawValue
                &&  isNothing rawReuseOrigin
                &&  (isNothing rawUuidVersion || isJust rawOrigin)
                )

    if isSimple then
        pure $ UUID.build UuidFields
            { uuidVariety = fromMaybe b0000    rawVariety
            , uuidValue   = fromMaybe (ls60 0) rawValue
            , uuidVariant = b00
            , uuidVersion = fromMaybe b00      rawUuidVersion
            , uuidOrigin  = fromMaybe (ls60 0) rawOrigin
            }
    else do
        uuidVariety <- pure $ fromMaybe (uuidVariety prev) rawVariety
        uuidValue <- pure $ reuse rawReuseValue rawValue (uuidValue prev)
        let uuidVariant = b00
        uuidVersion <- pure $ fromMaybe (uuidVersion prev) rawUuidVersion
        uuidOrigin <-
            pure $ reuse rawReuseOrigin rawOrigin (uuidOrigin prev)
        pure $ UUID.build UuidFields{..}
  where

    whichPrev changeZipBase
        | changeZipBase = sameOpPrevUuid
        | otherwise = case defaultZipBase of
            PrevOpSameKey  -> prevOpSameKey
            SameOpPrevUuid -> sameOpPrevUuid

    reuse :: Maybe Int -> Maybe Word60 -> Word60 -> Word60
    reuse Nothing          Nothing    prev = prev
    reuse Nothing          (Just new) _    = new
    reuse (Just prefixLen) Nothing    prev =
        ls60 $ safeCast prev .&. complement 0 `shiftL` (60 - 6 * prefixLen)
    reuse (Just prefixLen) (Just new) prev = ls60 $ prefix .|. postfix
      where
        prefix  = safeCast prev .&. complement 0 `shiftL` (60 - 6 * prefixLen)
        postfix = safeCast new `shiftR` (6 * prefixLen)

pReuse :: Parser Int
pReuse = anyChar >>= \case
    '(' -> pure 4
    '[' -> pure 5
    '{' -> pure 6
    '}' -> pure 7
    ']' -> pure 8
    ')' -> pure 9
    _   -> fail "not a reuse symbol"

uuid22 :: Parser UUID
uuid22 = label "UUID-Base64-double-word" $ do
    xy <- base64word 22
    guard $ BS.length xy == 22
    maybe (fail "Base64 decoding error") pure $
        UUID
            <$> Base64.decode64 (BS.take 11 xy)
            <*> Base64.decode64 (BS.drop 11 xy)

base64word :: Int -> Parser ByteString
base64word maxSize = label "Base64 word" $ do
    word <- takeWhile1 Base64.isLetter
    guard $ BS.length word <= maxSize
    pure word

base64word60 :: Int -> Parser Word60
base64word60 maxSize = label "Base64 word60" $ do
    word <- base64word maxSize
    Base64.decode60 word ?? fail "decode60"

isUpperHexDigit :: Word8 -> Bool
isUpperHexDigit c =
    (fromIntegral (c - fromIntegral (ord '0')) :: Word) <= 9 ||
    (fromIntegral (c - fromIntegral (ord 'A')) :: Word) <= 5

pVariety :: Parser Word4
pVariety = label "variety" $ do
    letter <- satisfy isUpperHexDigit <* "/"
    Base64.decodeLetter4 letter ?? fail "Base64.decodeLetter4"

pUuidVersion :: Parser Word2
pUuidVersion = label "UUID-version" $
    anyChar >>= \case
        '$' -> pure b00
        '%' -> pure b01
        '+' -> pure b10
        '-' -> pure b11
        _   -> fail "not a UUID-version"

payload :: UUID -> Parser [Atom]
payload = label "payload" . go
  where
    go prevUuid = do
        ma <- optional $ atom prevUuid
        case ma of
            Nothing -> pure []
            Just a  -> (a :) <$> go newUuid
              where
                newUuid = case a of
                    AUuid u -> u
                    _       -> prevUuid

atom :: UUID -> Parser Atom
atom prevUuid = skipSpace *> atom'
  where
    atom' =
        char '^' *> skipSpace *> (AFloat   <$> double ) <+>
        char '=' *> skipSpace *> (AInteger <$> integer) <+>
        char '>' *> skipSpace *> (AUuid    <$> uuid'  ) <+>
        AString                            <$> string
    integer = signed decimal
    uuid'   = uuidAtom prevUuid

uuidAtom :: UUID -> Parser UUID
uuidAtom prev = uuid UUID.zero prev SameOpPrevUuid

-- | Parse an atom
parseAtom :: ByteStringL -> Either String Atom
parseAtom = parseOnlyL $ atom UUID.zero <* endOfInputEx

string :: Parser Text
string = do
    bs <- char '\'' *> content
    case Json.decodeStrict $ '"' `BSC.cons` (bs `BSC.snoc` '"') of
        Just s  -> pure s
        Nothing -> fail "bad string"
  where
    content = do
        chunk <- takeWhile $ \c -> c /= '\'' && c /= '\\'
        anyChar >>= \case
            '\'' -> pure chunk
            '\\' -> anyChar >>= \case
                '\'' -> (chunk <>) . BSC.cons '\'' <$> content
                c    -> (chunk <>) . BSC.cons '\\' . BSC.cons c <$> content
            _ -> fail "cannot happen"

-- | Parse a string atom
parseString :: ByteStringL -> Either String Text
parseString = parseOnlyL $ string <* endOfInputEx

-- | Return 'RawOp' and 'chunkIsQuery'
header :: RawOp -> Parser (RawOp, Bool)
header prev = do
    (_, x) <- rawOp prev
    t <- term
    case t of
        THeader -> pure (x, False)
        TQuery  -> pure (x, True)
        _       -> fail "not a header"

term :: Parser OpTerm
term = do
    skipSpace
    anyChar >>= \case
        '!' -> pure THeader
        '?' -> pure TQuery
        ',' -> pure TReduced
        ';' -> pure TRaw
        _   -> fail "not a term"

-- | Parse a state frame
parseStateFrame :: ByteStringL -> Either String StateFrame
parseStateFrame = parseWireFrame >=> findObjects

-- | Parse a state frame as an object
parseObject :: UUID -> ByteStringL -> Either String (Object a)
parseObject oid bytes = Object oid <$> parseStateFrame bytes

-- | Extract object states from a common frame
findObjects :: WireFrame -> Either String StateFrame
findObjects = fmap Map.fromList . traverse loadBody where
    loadBody = \case
        Value WireReducedChunk{..} -> do
            let RawOp{..} = wrcHeader
            let Op{..} = op
            let stateVersion = opEvent
            let stateBody = wrcBody
            pure ((opType, opObject), StateChunk{..})
        _ -> Left "expected reduced chunk"

opZero :: RawOp
opZero = RawOp
    { opType   = UUID.zero
    , opObject = UUID.zero
    , op       = Op{opEvent = UUID.zero, opRef = UUID.zero, opPayload = []}
    }