{-# LANGUAGE BinaryLiterals #-}
{-# LANGUAGE LambdaCase #-}
{-# LANGUAGE MultiWayIf #-}
{-# LANGUAGE NamedFieldPuns #-}
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE RecordWildCards #-}
module RON.Text.Parse
( parseAtom
, parseObject
, parseOp
, parseStateFrame
, parseString
, parseUuid
, parseUuidKey
, parseUuidAtom
, parseWireFrame
, parseWireFrames
) where
import Prelude hiding (takeWhile)
import RON.Internal.Prelude
import Attoparsec.Extra (Parser, char, endOfInputEx, isSuccessful,
label, manyTill, parseOnlyL, satisfy, (<+>),
(??))
import qualified Data.Aeson as Json
import Data.Attoparsec.ByteString (takeWhile1)
import Data.Attoparsec.ByteString.Char8 (anyChar, decimal, double,
signed, skipSpace, takeWhile)
import Data.Bits (complement, shiftL, shiftR, (.&.), (.|.))
import qualified Data.ByteString as BS
import qualified Data.ByteString.Char8 as BSC
import Data.Char (ord)
import qualified Data.Map.Strict as Map
import Data.Text (Text)
import qualified RON.Base64 as Base64
import RON.Types (Atom (AFloat, AInteger, AString, AUuid),
Object (..), Op (..),
OpTerm (THeader, TQuery, TRaw, TReduced),
RawOp (..), StateChunk (..), StateFrame,
UUID (UUID), WireChunk (Query, Raw, Value),
WireFrame, WireReducedChunk (..))
import RON.Util.Word (Word2, Word4, Word60, b00, b0000, b01, b10, b11,
ls60, safeCast)
import RON.UUID (UuidFields (..))
import qualified RON.UUID as UUID
parseWireFrame :: ByteStringL -> Either String WireFrame
parseWireFrame = parseOnlyL frame
chunksTill :: Parser () -> Parser [WireChunk]
chunksTill end = label "[WireChunk]" $ go opZero
where
go prev = do
skipSpace
atEnd <- isSuccessful end
if atEnd then
pure []
else do
(ch, lastOp) <- pChunk prev
(ch :) <$> go lastOp
pChunk :: RawOp -> Parser (WireChunk, RawOp)
pChunk prev = label "WireChunk" $ wireStateChunk prev <+> chunkRaw prev
chunkRaw :: RawOp -> Parser (WireChunk, RawOp)
chunkRaw prev = label "WireChunk-raw" $ do
skipSpace
(_, x) <- rawOp prev
skipSpace
void $ char ';'
pure (Raw x, x)
wireStateChunk :: RawOp -> Parser (WireChunk, RawOp)
wireStateChunk prev = label "WireChunk-reduced" $ do
(wrcHeader, isQuery) <- header prev
let reducedOps y = do
skipSpace
(isNotEmpty, x) <- reducedOp (opObject wrcHeader) y
t <- optional term
unless (t == Just TReduced || isNothing t) $
fail "reduced op may end with `,` only"
unless (isNotEmpty || t == Just TReduced) $ fail "Empty reduced op"
xs <- reducedOps x <|> stop
pure $ x : xs
wrcBody <- reducedOps (op wrcHeader) <|> stop
let lastOp = case wrcBody of
[] -> op wrcHeader
_ -> last wrcBody
wrap op = RawOp
{opType = opType wrcHeader, opObject = opObject wrcHeader, op}
pure ((if isQuery then Query else Value) WireReducedChunk{..}, wrap lastOp)
where
stop = pure []
frame :: Parser WireFrame
frame = label "WireFrame" $ chunksTill (endOfFrame <|> endOfInputEx)
parseWireFrames :: ByteStringL -> Either String [WireFrame]
parseWireFrames = parseOnlyL $ manyTill frameInStream endOfInputEx
frameInStream :: Parser WireFrame
frameInStream = label "WireFrame-stream" $ chunksTill endOfFrame
parseOp :: ByteStringL -> Either String RawOp
parseOp = parseOnlyL $ do
(_, x) <- rawOp opZero <* skipSpace <* endOfInputEx
pure x
parseUuid :: ByteStringL -> Either String UUID
parseUuid = parseOnlyL $
uuid UUID.zero UUID.zero PrevOpSameKey <* skipSpace <* endOfInputEx
parseUuidKey
:: UUID
-> UUID
-> ByteStringL
-> Either String UUID
parseUuidKey prevKey prev =
parseOnlyL $ uuid prevKey prev PrevOpSameKey <* skipSpace <* endOfInputEx
parseUuidAtom
:: UUID
-> ByteStringL
-> Either String UUID
parseUuidAtom prev = parseOnlyL $ uuidAtom prev <* skipSpace <* endOfInputEx
endOfFrame :: Parser ()
endOfFrame = label "end of frame" $ void $ skipSpace *> char '.'
rawOp :: RawOp -> Parser (Bool, RawOp)
rawOp prev = label "RawOp-cont" $ do
(hasTyp, opType) <- key "type" '*' (opType prev) UUID.zero
(hasObj, opObject) <- key "object" '#' (opObject prev) opType
(hasEvt, opEvent) <- key "event" '@' (opEvent prev') opObject
(hasLoc, opRef) <- key "ref" ':' (opRef prev') opEvent
opPayload <- payload opObject
let op = Op{..}
pure
( hasTyp || hasObj || hasEvt || hasLoc || not (null opPayload)
, RawOp{..}
)
where
prev' = op prev
reducedOp :: UUID -> Op -> Parser (Bool, Op)
reducedOp opObject prev = label "Op-cont" $ do
(hasEvt, opEvent) <- key "event" '@' (opEvent prev) opObject
(hasLoc, opRef) <- key "ref" ':' (opRef prev) opEvent
opPayload <- payload opObject
let op = Op{..}
pure (hasEvt || hasLoc || not (null opPayload), op)
key :: String -> Char -> UUID -> UUID -> Parser (Bool, UUID)
key name keyChar prevOpSameKey sameOpPrevUuid = label name $ do
skipSpace
isKeyPresent <- isSuccessful $ char keyChar
if isKeyPresent then do
u <- uuid prevOpSameKey sameOpPrevUuid PrevOpSameKey
pure (True, u)
else
pure (False, prevOpSameKey)
uuid :: UUID -> UUID -> UuidZipBase -> Parser UUID
uuid prevOpSameKey sameOpPrevUuid defaultZipBase = label "UUID" $
uuid22 <+> uuid11 <+> uuidZip prevOpSameKey sameOpPrevUuid defaultZipBase
uuid11 :: Parser UUID
uuid11 = label "UUID-RON-11-letter-value" $ do
rawX <- base64word 11
guard $ BS.length rawX == 11
x <- Base64.decode64 rawX ?? fail "Base64.decode64"
skipSpace
rawUuidVersion <- optional pUuidVersion
rawOrigin <- optional $ base64word $ maybe 11 (const 10) rawUuidVersion
y <- case (rawUuidVersion, BS.length <$> rawOrigin) of
(Nothing, Just 11) ->
case rawOrigin of
Nothing -> pure 0
Just origin -> Base64.decode64 origin ?? fail "Base64.decode64"
_ -> do
origin <- case rawOrigin of
Nothing -> pure $ ls60 0
Just origin -> Base64.decode60 origin ?? fail "Base64.decode60"
pure $ UUID.buildY b00 (fromMaybe b00 rawUuidVersion) origin
pure $ UUID x y
data UuidZipBase = PrevOpSameKey | SameOpPrevUuid
uuidZip :: UUID -> UUID -> UuidZipBase -> Parser UUID
uuidZip prevOpSameKey sameOpPrevUuid defaultZipBase = label "UUID-zip" $ do
changeZipBase <- isSuccessful $ char '`'
rawVariety <- optional pVariety
rawReuseValue <- optional pReuse
rawValue <- optional $ base64word60 $ 10 - fromMaybe 0 rawReuseValue
skipSpace
rawUuidVersion <- optional pUuidVersion
rawReuseOrigin <- optional pReuse
rawOrigin <- optional $ base64word60 $ 10 - fromMaybe 0 rawReuseOrigin
let prev = UUID.split $ whichPrev changeZipBase
let isSimple
= uuidVariant prev /= b00
|| ( not changeZipBase
&& isNothing rawReuseValue && isJust rawValue
&& isNothing rawReuseOrigin
&& (isNothing rawUuidVersion || isJust rawOrigin)
)
if isSimple then
pure $ UUID.build UuidFields
{ uuidVariety = fromMaybe b0000 rawVariety
, uuidValue = fromMaybe (ls60 0) rawValue
, uuidVariant = b00
, uuidVersion = fromMaybe b00 rawUuidVersion
, uuidOrigin = fromMaybe (ls60 0) rawOrigin
}
else do
uuidVariety <- pure $ fromMaybe (uuidVariety prev) rawVariety
uuidValue <- pure $ reuse rawReuseValue rawValue (uuidValue prev)
let uuidVariant = b00
uuidVersion <- pure $ fromMaybe (uuidVersion prev) rawUuidVersion
uuidOrigin <-
pure $ reuse rawReuseOrigin rawOrigin (uuidOrigin prev)
pure $ UUID.build UuidFields{..}
where
whichPrev changeZipBase
| changeZipBase = sameOpPrevUuid
| otherwise = case defaultZipBase of
PrevOpSameKey -> prevOpSameKey
SameOpPrevUuid -> sameOpPrevUuid
reuse :: Maybe Int -> Maybe Word60 -> Word60 -> Word60
reuse Nothing Nothing prev = prev
reuse Nothing (Just new) _ = new
reuse (Just prefixLen) Nothing prev =
ls60 $ safeCast prev .&. complement 0 `shiftL` (60 - 6 * prefixLen)
reuse (Just prefixLen) (Just new) prev = ls60 $ prefix .|. postfix
where
prefix = safeCast prev .&. complement 0 `shiftL` (60 - 6 * prefixLen)
postfix = safeCast new `shiftR` (6 * prefixLen)
pReuse :: Parser Int
pReuse = anyChar >>= \case
'(' -> pure 4
'[' -> pure 5
'{' -> pure 6
'}' -> pure 7
']' -> pure 8
')' -> pure 9
_ -> fail "not a reuse symbol"
uuid22 :: Parser UUID
uuid22 = label "UUID-Base64-double-word" $ do
xy <- base64word 22
guard $ BS.length xy == 22
maybe (fail "Base64 decoding error") pure $
UUID
<$> Base64.decode64 (BS.take 11 xy)
<*> Base64.decode64 (BS.drop 11 xy)
base64word :: Int -> Parser ByteString
base64word maxSize = label "Base64 word" $ do
word <- takeWhile1 Base64.isLetter
guard $ BS.length word <= maxSize
pure word
base64word60 :: Int -> Parser Word60
base64word60 maxSize = label "Base64 word60" $ do
word <- base64word maxSize
Base64.decode60 word ?? fail "decode60"
isUpperHexDigit :: Word8 -> Bool
isUpperHexDigit c =
(fromIntegral (c - fromIntegral (ord '0')) :: Word) <= 9 ||
(fromIntegral (c - fromIntegral (ord 'A')) :: Word) <= 5
pVariety :: Parser Word4
pVariety = label "variety" $ do
letter <- satisfy isUpperHexDigit <* "/"
Base64.decodeLetter4 letter ?? fail "Base64.decodeLetter4"
pUuidVersion :: Parser Word2
pUuidVersion = label "UUID-version" $
anyChar >>= \case
'$' -> pure b00
'%' -> pure b01
'+' -> pure b10
'-' -> pure b11
_ -> fail "not a UUID-version"
payload :: UUID -> Parser [Atom]
payload = label "payload" . go
where
go prevUuid = do
ma <- optional $ atom prevUuid
case ma of
Nothing -> pure []
Just a -> (a :) <$> go newUuid
where
newUuid = case a of
AUuid u -> u
_ -> prevUuid
atom :: UUID -> Parser Atom
atom prevUuid = skipSpace *> atom'
where
atom' =
char '^' *> skipSpace *> (AFloat <$> double ) <+>
char '=' *> skipSpace *> (AInteger <$> integer) <+>
char '>' *> skipSpace *> (AUuid <$> uuid' ) <+>
AString <$> string
integer = signed decimal
uuid' = uuidAtom prevUuid
uuidAtom :: UUID -> Parser UUID
uuidAtom prev = uuid UUID.zero prev SameOpPrevUuid
parseAtom :: ByteStringL -> Either String Atom
parseAtom = parseOnlyL $ atom UUID.zero <* endOfInputEx
string :: Parser Text
string = do
bs <- char '\'' *> content
case Json.decodeStrict $ '"' `BSC.cons` (bs `BSC.snoc` '"') of
Just s -> pure s
Nothing -> fail "bad string"
where
content = do
chunk <- takeWhile $ \c -> c /= '\'' && c /= '\\'
anyChar >>= \case
'\'' -> pure chunk
'\\' -> anyChar >>= \case
'\'' -> (chunk <>) . BSC.cons '\'' <$> content
c -> (chunk <>) . BSC.cons '\\' . BSC.cons c <$> content
_ -> fail "cannot happen"
parseString :: ByteStringL -> Either String Text
parseString = parseOnlyL $ string <* endOfInputEx
header :: RawOp -> Parser (RawOp, Bool)
header prev = do
(_, x) <- rawOp prev
t <- term
case t of
THeader -> pure (x, False)
TQuery -> pure (x, True)
_ -> fail "not a header"
term :: Parser OpTerm
term = do
skipSpace
anyChar >>= \case
'!' -> pure THeader
'?' -> pure TQuery
',' -> pure TReduced
';' -> pure TRaw
_ -> fail "not a term"
parseStateFrame :: ByteStringL -> Either String StateFrame
parseStateFrame = parseWireFrame >=> findObjects
parseObject :: UUID -> ByteStringL -> Either String (Object a)
parseObject oid bytes = Object oid <$> parseStateFrame bytes
findObjects :: WireFrame -> Either String StateFrame
findObjects = fmap Map.fromList . traverse loadBody where
loadBody = \case
Value WireReducedChunk{..} -> do
let RawOp{..} = wrcHeader
let Op{..} = op
let stateVersion = opEvent
let stateBody = wrcBody
pure ((opType, opObject), StateChunk{..})
_ -> Left "expected reduced chunk"
opZero :: RawOp
opZero = RawOp
{ opType = UUID.zero
, opObject = UUID.zero
, op = Op{opEvent = UUID.zero, opRef = UUID.zero, opPayload = []}
}