module Data.JsonStream.TokenParser (
Element(..)
, TokenResult(..)
, tokenParser
) where
import Control.Applicative
import Control.Monad (replicateM, when, (>=>))
import qualified Data.Aeson as AE
import qualified Data.ByteString as BSW
import qualified Data.ByteString.Char8 as BS
import Data.Char (isDigit, isDigit, isLower, isSpace)
import Data.Scientific (scientific)
import qualified Data.Text as T
import Data.Text.Encoding (decodeUtf8', encodeUtf8)
data Element = ArrayBegin | ArrayEnd | ObjectBegin | ObjectEnd
| ObjectKey T.Text | JValue AE.Value
deriving (Show, Eq)
data TokenResult' a = TokMoreData' (BS.ByteString -> TokenParser a) BS.ByteString
| PartialResult' Element (TokenParser a) BS.ByteString
| TokFailed' BS.ByteString
| Intermediate' a
data TokenResult = TokMoreData (BS.ByteString -> TokenResult) BS.ByteString
| PartialResult Element (TokenResult) BS.ByteString
| TokFailed BS.ByteString
instance Show TokenResult where
show (TokMoreData _ ctx) = "(TokMoreData' + " ++ show ctx ++ ")"
show (TokFailed _) = "TokFailed'"
show (PartialResult el _ rest) = "(PartialResult' " ++ show el ++ " " ++ show rest ++ ")"
data State = State {
stData :: BS.ByteString
, stContext :: BS.ByteString
}
newtype TokenParser a = TokenParser {
runTokParser :: State -> (TokenResult' a, State)
}
instance Monad TokenParser where
return x = TokenParser $ \s -> (Intermediate' x, s)
m >>= mpost = TokenParser $ \s ->
let (res, newstate) = runTokParser m s
in case res of
TokMoreData' cont context -> (TokMoreData' (cont >=> mpost) context, newstate)
PartialResult' el tokp context -> (PartialResult' el (tokp >>= mpost) context, newstate)
TokFailed' context -> (TokFailed' context, newstate)
Intermediate' result -> runTokParser (mpost result) newstate
instance Functor TokenResult' where
fmap f (TokMoreData' newp ctx) = TokMoreData' (fmap f . newp) ctx
fmap f (PartialResult' el tok ctx) = PartialResult' el (fmap f tok) ctx
fmap _ (TokFailed' ctx) = TokFailed' ctx
fmap f (Intermediate' a) = Intermediate' (f a)
instance Applicative TokenParser where
pure = return
f <*> param = do
mf <- f
mparam <- param
return (mf mparam)
instance Functor TokenParser where
fmap f tokp = TokenParser $ \s ->
let (res, newstate) = runTokParser tokp s
in (fmap f res, newstate)
failTok :: TokenParser a
failTok = TokenParser $ \s -> (TokFailed' (stContext s), s)
isBreakChar :: Char -> Bool
isBreakChar c = isSpace c || (c == '{') || (c == '[') || (c == '}') || (c == ']') || (c == ',')
peekChar :: TokenParser Char
peekChar = TokenParser handle
where
handle st@(State dta context)
| BS.null dta = (TokMoreData' (\newdta -> TokenParser $ \_ -> handle (State newdta (BS.append context newdta)))
context
, st)
| otherwise = (Intermediate' (BS.head dta), st)
pickChar :: TokenParser Char
pickChar = TokenParser handle
where
handle st@(State dta context)
| BS.null dta = (TokMoreData' (\newdta -> TokenParser $ \_ -> handle (State newdta (BS.append context newdta)))
context
, st)
| otherwise = (Intermediate' (BS.head dta), State (BS.tail dta) context)
yield :: Element -> TokenParser ()
yield el = TokenParser $ \state@(State dta ctx) -> (PartialResult' el (contparse dta) ctx, state)
where
contparse dta = TokenParser $ const (Intermediate' (), State dta dta )
getWhile' :: (Char -> Bool) -> TokenParser (BS.ByteString, Bool)
getWhile' predicate = do
char <- peekChar
if predicate char then getBuf
else return ("", True)
where
getBuf = TokenParser $ \(State dta ctx) ->
let (st,rest) = BS.span predicate dta
in (Intermediate' (st, not (BS.null rest)), State rest ctx)
getWhile :: (Char -> Bool) -> TokenParser BS.ByteString
getWhile predicate = do
(dta, complete) <- getWhile' predicate
if complete
then return dta
else loop [dta]
where
loop acc = do
(dta, complete) <- getWhile' predicate
if complete
then return $ BS.concat $ reverse (dta:acc)
else loop (dta:acc)
parseIdent :: TokenParser ()
parseIdent = do
ident <- getWhile isLower
nextchar <- peekChar
if | isBreakChar nextchar -> toTemp ident
| otherwise -> failTok
where
toTemp "true" = yield $ JValue $ AE.Bool True
toTemp "false" = yield $ JValue $ AE.Bool False
toTemp "null" = yield $ JValue AE.Null
toTemp _ = failTok
parseUnicode :: TokenParser Char
parseUnicode = do
lst <- replicateM 4 pickChar
return $ toEnum $ foldl1 (\a b -> 16 * a + b) $ map hexCharToInt lst
where
hexCharToInt :: Char -> Int
hexCharToInt c
| c >= 'A' && c <= 'F' = 10 + (fromEnum c fromEnum 'A')
| c >= 'a' && c <= 'f' = 10 + (fromEnum c fromEnum 'a')
| isDigit c = fromEnum c fromEnum '0'
| otherwise = error "Incorrect hex input, internal error."
chooseKeyOrValue :: T.Text -> TokenParser ()
chooseKeyOrValue text = do
chr <- peekChar
if | chr == ':' -> pickChar >> yield (ObjectKey text)
| isSpace chr -> getWhile' isSpace >> chooseKeyOrValue text
| otherwise -> yield $ JValue $ AE.String text
parseString :: TokenParser ()
parseString = do
(firstpart, _) <- getWhile' (\c -> c /= '"' && c /= '\\' )
chr <- peekChar
if chr == '"'
then pickChar >> handleDecode firstpart
else handleString [firstpart]
where
handleDecode str = case decodeUtf8' str of
Left _ -> failTok
Right val -> chooseKeyOrValue val
handleString acc = do
chr <- peekChar
case chr of
'"' -> do
_ <- pickChar
handleDecode (BS.concat $ reverse acc)
'\\' -> do
_ <- pickChar
specchr <- pickChar
nchr <- parseSpecChar specchr
handleString (encodeUtf8 (T.singleton nchr):acc)
_ -> do
dstr <- getWhile (\c -> c /= '"' && c /= '\\' )
handleString (dstr:acc)
parseSpecChar '"' = return '"'
parseSpecChar '\\' = return '\\'
parseSpecChar '/' = return '/'
parseSpecChar 'b' = return '\b'
parseSpecChar 'f' = return '\f'
parseSpecChar 'n' = return '\n'
parseSpecChar 'r' = return '\r'
parseSpecChar 't' = return '\t'
parseSpecChar 'u' = parseUnicode
parseSpecChar c = return c
parseNumber :: TokenParser ()
parseNumber = do
tnumber <- getWhile (\c -> isDigit c || c == '.' || c == '+' || c == '-' || c == 'e' || c == 'E')
let
([(texp, _), (frac, frdigits), (num, numdigits), (csign, _)], rest) =
foldl parseStep ([], tnumber) [parseSign, parseDecimal, parseFract, parseE]
when (numdigits == 0 || not (BS.null rest)) failTok
let dpart = fromIntegral csign * (fromIntegral num * (10 ^ frdigits) + fromIntegral frac) :: Integer
e = texp frdigits
yield $ JValue $ AE.Number $ scientific dpart e
where
parseStep :: ([(Int, Int)], BS.ByteString) -> (BS.ByteString -> ((Int, Int), BS.ByteString)) -> ([(Int, Int)], BS.ByteString)
parseStep (lst, txt) f =
let (newi, rest) = f txt
in (newi:lst, rest)
parseFract txt
| BS.null txt = ((0, 0), txt)
| BS.head txt == '.' = parseDecimal (BS.tail txt)
| otherwise = ((0,0), txt)
parseE txt
| BS.null txt = ((0, 0), txt)
| firstc == 'e' || firstc == 'E' =
let ((sign, d1), rest) = parseSign (BS.tail txt)
((dnum, d2), trest) = parseDecimal rest
in ((dnum * sign, d1 + d2), trest)
| otherwise = ((0,0), txt)
where
firstc = BS.head txt
parseSign txt
| BS.null txt = ((1, 0), txt)
| BS.head txt == '+' = ((1, 1), BS.tail txt)
| BS.head txt == '-' = ((1, 1), BS.tail txt)
| otherwise = ((1, 0), txt)
parseDecimal txt
| BS.null txt = ((0, 0), txt)
| otherwise = parseNum txt (0,0)
parseNum txt (!start, !digits)
| BS.null txt = ((start, digits), txt)
| dchr >= 48 && dchr <= 57 = parseNum (BS.tail txt) (start * 10 + fromIntegral (dchr 48), digits + 1)
| otherwise = ((start, digits), txt)
where
dchr = BSW.head txt
peekCharInMain :: TokenParser Char
peekCharInMain = TokenParser handle
where
handle st@(State dta ctx)
| BS.null dta = (TokMoreData' (\newdta -> TokenParser $ \_ -> handle (State newdta (BS.append ctx newdta)))
ctx
, st)
| chr == '[' = (PartialResult' ArrayBegin contparse ctx, st)
| chr == ']' = (PartialResult' ArrayEnd contparse ctx, st)
| chr == '{' = (PartialResult' ObjectBegin contparse ctx, st)
| chr == '}' = (PartialResult' ObjectEnd contparse ctx, st)
| chr == ',' || isSpace chr = handle (State (BS.dropWhile (\c -> c == ',' || isSpace c) ctx) ctx)
| chr == '"' = runTokParser (parseString >> peekCharInMain) (State rest ctx)
| otherwise = (Intermediate' (BS.head dta), st)
where
chr = BS.head dta
rest = BS.tail dta
contparse = TokenParser $ const $ handle (State rest rest)
mainParser :: TokenParser ()
mainParser = do
chr <- peekCharInMain
case chr of
't' -> parseIdent
'f' -> parseIdent
'n' -> parseIdent
'-' -> parseNumber
_| isDigit chr -> parseNumber
| otherwise -> failTok
tokenParser :: BS.ByteString -> TokenResult
tokenParser dta = handle $ runTokParser mainParser (State dta dta)
where
handle (TokMoreData' ntp ctx, st) = TokMoreData (\ndta -> handle $ runTokParser (ntp ndta) st) ctx
handle (PartialResult' el ntp ctx, st) = PartialResult el (handle $ runTokParser ntp st) ctx
handle (TokFailed' ctx, _) = TokFailed ctx
handle (Intermediate' _, st) = handle $ runTokParser mainParser st