-- -*- haskell -*- { module Data.Aeson.Micro.Scanner (Lexeme(..), scanLexemes) where import qualified Data.ByteString as B import Data.Word } %encoding "latin1" -- c.f. RFC 7159 $ws = [\x20\x09\x0a\x0d] -- unescaped = %x20-21 / %x23-5B / %x5D-10FFFF $escaped = [\x00-\x1f\x22\x5c] $unescaped = [\x00-\xff] # $escaped -- zero / ( digit1-9 *DIGIT ) @int = "0"|[1-9][0-9]* -- decimal-point 1*DIGIT @frac = "."[0-9]+ -- e [ minus / plus ] 1*DIGIT @exp = [eE][\-\+]?[0-9]+ -- [ minus ] int [ frac ] [ exp ] @num = "-"? @int @frac? @exp? :- <0> $ws ; <0> "{" { L_ObjStart } <0> "}" { L_ObjEnd } <0> "[" { L_ArrStart } <0> "]" { L_ArrEnd } <0> \" { L_StrStart } <0> \: { L_Colon } <0> \, { L_Comma } <0> "true" { L_True } <0> "false" { L_False } <0> "null" { L_Null } <0> @num { L_Number } [\x22] { L_StrEnd } $unescaped+ { L_StrUnescaped } \\[\x22\x5c\x2f\x62\x66\x6e\x72\x74] { L_StrEscaped } \\"u"[0-9a-cA-CefEF][0-9a-fA-F]{3} { L_StrEscapedHex } \\"u"[dD][0-7][0-9a-fA-F]{2} { L_StrEscapedHex } \\"u"[dD][89abAB][0-9a-fA-F]{2} \\"u"[dD][c-fC-F][0-9a-fA-F]{2} { L_StrEscapedHexSurr } { data Lexeme = L_ArrStart | L_ArrEnd | L_Colon | L_Comma | L_False | L_Null | L_Number | L_ObjStart | L_ObjEnd | L_StrStart | L_StrEnd | L_StrEscaped | L_StrEscapedHex | L_StrEscapedHexSurr | L_StrUnescaped | L_True | L_LexError deriving (Eq,Ord,Show) type AlexInput = B.ByteString alexGetByte :: AlexInput -> Maybe (Word8,AlexInput) alexGetByte = B.uncons -- alexInputPrevChar :: AlexInput -> Char -- generated by @alex@ alexScan :: AlexInput -> Int -> AlexReturn Lexeme scanLexemes :: B.ByteString -> [(Lexeme, B.ByteString)] scanLexemes = go False where go inStr bs = case alexScan bs (if inStr then string else 0) of AlexEOF -> [] AlexError inp' -> [(L_LexError,inp')] AlexSkip inp' _len -> go inStr inp' AlexToken inp' len L_StrUnescaped -- workaround for https://github.com/simonmar/alex/issues/119 | B.length bs - B.length inp' > len -> (L_StrUnescaped,B.take (B.length bs - B.length inp') bs) : go inStr inp' AlexToken inp' len act -> (act,B.take len bs) : go (if inStr then act /= L_StrEnd else act == L_StrStart) inp' }