module Text.Yaml.Reference
(
Code,
Token,
Tokenizer,
yaml,
Context,
Chomp,
tokenizer,
tokenizerWithN,
tokenizerWithC,
tokenizerWithT,
tokenizerWithNC,
tokenizerWithNT,
tokenizerNames,
showTokens
)
where
import Control.Monad
import qualified Data.ByteString.Lazy.Char8 as C
import Data.Char
import qualified Data.DList as D
import qualified Data.Map as Map
import Text.Regex
import Debug.Trace
import qualified Prelude
import Prelude hiding ((/), (*), (+), (), (^))
infixl 6 .+
(.+) = (Prelude.+)
infixl 6 .-
(.-) = (Prelude.-)
infixl 7 .*
(.*) = (Prelude.*)
infixl 7 ./
(./) = (Prelude./)
infixl 9 |>
(|>) :: record -> (record -> value) -> value
record |> field = field record
data Encoding = UTF8
| UTF16LE
| UTF16BE
| UTF32LE
| UTF32BE
deriving Show
decode :: C.ByteString -> (Encoding, [Char])
decode text = (encoding, undoEncoding encoding text)
where encoding = detectEncoding $ C.unpack $ C.take 4 text
detectEncoding :: [Char] -> Encoding
detectEncoding text =
case text of
'\x00' : '\x00' : '\xFE' : '\xFF' : _ -> UTF32BE
'\x00' : '\x00' : '\x00' : _ : _ -> UTF32BE
'\xFF' : '\xFE' : '\x00' : '\x00' : _ -> UTF32LE
_ : '\x00' : '\x00' : '\x00' : _ -> UTF32LE
'\xFE' : '\xFF' : _ -> UTF16BE
'\x00' : _ : _ -> UTF16BE
'\xFF' : '\xFE' : _ -> UTF16LE
_ : '\x00' : _ -> UTF16LE
'\xEF' : '\xBB' : '\xBF' : _ -> UTF8
_ -> UTF8
undoEncoding :: Encoding -> C.ByteString -> [Char]
undoEncoding encoding bytes =
case encoding of
UTF8 -> undoUTF8 bytes
UTF16LE -> combinePairs $ undoUTF16LE bytes
UTF16BE -> combinePairs $ undoUTF16BE bytes
UTF32LE -> combinePairs $ undoUTF32LE bytes
UTF32BE -> combinePairs $ undoUTF32BE bytes
hasFewerThan :: Int -> C.ByteString -> Bool
hasFewerThan n bytes
| n == 1 = C.null bytes
| n > 1 = C.null bytes || hasFewerThan (n .- 1) (C.tail bytes)
undoUTF32LE :: C.ByteString -> [Char]
undoUTF32LE bytes
| C.null bytes = []
| hasFewerThan 4 bytes = error "UTF-32-LE input contains invalid number of bytes"
| otherwise = let first = C.head bytes
bytes' = C.tail bytes
second = C.head bytes'
bytes'' = C.tail bytes'
third = C.head bytes''
bytes''' = C.tail bytes''
fourth = C.head bytes'''
rest = C.tail bytes'''
in (chr $ (ord first)
.+ 256 .* ((ord second)
.+ 256 .* ((ord third)
.+ 256 .* ((ord fourth))))):(undoUTF32LE rest)
undoUTF32BE :: C.ByteString -> [Char]
undoUTF32BE bytes
| C.null bytes = []
| hasFewerThan 4 bytes = error "UTF-32-BE input contains invalid number of bytes"
| otherwise = let first = C.head bytes
bytes' = C.tail bytes
second = C.head bytes'
bytes'' = C.tail bytes'
third = C.head bytes''
bytes''' = C.tail bytes''
fourth = C.head bytes'''
rest = C.tail bytes'''
in (chr $ (ord fourth)
.+ 256 .* ((ord third)
.+ 256 .* ((ord second)
.+ 256 .* ((ord first))))):(undoUTF32BE rest)
combinePairs :: [Char] -> [Char]
combinePairs [] = []
combinePairs (lead:rest)
| '\xD800' <= lead && lead <= '\xDBFF' = combineLead lead rest
| '\xDC00' <= lead && lead <= '\xDFFF' = error "UTF-16 contains trail surrogate without lead surrogate"
| otherwise = lead:(combinePairs rest)
combineLead :: Char -> [Char] -> [Char]
combineLead lead [] = error "UTF-16 contains lead surrogate as final character"
combineLead lead (trail:rest)
| '\xDC00' <= trail && trail <= '\xDFFF' = (combineSurrogates lead trail):combinePairs rest
| otherwise = error "UTF-16 contains lead surrogate without trail surrogate"
surrogateOffset :: Int
surrogateOffset = 0x10000 .- (0xD800 .* 1024) .- 0xDC00
combineSurrogates :: Char -> Char -> Char
combineSurrogates lead trail = chr $ (ord lead) .* 1024 .+ (ord trail) .+ surrogateOffset
undoUTF16LE :: C.ByteString -> [Char]
undoUTF16LE bytes
| C.null bytes = []
| hasFewerThan 2 bytes = error "UTF-16-LE input contains odd number of bytes"
| otherwise = let low = C.head bytes
bytes' = C.tail bytes
high = C.head bytes'
rest = C.tail bytes'
in (chr $ (ord low) .+ (ord high) .* 256):(undoUTF16LE rest)
undoUTF16BE :: C.ByteString -> [Char]
undoUTF16BE bytes
| C.null bytes = []
| hasFewerThan 2 bytes = error "UTF-16-BE input contains odd number of bytes"
| otherwise = let high = C.head bytes
bytes' = C.tail bytes
low = C.head bytes'
rest = C.tail bytes'
in (chr $ (ord low) .+ (ord high) .* 256):(undoUTF16BE rest)
undoUTF8 :: C.ByteString -> [Char]
undoUTF8 bytes
| C.null bytes = []
| otherwise = let first = C.head bytes
rest = C.tail bytes
in case () of
_ | first < '\x80' -> first:(undoUTF8 rest)
| first < '\xC0' -> error "UTF-8 input contains invalid first byte"
| first < '\xE0' -> decodeTwoUTF8 first rest
| first < '\xF0' -> decodeThreeUTF8 first rest
| first < '\xF8' -> decodeFourUTF8 first rest
| otherwise -> error "UTF-8 input contains invalid first byte"
decodeTwoUTF8 :: Char -> C.ByteString -> [Char]
decodeTwoUTF8 first bytes
| C.null bytes = error "UTF-8 double byte char is missing second byte at eof"
| otherwise = let second = C.head bytes
rest = C.tail bytes
in case () of
_ | second < '\x80' || '\xBF' < second -> error "UTF-8 double byte char has invalid second byte"
| otherwise -> (combineTwoUTF8 first second):(undoUTF8 rest)
combineTwoUTF8 :: Char -> Char -> Char
combineTwoUTF8 first second = chr(((ord first) .- 0xC0) .* 64
.+ ((ord second) .- 0x80))
decodeThreeUTF8 :: Char -> C.ByteString -> [Char]
decodeThreeUTF8 first bytes
| hasFewerThan 2 bytes = error "UTF-8 triple byte char is missing bytes at eof"
| otherwise = let second = C.head bytes
bytes' = C.tail bytes
third = C.head bytes'
rest = C.tail bytes'
in case () of
_ | second < '\x80' || '\xBF' < second -> error "UTF-8 triple byte char has invalid second byte"
| third < '\x80' || '\xBF' < third -> error "UTF-8 triple byte char has invalid third byte"
| otherwise -> (combineThreeUTF8 first second third):(undoUTF8 rest)
combineThreeUTF8 :: Char -> Char -> Char -> Char
combineThreeUTF8 first second third = chr(((ord first) .- 0xE0) .* 4096
.+ ((ord second) .- 0x80) .* 64
.+ ((ord third) .- 0x80))
decodeFourUTF8 :: Char -> C.ByteString -> [Char]
decodeFourUTF8 first bytes
| hasFewerThan 3 bytes = error "UTF-8 quad byte char is missing bytes at eof"
| otherwise = let second = C.head bytes
bytes' = C.tail bytes
third = C.head bytes'
bytes'' = C.tail bytes'
fourth = C.head bytes''
rest = C.tail bytes''
in case () of
_ | second < '\x80' || '\xBF' < second -> error "UTF-8 quad byte char has invalid second byte"
| third < '\x80' || '\xBF' < third -> error "UTF-8 quad byte char has invalid third byte"
| third < '\x80' || '\xBF' < third -> error "UTF-8 quad byte char has invalid fourth byte"
| otherwise -> (combineFourUTF8 first second third fourth):(undoUTF8 rest)
combineFourUTF8 :: Char -> Char -> Char -> Char -> Char
combineFourUTF8 first second third fourth = chr(((ord first) .- 0xF0) .* 262144
.+ ((ord second) .- 0x80) .* 4096
.+ ((ord third) .- 0x80) .* 64
.+ ((ord fourth) .- 0x80))
data Code = Bom
| Text
| Meta
| Break
| Continue
| LineFeed
| LineFold
| Indicator
| White
| Indent
| DocumentStart
| DocumentEnd
| BeginEscape
| EndEscape
| BeginComment
| EndComment
| BeginDirective
| EndDirective
| BeginTag
| EndTag
| BeginHandle
| EndHandle
| BeginAnchor
| EndAnchor
| BeginProperties
| EndProperties
| BeginAlias
| EndAlias
| BeginScalar
| EndScalar
| BeginSequence
| EndSequence
| BeginMapping
| EndMapping
| BeginPair
| EndPair
| BeginNode
| EndNode
| BeginDocument
| EndDocument
| BeginStream
| EndStream
| Error
| Unparsed
| Test
| Detected
deriving Eq
instance Show Code where
show code = case code of
Bom -> "U"
Text -> "T"
Meta -> "t"
Break -> "B"
Continue -> "b"
LineFeed -> "L"
LineFold -> "l"
Indicator -> "I"
White -> "w"
Indent -> "i"
DocumentStart -> "K"
DocumentEnd -> "k"
BeginEscape -> "E"
EndEscape -> "e"
BeginComment -> "C"
EndComment -> "c"
BeginDirective -> "D"
EndDirective -> "d"
BeginTag -> "G"
EndTag -> "g"
BeginHandle -> "H"
EndHandle -> "h"
BeginAnchor -> "A"
EndAnchor -> "a"
BeginProperties -> "P"
EndProperties -> "p"
BeginAlias -> "R"
EndAlias -> "r"
BeginScalar -> "S"
EndScalar -> "s"
BeginSequence -> "Q"
EndSequence -> "q"
BeginMapping -> "M"
EndMapping -> "m"
BeginNode -> "N"
EndNode -> "n"
BeginPair -> "X"
EndPair -> "x"
BeginDocument -> "O"
EndDocument -> "o"
BeginStream -> "Y"
EndStream -> "y"
Error -> "!"
Unparsed -> "-"
Test -> "?"
Detected -> "$"
data Token = Token {
tCode :: Code,
tText :: String
}
instance Show Token where
show token = (show $ token|>tCode) ++ (escapeString $ token|>tText) ++ "\n"
escapeString :: String -> String
escapeString [] = []
escapeString (first:rest)
| ' ' <= first && first /= '\\' && first <= '~' = first:(escapeString rest)
| first <= '\xFF' = "\\x" ++ (toHex 2 $ ord first) ++ (escapeString rest)
| '\xFF' < first && first <= '\xFFFF' = "\\u" ++ (toHex 4 $ ord first) ++ (escapeString rest)
| otherwise = "\\U" ++ (toHex 8 $ ord first) ++ (escapeString rest)
toHex :: Int -> Int -> String
toHex digits int
| digits > 1 = (toHex (digits .- 1) (int `div` 16)) ++ [intToDigit $ int `mod` 16]
| digits == 1 = [intToDigit int]
showTokens :: [Token] -> String
showTokens tokens = foldr (\ token text -> (show token) ++ text) "" tokens
data Parser result = Parser (State -> Reply result)
data Result result = Failed String
| Result result
| More (Parser result)
instance (Show result) => Show (Result result) where
show result = case result of
Failed message -> "Failed " ++ message
Result result -> "Result " ++ (show result)
More _ -> "More"
data Reply result = Reply {
rResult :: !(Result result),
rTokens :: !(D.DList Token),
rCommit :: !(Maybe String),
rState :: !State
}
instance (Show result) => Show (Reply result) where
show reply = "Result: " ++ (show $ reply|>rResult)
++ ", Tokens: " ++ (show $ D.toList $ reply|>rTokens)
++ ", Commit: " ++ (show $ reply|>rCommit)
++ ", State: { " ++ (show $ reply|>rState) ++ "}"
type Pattern = Parser ()
data State = State {
sName :: !String,
sEncoding :: !Encoding,
sDecision :: !String,
sLimit :: !Int,
sForbidden :: !(Maybe Pattern),
sIsPeek :: !Bool,
sChars :: ![Char],
sOffset :: !Int,
sLine :: !Int,
sColumn :: !Int,
sCode :: !Code,
sLast :: !Char,
sInput :: ![Char]
}
instance Show State where
show state = "Name: " ++ (show $ state|>sName)
++ ", Encoding: " ++ (show $ state|>sEncoding)
++ ", Decision: " ++ (show $ state|>sDecision)
++ ", Limit: " ++ (show $ state|>sLimit)
++ ", IsPeek: " ++ (show $ state|>sIsPeek)
++ ", Chars: >>>" ++ (reverse $ state|>sChars) ++ "<<<"
++ ", Offset: " ++ (show $ state|>sOffset)
++ ", Line: " ++ (show $ state|>sLine)
++ ", Column: " ++ (show $ state|>sColumn)
++ ", Code: " ++ (show $ state|>sCode)
++ ", Last: " ++ (show $ state|>sLast)
initialState :: String -> C.ByteString -> State
initialState name input = let (encoding, decoded) = decode input
in State { sName = name,
sEncoding = encoding,
sDecision = "",
sLimit = 1,
sForbidden = Nothing,
sIsPeek = False,
sChars = [],
sOffset = 0,
sLine = 1,
sColumn = 0,
sCode = Test,
sLast = ' ',
sInput = decoded }
setDecision :: String -> State -> State
setDecision decision state = state { sDecision = decision }
setLimit :: Int -> State -> State
setLimit limit state = state { sLimit = limit }
setForbidden :: Maybe Pattern -> State -> State
setForbidden forbidden state = state { sForbidden = forbidden }
setCode :: Code -> State -> State
setCode code state = state { sCode = code }
class Match parameter result | parameter -> result where
match :: parameter -> Parser result
instance Match (Parser result) result where
match = id
instance Match Char () where
match code = nextIf (== code)
instance Match (Char, Char) () where
match (low, high) = nextIf $ \ code -> low <= code && code <= high
instance Match String () where
match = foldr (&) empty
returnReply :: State -> result -> Reply result
returnReply state result = Reply { rResult = Result result,
rTokens = D.empty,
rCommit = Nothing,
rState = state }
tokenReply state token = Reply { rResult = Result (),
rTokens = D.singleton token,
rCommit = Nothing,
rState = state { sChars = [] } }
failReply :: State -> String -> Reply result
failReply state message = Reply { rResult = Failed $ state|>sName
++ ": line " ++ (show $ state|>sLine)
++ ": column " ++ (show $ state|>sColumn)
++ ": " ++ message,
rTokens = D.empty,
rCommit = Nothing,
rState = state }
unexpectedReply :: State -> Reply result
unexpectedReply state = case state|>sInput of
(char:_) -> failReply state $ "Unexpected '" ++ [char] ++ "'"
[] -> failReply state "Unexpected end of input"
instance Monad Parser where
return result = Parser $ \ state -> returnReply state result
left >>= right = bindParser left right
where bindParser (Parser left) right = Parser $ \ state ->
let reply = left state
in case reply|>rResult of
Failed message -> reply { rResult = Failed message }
Result value -> reply { rResult = More $ right value }
More parser -> reply { rResult = More $ bindParser parser right }
fail message = Parser $ \ state -> failReply state message
infix 3 ^
infix 3 %
infix 3 <%
infix 3 !
infix 3 ?!
infixl 3
infixr 2 &
infixr 1 /
infix 0 ?
infix 0 *
infix 0 +
infix 0 <?
infix 0 >?
infix 0 <!
infix 0 >!
(%) :: (Match match result) => match -> Int -> Pattern
parser % n
| n <= 0 = empty
| n > 0 = parser & parser % n .- 1
(<%) :: (Match match result) => match -> Int -> Pattern
parser <% n
| n < 1 = fail "Fewer than 0 repetitions"
| n == 1 = reject parser Nothing
| n > 1 = "<%" ^ ( parser ! "<%" & parser <% n .- 1 / empty )
(^) :: (Match match result) => String -> match -> Parser result
decision ^ parser = choice decision (match parser)
(!) :: (Match match result) => match -> String -> Pattern
parser ! decision = parser & commit decision
(?!) :: (Match match result) => match -> String -> Pattern
parser ?! decision = peek parser & commit decision
(<?) :: (Match match result) => match -> Parser result
(<?) lookbehind = prev lookbehind
(>?) :: (Match match result) => match -> Parser result
(>?) lookahead = peek lookahead
(<!) :: (Match match result) => match -> Pattern
(<!) lookbehind = prev $ reject lookbehind Nothing
(>!) :: (Match match result) => match -> Pattern
(>!) lookahead = reject lookahead Nothing
() :: (Match match1 result1, Match match2 result2) => match1 -> match2 -> Parser result1
parser rejected = reject rejected Nothing & parser
(&) :: (Match match1 result1, Match match2 result2) => match1 -> match2 -> Parser result2
before & after = (match before) >> (match after)
(/) :: (Match match1 result, Match match2 result) => match1 -> match2 -> Parser result
first / second = Parser $ \ state ->
let Parser parser = decide (match first) (match second)
in parser state
(?) :: (Match match result) => match -> Pattern
(?) optional = "?" ^ (optional & empty / empty)
(*) :: (Match match result) => match -> Pattern
(*) parser = "*" ^ zomParser
where zomParser = (nonEmpty parser ! "*" & zomParser) / empty
(+) :: (Match match result) => match -> Pattern
(+) parser = nonEmpty parser & (parser *)
decide :: Parser result -> Parser result -> Parser result
decide left right = Parser $ \ state ->
let Parser parser = decideParser state D.empty left right
in parser state
where decideParser point tokens (Parser left) right = Parser $ \state ->
let reply = left state
tokens' reply = D.append tokens $ reply|>rTokens
in case (reply|>rResult, reply|>rCommit) of
(Failed _, _) -> Reply { rState = point,
rTokens = D.empty,
rResult = More right,
rCommit = Nothing }
(Result _, _) -> reply { rTokens = tokens' reply }
(More left', Just _) -> reply { rTokens = tokens' reply,
rResult = More left' }
(More left', Nothing) -> let Parser parser = decideParser point (tokens' reply) left' right
in parser $ reply|>rState
choice :: String -> Parser result -> Parser result
choice decision parser = Parser $ \ state ->
let Parser parser' = choiceParser (state|>sDecision) decision parser
in parser' state { sDecision = decision }
where choiceParser parentDecision makingDecision (Parser parser) = Parser $ \ state ->
let reply = parser state
commit' = case reply|>rCommit of
Nothing -> Nothing
Just decision | decision == makingDecision -> Nothing
| otherwise -> reply|>rCommit
reply' = case reply|>rResult of
More parser' -> reply { rCommit = commit',
rResult = More $ choiceParser parentDecision makingDecision parser' }
_ -> reply { rCommit = commit',
rState = (reply|>rState) { sDecision = parentDecision } }
in reply'
prev :: (Match match result) => match -> Parser result
prev parser = Parser $ \ state ->
prevParser state (match parser) state { sIsPeek = True, sInput = state|>sLast : state|>sInput }
where prevParser point (Parser parser) state =
let reply = parser state
in case reply|>rResult of
Failed message -> failReply point message
Result value -> returnReply point value
More parser' -> prevParser point parser' $ reply|>rState
peek :: (Match match result) => match -> Parser result
peek parser = Parser $ \ state ->
peekParser state (match parser) state { sIsPeek = True }
where peekParser point (Parser parser) state =
let reply = parser state
in case reply|>rResult of
Failed message -> failReply point message
Result value -> returnReply point value
More parser' -> peekParser point parser' $ reply|>rState
reject :: (Match match result) => match -> Maybe String -> Pattern
reject parser name = Parser $ \ state ->
rejectParser state name (match parser) state { sIsPeek = True }
where rejectParser point name (Parser parser) state =
let reply = parser state
in case reply|>rResult of
Failed message -> returnReply point ()
Result value -> case name of
Nothing -> unexpectedReply point
Just text -> failReply point $ "Unexpected " ++ text
More parser' -> rejectParser point name parser' $ reply|>rState
nonEmpty :: (Match match result) => match -> Parser result
nonEmpty parser = Parser $ \ state ->
let Parser parser' = nonEmptyParser (state|>sOffset) (match parser)
in parser' state
where nonEmptyParser offset (Parser parser) = Parser $ \ state ->
let reply = parser state
state' = reply|>rState
in case reply|>rResult of
Failed message -> reply
Result value -> if state'|>sOffset > offset
then reply
else failReply state' "Matched empty pattern"
More parser' -> reply { rResult = More $ nonEmptyParser offset parser' }
empty :: Pattern
empty = return ()
eof :: Pattern
eof = Parser $ \ state ->
if state|>sInput == []
then returnReply state ()
else failReply state "Expected end of input"
sol :: Pattern
sol = Parser $ \ state ->
if state|>sColumn == 0
then returnReply state ()
else failReply state "Expected start of line"
commit :: String -> Pattern
commit decision = Parser $ \ state ->
Reply { rState = state,
rTokens = D.empty,
rResult = Result (),
rCommit = Just decision }
nextLine :: Pattern
nextLine = Parser $ \ state ->
returnReply state { sLine = state|>sLine .+ 1,
sColumn = 0 }
()
with :: (value -> State -> State) -> (State -> value) -> value -> Parser result -> Parser result
with setField getField value parser = Parser $ \ state ->
let value' = getField state
Parser parser' = value' `seq` withParser value' parser
in parser' $ setField value state
where withParser parentValue (Parser parser) = Parser $ \ state ->
let reply = parser state
in case reply|>rResult of
Failed _ -> reply { rState = setField parentValue $ reply|>rState }
Result _ -> reply { rState = setField parentValue $ reply|>rState }
More parser' -> reply { rResult = More $ withParser parentValue parser' }
forbidding :: (Match match1 result1, Match match2 result2) => match1 -> match2 -> Parser result1
forbidding parser forbidden = with setForbidden sForbidden (Just $ forbidden & empty) (match parser)
limitedTo :: (Match match result) => match -> Int -> Parser result
limitedTo parser limit = with setLimit sLimit limit (match parser)
nextIf :: (Char -> Bool) -> Pattern
nextIf test = Parser $ \ state ->
case state|>sForbidden of
Nothing -> limitedNextIf state
Just parser -> let Parser parser' = reject parser $ Just "forbidden pattern"
reply = parser' state { sForbidden = Nothing }
in case reply|>rResult of
Failed _ -> reply
Result _ -> limitedNextIf state
where limitedNextIf state =
case state|>sLimit of
1 -> consumeNextIf state
0 -> failReply state "Lookahead limit reached"
limit -> consumeNextIf state { sLimit = state|>sLimit .- 1 }
consumeNextIf state =
case state|>sInput of
(char:rest) | test char -> let chars = if state|>sIsPeek
then []
else char:(state|>sChars)
state' = state { sInput = rest,
sLast = char,
sChars = chars,
sOffset = state|>sOffset .+ 1,
sColumn = state|>sColumn .+ 1 }
in returnReply state' ()
| otherwise -> unexpectedReply state
[] -> unexpectedReply state
finishToken :: Pattern
finishToken = Parser $ \ state ->
if state|>sIsPeek
then returnReply state ()
else case state|>sChars of
[] -> returnReply state ()
chars@(_:_) -> tokenReply state Token { tCode = state|>sCode,
tText = reverse chars }
wrap :: (Match match result) => match -> Parser result
wrap parser = do result <- match parser
finishToken
eof
return result
consume :: (Match match result) => match -> Parser result
consume parser = do result <- match parser
finishToken
clearInput
return result
where clearInput = Parser $ \ state -> returnReply state { sInput = [] } ()
token :: (Match match result) => Code -> match -> Pattern
token code parser = finishToken & with setCode sCode code (parser & finishToken)
fake :: Code -> String -> Pattern
fake code text = Parser $ \ state ->
if state|>sIsPeek
then returnReply state ()
else tokenReply state Token { tCode = code,
tText = text }
meta :: (Match match result) => match -> Pattern
meta parser = token Meta parser
indicator :: (Match match result) => match -> Pattern
indicator parser = token Indicator $ parser
text :: (Match match result) => match -> Pattern
text parser = token Text parser
nest :: Code -> Pattern
nest code = finishToken & nestParser code
where nestParser code = Parser $ \ state ->
if state|>sIsPeek
then returnReply state ()
else tokenReply state Token { tCode = code,
tText = "" }
data Context = BlockOut
| BlockIn
| FlowOut
| FlowIn
| FlowKey
instance Show Context where
show context = case context of
BlockOut -> "block-out"
BlockIn -> "block-in"
FlowOut -> "flow-out"
FlowIn -> "flow-in"
FlowKey -> "flow-key"
instance Read Context where
readsPrec _ text = [ ((r word), tail) | (word, tail) <- lex text ]
where r word = case word of
"block_out" -> BlockOut
"block_in" -> BlockIn
"flow_out" -> FlowOut
"flow_in" -> FlowIn
"flow_key" -> FlowKey
_ -> error $ "unknown context: " ++ word
data Chomp = Strip
| Clip
| Keep
instance Show Chomp where
show chomp = case chomp of
Strip -> "strip"
Clip -> "clip"
Keep -> "keep"
instance Read Chomp where
readsPrec _ text = [ ((r word), tail) | (word, tail) <- lex text ]
where r word = case word of
"strip" -> Strip
"clip" -> Clip
"keep" -> Keep
_ -> error $ "unknown chomp: " ++ word
type Tokenizer = String -> C.ByteString -> Bool -> [Token]
patternTokenizer :: Pattern -> Tokenizer
patternTokenizer pattern name input withFollowing =
D.toList $ patternParser (wrap pattern) (initialState name input)
where patternParser (Parser parser) state =
let reply = parser state
tokens = commitBugs reply
in case reply|>rResult of
Failed message -> errorTokens tokens message withFollowing $ reply|>rState|>sInput
Result _ -> tokens
More parser' -> D.append tokens $ patternParser parser' $ reply|>rState
parserTokenizer :: (Show result, Match match result) => String -> match -> Tokenizer
parserTokenizer what parser name input withFollowing =
D.toList $ parserParser (wrap parser) (initialState name input)
where parserParser (Parser parser) state =
let reply = parser state
tokens = commitBugs reply
in case reply|>rResult of
Failed message -> errorTokens tokens message withFollowing $ reply|>rState|>sInput
Result value -> D.append tokens $ D.singleton Token { tCode = Detected,
tText = what ++ "=" ++ (show value) }
More parser' -> D.append tokens $ parserParser parser' $ reply|>rState
errorTokens tokens message withFollowing following =
let tokens' = D.append tokens $ D.singleton Token { tCode = Error, tText = message }
in if withFollowing && following /= ""
then D.append tokens' $ D.singleton Token { tCode = Unparsed, tText = following }
else tokens'
commitBugs :: Reply result -> D.DList Token
commitBugs reply =
let tokens = reply|>rTokens
in case reply|>rCommit of
Nothing -> tokens
Just commit -> D.append tokens $ D.singleton Token { tCode = Error,
tText = "Commit to '" ++ commit ++ "' was made outside it" }
yaml :: Tokenizer
yaml = patternTokenizer l_yaml_stream
#ifdef REAL_CPP
#define STR(X) #X
#else
#define STR(X) "X"
#endif
#define PAT(PATTERN) pat STR(PATTERN) PATTERN
#define PAR(PARSER) par STR(PARSER) PARSER
#define PAC(PARSER) pac STR(PARSER) PARSER
pName :: String -> String
pName name = regexSub questionRegex "?"
$ regexSub minusRegex "-"
$ regexSub plusRegex "+" name
where regexSub regex value text = subRegex regex text value
questionRegex = mkRegex "'"
minusRegex = mkRegex "_"
plusRegex = mkRegex "__"
tokenizers :: Map.Map String Tokenizer
tokenizers = PAR(c_chomping_indicator) "t"
$ PAC(detect_inline_indentation) "m"
$ PAT(b_as_line_feed)
$ PAT(b_as_space)
$ PAT(b_carriage_return)
$ PAT(b_char)
$ PAT(b_line_feed)
$ PAT(b_non_content)
$ PAT(c_alias)
$ PAT(c_anchor)
$ PAT(c_byte_order_mark)
$ PAT(c_collect_entry)
$ PAT(c_comment)
$ PAT(c_directive)
$ PAT(c_document_end)
$ PAT(c_document_start)
$ PAT(c_double_quote)
$ PAT(c_escape)
$ PAT(c_flow_indicator)
$ PAT(c_folded)
$ PAT(c_forbidden)
$ PAT(c_indicator)
$ PAT(c_json)
$ PAT(c_literal)
$ PAT(c_mapping_end)
$ PAT(c_mapping_key)
$ PAT(c_mapping_start)
$ PAT(c_mapping_value)
$ PAT(c_named_tag_handle)
$ PAT(c_nb_comment_text)
$ PAT(c_non_specific_tag)
$ PAT(c_ns_alias_node)
$ PAT(c_ns_anchor_property)
$ PAT(c_ns_esc_char)
$ PAT(c_ns_local_tag_prefix)
$ PAT(c_ns_shorthand_tag)
$ PAT(c_ns_tag_property)
$ PAT(c_primary_tag_handle)
$ PAT(c_printable)
$ PAT(c_quoted_quote)
$ PAT(c_reserved)
$ PAT(c_secondary_tag_handle)
$ PAT(c_sequence_end)
$ PAT(c_sequence_entry)
$ PAT(c_sequence_start)
$ PAT(c_s_implicit_json_key)
$ PAT(c_single_quote)
$ PAT(c_tag)
$ PAT(c_tag_handle)
$ PAT(c_verbatim_tag)
$ PAT(e_node)
$ PAT(e_scalar)
$ PAT(l_comment)
$ PAT(l_directive)
$ PAT(l_document_prefix)
$ PAT(l_documents)
$ PAT(l_document_suffix)
$ PAT(l_explicit_document)
$ PAT(l_following_document)
$ PAT(l_implicit_document)
$ PAT(l_leading_document)
$ PAT(l_yaml_stream)
$ PAT(nb_char)
$ PAT(nb_double_char)
$ PAT(nb_double_one_line)
$ PAT(nb_json)
$ PAT(nb_ns_double_in_line)
$ PAT(nb_ns_single_in_line)
$ PAT(nb_single_char)
$ PAT(nb_single_one_line)
$ PAT(ns_anchor_char)
$ PAT(ns_anchor_name)
$ PAT(ns_ascii_letter)
$ PAT(ns_char)
$ PAT(ns_dec_digit)
$ PAT(ns_directive_name)
$ PAT(ns_directive_parameter)
$ PAT(ns_double_char)
$ PAT(ns_esc_16_bit)
$ PAT(ns_esc_32_bit)
$ PAT(ns_esc_8_bit)
$ PAT(ns_esc_backslash)
$ PAT(ns_esc_backspace)
$ PAT(ns_esc_bell)
$ PAT(ns_esc_carriage_return)
$ PAT(ns_esc_double_quote)
$ PAT(ns_esc_escape)
$ PAT(ns_esc_form_feed)
$ PAT(ns_esc_horizontal_tab)
$ PAT(ns_esc_line_feed)
$ PAT(ns_esc_line_separator)
$ PAT(ns_esc_next_line)
$ PAT(ns_esc_non_breaking_space)
$ PAT(ns_esc_null)
$ PAT(ns_esc_paragraph_separator)
$ PAT(ns_esc_slash)
$ PAT(ns_esc_space)
$ PAT(ns_esc_vertical_tab)
$ PAT(ns_global_tag_prefix)
$ PAT(ns_hex_digit)
$ PAT(ns_plain_safe_in)
$ PAT(ns_plain_safe_out)
$ PAT(ns_reserved_directive)
$ PAT(ns_s_block_map_implicit_key)
$ PAT(ns_s_implicit_yaml_key)
$ PAT(ns_single_char)
$ PAT(ns_tag_char)
$ PAT(ns_tag_directive)
$ PAT(ns_tag_prefix)
$ PAT(ns_uri_char)
$ PAT(ns_word_char)
$ PAT(ns_yaml_directive)
$ PAT(ns_yaml_version)
$ PAT(s_b_comment)
$ PAT(s_l_comments)
$ PAT(s_separate_in_line)
$ PAT(s_space)
$ PAT(s_tab)
$ PAT(s_white)
$ Map.empty
where pat name pattern = Map.insert (pName name) $ patternTokenizer (match pattern)
par name parser what = Map.insert (pName name) $ parserTokenizer what (match parser)
pac name parser what = Map.insert (pName name) $ parserTokenizer what (consume parser)
tokenizer :: String -> (Maybe Tokenizer)
tokenizer name = Map.lookup name tokenizers
tokenizersWithN :: Map.Map String (Int -> Tokenizer)
tokenizersWithN = PAR(c_b_block_header) "(m,t)"
$ PAC(detect_collection_indentation) "m"
$ PAC(detect_scalar_indentation) "m"
$ PAR(c_indentation_indicator) "m"
$ PAR(count_spaces) "m"
$ PAT(b_l_spaced)
$ PAT(b_nb_literal_next)
$ PAT(c_l_block_map_explicit_entry)
$ PAT(c_l_block_map_explicit_key)
$ PAT(c_l_block_map_implicit_value)
$ PAT(c_l_block_seq_entry)
$ PAT(c_l__folded)
$ PAT(c_l__literal)
$ PAT(l_block_map_explicit_value)
$ PAT(l__block_mapping)
$ PAT(l__block_sequence)
$ PAT(l_keep_empty)
$ PAT(l_nb_diff_lines)
$ PAT(l_nb_folded_lines)
$ PAT(l_nb_literal_text)
$ PAT(l_nb_same_lines)
$ PAT(l_nb_spaced_lines)
$ PAT(l_strip_empty)
$ PAT(l_trail_comments)
$ PAT(nb_double_multi_line)
$ PAT(nb_single_multi_line)
$ PAT(ns_l_block_map_entry)
$ PAT(ns_l_block_map_implicit_entry)
$ PAT(ns_l_in_line_mapping)
$ PAT(ns_l_in_line_sequence)
$ PAT(s_block_line_prefix)
$ PAT(s_flow_line_prefix)
$ PAT(s_indent)
$ PAT(s_indent_le)
$ PAT(s_indent_lt)
$ PAT(s_l__flow_in_block)
$ PAT(s_nb_folded_text)
$ PAT(s_nb_spaced_text)
$ PAT(s_ns_double_next_line)
$ PAT(s_ns_single_next_line)
$ PAT(s_s_double_break)
$ PAT(s_s_double_escaped)
$ PAT(s_separate_lines)
$ PAT(s_s_flow_folded)
$ Map.empty
where pat name pattern = Map.insert (pName name) (\ n -> patternTokenizer (match $ pattern n))
par name parser what = Map.insert (pName name) (\ n -> parserTokenizer what (match $ parser n))
pac name parser what = Map.insert (pName name) (\ n -> parserTokenizer what (consume $ parser n))
tokenizerWithN :: String -> Int -> Maybe Tokenizer
tokenizerWithN name n =
case Map.lookup name tokenizersWithN of
Just tokenizer -> Just $ tokenizer n
Nothing -> Nothing
tokenizersWithC :: Map.Map String (Context -> Tokenizer)
tokenizersWithC = PAT(nb_ns_plain_in_line)
$ PAT(nb_plain_char)
$ PAT(ns_plain_char)
$ PAT(ns_plain_first)
$ PAT(ns_plain_one_line)
$ PAT(ns_plain_safe)
$ Map.empty
where pat name pattern = Map.insert (pName name) (\ c -> patternTokenizer (match $ pattern c))
tokenizerWithC :: String -> Context -> Maybe Tokenizer
tokenizerWithC name c =
case Map.lookup name tokenizersWithC of
Just tokenizer -> Just $ tokenizer c
Nothing -> Nothing
tokenizersWithT :: Map.Map String (Chomp -> Tokenizer)
tokenizersWithT = PAT(b_chomped_last)
$ Map.empty
where pat name pattern = Map.insert (pName name) (\ t -> patternTokenizer (match $ pattern t))
tokenizerWithT :: String -> Chomp -> Maybe Tokenizer
tokenizerWithT name t =
case Map.lookup name tokenizersWithT of
Just tokenizer -> Just $ tokenizer t
Nothing -> Nothing
tokenizersWithNC :: Map.Map String (Int -> Context -> Tokenizer)
tokenizersWithNC = PAT(b_l_folded)
$ PAT(b_l_trimmed)
$ PAT(c_double_quoted)
$ PAT(c_flow_json_content)
$ PAT(c_flow_json_node)
$ PAT(c_flow_mapping)
$ PAT(c_flow_sequence)
$ PAT(c_ns_flow_map_adjacent_value)
$ PAT(c_ns_flow_map_empty_key_entry)
$ PAT(c_ns_flow_map_json_key_entry)
$ PAT(c_ns_flow_map_separate_value)
$ PAT(c_ns_flow_pair_json_key_entry)
$ PAT(c_ns_properties)
$ PAT(c_single_quoted)
$ PAT(l_empty)
$ PAT(nb_double_text)
$ PAT(nb_single_text)
$ PAT(ns_flow_content)
$ PAT(ns_flow_map_entry)
$ PAT(ns_flow_map_explicit_entry)
$ PAT(ns_flow_map_implicit_entry)
$ PAT(ns_flow_map_yaml_key_entry)
$ PAT(ns_flow_node)
$ PAT(ns_flow_pair)
$ PAT(ns_flow_pair_entry)
$ PAT(ns_flow_pair_yaml_key_entry)
$ PAT(ns_flow_seq_entry)
$ PAT(ns_flow_yaml_content)
$ PAT(ns_flow_yaml_node)
$ PAT(ns_plain)
$ PAT(ns_plain_multi_line)
$ PAT(ns_s_flow_map_entries)
$ PAT(ns_s_flow_seq_entries)
$ PAT(s_l__block_collection)
$ PAT(s_l__block_in_block)
$ PAT(s_l__block_indented)
$ PAT(s_l__block_node)
$ PAT(s_l__block_scalar)
$ PAT(s_line_prefix)
$ PAT(s_ns_plain_next_line)
$ PAT(s_separate)
$ Map.empty
where pat name pattern = Map.insert (pName name) (\ n c -> patternTokenizer (match $ pattern n c))
tokenizerWithNC :: String -> Int -> Context -> Maybe Tokenizer
tokenizerWithNC name n c =
case Map.lookup name tokenizersWithNC of
Just tokenizer -> Just $ tokenizer n c
Nothing -> Nothing
tokenizersWithNT :: Map.Map String (Int -> Chomp -> Tokenizer)
tokenizersWithNT = PAT(l_chomped_empty)
$ PAT(l_folded_content)
$ PAT(l_literal_content)
$ Map.empty
where pat name pattern = Map.insert (pName name) (\ n t -> patternTokenizer (match $ pattern n t))
tokenizerWithNT :: String -> Int -> Chomp -> Maybe Tokenizer
tokenizerWithNT name n t =
case Map.lookup name tokenizersWithNT of
Just tokenizer -> Just $ tokenizer n t
Nothing -> Nothing
tokenizerNames :: [String]
tokenizerNames = (Map.keys tokenizers)
++ (Map.keys tokenizersWithN)
++ (Map.keys tokenizersWithC)
++ (Map.keys tokenizersWithT)
++ (Map.keys tokenizersWithNC)
++ (Map.keys tokenizersWithNT)
detect_utf_encoding = Parser $ \ state -> let text = case state|>sEncoding of
UTF8 -> "TF8"
UTF16LE -> "TF16LE"
UTF16BE -> "TF16BE"
UTF32LE -> "TF32LE"
UTF32BE -> "TF32BE"
Parser parser = fake Bom text
in parser state { sColumn = state|>sColumn .- 1 }
na :: Int
na = error "Accessing non-applicable indentation"
asInteger :: Parser Int
asInteger = Parser $ \ state -> returnReply state $ ord (state|>sLast) .- 48
result :: result -> Parser result
result = return
#include "Reference.bnf"