YamlReference-0.3: YAML reference implementationContentsIndex
Text.Yaml.Reference
Portabilityportable
Stabilityalpha
Maintaineryaml-oren@ben-kiki.org
Contents
Generic operators
Numeric operators
Record field access
UTF decoding
UTF-16 decoding
UTF-8 decoding
Result tokens
Parsing framework
Parsing state
Setters
Implicit parsers
Reply constructors
Parsing Monad
Parsing operators
Basic parsers
State manipulation pseudo-parsers
Consuming input characters
Producing tokens
Production parameters
Tokenizers
Productions
BNF compatibility helpers
Spec productions
Description

Implementation of the YAML syntax as defined in http://www.yaml.org. Actually this file contains the parsing framework and includes (using CPP) the actual productions from Reference.bnf.

The parsing framework is fully streaming (generates output tokens "immediately"). The memory leak that existed in previous version has been plugged.

Synopsis
(|>) :: record -> (record -> value) -> value
data Encoding
= UTF8
| UTF16LE
| UTF16BE
decode :: ByteString -> (Encoding, [Char])
detectEncoding :: [Char] -> Encoding
undoEncoding :: Encoding -> ByteString -> [Char]
combinePairs :: [Char] -> [Char]
combineLead :: Char -> [Char] -> [Char]
surrogateOffset :: Int
combineSurrogates :: Char -> Char -> Char
hasFewerThan :: Int -> ByteString -> Bool
undoUTF16LE :: ByteString -> [Char]
undoUTF16BE :: ByteString -> [Char]
undoUTF8 :: ByteString -> [Char]
decodeTwoUTF8 :: Char -> ByteString -> [Char]
combineTwoUTF8 :: Char -> Char -> Char
decodeThreeUTF8 :: Char -> ByteString -> [Char]
combineThreeUTF8 :: Char -> Char -> Char -> Char
decodeFourUTF8 :: Char -> ByteString -> [Char]
combineFourUTF8 :: Char -> Char -> Char -> Char -> Char
data Code
= Bom
| Text
| Meta
| Break
| Continue
| LineFeed
| LineFold
| Indicator
| White
| Indent
| DocumentStart
| DocumentEnd
| BeginEscape
| EndEscape
| BeginComment
| EndComment
| BeginDirective
| EndDirective
| BeginTag
| EndTag
| BeginHandle
| EndHandle
| BeginAnchor
| EndAnchor
| BeginProperties
| EndProperties
| BeginAlias
| EndAlias
| BeginScalar
| EndScalar
| BeginSequence
| EndSequence
| BeginMapping
| EndMapping
| BeginPair
| EndPair
| BeginNode
| EndNode
| BeginDocument
| EndDocument
| BeginStream
| EndStream
| Error
| Test
| Detected
data Token = Token {
tCode :: Code
tText :: String
}
escapeString :: String -> String
toHex :: Int -> Int -> String
showTokens :: [Token] -> String
data Parser result = Parser (State -> Reply result)
data Result result
= Failed String
| Result result
| More (Parser result)
data Reply result = Reply {
rResult :: !(Result result)
rTokens :: !(DList Token)
rCommit :: !(Maybe String)
rState :: !State
}
type Pattern = Parser ()
data State = State {
sName :: !String
sEncoding :: !Encoding
sDecision :: !String
sLimit :: !Int
sForbidden :: !(Maybe Pattern)
sIsPeek :: !Bool
sChars :: ![Char]
sOffset :: !Int
sLine :: !Int
sColumn :: !Int
sCode :: !Code
sLast :: !Char
sInput :: ![Char]
}
initialState :: String -> ByteString -> State
setDecision :: String -> State -> State
setLimit :: Int -> State -> State
setForbidden :: Maybe Pattern -> State -> State
setCode :: Code -> State -> State
class Match parameter result | parameter -> result where
match :: parameter -> Parser result
returnReply :: State -> result -> Reply result
failReply :: State -> String -> Reply result
unexpectedReply :: State -> Reply result
(%) :: Match match result => match -> Int -> Pattern
(<%) :: Match match result => match -> Int -> Pattern
(^) :: Match match result => String -> match -> Parser result
(!) :: Match match result => match -> String -> Pattern
(?!) :: Match match result => match -> String -> Pattern
(-) :: (Match match1 result1, Match match2 result2) => match1 -> match2 -> Parser result1
(&) :: (Match match1 result1, Match match2 result2) => match1 -> match2 -> Parser result2
(/) :: (Match match1 result, Match match2 result) => match1 -> match2 -> Parser result
(?) :: Match match result => match -> Pattern
(*) :: Match match result => match -> Pattern
(+) :: Match match result => match -> Pattern
decide :: Parser result -> Parser result -> Parser result
choice :: String -> Parser result -> Parser result
peek :: Match match result => match -> Parser result
reject :: Match match result => match -> Maybe String -> Pattern
nonEmpty :: Match match result => match -> Parser result
empty :: Pattern
eof :: Pattern
sol :: Pattern
commit :: String -> Pattern
nextLine :: Pattern
with :: (value -> State -> State) -> (State -> value) -> value -> Parser result -> Parser result
forbidding :: (Match match1 result1, Match match2 result2) => match1 -> match2 -> Parser result1
limitedTo :: Match match result => match -> Int -> Parser result
nextIf :: (Char -> Bool) -> Pattern
finishToken :: Pattern
wrap :: Match match result => match -> Parser result
consume :: Match match result => match -> Parser result
token :: Match match result => Code -> match -> Pattern
fake :: Code -> String -> Pattern
meta :: Match match result => match -> Pattern
indicator :: Match match result => match -> Pattern
text :: Match match result => match -> Pattern
nest :: Code -> Pattern
data Context
= BlockOut
| BlockIn
| FlowOut
| FlowIn
| FlowKey
data Style
= Plain
| Double
| Single
| Literal
| Folded
data Chomp
= Strip
| Clip
| Keep
type Tokenizer = String -> ByteString -> [Token]
patternTokenizer :: Pattern -> Tokenizer
parserTokenizer :: (Show result, Match match result) => String -> match -> Tokenizer
commitBugs :: Reply result -> DList Token
yaml :: Tokenizer
pName :: String -> String
tokenizers :: Map String Tokenizer
tokenizer :: String -> Maybe Tokenizer
tokenizersWithN :: Map String (Int -> Tokenizer)
tokenizerWithN :: String -> Int -> Maybe Tokenizer
tokenizersWithC :: Map String (Context -> Tokenizer)
tokenizerWithC :: String -> Context -> Maybe Tokenizer
tokenizersWithS :: Map String (Style -> Tokenizer)
tokenizerWithS :: String -> Style -> Maybe Tokenizer
tokenizersWithT :: Map String (Chomp -> Tokenizer)
tokenizerWithT :: String -> Chomp -> Maybe Tokenizer
tokenizersWithNC :: Map String (Int -> Context -> Tokenizer)
tokenizerWithNC :: String -> Int -> Context -> Maybe Tokenizer
tokenizersWithNS :: Map String (Int -> Style -> Tokenizer)
tokenizerWithNS :: String -> Int -> Style -> Maybe Tokenizer
tokenizersWithNT :: Map String (Int -> Chomp -> Tokenizer)
tokenizerWithNT :: String -> Int -> Chomp -> Maybe Tokenizer
tokenizerNames :: [String]
na :: Int
asInteger :: Parser Int
result :: result -> Parser result
Generic operators
Numeric operators
Record field access
(|>) :: record -> (record -> value) -> value
record |> field is the same as field record, but is more readable.
UTF decoding
data Encoding
Recognized Unicode encodings. UTF-32 isn't required by YAML parsers.
Constructors
UTF8UTF-8 encoding (or ASCII)
UTF16LEUTF-16 little endian
UTF16BEUTF-16 big endian
show/hide Instances
decode :: ByteString -> (Encoding, [Char])
decode bytes automatically detects the Encoding used and converts the bytes to Unicode characters.
detectEncoding :: [Char] -> Encoding
detectEncoding text examines the first few chars (bytes) of the text to deduce the Unicode encoding used according to the YAML spec.
undoEncoding :: Encoding -> ByteString -> [Char]
undoEncoding encoding bytes converts a bytes stream to Unicode characters according to the encoding.
UTF-16 decoding
combinePairs :: [Char] -> [Char]
combinePairs chars converts each pair of UTF-16 surrogate characters to a single Unicode character.
combineLead :: Char -> [Char] -> [Char]
combineLead lead rest combines the lead surrogate with the head of the rest of the input chars, assumed to be a trail surrogate, and continues combining surrogate pairs.
surrogateOffset :: Int
surrogateOffset is copied from the Unicode FAQs.
combineSurrogates :: Char -> Char -> Char
combineSurrogates lead trail combines two UTF-16 surrogates into a single Unicode character.
hasFewerThan :: Int -> ByteString -> Bool
hasFewerThan bytes n checks whether there are fewer than n bytes left to read.
undoUTF16LE :: ByteString -> [Char]
undoUTF18LE bytes decoded a UTF-16-LE bytes stream to Unicode chars.
undoUTF16BE :: ByteString -> [Char]
undoUTF18BE bytes decoded a UTF-16-BE bytes stream to Unicode chars.
UTF-8 decoding
undoUTF8 :: ByteString -> [Char]
undoUTF8 bytes decoded a UTF-8 bytes stream to Unicode chars.
decodeTwoUTF8 :: Char -> ByteString -> [Char]
decodeTwoUTF8 first bytes decodes a two-byte UTF-8 character, where the first byte is already available and the second is the head of the bytes, and then continues to undo the UTF-8 encoding.
combineTwoUTF8 :: Char -> Char -> Char
combineTwoUTF8 first second combines the first and second bytes of a two-byte UTF-8 char into a single Unicode char.
decodeThreeUTF8 :: Char -> ByteString -> [Char]
decodeThreeUTF8 first bytes decodes a three-byte UTF-8 character, where the first byte is already available and the second and third are the head of the bytes, and then continues to undo the UTF-8 encoding.
combineThreeUTF8 :: Char -> Char -> Char -> Char
combineThreeUTF8 first second combines the first, second and third bytes of a three-byte UTF-8 char into a single Unicode char.
decodeFourUTF8 :: Char -> ByteString -> [Char]
decodeFourUTF8 first bytes decodes a four-byte UTF-8 character, where the first byte is already available and the second, third and fourth are the head of the bytes, and then continues to undo the UTF-8 encoding.
combineFourUTF8 :: Char -> Char -> Char -> Char -> Char
combineFourUTF8 first second combines the first, second and third bytes of a three-byte UTF-8 char into a single Unicode char.
Result tokens
data Code
Token codes.
Constructors
BomBOM, contains "TF8", "TF16LE" or "TF16BE".
TextContent text characters.
MetaNon-content (meta) text characters.
BreakLine break preserved in content.
ContinueSeparation line break.
LineFeedLine break normalized to content line feed.
LineFoldLine break folded to content space.
IndicatorCharacter indicating structure.
WhiteSeparation white space.
IndentIndentation spaces.
DocumentStartDocument start marker.
DocumentEndDocument end marker.
BeginEscapeBegins escape sequence.
EndEscapeEnds escape sequence.
BeginCommentBegins comment.
EndCommentEnds comment.
BeginDirectiveBegins directive.
EndDirectiveEnds directive.
BeginTagBegins tag.
EndTagEnds tag.
BeginHandleBegins tag handle.
EndHandleEnds tag handle.
BeginAnchorBegins anchor.
EndAnchorEnds anchor.
BeginPropertiesBegins node properties.
EndPropertiesEnds node properties.
BeginAliasBegins alias.
EndAliasEnds alias.
BeginScalarBegins scalar content.
EndScalarEnds scalar content.
BeginSequenceBegins sequence content.
EndSequenceEnds sequence content.
BeginMappingBegins mapping content.
EndMappingEnds mapping content.
BeginPairBegins mapping key:value pair.
EndPairEnds mapping key:value pair.
BeginNodeBegins complete node.
EndNodeEnds complete node.
BeginDocumentBegins document.
EndDocumentEnds document.
BeginStreamBegins YAML stream.
EndStreamEnds YAML stream.
ErrorParsing error at this point. For testing:.
TestTest characters otherwise unassigned.
DetectedDetected parameter.
show/hide Instances
data Token
Parsed token.
Constructors
Token
tCode :: CodeSpecific token Code.
tText :: StringContained input chars, if any.
show/hide Instances
escapeString :: String -> String
escapeString string escapes all the non-ASCII characters in the string, as well as escaping the "\" character, using the "\xXX", "\uXXXX" and "\UXXXXXXXX" escape sequences.
toHex :: Int -> Int -> String
toHex digits int converts the int to the specified number of hexadecimal digits.
showTokens :: [Token] -> String
showTokens tokens converts a list of tokens to a multi-line YEAST text.
Parsing framework
data Parser result
A Parser is basically a function computing a Reply.
Constructors
Parser (State -> Reply result)
show/hide Instances
Monad Parser
Match (Parser result) result
data Result result
The Result of each invocation is either an error, the actual result, or a continuation for computing the actual result.
Constructors
Failed StringParsing aborted with a failure.
Result resultParsing completed with a result.
More (Parser result)Parsing is ongoing with a continuation.
show/hide Instances
Show result => Show (Result result)
data Reply result
Each invication of a Parser yields a Reply. The Result is only one part of the Reply.
Constructors
Reply
rResult :: !(Result result)Parsing result.
rTokens :: !(DList Token)Tokens generated by the parser.
rCommit :: !(Maybe String)Commitment to a decision point.
rState :: !StateThe updated parser state.
show/hide Instances
Show result => Show (Reply result)
type Pattern = Parser ()
Parsing state
data State
The internal parser state. We don't bother with parameterising it with a "UserState", we just bundle the generic and specific fields together (not that it is that easy to draw the line - is sLine generic or specific?).
Constructors
State
sName :: !StringThe input name for error messages.
sEncoding :: !EncodingThe input UTF encoding.
sDecision :: !StringCurrent decision name.
sLimit :: !IntLookahead characters limit.
sForbidden :: !(Maybe Pattern)Pattern we must not enter into.
sIsPeek :: !BoolDisables token generation.
sChars :: ![Char](Reversed) characters collected for a token.
sOffset :: !IntOffset in characters in the input.
sLine :: !IntBuilds on YAML's line break definition.
sColumn :: !IntActually character number - we hate tabs.
sCode :: !CodeOf token we are collecting chars for.
sLast :: !CharLast matched character.
sInput :: ![Char]The decoded input characters.
show/hide Instances
initialState :: String -> ByteString -> State
initialState name input returns an initial State for parsing the input (with name for error messages).
Setters
setDecision :: String -> State -> State
setDecision name state sets the sDecision field to decision.
setLimit :: Int -> State -> State
setLimit limit state sets the sLimit field to limit.
setForbidden :: Maybe Pattern -> State -> State
setForbidden forbidden state sets the sForbidden field to forbidden.
setCode :: Code -> State -> State
setCode code state sets the sCode field to code.
Implicit parsers
class Match parameter result | parameter -> result where
Match parameter result specifies that we can convert the parameter to a Parser returning the result.
Methods
match :: parameter -> Parser result
show/hide Instances
Match Char ()
Match String ()
Match (Char, Char) ()
Match (Parser result) result
Reply constructors
returnReply :: State -> result -> Reply result
returnReply state result prepares a Reply with the specified state and result.
failReply :: State -> String -> Reply result
failReply state message prepares a Reply with the specified state and error message.
unexpectedReply :: State -> Reply result
unexpectedReply state returns a failReply for an unexpected character.
Parsing Monad
Parsing operators
(%) :: Match match result => match -> Int -> Pattern
parser % n repeats parser exactly n times.
(<%) :: Match match result => match -> Int -> Pattern
parser <% n matches fewer than n occurrences of parser.
(^) :: Match match result => String -> match -> Parser result
decision ^ (option / option / ...) provides a decision name to the choice about to be made, to allow to commit to it.
(!) :: Match match result => match -> String -> Pattern
parser ! decision commits to decision after successfully matching the parser.
(?!) :: Match match result => match -> String -> Pattern
parser ?! decision commits to decision if the current position matches parser, without consuming any characters.
(-) :: (Match match1 result1, Match match2 result2) => match1 -> match2 -> Parser result1
parser - rejected matches parser, except if rejected matches at this point.
(&) :: (Match match1 result1, Match match2 result2) => match1 -> match2 -> Parser result2
before & after parses before and, if it succeeds, parses after. This basically invokes the monad's >>= (bind) method.
(/) :: (Match match1 result, Match match2 result) => match1 -> match2 -> Parser result
first / second tries to parse first, and failing that parses second, unless first has committed in which case is fails immediately.
(?) :: Match match result => match -> Pattern
(optional ?) tries to match parser, otherwise does nothing.
(*) :: Match match result => match -> Pattern
(parser *) matches zero or more occurrences of repeat, as long as each one actually consumes input characters.
(+) :: Match match result => match -> Pattern
(parser +) matches one or more occurrences of parser, as long as each one actually consumed input characters.
Basic parsers
decide :: Parser result -> Parser result -> Parser result
decide first second tries to parse first, and failing that parses second, unless first has committed in which case is fails immediately.
choice :: String -> Parser result -> Parser result
choice decision parser provides a decision name to the choice about to be made in parser, to allow to commit to it.
peek :: Match match result => match -> Parser result
peek parser succeeds if parser matches at this point, but does not consume any input.
reject :: Match match result => match -> Maybe String -> Pattern
reject parser name fails if parser matches at this point, and does nothing otherwise. If name is provided, it is used in the error message, otherwise the messages uses the current character.
nonEmpty :: Match match result => match -> Parser result
nonEmpty parser succeeds if parser matches some non-empty input characters at this point.
empty :: Pattern
empty always matches without consuming any input.
eof :: Pattern
eof matches the end of the input.
sol :: Pattern
sol matches the start of a line.
State manipulation pseudo-parsers
commit :: String -> Pattern
commit decision commits the parser to all the decisions up to the most recent parent decision. This makes all tokens generated in this parsing path immediately available to the caller.
nextLine :: Pattern
nextLine increments sLine counter and resets sColumn.
with :: (value -> State -> State) -> (State -> value) -> value -> Parser result -> Parser result
with setField getField value parser invokes the specified parser with the value of the specified field set to value for the duration of the invocation, using the setField and getField functions to manipulate it.
forbidding :: (Match match1 result1, Match match2 result2) => match1 -> match2 -> Parser result1
parser `forbidding` pattern parses the specified parser ensuring that it does not contain anything matching the forbidden parser.
limitedTo :: Match match result => match -> Int -> Parser result
parser `limitedTo` limit parses the specified parser ensuring that it does not consume more than the limit input chars.
Consuming input characters
nextIf :: (Char -> Bool) -> Pattern
nextIf test fails if the current position matches the State forbidden pattern or if the State lookahead limit is reached. Otherwise it consumes (and buffers) the next input char if it satisfies test.
Producing tokens
finishToken :: Pattern
finishToken places all collected text into a new token and begins a new one, or does nothing if there are no collected characters.
wrap :: Match match result => match -> Parser result
wrap parser invokes the parser, ensures any unclaimed input characters are wrapped into a token (only happens when testing productions), ensures no input is left unparsed, and returns the parser's result.
consume :: Match match result => match -> Parser result
consume parser invokes the parser and then consumes all remaining unparsed input characters.
token :: Match match result => Code -> match -> Pattern
token code parser places all text matched by parser into a Token with the specified code (unless it is empty). Note it collects the text even if there is an error.
fake :: Code -> String -> Pattern
fake code text creates a token with the specified code and "fake" text characters, instead of whatever characters are collected so far.
meta :: Match match result => match -> Pattern
meta parser collects the text matched by the specified parser into a | Meta token.
indicator :: Match match result => match -> Pattern
indicator code collects the text matched by the specified parser into an Indicator token.
text :: Match match result => match -> Pattern
text parser collects the text matched by the specified parser into a Text token.
nest :: Code -> Pattern
nest code returns an empty token with the specified begin/end code to signal nesting.
Production parameters
data Context
Production context.
Constructors
BlockOutOutside block mapping.
BlockInInside block mapping.
FlowOutOutside flow collection.
FlowInInside flow collection.
FlowKeyInside flow key.
show/hide Instances
data Style
Scalar style.
Constructors
PlainPlain scalar.
DoubleDouble quoted.
SingleSingle quoted.
LiteralLiteral block.
FoldedFolded block.
show/hide Instances
data Chomp
Chomp method.
Constructors
StripRemove all trailing line breaks.
ClipKeep first trailing line break.
KeepKeep all trailing line breaks.
show/hide Instances
Tokenizers
type Tokenizer = String -> ByteString -> [Token]
Tokenizer converts a (named) input text into a list of Token. Errors are reported as tokens with the Error Code.
patternTokenizer :: Pattern -> Tokenizer
patternTokenizer pattern converts the pattern to a simple Tokenizer.
parserTokenizer :: (Show result, Match match result) => String -> match -> Tokenizer
parserTokenizer what parser converts the parser returning what to a simple Tokenizer (only used for tests). The result is reported as a token with the Detected Code The result is reported as a token with the Detected Code.
commitBugs :: Reply result -> DList Token
commitBugs reply inserts an error token if a commit was made outside a named choice. This should never happen outside tests.
yaml :: Tokenizer
yaml name input converts the Unicode input (called name in error messages) to a list of Token according to the YAML spec. This is it!
pName :: String -> String
pName name converts a parser name to the "proper" spec name.
tokenizers :: Map String Tokenizer
tokenizers returns a mapping from a production name to a production tokenizer.
tokenizer :: String -> Maybe Tokenizer
tokenizer name converts the production with the specified name to a simple Tokenizer, or Nothing if it isn't known.
tokenizersWithN :: Map String (Int -> Tokenizer)
tokenizersWithN returns a mapping from a production name to a production tokenizer (that takes an n argument).
tokenizerWithN :: String -> Int -> Maybe Tokenizer
tokenizerWithN name n converts the production (that requires an n argument) with the specified name to a simple Tokenizer, or Nothing if it isn't known.
tokenizersWithC :: Map String (Context -> Tokenizer)
tokenizersWithC returns a mapping from a production name to a production tokenizer (that takes a c argument).
tokenizerWithC :: String -> Context -> Maybe Tokenizer
tokenizerWithC name c converts the production (that requires a c argument) with the specified name to a simple Tokenizer, or Nothing if it isn't known.
tokenizersWithS :: Map String (Style -> Tokenizer)
tokenizersWithS returns a mapping from a production name to a production tokenizer (that takes a s argument).
tokenizerWithS :: String -> Style -> Maybe Tokenizer
tokenizerWithS name s converts the production (that requires an s argument) with the specified name to a simple Tokenizer, or Nothing if it isn't known.
tokenizersWithT :: Map String (Chomp -> Tokenizer)
tokenizersWithT returns a mapping from a production name to a production tokenizer (that takes a t argument).
tokenizerWithT :: String -> Chomp -> Maybe Tokenizer
tokenizerWithT name t converts the production (that requires an t argument) with the specified name to a simple Tokenizer, or Nothing if it isn't known.
tokenizersWithNC :: Map String (Int -> Context -> Tokenizer)
tokenizersWithNC returns a mapping from a production name to a production tokenizer (that requires n and c arguments).
tokenizerWithNC :: String -> Int -> Context -> Maybe Tokenizer
tokenizerWithNC name n c converts the production (that requires n and c arguments) with the specified name to a simple Tokenizer, or Nothing if it isn't known.
tokenizersWithNS :: Map String (Int -> Style -> Tokenizer)
tokenizersWithNS returns a mapping from a production name to a production tokenizer (that requires n and s arguments).
tokenizerWithNS :: String -> Int -> Style -> Maybe Tokenizer
tokenizerWithNS name n s converts the production (that requires n and s arguments) with the specified name to a simple Tokenizer, or Nothing if it isn't known.
tokenizersWithNT :: Map String (Int -> Chomp -> Tokenizer)
tokenizersWithNT returns a mapping from a production name to a production tokenizer (that requires n and t arguments).
tokenizerWithNT :: String -> Int -> Chomp -> Maybe Tokenizer
tokenizerWithNT name n t converts the production (that requires n and t arguments) with the specified name to a simple Tokenizer, or Nothing if it isn't known.
tokenizerNames :: [String]
tokenizerNames returns the list of all productions (tokenizers).
Productions
BNF compatibility helpers
na :: Int
na is the "non-applicable" indentation value. We use Haskell's laziness to verify it really is never used.
asInteger :: Parser Int
asInteger returns the last consumed character, which is assumed to be a decimal digit, as an integer.
result :: result -> Parser result
result value is the same as return value except that we give the Haskell type deduction the additional boost it needs to figure out this is wrapped in a Parser.
Spec productions
Produced by Haddock version 0.8