Text.Yaml.Reference

YamlReference-0.3: YAML reference implementation

Text.Yaml.Reference

Portability	portable
Stability	alpha
Maintainer	yaml-oren@ben-kiki.org

Description

Implementation of the YAML syntax as defined in http://www.yaml.org. Actually this file contains the parsing framework and includes (using CPP) the actual productions from Reference.bnf.

The parsing framework is fully streaming (generates output tokens "immediately"). The memory leak that existed in previous version has been plugged.

Synopsis

(|>) :: record -> (record -> value) -> value

data Encoding

= UTF8

| UTF16LE

| UTF16BE

decode :: ByteString -> (Encoding, [Char])

detectEncoding :: [Char] -> Encoding

undoEncoding :: Encoding -> ByteString -> [Char]

combinePairs :: [Char] -> [Char]

combineLead :: Char -> [Char] -> [Char]

surrogateOffset :: Int

combineSurrogates :: Char -> Char -> Char

hasFewerThan :: Int -> ByteString -> Bool

undoUTF16LE :: ByteString -> [Char]

undoUTF16BE :: ByteString -> [Char]

undoUTF8 :: ByteString -> [Char]

decodeTwoUTF8 :: Char -> ByteString -> [Char]

combineTwoUTF8 :: Char -> Char -> Char

decodeThreeUTF8 :: Char -> ByteString -> [Char]

combineThreeUTF8 :: Char -> Char -> Char -> Char

decodeFourUTF8 :: Char -> ByteString -> [Char]

combineFourUTF8 :: Char -> Char -> Char -> Char -> Char

data Code

= Bom

| Text

| Meta

| Break

| White

| Indent

| EndTag

| EndPair

| EndNode

| Error

| Test

| Detected

data Token = Token {

tCode :: Code

tText :: String

}

escapeString :: String -> String

toHex :: Int -> Int -> String

showTokens :: [Token] -> String

data Parser result = Parser (State -> Reply result)

data Result result

= Failed String

| Result result

| More (Parser result)

data Reply result = Reply {

rResult :: !(Result result)

rTokens :: !(DList Token)

rCommit :: !(Maybe String)

rState :: !State

}

type Pattern = Parser ()

data State = State {

sName :: !String

sEncoding :: !Encoding

sDecision :: !String

sLimit :: !Int

sForbidden :: !(Maybe Pattern)

sIsPeek :: !Bool

sChars :: ![Char]

sOffset :: !Int

sLine :: !Int

sColumn :: !Int

sCode :: !Code

sLast :: !Char

sInput :: ![Char]

}

initialState :: String -> ByteString -> State

setDecision :: String -> State -> State

setLimit :: Int -> State -> State

setForbidden :: Maybe Pattern -> State -> State

setCode :: Code -> State -> State

class Match parameter result | parameter -> result where

match :: parameter -> Parser result

returnReply :: State -> result -> Reply result

failReply :: State -> String -> Reply result

unexpectedReply :: State -> Reply result

(%) :: Match match result => match -> Int -> Pattern

(<%) :: Match match result => match -> Int -> Pattern

(^) :: Match match result => String -> match -> Parser result

(!) :: Match match result => match -> String -> Pattern

(?!) :: Match match result => match -> String -> Pattern

(-) :: (Match match1 result1, Match match2 result2) => match1 -> match2 -> Parser result1

(&) :: (Match match1 result1, Match match2 result2) => match1 -> match2 -> Parser result2

(/) :: (Match match1 result, Match match2 result) => match1 -> match2 -> Parser result

(?) :: Match match result => match -> Pattern

(*) :: Match match result => match -> Pattern

(+) :: Match match result => match -> Pattern

decide :: Parser result -> Parser result -> Parser result

choice :: String -> Parser result -> Parser result

peek :: Match match result => match -> Parser result

reject :: Match match result => match -> Maybe String -> Pattern

nonEmpty :: Match match result => match -> Parser result

empty :: Pattern

eof :: Pattern

sol :: Pattern

commit :: String -> Pattern

nextLine :: Pattern

with :: (value -> State -> State) -> (State -> value) -> value -> Parser result -> Parser result

forbidding :: (Match match1 result1, Match match2 result2) => match1 -> match2 -> Parser result1

limitedTo :: Match match result => match -> Int -> Parser result

nextIf :: (Char -> Bool) -> Pattern

finishToken :: Pattern

wrap :: Match match result => match -> Parser result

consume :: Match match result => match -> Parser result

token :: Match match result => Code -> match -> Pattern

fake :: Code -> String -> Pattern

meta :: Match match result => match -> Pattern

indicator :: Match match result => match -> Pattern

text :: Match match result => match -> Pattern

nest :: Code -> Pattern

data Context

= BlockOut

| BlockIn

| FlowOut

| FlowIn

| FlowKey

data Style

= Plain

| Double

| Single

| Literal

| Folded

data Chomp

= Strip

| Clip

| Keep

type Tokenizer = String -> ByteString -> [Token]

patternTokenizer :: Pattern -> Tokenizer

parserTokenizer :: (Show result, Match match result) => String -> match -> Tokenizer

commitBugs :: Reply result -> DList Token

yaml :: Tokenizer

pName :: String -> String

tokenizers :: Map String Tokenizer

tokenizer :: String -> Maybe Tokenizer

tokenizersWithN :: Map String (Int -> Tokenizer)

tokenizerWithN :: String -> Int -> Maybe Tokenizer

tokenizersWithC :: Map String (Context -> Tokenizer)

tokenizerWithC :: String -> Context -> Maybe Tokenizer

tokenizersWithS :: Map String (Style -> Tokenizer)

tokenizerWithS :: String -> Style -> Maybe Tokenizer

tokenizersWithT :: Map String (Chomp -> Tokenizer)

tokenizerWithT :: String -> Chomp -> Maybe Tokenizer

tokenizersWithNC :: Map String (Int -> Context -> Tokenizer)

tokenizerWithNC :: String -> Int -> Context -> Maybe Tokenizer

tokenizersWithNS :: Map String (Int -> Style -> Tokenizer)

tokenizerWithNS :: String -> Int -> Style -> Maybe Tokenizer

tokenizersWithNT :: Map String (Int -> Chomp -> Tokenizer)

tokenizerWithNT :: String -> Int -> Chomp -> Maybe Tokenizer

tokenizerNames :: [String]

na :: Int

asInteger :: Parser Int

result :: result -> Parser result

Generic operators

Numeric operators

Record field access

(|>) :: record -> (record -> value) -> value

record |> field is the same as field record, but is more readable.

UTF decoding

data Encoding

Recognized Unicode encodings. UTF-32 isn't required by YAML parsers.

Constructors

UTF8	UTF-8 encoding (or ASCII)
UTF16LE	UTF-16 little endian
UTF16BE	UTF-16 big endian

Instances

Show Encoding

decode :: ByteString -> (Encoding, [Char])

decode bytes automatically detects the Encoding used and converts the bytes to Unicode characters.

detectEncoding :: [Char] -> Encoding

detectEncoding text examines the first few chars (bytes) of the text to deduce the Unicode encoding used according to the YAML spec.

undoEncoding :: Encoding -> ByteString -> [Char]

undoEncoding encoding bytes converts a bytes stream to Unicode characters according to the encoding.

UTF-16 decoding

combinePairs :: [Char] -> [Char]

combinePairs chars converts each pair of UTF-16 surrogate characters to a single Unicode character.

combineLead :: Char -> [Char] -> [Char]

combineLead lead rest combines the lead surrogate with the head of the rest of the input chars, assumed to be a trail surrogate, and continues combining surrogate pairs.

surrogateOffset :: Int

surrogateOffset is copied from the Unicode FAQs.

combineSurrogates :: Char -> Char -> Char

combineSurrogates lead trail combines two UTF-16 surrogates into a single Unicode character.

hasFewerThan :: Int -> ByteString -> Bool

hasFewerThan bytes n checks whether there are fewer than n bytes left to read.

undoUTF16LE :: ByteString -> [Char]

undoUTF18LE bytes decoded a UTF-16-LE bytes stream to Unicode chars.

undoUTF16BE :: ByteString -> [Char]

undoUTF18BE bytes decoded a UTF-16-BE bytes stream to Unicode chars.

UTF-8 decoding

undoUTF8 :: ByteString -> [Char]

undoUTF8 bytes decoded a UTF-8 bytes stream to Unicode chars.

decodeTwoUTF8 :: Char -> ByteString -> [Char]

decodeTwoUTF8 first bytes decodes a two-byte UTF-8 character, where the first byte is already available and the second is the head of the bytes, and then continues to undo the UTF-8 encoding.

combineTwoUTF8 :: Char -> Char -> Char

combineTwoUTF8 first second combines the first and second bytes of a two-byte UTF-8 char into a single Unicode char.

decodeThreeUTF8 :: Char -> ByteString -> [Char]

decodeThreeUTF8 first bytes decodes a three-byte UTF-8 character, where the first byte is already available and the second and third are the head of the bytes, and then continues to undo the UTF-8 encoding.

combineThreeUTF8 :: Char -> Char -> Char -> Char

combineThreeUTF8 first second combines the first, second and third bytes of a three-byte UTF-8 char into a single Unicode char.

decodeFourUTF8 :: Char -> ByteString -> [Char]

decodeFourUTF8 first bytes decodes a four-byte UTF-8 character, where the first byte is already available and the second, third and fourth are the head of the bytes, and then continues to undo the UTF-8 encoding.

combineFourUTF8 :: Char -> Char -> Char -> Char -> Char

combineFourUTF8 first second combines the first, second and third bytes of a three-byte UTF-8 char into a single Unicode char.

Result tokens

data Code

Token codes.

Constructors

Bom	BOM, contains "`TF8`", "`TF16LE`" or "`TF16BE`".
Text	Content text characters.
Meta	Non-content (meta) text characters.
Break	Line break preserved in content.
Continue	Separation line break.
LineFeed	Line break normalized to content line feed.
LineFold	Line break folded to content space.
Indicator	Character indicating structure.
White	Separation white space.
Indent	Indentation spaces.
DocumentStart	Document start marker.
DocumentEnd	Document end marker.
BeginEscape	Begins escape sequence.
EndEscape	Ends escape sequence.
BeginComment	Begins comment.
EndComment	Ends comment.
BeginDirective	Begins directive.
EndDirective	Ends directive.
BeginTag	Begins tag.
EndTag	Ends tag.
BeginHandle	Begins tag handle.
EndHandle	Ends tag handle.
BeginAnchor	Begins anchor.
EndAnchor	Ends anchor.
BeginProperties	Begins node properties.
EndProperties	Ends node properties.
BeginAlias	Begins alias.
EndAlias	Ends alias.
BeginScalar	Begins scalar content.
EndScalar	Ends scalar content.
BeginSequence	Begins sequence content.
EndSequence	Ends sequence content.
BeginMapping	Begins mapping content.
EndMapping	Ends mapping content.
BeginPair	Begins mapping key:value pair.
EndPair	Ends mapping key:value pair.
BeginNode	Begins complete node.
EndNode	Ends complete node.
BeginDocument	Begins document.
EndDocument	Ends document.
BeginStream	Begins YAML stream.
EndStream	Ends YAML stream.
Error	Parsing error at this point. For testing:.
Test	Test characters otherwise unassigned.
Detected	Detected parameter.

Instances

Eq Code

Show Code

data Token

Parsed token.

Constructors

Token

tCode :: Code	Specific token `Code`.
tText :: String	Contained input chars, if any.

Instances

Show Token

escapeString :: String -> String

escapeString string escapes all the non-ASCII characters in the string, as well as escaping the "\" character, using the "\xXX", "\uXXXX" and "\UXXXXXXXX" escape sequences.

toHex :: Int -> Int -> String

toHex digits int converts the int to the specified number of hexadecimal digits.

showTokens :: [Token] -> String

showTokens tokens converts a list of tokens to a multi-line YEAST text.

Parsing framework

data Parser result

A Parser is basically a function computing a Reply.

Constructors

Parser (State -> Reply result)

Instances

Monad Parser

Match (Parser result) result

data Result result

The Result of each invocation is either an error, the actual result, or a continuation for computing the actual result.

Constructors

Failed String	Parsing aborted with a failure.
Result result	Parsing completed with a result.
More (Parser result)	Parsing is ongoing with a continuation.

Instances

Show result => Show (Result result)

data Reply result

Each invication of a Parser yields a Reply. The Result is only one part of the Reply.

Constructors

Reply

rResult :: !(Result result)	Parsing result.
rTokens :: !(DList Token)	Tokens generated by the parser.
rCommit :: !(Maybe String)	Commitment to a decision point.
rState :: !State	The updated parser state.

Instances

Show result => Show (Reply result)

type Pattern = Parser ()

Parsing state

data State

The internal parser state. We don't bother with parameterising it with a "UserState", we just bundle the generic and specific fields together (not that it is that easy to draw the line - is sLine generic or specific?).

Constructors

State

sName :: !String	The input name for error messages.
sEncoding :: !Encoding	The input UTF encoding.
sDecision :: !String	Current decision name.
sLimit :: !Int	Lookahead characters limit.
sForbidden :: !(Maybe Pattern)	Pattern we must not enter into.
sIsPeek :: !Bool	Disables token generation.
sChars :: ![Char]	(Reversed) characters collected for a token.
sOffset :: !Int	Offset in characters in the input.
sLine :: !Int	Builds on YAML's line break definition.
sColumn :: !Int	Actually character number - we hate tabs.
sCode :: !Code	Of token we are collecting chars for.
sLast :: !Char	Last matched character.
sInput :: ![Char]	The decoded input characters.

Instances

Show State

initialState :: String -> ByteString -> State

initialState name input returns an initial State for parsing the input (with name for error messages).

Setters

setDecision :: String -> State -> State

setDecision name state sets the sDecision field to decision.

setLimit :: Int -> State -> State

setLimit limit state sets the sLimit field to limit.

setForbidden :: Maybe Pattern -> State -> State

setForbidden forbidden state sets the sForbidden field to forbidden.

setCode :: Code -> State -> State

setCode code state sets the sCode field to code.

Implicit parsers

class Match parameter result | parameter -> result where

Match parameter result specifies that we can convert the parameter to a Parser returning the result.

Methods

match :: parameter -> Parser result

Instances

Match Char ()

Match String ()

Match (Char, Char) ()

Match (Parser result) result

Reply constructors

returnReply :: State -> result -> Reply result

returnReply state result prepares a Reply with the specified state and result.

failReply :: State -> String -> Reply result

failReply state message prepares a Reply with the specified state and error message.

unexpectedReply :: State -> Reply result

unexpectedReply state returns a failReply for an unexpected character.

Parsing Monad

Parsing operators

(%) :: Match match result => match -> Int -> Pattern

parser % n repeats parser exactly n times.

(<%) :: Match match result => match -> Int -> Pattern

parser <% n matches fewer than n occurrences of parser.

(^) :: Match match result => String -> match -> Parser result

decision ^ (option / option / ...) provides a decision name to the choice about to be made, to allow to commit to it.

(!) :: Match match result => match -> String -> Pattern

parser ! decision commits to decision after successfully matching the parser.

(?!) :: Match match result => match -> String -> Pattern

parser ?! decision commits to decision if the current position matches parser, without consuming any characters.

(-) :: (Match match1 result1, Match match2 result2) => match1 -> match2 -> Parser result1

parser - rejected matches parser, except if rejected matches at this point.

(&) :: (Match match1 result1, Match match2 result2) => match1 -> match2 -> Parser result2

before & after parses before and, if it succeeds, parses after. This basically invokes the monad's >>= (bind) method.

(/) :: (Match match1 result, Match match2 result) => match1 -> match2 -> Parser result

first / second tries to parse first, and failing that parses second, unless first has committed in which case is fails immediately.

(?) :: Match match result => match -> Pattern

(optional ?) tries to match parser, otherwise does nothing.

(*) :: Match match result => match -> Pattern

(parser *) matches zero or more occurrences of repeat, as long as each one actually consumes input characters.

(+) :: Match match result => match -> Pattern

(parser +) matches one or more occurrences of parser, as long as each one actually consumed input characters.

Basic parsers

decide :: Parser result -> Parser result -> Parser result

decide first second tries to parse first, and failing that parses second, unless first has committed in which case is fails immediately.

choice :: String -> Parser result -> Parser result

choice decision parser provides a decision name to the choice about to be made in parser, to allow to commit to it.

peek :: Match match result => match -> Parser result

peek parser succeeds if parser matches at this point, but does not consume any input.

reject :: Match match result => match -> Maybe String -> Pattern

reject parser name fails if parser matches at this point, and does nothing otherwise. If name is provided, it is used in the error message, otherwise the messages uses the current character.

nonEmpty :: Match match result => match -> Parser result

nonEmpty parser succeeds if parser matches some non-empty input characters at this point.

empty :: Pattern

empty always matches without consuming any input.

eof :: Pattern

eof matches the end of the input.

sol :: Pattern

sol matches the start of a line.

State manipulation pseudo-parsers

commit :: String -> Pattern

commit decision commits the parser to all the decisions up to the most recent parent decision. This makes all tokens generated in this parsing path immediately available to the caller.

nextLine :: Pattern

nextLine increments sLine counter and resets sColumn.

with :: (value -> State -> State) -> (State -> value) -> value -> Parser result -> Parser result

with setField getField value parser invokes the specified parser with the value of the specified field set to value for the duration of the invocation, using the setField and getField functions to manipulate it.

forbidding :: (Match match1 result1, Match match2 result2) => match1 -> match2 -> Parser result1

parser `forbidding` pattern parses the specified parser ensuring that it does not contain anything matching the forbidden parser.

limitedTo :: Match match result => match -> Int -> Parser result

parser `limitedTo` limit parses the specified parser ensuring that it does not consume more than the limit input chars.

Consuming input characters

nextIf :: (Char -> Bool) -> Pattern

nextIf test fails if the current position matches the State forbidden pattern or if the State lookahead limit is reached. Otherwise it consumes (and buffers) the next input char if it satisfies test.

Producing tokens

finishToken :: Pattern

finishToken places all collected text into a new token and begins a new one, or does nothing if there are no collected characters.

wrap :: Match match result => match -> Parser result

wrap parser invokes the parser, ensures any unclaimed input characters are wrapped into a token (only happens when testing productions), ensures no input is left unparsed, and returns the parser's result.

consume :: Match match result => match -> Parser result

consume parser invokes the parser and then consumes all remaining unparsed input characters.

token :: Match match result => Code -> match -> Pattern

token code parser places all text matched by parser into a Token with the specified code (unless it is empty). Note it collects the text even if there is an error.

fake :: Code -> String -> Pattern

fake code text creates a token with the specified code and "fake" text characters, instead of whatever characters are collected so far.

meta :: Match match result => match -> Pattern

meta parser collects the text matched by the specified parser into a | Meta token.

indicator :: Match match result => match -> Pattern

indicator code collects the text matched by the specified parser into an Indicator token.

text :: Match match result => match -> Pattern

text parser collects the text matched by the specified parser into a Text token.

nest :: Code -> Pattern

nest code returns an empty token with the specified begin/end code to signal nesting.

Production parameters

data Context

Production context.

Constructors

BlockOut	Outside block mapping.
BlockIn	Inside block mapping.
FlowOut	Outside flow collection.
FlowIn	Inside flow collection.
FlowKey	Inside flow key.

Instances

Read Context

Show Context

data Style

Scalar style.

Constructors

Plain	Plain scalar.
Double	Double quoted.
Single	Single quoted.
Literal	Literal block.
Folded	Folded block.

Instances

Read Style

Show Style

data Chomp

Chomp method.

Constructors

Strip	Remove all trailing line breaks.
Clip	Keep first trailing line break.
Keep	Keep all trailing line breaks.

Instances

Read Chomp

Show Chomp

Tokenizers

type Tokenizer = String -> ByteString -> [Token]

Tokenizer converts a (named) input text into a list of Token. Errors are reported as tokens with the Error Code.

patternTokenizer :: Pattern -> Tokenizer

patternTokenizer pattern converts the pattern to a simple Tokenizer.

parserTokenizer :: (Show result, Match match result) => String -> match -> Tokenizer

parserTokenizer what parser converts the parser returning what to a simple Tokenizer (only used for tests). The result is reported as a token with the Detected Code The result is reported as a token with the Detected Code.

commitBugs :: Reply result -> DList Token

commitBugs reply inserts an error token if a commit was made outside a named choice. This should never happen outside tests.

yaml :: Tokenizer

yaml name input converts the Unicode input (called name in error messages) to a list of Token according to the YAML spec. This is it!

pName :: String -> String

pName name converts a parser name to the "proper" spec name.

tokenizers :: Map String Tokenizer

tokenizers returns a mapping from a production name to a production tokenizer.

tokenizer :: String -> Maybe Tokenizer

tokenizer name converts the production with the specified name to a simple Tokenizer, or Nothing if it isn't known.

tokenizersWithN :: Map String (Int -> Tokenizer)

tokenizersWithN returns a mapping from a production name to a production tokenizer (that takes an n argument).

tokenizerWithN :: String -> Int -> Maybe Tokenizer

tokenizerWithN name n converts the production (that requires an n argument) with the specified name to a simple Tokenizer, or Nothing if it isn't known.

tokenizersWithC :: Map String (Context -> Tokenizer)

tokenizersWithC returns a mapping from a production name to a production tokenizer (that takes a c argument).

tokenizerWithC :: String -> Context -> Maybe Tokenizer

tokenizerWithC name c converts the production (that requires a c argument) with the specified name to a simple Tokenizer, or Nothing if it isn't known.

tokenizersWithS :: Map String (Style -> Tokenizer)

tokenizersWithS returns a mapping from a production name to a production tokenizer (that takes a s argument).

tokenizerWithS :: String -> Style -> Maybe Tokenizer

tokenizerWithS name s converts the production (that requires an s argument) with the specified name to a simple Tokenizer, or Nothing if it isn't known.

tokenizersWithT :: Map String (Chomp -> Tokenizer)

tokenizersWithT returns a mapping from a production name to a production tokenizer (that takes a t argument).

tokenizerWithT :: String -> Chomp -> Maybe Tokenizer

tokenizerWithT name t converts the production (that requires an t argument) with the specified name to a simple Tokenizer, or Nothing if it isn't known.

tokenizersWithNC :: Map String (Int -> Context -> Tokenizer)

tokenizersWithNC returns a mapping from a production name to a production tokenizer (that requires n and c arguments).

tokenizerWithNC :: String -> Int -> Context -> Maybe Tokenizer

tokenizerWithNC name n c converts the production (that requires n and c arguments) with the specified name to a simple Tokenizer, or Nothing if it isn't known.

tokenizersWithNS :: Map String (Int -> Style -> Tokenizer)

tokenizersWithNS returns a mapping from a production name to a production tokenizer (that requires n and s arguments).

tokenizerWithNS :: String -> Int -> Style -> Maybe Tokenizer

tokenizerWithNS name n s converts the production (that requires n and s arguments) with the specified name to a simple Tokenizer, or Nothing if it isn't known.

tokenizersWithNT :: Map String (Int -> Chomp -> Tokenizer)

tokenizersWithNT returns a mapping from a production name to a production tokenizer (that requires n and t arguments).

tokenizerWithNT :: String -> Int -> Chomp -> Maybe Tokenizer

tokenizerWithNT name n t converts the production (that requires n and t arguments) with the specified name to a simple Tokenizer, or Nothing if it isn't known.

tokenizerNames :: [String]

tokenizerNames returns the list of all productions (tokenizers).

Productions

BNF compatibility helpers

na :: Int

na is the "non-applicable" indentation value. We use Haskell's laziness to verify it really is never used.

asInteger :: Parser Int

asInteger returns the last consumed character, which is assumed to be a decimal digit, as an integer.

result :: result -> Parser result

result value is the same as return value except that we give the Haskell type deduction the additional boost it needs to figure out this is wrapped in a Parser.

Spec productions

Produced by Haddock version 0.8