-- ------------------------------------------------------------ {- | Module : Text.XML.HXT.Parser.XmlCharParser Copyright : Copyright (C) 2010 Uwe Schmidt License : MIT Maintainer : Uwe Schmidt (uwe@fh-wedel.de) Stability : stable Portability: portable UTF-8 character parser and simple XML token parsers -} -- ------------------------------------------------------------ module Text.XML.HXT.Parser.XmlCharParser ( XParser , SimpleXParser , XPState(..) , withNormNewline , withoutNormNewline , xmlChar -- xml char parsers , xmlNameChar , xmlNameStartChar , xmlNCNameChar , xmlNCNameStartChar , xmlLetter , xmlSpaceChar , xmlCRLFChar ) where import Data.Char.Properties.XMLCharProps (isXmlCharCR, isXmlLetter, isXmlNCNameChar, isXmlNCNameStartChar, isXmlNameChar, isXmlNameStartChar, isXmlSpaceCharCR) import Data.String.Unicode import Text.ParserCombinators.Parsec -- ------------------------------------------------------------ type XParser s a = GenParser Char (XPState s) a type SimpleXParser a = XParser () a data XPState s = XPState { xps_normalizeNewline :: ! Bool , xps_userState :: s } withNormNewline :: a -> XPState a withNormNewline x = XPState True x withoutNormNewline :: a -> XPState a withoutNormNewline x = XPState False x -- ------------------------------------------------------------ -- -- Char (2.2) -- -- | -- parse a single Unicode character xmlChar :: XParser s Unicode xmlChar = ( satisfy isXmlCharCR <|> xmlCRLFChar ) "legal XML character" {-# INLINE xmlChar #-} -- | -- parse a XML name character xmlNameChar :: XParser s Unicode xmlNameChar = satisfy isXmlNameChar "legal XML name character" {-# INLINE xmlNameChar #-} -- | -- parse a XML name start character xmlNameStartChar :: XParser s Unicode xmlNameStartChar = satisfy isXmlNameStartChar "legal XML name start character" {-# INLINE xmlNameStartChar #-} -- | -- parse a XML NCName character xmlNCNameChar :: XParser s Unicode xmlNCNameChar = satisfy isXmlNCNameChar "legal XML NCName character" {-# INLINE xmlNCNameChar #-} -- | -- parse a XML NCName start character xmlNCNameStartChar :: XParser s Unicode xmlNCNameStartChar = satisfy isXmlNCNameStartChar "legal XML NCName start character" {-# INLINE xmlNCNameStartChar #-} -- | -- parse a XML letter character xmlLetter :: XParser s Unicode xmlLetter = satisfy isXmlLetter "legal XML letter" {-# INLINE xmlLetter #-} -- | -- White Space (2.3) -- -- end of line handling (2.11) will be done before or with 'xmlCRLFChar' parser xmlSpaceChar :: XParser s Char xmlSpaceChar = ( satisfy isXmlSpaceCharCR <|> xmlCRLFChar ) "white space" {-# INLINE xmlSpaceChar #-} -- | -- White Space Normalization -- -- end of line handling (2.11) -- \#x0D and \#x0D\#x0A are mapped to \#x0A xmlCRLFChar :: XParser s Char xmlCRLFChar = ( do _ <- char '\r' s <- getState if xps_normalizeNewline s then option '\n' (char '\n') else return '\r' ) "newline" -- ------------------------------------------------------------