-- | Functions for identifying and manipulating character codes. module Zenacy.HTML.Internal.Char ( ctow , chrWord8 , chrUTF8 , chrSurrogate , chrScalar , chrNonCharacter , chrASCIIDigit , chrASCIIUpperHexDigit , chrASCIILowerHexDigit , chrASCIIHexDigit , chrASCIIUpperAlpha , chrASCIILowerAlpha , chrASCIIAlpha , chrASCIIAlphanumeric , chrWhitespace , chrC0Control , chrControl , chrToUpper , chrToLower , chrAmpersand , chrEOF , chrExclamation , chrGreater , chrLess , chrQuestion , chrSolidus , chrTab , chrLF , chrFF , chrCR , chrSpace , chrEqual , chrQuote , chrApostrophe , chrGrave , chrNumberSign , chrHyphen , chrBracketRight , chrSemicolon , chrUpperX , chrLowerX ) where import qualified Data.ByteString as S ( unpack ) import Data.Char ( chr , ord ) import qualified Data.Text as Text ( singleton ) import qualified Data.Text.Encoding as Text ( encodeUtf8 ) import Data.Word8 -- | Converts a character to a Word8. ctow :: Char -> Word8 ctow x = fromIntegral (ord x) -- | Determines if a character code is in the range of a Word8. chrWord8 :: Int -> Bool chrWord8 x = x >= 0 && x <= 0xFF -- | Decodes a UTF8 unicode character. chrUTF8 :: Int -> [Word8] chrUTF8 = S.unpack . Text.encodeUtf8 . Text.singleton . chr -- | Determines if a character code is a surrogate. chrSurrogate :: Int -> Bool chrSurrogate x = x >= 0xD800 && x <= 0xDFFF -- | Determines if a character code is a scalar. chrScalar :: Int -> Bool chrScalar = not . chrSurrogate -- | Determines if a code is a not a character code. chrNonCharacter :: Int -> Bool chrNonCharacter x = (x >= 0xFDD0 && x <= 0xFDEF) || any (==x) [ 0xFFFE, 0xFFFF, 0x1FFFE, 0x1FFFF , 0x2FFFE, 0x2FFFF, 0x3FFFE, 0x3FFFF , 0x4FFFE, 0x4FFFF, 0x5FFFE, 0x5FFFF , 0x6FFFE, 0x6FFFF, 0x7FFFE, 0x7FFFF , 0x8FFFE, 0x8FFFF, 0x9FFFE, 0x9FFFF , 0xAFFFE, 0xAFFFF, 0xBFFFE, 0xBFFFF , 0xCFFFE, 0xCFFFF, 0xDFFFE, 0xDFFFF , 0xEFFFE, 0xEFFFF, 0xFFFFE, 0xFFFFF , 0x10FFFE, 0x10FFFF ] -- | Determines if a character is an ASCII digit. chrASCIIDigit :: Word8 -> Bool chrASCIIDigit x = x >= 0x30 && x <= 0x39 -- | Determines if a character is an ASCII uppercase hex digit. chrASCIIUpperHexDigit :: Word8 -> Bool chrASCIIUpperHexDigit x = chrASCIIDigit x || (x >= 0x41 && x <= 0x46) -- | Determines if a character is an ASCII lowercase hex digit. chrASCIILowerHexDigit :: Word8 -> Bool chrASCIILowerHexDigit x = chrASCIIDigit x || (x >= 0x61 && x <= 0x66) -- | Determines if a character is an ASCII hex digit (any case). chrASCIIHexDigit :: Word8 -> Bool chrASCIIHexDigit x = chrASCIIUpperHexDigit x || chrASCIILowerHexDigit x -- | Determines if a character is an ASCII uppercase alpha character. chrASCIIUpperAlpha :: Word8 -> Bool chrASCIIUpperAlpha x = x >= 0x41 && x <= 0x5A -- | Determines if a character is an ASCII lowercase alpha character. chrASCIILowerAlpha :: Word8 -> Bool chrASCIILowerAlpha x = x >= 0x61 && x <= 0x7A -- | Determines if a character is an ASCII alpha character (any case). chrASCIIAlpha :: Word8 -> Bool chrASCIIAlpha x = chrASCIIUpperAlpha x || chrASCIILowerAlpha x -- | Determines if a character is an ASCII alphanumeric character (any case). chrASCIIAlphanumeric :: Word8 -> Bool chrASCIIAlphanumeric x = chrASCIIDigit x || chrASCIIAlpha x -- | Determines if a character is a whitespace character. chrWhitespace :: Word8 -> Bool chrWhitespace x = x == chrTab || x == chrLF || x == chrFF || x == chrCR || x == chrSpace -- | Determines if a character is a C0 control character. chrC0Control :: Word8 -> Bool chrC0Control x = x >= 0x00 && x <= 0x1F -- | Determines if a character is a control character. chrControl :: Word8 -> Bool chrControl x = chrC0Control x || (x >= 0x7F && x <= 0x9F) -- | Converts a character to uppercase. chrToUpper :: Word8 -> Word8 chrToUpper = toUpper -- | Converts a character to lowercase. chrToLower :: Word8 -> Word8 chrToLower = toLower -- | Character code for ampersand. chrAmpersand :: Word8 chrAmpersand = _ampersand -- | Character code for EOF. chrEOF :: Word8 chrEOF = _nul -- | Character code for exclamation. chrExclamation :: Word8 chrExclamation = _exclam -- | Character code for greater. chrGreater :: Word8 chrGreater = _greater -- | Character code for less. chrLess :: Word8 chrLess = _less -- | Character code for question. chrQuestion :: Word8 chrQuestion = _question -- | Character code for solidus (slash). chrSolidus :: Word8 chrSolidus = _slash -- | Character code for tab. chrTab :: Word8 chrTab = _tab -- | Character code for line feed. chrLF :: Word8 chrLF = _lf -- | Character code for form feed. chrFF :: Word8 chrFF = _np -- | Character code for carraige return. chrCR :: Word8 chrCR = _cr -- | Character code for space. chrSpace :: Word8 chrSpace = _space -- | Character code for equal. chrEqual :: Word8 chrEqual = _equal -- | Character code for quote. chrQuote :: Word8 chrQuote = _quotedbl -- | Character code for apostrophe. chrApostrophe :: Word8 chrApostrophe = _quotesingle -- | Character code for grave. chrGrave :: Word8 chrGrave = _grave -- | Character code for number sign. chrNumberSign :: Word8 chrNumberSign = _numbersign -- | Character code for hyphen. chrHyphen :: Word8 chrHyphen = _hyphen -- | Character code for right bracket. chrBracketRight :: Word8 chrBracketRight = _bracketright -- | Character code for semicolon. chrSemicolon :: Word8 chrSemicolon = _semicolon -- | Character code for upper x. chrUpperX :: Word8 chrUpperX = 0x58 -- | Character code for lower x. chrLowerX :: Word8 chrLowerX = 0x78