-- | Functions for identifying and manipulating character codes.
module Zenacy.HTML.Internal.Char
  ( ctow
  , chrWord8
  , chrUTF8
  , chrSurrogate
  , chrScalar
  , chrNonCharacter
  , chrASCIIDigit
  , chrASCIIUpperHexDigit
  , chrASCIILowerHexDigit
  , chrASCIIHexDigit
  , chrASCIIUpperAlpha
  , chrASCIILowerAlpha
  , chrASCIIAlpha
  , chrASCIIAlphanumeric
  , chrWhitespace
  , chrC0Control
  , chrControl
  , chrToUpper
  , chrToLower
  , chrAmpersand
  , chrEOF
  , chrExclamation
  , chrGreater
  , chrLess
  , chrQuestion
  , chrSolidus
  , chrTab
  , chrLF
  , chrFF
  , chrCR
  , chrSpace
  , chrEqual
  , chrQuote
  , chrApostrophe
  , chrGrave
  , chrNumberSign
  , chrHyphen
  , chrBracketRight
  , chrSemicolon
  , chrUpperX
  , chrLowerX
  ) where

import qualified Data.ByteString as S
  ( unpack
  )
import Data.Char
  ( chr
  , ord
  )
import qualified Data.Text as Text
  ( singleton
  )
import qualified Data.Text.Encoding as Text
  ( encodeUtf8
  )
import Data.Word8

-- | Converts a character to a Word8.
ctow :: Char -> Word8
ctow :: Char -> Word8
ctow Char
x = Int -> Word8
forall a b. (Integral a, Num b) => a -> b
fromIntegral (Char -> Int
ord Char
x)

-- | Determines if a character code is in the range of a Word8.
chrWord8 :: Int -> Bool
chrWord8 :: Int -> Bool
chrWord8 Int
x = Int
x Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
>= Int
0 Bool -> Bool -> Bool
&& Int
x Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
<= Int
0xFF

-- | Decodes a UTF8 unicode character.
chrUTF8 :: Int -> [Word8]
chrUTF8 :: Int -> [Word8]
chrUTF8 = ByteString -> [Word8]
S.unpack (ByteString -> [Word8]) -> (Int -> ByteString) -> Int -> [Word8]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Text -> ByteString
Text.encodeUtf8 (Text -> ByteString) -> (Int -> Text) -> Int -> ByteString
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Char -> Text
Text.singleton (Char -> Text) -> (Int -> Char) -> Int -> Text
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Int -> Char
chr

-- | Determines if a character code is a surrogate.
chrSurrogate :: Int -> Bool
chrSurrogate :: Int -> Bool
chrSurrogate Int
x = Int
x Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
>= Int
0xD800 Bool -> Bool -> Bool
&& Int
x Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
<= Int
0xDFFF

-- | Determines if a character code is a scalar.
chrScalar :: Int -> Bool
chrScalar :: Int -> Bool
chrScalar = Bool -> Bool
not (Bool -> Bool) -> (Int -> Bool) -> Int -> Bool
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Int -> Bool
chrSurrogate

-- | Determines if a code is a not a character code.
chrNonCharacter :: Int -> Bool
chrNonCharacter :: Int -> Bool
chrNonCharacter Int
x =
  (Int
x Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
>= Int
0xFDD0 Bool -> Bool -> Bool
&& Int
x Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
<= Int
0xFDEF) Bool -> Bool -> Bool
||
  (Int -> Bool) -> [Int] -> Bool
forall (t :: * -> *) a. Foldable t => (a -> Bool) -> t a -> Bool
any (Int -> Int -> Bool
forall a. Eq a => a -> a -> Bool
==Int
x)
    [ Int
0xFFFE, Int
0xFFFF, Int
0x1FFFE, Int
0x1FFFF
    , Int
0x2FFFE, Int
0x2FFFF, Int
0x3FFFE, Int
0x3FFFF
    , Int
0x4FFFE, Int
0x4FFFF, Int
0x5FFFE, Int
0x5FFFF
    , Int
0x6FFFE, Int
0x6FFFF, Int
0x7FFFE, Int
0x7FFFF
    , Int
0x8FFFE, Int
0x8FFFF, Int
0x9FFFE, Int
0x9FFFF
    , Int
0xAFFFE, Int
0xAFFFF, Int
0xBFFFE, Int
0xBFFFF
    , Int
0xCFFFE, Int
0xCFFFF, Int
0xDFFFE, Int
0xDFFFF
    , Int
0xEFFFE, Int
0xEFFFF, Int
0xFFFFE, Int
0xFFFFF
    , Int
0x10FFFE, Int
0x10FFFF
    ]

-- | Determines if a character is an ASCII digit.
chrASCIIDigit :: Word8 -> Bool
chrASCIIDigit :: Word8 -> Bool
chrASCIIDigit Word8
x = Word8
x Word8 -> Word8 -> Bool
forall a. Ord a => a -> a -> Bool
>= Word8
0x30 Bool -> Bool -> Bool
&& Word8
x Word8 -> Word8 -> Bool
forall a. Ord a => a -> a -> Bool
<= Word8
0x39

-- | Determines if a character is an ASCII uppercase hex digit.
chrASCIIUpperHexDigit :: Word8 -> Bool
chrASCIIUpperHexDigit :: Word8 -> Bool
chrASCIIUpperHexDigit Word8
x = Word8 -> Bool
chrASCIIDigit Word8
x Bool -> Bool -> Bool
|| (Word8
x Word8 -> Word8 -> Bool
forall a. Ord a => a -> a -> Bool
>= Word8
0x41 Bool -> Bool -> Bool
&& Word8
x Word8 -> Word8 -> Bool
forall a. Ord a => a -> a -> Bool
<= Word8
0x46)

-- | Determines if a character is an ASCII lowercase hex digit.
chrASCIILowerHexDigit :: Word8 -> Bool
chrASCIILowerHexDigit :: Word8 -> Bool
chrASCIILowerHexDigit Word8
x = Word8 -> Bool
chrASCIIDigit Word8
x Bool -> Bool -> Bool
|| (Word8
x Word8 -> Word8 -> Bool
forall a. Ord a => a -> a -> Bool
>= Word8
0x61 Bool -> Bool -> Bool
&& Word8
x Word8 -> Word8 -> Bool
forall a. Ord a => a -> a -> Bool
<= Word8
0x66)

-- | Determines if a character is an ASCII hex digit (any case).
chrASCIIHexDigit :: Word8 -> Bool
chrASCIIHexDigit :: Word8 -> Bool
chrASCIIHexDigit Word8
x = Word8 -> Bool
chrASCIIUpperHexDigit Word8
x Bool -> Bool -> Bool
|| Word8 -> Bool
chrASCIILowerHexDigit Word8
x

-- | Determines if a character is an ASCII uppercase alpha character.
chrASCIIUpperAlpha :: Word8 -> Bool
chrASCIIUpperAlpha :: Word8 -> Bool
chrASCIIUpperAlpha Word8
x = Word8
x Word8 -> Word8 -> Bool
forall a. Ord a => a -> a -> Bool
>= Word8
0x41 Bool -> Bool -> Bool
&& Word8
x Word8 -> Word8 -> Bool
forall a. Ord a => a -> a -> Bool
<= Word8
0x5A

-- | Determines if a character is an ASCII lowercase alpha character.
chrASCIILowerAlpha :: Word8 -> Bool
chrASCIILowerAlpha :: Word8 -> Bool
chrASCIILowerAlpha Word8
x = Word8
x Word8 -> Word8 -> Bool
forall a. Ord a => a -> a -> Bool
>= Word8
0x61 Bool -> Bool -> Bool
&& Word8
x Word8 -> Word8 -> Bool
forall a. Ord a => a -> a -> Bool
<= Word8
0x7A

-- | Determines if a character is an ASCII alpha character (any case).
chrASCIIAlpha :: Word8 -> Bool
chrASCIIAlpha :: Word8 -> Bool
chrASCIIAlpha Word8
x = Word8 -> Bool
chrASCIIUpperAlpha Word8
x Bool -> Bool -> Bool
|| Word8 -> Bool
chrASCIILowerAlpha Word8
x

-- | Determines if a character is an ASCII alphanumeric character (any case).
chrASCIIAlphanumeric :: Word8 -> Bool
chrASCIIAlphanumeric :: Word8 -> Bool
chrASCIIAlphanumeric Word8
x = Word8 -> Bool
chrASCIIDigit Word8
x Bool -> Bool -> Bool
|| Word8 -> Bool
chrASCIIAlpha Word8
x

-- | Determines if a character is a whitespace character.
chrWhitespace :: Word8 -> Bool
chrWhitespace :: Word8 -> Bool
chrWhitespace Word8
x =
  Word8
x Word8 -> Word8 -> Bool
forall a. Eq a => a -> a -> Bool
== Word8
chrTab Bool -> Bool -> Bool
||
  Word8
x Word8 -> Word8 -> Bool
forall a. Eq a => a -> a -> Bool
== Word8
chrLF Bool -> Bool -> Bool
||
  Word8
x Word8 -> Word8 -> Bool
forall a. Eq a => a -> a -> Bool
== Word8
chrFF Bool -> Bool -> Bool
||
  Word8
x Word8 -> Word8 -> Bool
forall a. Eq a => a -> a -> Bool
== Word8
chrCR Bool -> Bool -> Bool
||
  Word8
x Word8 -> Word8 -> Bool
forall a. Eq a => a -> a -> Bool
== Word8
chrSpace

-- | Determines if a character is a C0 control character.
chrC0Control :: Word8 -> Bool
chrC0Control :: Word8 -> Bool
chrC0Control Word8
x = Word8
x Word8 -> Word8 -> Bool
forall a. Ord a => a -> a -> Bool
>= Word8
0x00 Bool -> Bool -> Bool
&& Word8
x Word8 -> Word8 -> Bool
forall a. Ord a => a -> a -> Bool
<= Word8
0x1F

-- | Determines if a character is a control character.
chrControl :: Word8 -> Bool
chrControl :: Word8 -> Bool
chrControl Word8
x = Word8 -> Bool
chrC0Control Word8
x Bool -> Bool -> Bool
|| (Word8
x Word8 -> Word8 -> Bool
forall a. Ord a => a -> a -> Bool
>= Word8
0x7F Bool -> Bool -> Bool
&& Word8
x Word8 -> Word8 -> Bool
forall a. Ord a => a -> a -> Bool
<= Word8
0x9F)

-- | Converts a character to uppercase.
chrToUpper :: Word8 -> Word8
chrToUpper :: Word8 -> Word8
chrToUpper = Word8 -> Word8
toUpper

-- | Converts a character to lowercase.
chrToLower :: Word8 -> Word8
chrToLower :: Word8 -> Word8
chrToLower = Word8 -> Word8
toLower

-- | Character code for ampersand.
chrAmpersand :: Word8
chrAmpersand :: Word8
chrAmpersand = Word8
_ampersand

-- | Character code for EOF.
chrEOF :: Word8
chrEOF :: Word8
chrEOF = Word8
_nul

-- | Character code for exclamation.
chrExclamation :: Word8
chrExclamation :: Word8
chrExclamation = Word8
_exclam

-- | Character code for greater.
chrGreater :: Word8
chrGreater :: Word8
chrGreater = Word8
_greater

-- | Character code for less.
chrLess :: Word8
chrLess :: Word8
chrLess = Word8
_less

-- | Character code for question.
chrQuestion :: Word8
chrQuestion :: Word8
chrQuestion = Word8
_question

-- | Character code for solidus (slash).
chrSolidus :: Word8
chrSolidus :: Word8
chrSolidus = Word8
_slash

-- | Character code for tab.
chrTab :: Word8
chrTab :: Word8
chrTab = Word8
_tab

-- | Character code for line feed.
chrLF :: Word8
chrLF :: Word8
chrLF = Word8
_lf

-- | Character code for form feed.
chrFF :: Word8
chrFF :: Word8
chrFF = Word8
_np

-- | Character code for carraige return.
chrCR :: Word8
chrCR :: Word8
chrCR = Word8
_cr

-- | Character code for space.
chrSpace :: Word8
chrSpace :: Word8
chrSpace = Word8
_space

-- | Character code for equal.
chrEqual :: Word8
chrEqual :: Word8
chrEqual = Word8
_equal

-- | Character code for quote.
chrQuote :: Word8
chrQuote :: Word8
chrQuote = Word8
_quotedbl

-- | Character code for apostrophe.
chrApostrophe :: Word8
chrApostrophe :: Word8
chrApostrophe = Word8
_quotesingle

-- | Character code for grave.
chrGrave :: Word8
chrGrave :: Word8
chrGrave = Word8
_grave

-- | Character code for number sign.
chrNumberSign :: Word8
chrNumberSign :: Word8
chrNumberSign = Word8
_numbersign

-- | Character code for hyphen.
chrHyphen :: Word8
chrHyphen :: Word8
chrHyphen = Word8
_hyphen

-- | Character code for right bracket.
chrBracketRight :: Word8
chrBracketRight :: Word8
chrBracketRight = Word8
_bracketright

-- | Character code for semicolon.
chrSemicolon :: Word8
chrSemicolon :: Word8
chrSemicolon = Word8
_semicolon

-- | Character code for upper x.
chrUpperX :: Word8
chrUpperX :: Word8
chrUpperX = Word8
0x58

-- | Character code for lower x.
chrLowerX :: Word8
chrLowerX :: Word8
chrLowerX = Word8
0x78