{-# HLINT ignore "Use String" #-}

{-|
Description:    Iconic names and categories for various characters.

Copyright:      (c) 2020-2021 Sam May
License:        MPL-2.0
Maintainer:     ag.eitilt@gmail.com

Stability:      experimental
Portability:    portable
-}
module Web.Willow.Common.Encoding.Character
    ( replacementChar
      -- * ASCII Character Ranges
    , asciiWhitespace
    , isAsciiAlpha
    , isAsciiAlphaNum
    , isAsciiWhitespace
    , toAsciiLower
    , toAsciiUpper
    ) where


import qualified Data.Char as C


-- | __Infra:__
--      @[ASCII whitespace]
--      (https://infra.spec.whatwg.org/#ascii-whitespace)@
-- 
-- The ASCII characters defined as whitespace in the HTML standard.  Unlike
-- Haskell's 'Data.Char.isSpace' and anything following that example, does
-- /not/ include @\\x11@ (VT).
asciiWhitespace :: [Char]
asciiWhitespace = "\t\n\f\r "


-- | __Infra:__
--      @[ASCII alpha]
--      (https://infra.spec.whatwg.org/#ascii-alpha)@
-- 
-- Test whether the character is an alphabetic character in the ASCII range
-- (@[A-Za-z]@).
isAsciiAlpha :: Char -> Bool
isAsciiAlpha c = C.isAsciiUpper c || C.isAsciiLower c

-- | __Infra:__
--      @[ASCII alphanumeric]
--      (https://infra.spec.whatwg.org/#ascii-alphanumeric)@
-- 
-- Test whether the character is either an alphabetic character or a digit in
-- the ASCII range (@[A-Za-z0-9]@).
isAsciiAlphaNum :: Char -> Bool
isAsciiAlphaNum c = isAsciiAlpha c || C.isDigit  c

-- | __Infra:__
--      @[ASCII whitespace]
--      (https://infra.spec.whatwg.org/#ascii-whitespace)@
-- 
-- Test whether the character fits the spec's definition of 'asciiWhitespace'.
isAsciiWhitespace :: Char -> Bool
isAsciiWhitespace = flip elem asciiWhitespace

-- | Convert an uppercase, alphabetic, ASCII character to its lowercase form.
-- This has the same semantics within the ASCII range as 'C.toLower', but
-- leaves any non-ASCII characters unchanged.
-- 
-- >>> toAsciiLower 'A'
-- 'a'
-- 
-- >>> toAsciiLower 'Á'
-- 'Á'
toAsciiLower :: Char -> Char
toAsciiLower c
    | C.isAsciiUpper c = C.toLower c
    | otherwise = c

-- | Convert a lowercase, alphabetic, ASCII character to its uppercase form.
-- This has the same semantics within the ASCII range as 'C.toUpper', but
-- leaves any non-ASCII characters unchanged.
-- 
-- >>> toAsciiUpper 'a'
-- 'A'
-- 
-- >>> toAsciiUpper 'á'
-- 'á'
toAsciiUpper :: Char -> Char
toAsciiUpper c
    | C.isAsciiLower c = C.toUpper c
    | otherwise = c


-- | The Unicode character @\\xFFFD@, safely (but unrecoverably) representing
-- an illegal, invalid, or otherwise unknown character.
replacementChar :: Char
replacementChar = '\xFFFD'