{-# LANGUAGE DerivingVia #-} {-# LANGUAGE LambdaCase #-} {-# LANGUAGE PatternSynonyms #-} {-# LANGUAGE Trustworthy #-} -- | -- Module: Text.Ascii.Char -- Copyright: (C) 2021 Koz Ross -- License: Apache 2.0 -- Maintainer: Koz Ross -- Stability: stable -- Portability: GHC only -- -- An implementation of ASCII characters, as bytes restricted to the range 0 - -- 127 inclusive. -- -- /See also:/ [Wikipedia entry for ASCII](https://en.wikipedia.org/wiki/ASCII) module Text.Ascii.Char ( -- * ASCII characters -- ** Type AsciiChar (AsByte, AsChar), -- ** Construction char, fromChar, fromByte, -- ** Transformation upcase, downcase, -- * Categorization AsciiType (Control, Printable), charType, AsciiCategory (Other, Punctuation, Letter, Number, Symbol), categorize, categorizeGeneral, AsciiCase (Upper, Lower), caseOf, -- * Optics charWise, byteWise, ) where import Control.DeepSeq (NFData) import Control.Monad (guard) import Data.Char (GeneralCategory, chr, generalCategory, isAscii, ord) import Data.Functor (($>)) import Data.Hashable (Hashable) import Data.Word (Word8) import Optics.Prism (Prism', prism') import Text.Ascii.Internal (AsciiChar (AsciiChar), toByte, pattern AsByte, pattern AsChar) import Text.Ascii.QQ (char) -- $setup -- >>> :set -XQuasiQuotes -- >>> import Text.Ascii.Char -- | Try and turn a 'Char' into the equivalent 'AsciiChar'. Will return -- 'Nothing' if given a 'Char' that has no ASCII equivalent. -- -- >>> fromChar '0' -- Just '0x30' -- >>> fromChar '😺' -- Nothing -- -- @since 1.0.0 fromChar :: Char -> Maybe AsciiChar fromChar c = if isAscii c then pure . AsciiChar . fromIntegral . ord $ c else Nothing -- | Try to give the 'AsciiChar' corresponding to the given byte. Will return -- 'Nothing' if given a byte that doesn't correspond to an ASCII character. -- -- >>> fromByte 50 -- Just '0x32' -- >>> fromByte 128 -- Nothing -- -- @since 1.0.0 fromByte :: Word8 -> Maybe AsciiChar fromByte w8 = if isAscii . chr . fromIntegral $ w8 then pure . AsciiChar $ w8 else Nothing -- | Give the 'AsciiChar' corresponding to the uppercase version of the -- argument. Will give 'Nothing' if given an 'AsciiChar' which has no uppercase -- version, or is uppercase already. -- -- >>> upcase [char| 'a' |] -- Just '0x41' -- >>> upcase [char| '0' |] -- Nothing -- -- @since 1.0.0 upcase :: AsciiChar -> Maybe AsciiChar upcase c@(AsciiChar w8) = caseOf c >>= (\cs -> guard (cs == Lower) $> AsciiChar (w8 - 32)) -- | Give the 'AsciiChar' corresponding to the lowercase version of the -- argument. Will give 'Nothing' if given an 'AsciiChar' which has no lowercase -- version, or is lowercase already. -- -- >>> downcase [char| 'C' |] -- Just '0x63' -- >>> downcase [char| '\\' |] -- Nothing -- -- @since 1.0.0 downcase :: AsciiChar -> Maybe AsciiChar downcase c@(AsciiChar w8) = caseOf c >>= (\cs -> guard (cs == Upper) $> AsciiChar (w8 + 32)) -- Categorization -- | A categorization of ASCII characters based on whether they're meant to be -- displayed ('Printable') or for control ('Control'). -- -- @since 1.0.0 newtype AsciiType = AsciiType Word8 deriving (Eq, Ord, Hashable, NFData) via Word8 -- | @since 1.0.0 instance Show AsciiType where {-# INLINEABLE show #-} show = \case Control -> "Control" Printable -> "Printable" -- | @since 1.0.0 instance Bounded AsciiType where minBound = Control maxBound = Printable -- | A control character is any of the first 32 bytes (0-31), plus @DEL@ (127). -- -- @since 1.0.0 pattern Control :: AsciiType pattern Control <- AsciiType 0 where Control = AsciiType 0 -- | All ASCII characters whose byte is above 31 (and not 127) are printable -- characters. -- -- @since 1.0.0 pattern Printable :: AsciiType pattern Printable <- AsciiType 1 where Printable = AsciiType 1 {-# COMPLETE Control, Printable #-} -- | Classify an 'AsciiChar' according to whether it's a control character or a -- printable character. -- -- >>> charType [char| '\0' |] -- Control -- >>> charType [char| 'w' |] -- Printable -- -- @since 1.0.0 charType :: AsciiChar -> AsciiType charType (AsciiChar w8) | w8 == 127 = Control | w8 < 32 = Control | otherwise = Printable -- | A categorization of ASCII characters based on their usage. Based (loosely) -- on Unicode categories. -- -- @since 1.0.0 newtype AsciiCategory = AsciiCategory Word8 deriving (Eq, Ord, Hashable, NFData) via Word8 -- | @since 1.0.0 instance Show AsciiCategory where {-# INLINEABLE show #-} show = \case Other -> "Other" Symbol -> "Symbol" Number -> "Number" Letter -> "Letter" Punctuation -> "Punctuation" -- | @since 1.0.0 instance Bounded AsciiCategory where minBound = Other maxBound = Symbol -- | Something which doesn't fit into any of the other categories. -- -- @since 1.0.0 pattern Other :: AsciiCategory pattern Other <- AsciiCategory 0 where Other = AsciiCategory 0 -- | A punctuation character. -- -- @since 1.0.0 pattern Punctuation :: AsciiCategory pattern Punctuation <- AsciiCategory 1 where Punctuation = AsciiCategory 1 -- | A letter, either uppercase or lowercase. -- -- @since 1.0.0 pattern Letter :: AsciiCategory pattern Letter <- AsciiCategory 2 where Letter = AsciiCategory 2 -- | A numerical digit. -- -- @since 1.0.0 pattern Number :: AsciiCategory pattern Number <- AsciiCategory 3 where Number = AsciiCategory 3 -- | A symbol whose role isn't (normally) punctuation. -- -- @since 1.0.0 pattern Symbol :: AsciiCategory pattern Symbol <- AsciiCategory 4 where Symbol = AsciiCategory 4 {-# COMPLETE Other, Punctuation, Letter, Number, Symbol #-} -- | Classify an 'AsciiChar' based on its category. -- -- >>> categorize [char| ',' |] -- Punctuation -- >>> categorize [char| '~' |] -- Symbol -- >>> categorize [char| 'w' |] -- Letter -- >>> categorize [char| '2' |] -- Number -- >>> categorize [char| '\0' |] -- Other -- -- @since 1.0.0 categorize :: AsciiChar -> AsciiCategory categorize c@(AsciiChar w8) | charType c == Control = Other | w8 == 0x20 = Punctuation | w8 >= 0x21 && w8 <= 0x23 = Punctuation | w8 == 0x24 = Symbol | w8 >= 0x25 && w8 <= 0x2a = Punctuation | w8 == 0x2b = Symbol | w8 >= 0x2c && w8 <= 0x2f = Punctuation | w8 >= 0x30 && w8 <= 0x39 = Number | w8 >= 0x3a && w8 <= 0x3b = Punctuation | w8 >= 0x3c && w8 <= 0x3e = Symbol | w8 >= 0x3f && w8 <= 0x40 = Punctuation | w8 >= 0x41 && w8 <= 0x5a = Letter | w8 >= 0x5b && w8 <= 0x5d = Punctuation | w8 == 0x5e = Symbol | w8 == 0x5f = Punctuation | w8 == 0x60 = Symbol | w8 >= 0x61 && w8 <= 0x7a = Letter | w8 == 0x7b = Punctuation | w8 == 0x7c = Symbol | w8 == 0x7d = Punctuation | otherwise = Symbol -- This only leaves ~. - Koz -- | Compatibility method for the 'GeneralCategory' provided by 'Data.Char'. -- -- >>> categorizeGeneral [char| ',' |] -- OtherPunctuation -- >>> categorizeGeneral [char| '~' |] -- MathSymbol -- >>> categorizeGeneral [char| 'w' |] -- LowercaseLetter -- >>> categorizeGeneral [char| '2' |] -- DecimalNumber -- >>> categorizeGeneral [char| '\0' |] -- Control -- -- @since 1.0.0 categorizeGeneral :: AsciiChar -> GeneralCategory categorizeGeneral (AsciiChar w8) = generalCategory . chr . fromIntegral $ w8 -- | The case of an ASCII character (if it has one). -- -- @since 1.0.0 newtype AsciiCase = AsciiCase Word8 deriving (Eq, Ord, Hashable, NFData) via Word8 -- | @since 1.0.0 instance Show AsciiCase where {-# INLINEABLE show #-} show = \case Upper -> "Upper" Lower -> "Lower" -- | @since 1.0.0 instance Bounded AsciiCase where minBound = Upper maxBound = Lower -- | Indicator of an uppercase character. -- -- @since 1.0.0 pattern Upper :: AsciiCase pattern Upper <- AsciiCase 0 where Upper = AsciiCase 0 -- | Indicator of a lowercase character. -- -- @since 1.0.0 pattern Lower :: AsciiCase pattern Lower <- AsciiCase 1 where Lower = AsciiCase 1 {-# COMPLETE Upper, Lower #-} -- | Determine the case of an 'AsciiChar'. Returns 'Nothing' if the character -- doesn't have a case. -- -- >>> caseOf [char| 'w' |] -- Just Lower -- >>> caseOf [char| 'W' |] -- Just Upper -- >>> caseOf [char| '~' |] -- Nothing -- -- @since 1.0.0 caseOf :: AsciiChar -> Maybe AsciiCase caseOf c@(AsciiChar w8) | categorize c /= Letter = Nothing | w8 <= 0x5a = Just Upper | otherwise = Just Lower -- Optics -- | A representation of the relationship between 'Char' and 'AsciiChar'. -- -- @since 1.0.0 charWise :: Prism' Char AsciiChar charWise = prism' (chr . fromIntegral . toByte) fromChar -- | A representation of the relationship between ASCII characters and bytes. -- -- @since 1.0.0 byteWise :: Prism' Word8 AsciiChar byteWise = prism' toByte fromByte