{-# LANGUAGE UnicodeSyntax #-} {-# LANGUAGE PatternGuards #-} {-# LANGUAGE FlexibleContexts #-} {-# LANGUAGE MultiParamTypeClasses #-} -- | Latin-1 utility functions. module Text.Latin1 ( -- * Latin-1 checks IsLatin1(..) , isLatin1 , Latin1 , asciiIsLatin1 , maybeLatin1 , latin1 -- * Character properties , isControl , isPrintable , isWhiteSpace , isLower , isUpper , toLower , toUpper , isAlpha , isAlphaNum -- * Byte properties , isControl8 , isPrintable8 , isWhiteSpace8 , isLower8 , isUpper8 , toLower8 , toUpper8 , isAlpha8 , isAlphaNum8 ) where import Data.Checked import Data.Char (ord, chr) import Data.Word (Word8) import qualified Data.Text as TS import qualified Data.Text.Lazy as TL import Text.Ascii (Ascii) import qualified Text.Ascii as A data IsLatin1 = IsLatin1 instance Property IsLatin1 Char where holds _ = (< 256) . ord {-# INLINE holds #-} instance Property IsLatin1 α ⇒ Property IsLatin1 [α] where holds _ = all isLatin1 {-# INLINE holds #-} instance Property IsLatin1 TS.Text where holds _ = TS.all isLatin1 {-# INLINE holds #-} instance Property IsLatin1 TL.Text where holds _ = TL.all isLatin1 {-# INLINE holds #-} isLatin1 ∷ Property IsLatin1 v ⇒ v → Bool isLatin1 = holds IsLatin1 {-# INLINE isLatin1 #-} type Latin1 α = Checked IsLatin1 α -- | ASCII values are Latin-1 values. asciiIsLatin1 ∷ Ascii α → Latin1 α asciiIsLatin1 = trustMe . checked {-# INLINE asciiIsLatin1 #-} -- | Map a character to its Latin-1 encoding if possible, otherwise -- return 'Nothing'. maybeLatin1 ∷ Char → Maybe Word8 maybeLatin1 c | isLatin1 c = Just $ latin1 c | otherwise = Nothing {-# INLINABLE maybeLatin1 #-} -- | Encode a Latin-1 character. No checks is performed. latin1 ∷ Char → Word8 latin1 = fromIntegral . ord {-# INLINE latin1 #-} -- | Test if a character is a Latin-1 control character. isControl ∷ Char → Bool isControl c = w < 32 || (w >= 127 && w <= 159) where w = ord c {-# INLINE isControl #-} -- | Test if a character is a Latin-1 printable character. isPrintable ∷ Char → Bool isPrintable c = A.isPrintable c || (w >= 160 && w < 256) where w = ord c {-# INLINE isPrintable #-} -- | Test if a character is a Latin-1 whitespace character. isWhiteSpace ∷ Char → Bool isWhiteSpace c = A.isWhiteSpace c || w == 133 || w == 160 where w = ord c {-# INLINE isWhiteSpace #-} -- | Test if a character is a Latin-1 lower-case letter. isLower ∷ Char → Bool isLower c = A.isLower c || (w >= 223 && w < 256 && w /= 247) where w = ord c {-# INLINE isLower #-} -- | Test if a character is a Latin-1 upper-case letter. isUpper ∷ Char → Bool isUpper c = A.isUpper c || (w >= 192 && w <= 222 && w /= 215) where w = ord c {-# INLINE isUpper #-} -- | Map lower-case Latin-1 letters to the corresponding upper-case letters, -- leaving other characters as is. toLower ∷ Char → Char toLower c | isUpper c = chr (ord c + 32) | otherwise = c {-# INLINABLE toLower #-} -- | Map upper-case Latin-1 letters to the corresponding lower-case letters, -- leaving other characters as is. toUpper ∷ Char → Char toUpper c | w ← ord c , A.isLower c || (w >= 224 && w <= 254 && w /= 247) = chr (ord c - 32) | otherwise = c {-# INLINABLE toUpper #-} -- | Test if a character is a Latin-1 letter. isAlpha ∷ Char → Bool isAlpha c = isUpper c || isLower c {-# INLINABLE isAlpha #-} -- | Test if a character is either a Latin-1 letter or a decimal digit. isAlphaNum ∷ Char → Bool isAlphaNum c = A.isDigit c || isAlpha c {-# INLINABLE isAlphaNum #-} -- | Test if a byte is the encoding of a Latin-1 control character. isControl8 ∷ Word8 → Bool isControl8 w = w < 32 || (w >= 127 && w <= 159) {-# INLINE isControl8 #-} -- | Test if a byte is the encoding of a Latin-1 printable character. isPrintable8 ∷ Word8 → Bool isPrintable8 w = A.isPrintable8 w || w >= 160 {-# INLINE isPrintable8 #-} -- | Test if a byte is the encoding of a Latin-1 whitespace character. isWhiteSpace8 ∷ Word8 → Bool isWhiteSpace8 w = A.isWhiteSpace8 w || w == 133 || w == 160 {-# INLINE isWhiteSpace8 #-} -- | Test if a byte is the encoding of a Latin-1 lower-case letter. isLower8 ∷ Word8 → Bool isLower8 w = A.isLower8 w || (w >= 223 && w /= 247) {-# INLINE isLower8 #-} -- | Test if a byte is the encoding of a Latin-1 upper-case letter. isUpper8 ∷ Word8 → Bool isUpper8 w = A.isUpper8 w || (w >= 192 && w <= 222 && w /= 215) {-# INLINE isUpper8 #-} -- | Map the encodings of lower-case Latin-1 letters to the encodings of -- the corresponding upper-case letters, leaving other bytes as is. toLower8 ∷ Word8 → Word8 toLower8 w | isUpper8 w = w + 32 | otherwise = w {-# INLINABLE toLower8 #-} -- | Map the encodings of upper-case Latin-1 letters to the encodings of -- the corresponding lower-case letters, leaving other bytes as is. toUpper8 ∷ Word8 → Word8 toUpper8 w | A.isLower8 w || (w >= 224 && w /= 247 && w /= 255) = w - 32 | otherwise = w {-# INLINABLE toUpper8 #-} -- | Test if a byte is the encoding of a Latin-1 letter. isAlpha8 ∷ Word8 → Bool isAlpha8 w = isUpper8 w || isLower8 w {-# INLINABLE isAlpha8 #-} -- | Test if a byte is the encoding of either a Latin-1 letter -- or a decimal digit. isAlphaNum8 ∷ Word8 → Bool isAlphaNum8 w = A.isDigit8 w || isAlpha8 w {-# INLINABLE isAlphaNum8 #-}