{-# LANGUAGE UnicodeSyntax #-} {-# LANGUAGE CPP #-} {-# LANGUAGE PatternGuards #-} {-# LANGUAGE FlexibleContexts #-} {-# LANGUAGE FlexibleInstances #-} {-# LANGUAGE TypeSynonymInstances #-} {-# LANGUAGE MultiParamTypeClasses #-} -- | Latin-1 utility functions. module Text.Latin1 ( -- * Latin-1 checks IsLatin1(..) , isLatin1 , Latin1 , asciiIsLatin1 , maybeLatin1 , latin1 -- * Character properties , isControl , isPrintable , isWhiteSpace , isLower , isUpper , toLower , toUpper , isAlpha , isAlphaNum -- * Byte properties , isControl8 , isPrintable8 , isWhiteSpace8 , isLower8 , isUpper8 , toLower8 , toUpper8 , isAlpha8 , isAlphaNum8 ) where import Data.Checked import Data.Function (on) import Data.Char (ord, chr) import Data.String (IsString(..)) import Data.Word (Word8) import qualified Data.Text as TS import qualified Data.Text.Lazy as TL import Text.Ascii (Ascii) import qualified Text.Ascii as A import Data.Monoid (Monoid(..)) import Data.CaseInsensitive (FoldCase(..)) import Data.Hashable (Hashable(..)) data IsLatin1 = IsLatin1 instance Property IsLatin1 Char where holds _ = (< 256) . ord {-# INLINE holds #-} instance Property IsLatin1 α ⇒ Property IsLatin1 [α] where holds _ = all isLatin1 {-# INLINE holds #-} instance Property IsLatin1 TS.Text where holds _ = TS.all isLatin1 {-# INLINE holds #-} instance Property IsLatin1 TL.Text where holds _ = TL.all isLatin1 {-# INLINE holds #-} isLatin1 ∷ Property IsLatin1 v ⇒ v → Bool isLatin1 = holds IsLatin1 {-# INLINE isLatin1 #-} type Latin1 α = Checked IsLatin1 α instance Eq α ⇒ Eq (Latin1 α) where (==) = (==) `on` checked {-# INLINE (==) #-} instance Ord α ⇒ Ord (Latin1 α) where compare = compare `on` checked {-# INLINE compare #-} instance Show α ⇒ Show (Latin1 α) where showsPrec p = showsPrec p . checked instance Monoid α ⇒ Monoid (Latin1 α) where mempty = trustMe mempty {-# INLINE mempty #-} mappend x y = trustMe $ mappend (checked x) (checked y) {-# INLINE mappend #-} instance IsString α ⇒ IsString (Latin1 α) where fromString s | isLatin1 s = trustMe $ fromString s | otherwise = error $ "Not a Latin-1 string: " ++ show s {-# INLINE fromString #-} instance Hashable α ⇒ Hashable (Latin1 α) where #if MIN_VERSION_hashable(1,2,0) hashWithSalt s = hashWithSalt s . checked {-# INLINE hashWithSalt #-} #else hash = hash . checked {-# INLINE hash #-} #endif instance FoldCase (Latin1 Char) where foldCase = trustMap toLower {-# INLINE foldCase #-} instance FoldCase (Latin1 α) ⇒ FoldCase (Latin1 [α]) where foldCase = trustMap $ map $ checked . foldCase . trustThat IsLatin1 {-# INLINE foldCase #-} instance FoldCase (Latin1 TS.Text) where foldCase = trustMap $ TS.map toLower {-# INLINE foldCase #-} instance FoldCase (Latin1 TL.Text) where foldCase = trustMap $ TL.map toLower {-# INLINE foldCase #-} -- | ASCII values are Latin-1 values. asciiIsLatin1 ∷ Ascii α → Latin1 α asciiIsLatin1 = trustMe . checked {-# INLINE asciiIsLatin1 #-} -- | Map a character to its Latin-1 encoding if possible, otherwise -- return 'Nothing'. maybeLatin1 ∷ Char → Maybe Word8 maybeLatin1 c | isLatin1 c = Just $ latin1 c | otherwise = Nothing {-# INLINABLE maybeLatin1 #-} -- | Encode a Latin-1 character. No checks are performed. latin1 ∷ Char → Word8 latin1 = fromIntegral . ord {-# INLINE latin1 #-} -- | Test if a character is a Latin-1 control character. isControl ∷ Char → Bool isControl c = w < 32 || (w >= 127 && w <= 159) where w = ord c {-# INLINE isControl #-} -- | Test if a character is a Latin-1 printable character. isPrintable ∷ Char → Bool isPrintable c = A.isPrintable c || (w >= 160 && w < 256) where w = ord c {-# INLINE isPrintable #-} -- | Test if a character is a Latin-1 whitespace character. isWhiteSpace ∷ Char → Bool isWhiteSpace c = A.isWhiteSpace c || w == 133 || w == 160 where w = ord c {-# INLINE isWhiteSpace #-} -- | Test if a character is a Latin-1 lower-case letter. isLower ∷ Char → Bool isLower c = A.isLower c || (w >= 223 && w < 256 && w /= 247) where w = ord c {-# INLINE isLower #-} -- | Test if a character is a Latin-1 upper-case letter. isUpper ∷ Char → Bool isUpper c = A.isUpper c || (w >= 192 && w <= 222 && w /= 215) where w = ord c {-# INLINE isUpper #-} -- | Map lower-case Latin-1 letters to the corresponding upper-case letters, -- leaving other characters as is. toLower ∷ Char → Char toLower c | isUpper c = chr (ord c + 32) | otherwise = c {-# INLINABLE toLower #-} -- | Map upper-case Latin-1 letters to the corresponding lower-case letters, -- leaving other characters as is. toUpper ∷ Char → Char toUpper c | w ← ord c , A.isLower c || (w >= 224 && w <= 254 && w /= 247) = chr (ord c - 32) | otherwise = c {-# INLINABLE toUpper #-} -- | Test if a character is a Latin-1 letter. isAlpha ∷ Char → Bool isAlpha c = isUpper c || isLower c {-# INLINABLE isAlpha #-} -- | Test if a character is either a Latin-1 letter or a decimal digit. isAlphaNum ∷ Char → Bool isAlphaNum c = A.isDecDigit c || isAlpha c {-# INLINABLE isAlphaNum #-} -- | Test if a byte is the encoding of a Latin-1 control character. isControl8 ∷ Word8 → Bool isControl8 w = w < 32 || (w >= 127 && w <= 159) {-# INLINE isControl8 #-} -- | Test if a byte is the encoding of a Latin-1 printable character. isPrintable8 ∷ Word8 → Bool isPrintable8 w = A.isPrintable8 w || w >= 160 {-# INLINE isPrintable8 #-} -- | Test if a byte is the encoding of a Latin-1 whitespace character. isWhiteSpace8 ∷ Word8 → Bool isWhiteSpace8 w = A.isWhiteSpace8 w || w == 133 || w == 160 {-# INLINE isWhiteSpace8 #-} -- | Test if a byte is the encoding of a Latin-1 lower-case letter. isLower8 ∷ Word8 → Bool isLower8 w = A.isLower8 w || (w >= 223 && w /= 247) {-# INLINE isLower8 #-} -- | Test if a byte is the encoding of a Latin-1 upper-case letter. isUpper8 ∷ Word8 → Bool isUpper8 w = A.isUpper8 w || (w >= 192 && w <= 222 && w /= 215) {-# INLINE isUpper8 #-} -- | Map the encodings of lower-case Latin-1 letters to the encodings of -- the corresponding upper-case letters, leaving other bytes as is. toLower8 ∷ Word8 → Word8 toLower8 w | isUpper8 w = w + 32 | otherwise = w {-# INLINABLE toLower8 #-} -- | Map the encodings of upper-case Latin-1 letters to the encodings of -- the corresponding lower-case letters, leaving other bytes as is. toUpper8 ∷ Word8 → Word8 toUpper8 w | A.isLower8 w || (w >= 224 && w /= 247 && w /= 255) = w - 32 | otherwise = w {-# INLINABLE toUpper8 #-} -- | Test if a byte is the encoding of a Latin-1 letter. isAlpha8 ∷ Word8 → Bool isAlpha8 w = isUpper8 w || isLower8 w {-# INLINABLE isAlpha8 #-} -- | Test if a byte is the encoding of either a Latin-1 letter -- or a decimal digit. isAlphaNum8 ∷ Word8 → Bool isAlphaNum8 w = A.isDecDigit8 w || isAlpha8 w {-# INLINABLE isAlphaNum8 #-}