{-# Language TemplateHaskell #-} {-| Module : Data.Text.ICU.Normalized.NFC Description: : Convert all Text to Unicode NFC normalized Strict Text. Copyright : ©2016 License : GPL-3 Maintainer : Evan Cofsky Stability : experimental Portability : POSIX This will perform the NFC pass on any Text to ensure it's in a Normalized form to prevent subtle text-based bugs. Provides 'Ord' and 'Eq' based on 'ICU.uca'. Also derives 'Generic' and 'Typeable' for other packages to use. -} module Data.Text.ICU.Normalized.NFC ( NFCText, strict, lazy, lower, upper, utf8, utf8ByteString ) where import qualified Data.Text.ICU as ICU import Data.Text (Text) import qualified Data.Text.Lazy as LT import GHC.Generics (Generic) import Data.Text.Encoding (encodeUtf8, decodeUtf8) import qualified Data.ByteString.Char8 as C8 import Data.String (IsString(..)) import Text.ParserCombinators.ReadP import Text.Read hiding (get) import Prelude.Unicode import Data.Data import Data.Typeable import Control.Lens hiding (strict, lazy) import Data.ByteString (ByteString) -- | An NFC-normalized Data.Text newtype NFCText = NFCText Text deriving (Show, Generic, Data, Typeable) makePrisms ''NFCText -- | Prism between 'NFCText' and strict 'Text' strict ∷ Prism' NFCText Text strict = prism' (NFCText . ICU.normalize ICU.NFC) (preview _NFCText) -- | Prism between 'NFCText' and lazy 'LT.Text' lazy ∷ Prism' NFCText LT.Text lazy = prism' (review strict ∘ LT.toStrict) (\t → LT.fromStrict <$> preview _NFCText t) -- | Prism between 'NFCText' and a utf-8 encoded 'ByteString utf8ByteString ∷ Prism' NFCText ByteString utf8ByteString = prism' (NFCText ∘ decodeUtf8) (Just ∘ encodeUtf8 ∘ view _NFCText) -- | Prism between 'NFCText' and a utf-8 encoded 'String' utf8 :: Prism' NFCText String utf8 = prism' (fromString) (Just ∘ C8.unpack ∘ view utf8ByteString) -- | Convert 'NFCText' to lower case. lower :: Getter NFCText NFCText lower = to (NFCText ∘ ICU.toLower nfcLocale ∘ view _NFCText) -- | Convert 'NFCText' to upper case. upper :: Getter NFCText NFCText upper = to (NFCText ∘ ICU.toUpper nfcLocale ∘ view _NFCText) instance IsString NFCText where fromString = NFCText . decodeUtf8 . C8.pack nfcParser ∷ ReadP NFCText nfcParser = NFCText . fromString <$> many get instance Read NFCText where readPrec = lift nfcParser -- | Convert to FCD form, then compare the texts. instance Ord NFCText where (NFCText a) `compare` (NFCText b) = let cmp = ICU.collate ICU.uca in a `cmp` b -- | If the Ord of the two texts is EQ, then they're equal. instance Eq NFCText where a == b = a `compare` b == EQ -- | For now this is where we are. nfcLocale :: ICU.LocaleName nfcLocale = ICU.Locale "en_US.UTF8"