{-# Language TemplateHaskell #-} {-| Module : Data.Text.ICU.Normalized.NFC Description: : NFC Text implementing the 'IsString' typeclass. Copyright : ©2016 License : GPL-3 Maintainer : Evan Cofsky Stability : experimental Portability : POSIX This will perform the NFC pass on any Text to ensure it's in a Normalized form to prevent subtle text-based bugs. Provides 'Ord' and 'Eq' based on 'ICU.uca'. Also derives 'Generic' and 'Typeable' for other packages to use. -} module Data.Text.ICU.Normalized.NFC ( NFCText, packed, builder, text, normalized ) where import qualified Data.Text.ICU.Normalize as ICU import Data.Text (Text) import GHC.Generics (Generic) import Prelude.Unicode import Data.Data import Control.Lens hiding (strict, lazy) import Data.Text.Lazy (toStrict) import Data.Text.Lazy.Builder (toLazyText) import qualified Data.Text.Strict.Lens as DL import Data.Text.Lens (IsText(..)) import Control.Applicative -- | An NFC-normalized Data.Text newtype NFCText = NFCText {unNFC ∷ Text} deriving (Show, Generic, Data, Typeable) -- | Since we're NFC, we satisfy FCD, and we can just compare -- directly. instance Ord NFCText where (NFCText a) `compare` (NFCText b) = let cmp = ICU.compare [ICU.InputIsFCD] in a `cmp` b -- | If the Ord of the two texts is EQ, then they're equal. instance Eq NFCText where a == b = a `compare` b == EQ -- | 'Iso' between an 'IsText' and an 'NFCText'. normalized ∷ (IsText t) ⇒ Iso' NFCText t normalized = iso (review builder ∘ view builder ∘ unNFC) nnfc instance IsText NFCText where -- | 'Iso' for packing and unpacking 'NFCText' from 'String'. packed = iso (nnfc ∘ view DL.packed) (review DL.packed ∘ unNFC) -- | 'Iso' between a 'Builder' and an 'NFCText' builder = iso (view DL.builder ∘ unNFC) (nnfc ∘ review DL.builder) -- | Normalizes an 'IsText' to 'NFCText' -- -- 1. Do a 'quickCheck'. If this returns definitively, we normalize -- based on its response. -- 2. If not, we call 'isNormalized', and normalize based on its -- response. nnfc ∷ (IsText t) ⇒ t → NFCText nnfc txt = let nt = ICU.normalize ICU.NFC -- Make sure we start with a strict 'Text'. t = toStrict $ toLazyText $ txt ^. builder -- | If 'qc' fails to provide an answer we use 'isNormalized'. isn = Just $ if ICU.isNormalized ICU.NFC t then t else nt t -- | If we get a definitive answer from qc, act on it. quick = ICU.quickCheck ICU.NFC t >>= \q → if q then return t else return $ nt t norm = (quick <|> isn) ^?! _Just in NFCText norm