{-# LANGUAGE DeriveDataTypeable, TypeSynonymInstances, FlexibleInstances, Unsafe #-}

-----------------------------------------------------------------------------
-- |
-- Module      :  Data.Unicode.NormalizationInsensitive.Internal
-- Copyright   :  (c) 2011-2013 Bas van Dijk,
--                (c) 2016 Patrick Pelletier
-- License     :  BSD-style (see the file LICENSE)
-- Maintainer  :  Patrick Pelletier <code@funwithsoftware.org>
--
-- Internal module which exports the 'NI' type, constructor,
-- associated instances and the 'Normalizable' class and instances.
--
-----------------------------------------------------------------------------

module Data.Unicode.NormalizationInsensitive.Internal ( NI
                                     , mk
                                     , unsafeMk
                                     , original
                                     , normalized
                                     , map
                                     , Normalizable(normalize)
                                     ) where

--------------------------------------------------------------------------------
-- Imports
--------------------------------------------------------------------------------

-- from base:
import Data.Eq       ( Eq, (==) )
import Data.Function ( on )
import Data.Monoid   ( Monoid, mempty, mappend )
import Data.Ord      ( Ord, compare )
import Data.String   ( IsString, fromString )
import Data.Data     ( Data )
import Data.Typeable ( Typeable )
import Prelude       ( String, (.), fmap )
import Text.Read     ( Read, readPrec )
import Text.Show     ( Show, showsPrec )
import Data.Semigroup ( Semigroup, (<>) )

-- from bytestring:
import qualified Data.ByteString      as B  ( ByteString )
import qualified Data.ByteString.Lazy as BL ( ByteString, fromStrict, toStrict )

-- from text:
import qualified Data.Text                as T  ( Text, pack, unpack )
import qualified Data.Text.Encoding       as T  ( decodeUtf8With, encodeUtf8 )
import qualified Data.Text.Encoding.Error as T  ( lenientDecode )
import qualified Data.Text.Lazy           as TL ( Text, fromStrict, toStrict )

-- from deepseq:
import Control.DeepSeq ( NFData, rnf, deepseq )

-- from hashable:
import Data.Hashable ( Hashable, hashWithSalt )

-- from unicode-transforms:
import qualified Data.Text.Normalize            as T ( normalize )
import Data.Unicode.Types                       ( NormalizationMode(NFC) )

--------------------------------------------------------------------------------
-- Normalization Insensitive Strings
--------------------------------------------------------------------------------

{-| A @NI s@ provides /N/ormalization /I/nsensitive comparison for the string-like type
@s@ (for example: 'String', 'T.Text', 'B.ByteString', etc.).

Note that @NI s@ has an instance for 'IsString' which together with the
@OverloadedStrings@ language extension allows you to write normalization insensitive
string literals as in:

@
\> ("\\12399\\12441" :: NI Text) == ("\\12400" :: NI Text)
True
@

-}
data NI s = NI { NI s -> s
original   :: !s -- ^ Retrieve the original string-like value.
               , NI s -> s
normalized :: !s -- ^ Retrieve the normalized string-like value.
                                  --   (Also see 'normalize').
               }
          deriving (Typeable (NI s)
DataType
Constr
Typeable (NI s)
-> (forall (c :: * -> *).
    (forall d b. Data d => c (d -> b) -> d -> c b)
    -> (forall g. g -> c g) -> NI s -> c (NI s))
-> (forall (c :: * -> *).
    (forall b r. Data b => c (b -> r) -> c r)
    -> (forall r. r -> c r) -> Constr -> c (NI s))
-> (NI s -> Constr)
-> (NI s -> DataType)
-> (forall (t :: * -> *) (c :: * -> *).
    Typeable t =>
    (forall d. Data d => c (t d)) -> Maybe (c (NI s)))
-> (forall (t :: * -> * -> *) (c :: * -> *).
    Typeable t =>
    (forall d e. (Data d, Data e) => c (t d e)) -> Maybe (c (NI s)))
-> ((forall b. Data b => b -> b) -> NI s -> NI s)
-> (forall r r'.
    (r -> r' -> r) -> r -> (forall d. Data d => d -> r') -> NI s -> r)
-> (forall r r'.
    (r' -> r -> r) -> r -> (forall d. Data d => d -> r') -> NI s -> r)
-> (forall u. (forall d. Data d => d -> u) -> NI s -> [u])
-> (forall u. Int -> (forall d. Data d => d -> u) -> NI s -> u)
-> (forall (m :: * -> *).
    Monad m =>
    (forall d. Data d => d -> m d) -> NI s -> m (NI s))
-> (forall (m :: * -> *).
    MonadPlus m =>
    (forall d. Data d => d -> m d) -> NI s -> m (NI s))
-> (forall (m :: * -> *).
    MonadPlus m =>
    (forall d. Data d => d -> m d) -> NI s -> m (NI s))
-> Data (NI s)
NI s -> DataType
NI s -> Constr
(forall d. Data d => c (t d)) -> Maybe (c (NI s))
(forall b. Data b => b -> b) -> NI s -> NI s
(forall d b. Data d => c (d -> b) -> d -> c b)
-> (forall g. g -> c g) -> NI s -> c (NI s)
(forall b r. Data b => c (b -> r) -> c r)
-> (forall r. r -> c r) -> Constr -> c (NI s)
forall s. Data s => Typeable (NI s)
forall s. Data s => NI s -> DataType
forall s. Data s => NI s -> Constr
forall s. Data s => (forall b. Data b => b -> b) -> NI s -> NI s
forall s u.
Data s =>
Int -> (forall d. Data d => d -> u) -> NI s -> u
forall s u. Data s => (forall d. Data d => d -> u) -> NI s -> [u]
forall s r r'.
Data s =>
(r -> r' -> r) -> r -> (forall d. Data d => d -> r') -> NI s -> r
forall s r r'.
Data s =>
(r' -> r -> r) -> r -> (forall d. Data d => d -> r') -> NI s -> r
forall s (m :: * -> *).
(Data s, Monad m) =>
(forall d. Data d => d -> m d) -> NI s -> m (NI s)
forall s (m :: * -> *).
(Data s, MonadPlus m) =>
(forall d. Data d => d -> m d) -> NI s -> m (NI s)
forall s (c :: * -> *).
Data s =>
(forall b r. Data b => c (b -> r) -> c r)
-> (forall r. r -> c r) -> Constr -> c (NI s)
forall s (c :: * -> *).
Data s =>
(forall d b. Data d => c (d -> b) -> d -> c b)
-> (forall g. g -> c g) -> NI s -> c (NI s)
forall s (t :: * -> *) (c :: * -> *).
(Data s, Typeable t) =>
(forall d. Data d => c (t d)) -> Maybe (c (NI s))
forall s (t :: * -> * -> *) (c :: * -> *).
(Data s, Typeable t) =>
(forall d e. (Data d, Data e) => c (t d e)) -> Maybe (c (NI s))
forall a.
Typeable a
-> (forall (c :: * -> *).
    (forall d b. Data d => c (d -> b) -> d -> c b)
    -> (forall g. g -> c g) -> a -> c a)
-> (forall (c :: * -> *).
    (forall b r. Data b => c (b -> r) -> c r)
    -> (forall r. r -> c r) -> Constr -> c a)
-> (a -> Constr)
-> (a -> DataType)
-> (forall (t :: * -> *) (c :: * -> *).
    Typeable t =>
    (forall d. Data d => c (t d)) -> Maybe (c a))
-> (forall (t :: * -> * -> *) (c :: * -> *).
    Typeable t =>
    (forall d e. (Data d, Data e) => c (t d e)) -> Maybe (c a))
-> ((forall b. Data b => b -> b) -> a -> a)
-> (forall r r'.
    (r -> r' -> r) -> r -> (forall d. Data d => d -> r') -> a -> r)
-> (forall r r'.
    (r' -> r -> r) -> r -> (forall d. Data d => d -> r') -> a -> r)
-> (forall u. (forall d. Data d => d -> u) -> a -> [u])
-> (forall u. Int -> (forall d. Data d => d -> u) -> a -> u)
-> (forall (m :: * -> *).
    Monad m =>
    (forall d. Data d => d -> m d) -> a -> m a)
-> (forall (m :: * -> *).
    MonadPlus m =>
    (forall d. Data d => d -> m d) -> a -> m a)
-> (forall (m :: * -> *).
    MonadPlus m =>
    (forall d. Data d => d -> m d) -> a -> m a)
-> Data a
forall u. Int -> (forall d. Data d => d -> u) -> NI s -> u
forall u. (forall d. Data d => d -> u) -> NI s -> [u]
forall r r'.
(r -> r' -> r) -> r -> (forall d. Data d => d -> r') -> NI s -> r
forall r r'.
(r' -> r -> r) -> r -> (forall d. Data d => d -> r') -> NI s -> r
forall (m :: * -> *).
Monad m =>
(forall d. Data d => d -> m d) -> NI s -> m (NI s)
forall (m :: * -> *).
MonadPlus m =>
(forall d. Data d => d -> m d) -> NI s -> m (NI s)
forall (c :: * -> *).
(forall b r. Data b => c (b -> r) -> c r)
-> (forall r. r -> c r) -> Constr -> c (NI s)
forall (c :: * -> *).
(forall d b. Data d => c (d -> b) -> d -> c b)
-> (forall g. g -> c g) -> NI s -> c (NI s)
forall (t :: * -> *) (c :: * -> *).
Typeable t =>
(forall d. Data d => c (t d)) -> Maybe (c (NI s))
forall (t :: * -> * -> *) (c :: * -> *).
Typeable t =>
(forall d e. (Data d, Data e) => c (t d e)) -> Maybe (c (NI s))
$cNI :: Constr
$tNI :: DataType
gmapMo :: (forall d. Data d => d -> m d) -> NI s -> m (NI s)
$cgmapMo :: forall s (m :: * -> *).
(Data s, MonadPlus m) =>
(forall d. Data d => d -> m d) -> NI s -> m (NI s)
gmapMp :: (forall d. Data d => d -> m d) -> NI s -> m (NI s)
$cgmapMp :: forall s (m :: * -> *).
(Data s, MonadPlus m) =>
(forall d. Data d => d -> m d) -> NI s -> m (NI s)
gmapM :: (forall d. Data d => d -> m d) -> NI s -> m (NI s)
$cgmapM :: forall s (m :: * -> *).
(Data s, Monad m) =>
(forall d. Data d => d -> m d) -> NI s -> m (NI s)
gmapQi :: Int -> (forall d. Data d => d -> u) -> NI s -> u
$cgmapQi :: forall s u.
Data s =>
Int -> (forall d. Data d => d -> u) -> NI s -> u
gmapQ :: (forall d. Data d => d -> u) -> NI s -> [u]
$cgmapQ :: forall s u. Data s => (forall d. Data d => d -> u) -> NI s -> [u]
gmapQr :: (r' -> r -> r) -> r -> (forall d. Data d => d -> r') -> NI s -> r
$cgmapQr :: forall s r r'.
Data s =>
(r' -> r -> r) -> r -> (forall d. Data d => d -> r') -> NI s -> r
gmapQl :: (r -> r' -> r) -> r -> (forall d. Data d => d -> r') -> NI s -> r
$cgmapQl :: forall s r r'.
Data s =>
(r -> r' -> r) -> r -> (forall d. Data d => d -> r') -> NI s -> r
gmapT :: (forall b. Data b => b -> b) -> NI s -> NI s
$cgmapT :: forall s. Data s => (forall b. Data b => b -> b) -> NI s -> NI s
dataCast2 :: (forall d e. (Data d, Data e) => c (t d e)) -> Maybe (c (NI s))
$cdataCast2 :: forall s (t :: * -> * -> *) (c :: * -> *).
(Data s, Typeable t) =>
(forall d e. (Data d, Data e) => c (t d e)) -> Maybe (c (NI s))
dataCast1 :: (forall d. Data d => c (t d)) -> Maybe (c (NI s))
$cdataCast1 :: forall s (t :: * -> *) (c :: * -> *).
(Data s, Typeable t) =>
(forall d. Data d => c (t d)) -> Maybe (c (NI s))
dataTypeOf :: NI s -> DataType
$cdataTypeOf :: forall s. Data s => NI s -> DataType
toConstr :: NI s -> Constr
$ctoConstr :: forall s. Data s => NI s -> Constr
gunfold :: (forall b r. Data b => c (b -> r) -> c r)
-> (forall r. r -> c r) -> Constr -> c (NI s)
$cgunfold :: forall s (c :: * -> *).
Data s =>
(forall b r. Data b => c (b -> r) -> c r)
-> (forall r. r -> c r) -> Constr -> c (NI s)
gfoldl :: (forall d b. Data d => c (d -> b) -> d -> c b)
-> (forall g. g -> c g) -> NI s -> c (NI s)
$cgfoldl :: forall s (c :: * -> *).
Data s =>
(forall d b. Data d => c (d -> b) -> d -> c b)
-> (forall g. g -> c g) -> NI s -> c (NI s)
$cp1Data :: forall s. Data s => Typeable (NI s)
Data, Typeable)

-- | Make the given string-like value normalization insensitive.
mk :: Normalizable s => s -> NI s
mk :: s -> NI s
mk s
s = s -> s -> NI s
forall s. s -> s -> NI s
NI s
s (s -> s
forall s. Normalizable s => s -> s
normalize s
s)

-- | Constructs an 'NI' from an already normalized string-like
-- value. The given string is used both as the 'original' as well as
-- the 'normalized'.
--
-- This function is unsafe since the compiler can't guarantee that the
-- provided string is normalized.
unsafeMk :: s -> NI s
unsafeMk :: s -> NI s
unsafeMk s
s = s -> s -> NI s
forall s. s -> s -> NI s
NI s
s s
s

-- | Transform the original string-like value but keep it normalized.
map :: Normalizable s2 => (s1 -> s2) -> (NI s1 -> NI s2)
map :: (s1 -> s2) -> NI s1 -> NI s2
map s1 -> s2
f = s2 -> NI s2
forall s. Normalizable s => s -> NI s
mk (s2 -> NI s2) -> (NI s1 -> s2) -> NI s1 -> NI s2
forall b c a. (b -> c) -> (a -> b) -> a -> c
. s1 -> s2
f (s1 -> s2) -> (NI s1 -> s1) -> NI s1 -> s2
forall b c a. (b -> c) -> (a -> b) -> a -> c
. NI s1 -> s1
forall s. NI s -> s
original

instance (IsString s, Normalizable s) => IsString (NI s) where
    fromString :: String -> NI s
fromString = s -> NI s
forall s. Normalizable s => s -> NI s
mk (s -> NI s) -> (String -> s) -> String -> NI s
forall b c a. (b -> c) -> (a -> b) -> a -> c
. String -> s
forall a. IsString a => String -> a
fromString

instance (Semigroup s, Normalizable s) => Semigroup (NI s) where
    NI s
o1 s
_ <> :: NI s -> NI s -> NI s
<> NI s
o2 s
_ = s -> s -> NI s
forall s. s -> s -> NI s
NI s
o12 (s -> s
forall s. Normalizable s => s -> s
normalize s
o12)
      where o12 :: s
o12 = s
o1 s -> s -> s
forall a. Semigroup a => a -> a -> a
<> s
o2

instance (Monoid s, Normalizable s) => Monoid (NI s) where
    mempty :: NI s
mempty = s -> s -> NI s
forall s. s -> s -> NI s
NI s
forall a. Monoid a => a
mempty s
forall a. Monoid a => a
mempty
    -- The result of concatenating two normalized strings is not
    -- necessarily normalized.  Therefore, concatenate the original
    -- strings and re-normalize.
    -- https://github.com/ppelleti/normalization-insensitive/issues/1
    NI s
o1 s
_ mappend :: NI s -> NI s -> NI s
`mappend` NI s
o2 s
_ = s -> s -> NI s
forall s. s -> s -> NI s
NI s
o12 (s -> s
forall s. Normalizable s => s -> s
normalize s
o12)
      where o12 :: s
o12 = s
o1 s -> s -> s
forall a. Monoid a => a -> a -> a
`mappend` s
o2

instance Eq s => Eq (NI s) where
    == :: NI s -> NI s -> Bool
(==) = s -> s -> Bool
forall a. Eq a => a -> a -> Bool
(==) (s -> s -> Bool) -> (NI s -> s) -> NI s -> NI s -> Bool
forall b c a. (b -> b -> c) -> (a -> b) -> a -> a -> c
`on` NI s -> s
forall s. NI s -> s
normalized

instance Ord s => Ord (NI s) where
    compare :: NI s -> NI s -> Ordering
compare = s -> s -> Ordering
forall a. Ord a => a -> a -> Ordering
compare (s -> s -> Ordering) -> (NI s -> s) -> NI s -> NI s -> Ordering
forall b c a. (b -> b -> c) -> (a -> b) -> a -> a -> c
`on` NI s -> s
forall s. NI s -> s
normalized

instance (Read s, Normalizable s) => Read (NI s) where
    readPrec :: ReadPrec (NI s)
readPrec = (s -> NI s) -> ReadPrec s -> ReadPrec (NI s)
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap s -> NI s
forall s. Normalizable s => s -> NI s
mk ReadPrec s
forall a. Read a => ReadPrec a
readPrec

instance Show s => Show (NI s) where
    showsPrec :: Int -> NI s -> ShowS
showsPrec Int
prec = Int -> s -> ShowS
forall a. Show a => Int -> a -> ShowS
showsPrec Int
prec (s -> ShowS) -> (NI s -> s) -> NI s -> ShowS
forall b c a. (b -> c) -> (a -> b) -> a -> c
. NI s -> s
forall s. NI s -> s
original

instance Hashable s => Hashable (NI s) where
    hashWithSalt :: Int -> NI s -> Int
hashWithSalt Int
salt = Int -> s -> Int
forall a. Hashable a => Int -> a -> Int
hashWithSalt Int
salt (s -> Int) -> (NI s -> s) -> NI s -> Int
forall b c a. (b -> c) -> (a -> b) -> a -> c
. NI s -> s
forall s. NI s -> s
normalized

instance NFData s => NFData (NI s) where
    rnf :: NI s -> ()
rnf (NI s
o s
f) = s
o s -> s -> s
forall a b. NFData a => a -> b -> b
`deepseq` s
f s -> () -> ()
forall a b. NFData a => a -> b -> b
`deepseq` ()

--------------------------------------------------------------------------------
-- Normalization
--------------------------------------------------------------------------------

mode :: NormalizationMode
mode :: NormalizationMode
mode = NormalizationMode
NFC

-- | Class of string-like types that support normalization.
class Normalizable s where
    normalize :: s -> s

-- | Note that @normalize@ on @'B.ByteString's@ assumes UTF-8 encoded strings!
instance Normalizable B.ByteString where
    normalize :: ByteString -> ByteString
normalize = Text -> ByteString
T.encodeUtf8 (Text -> ByteString)
-> (ByteString -> Text) -> ByteString -> ByteString
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Text -> Text
forall s. Normalizable s => s -> s
normalize (Text -> Text) -> (ByteString -> Text) -> ByteString -> Text
forall b c a. (b -> c) -> (a -> b) -> a -> c
. OnDecodeError -> ByteString -> Text
T.decodeUtf8With OnDecodeError
T.lenientDecode

-- | Note that @normalize@ on @'BL.ByteString's@ assumes UTF-8 encoded strings!
instance Normalizable BL.ByteString where
    normalize :: ByteString -> ByteString
normalize = ByteString -> ByteString
BL.fromStrict (ByteString -> ByteString)
-> (ByteString -> ByteString) -> ByteString -> ByteString
forall b c a. (b -> c) -> (a -> b) -> a -> c
. ByteString -> ByteString
forall s. Normalizable s => s -> s
normalize (ByteString -> ByteString)
-> (ByteString -> ByteString) -> ByteString -> ByteString
forall b c a. (b -> c) -> (a -> b) -> a -> c
. ByteString -> ByteString
BL.toStrict

instance Normalizable String where
    normalize :: ShowS
normalize = Text -> String
T.unpack (Text -> String) -> (String -> Text) -> ShowS
forall b c a. (b -> c) -> (a -> b) -> a -> c
. NormalizationMode -> Text -> Text
T.normalize NormalizationMode
mode (Text -> Text) -> (String -> Text) -> String -> Text
forall b c a. (b -> c) -> (a -> b) -> a -> c
. String -> Text
T.pack

instance Normalizable T.Text where
    normalize :: Text -> Text
normalize = NormalizationMode -> Text -> Text
T.normalize NormalizationMode
mode

instance Normalizable TL.Text where
    normalize :: Text -> Text
normalize = Text -> Text
TL.fromStrict (Text -> Text) -> (Text -> Text) -> Text -> Text
forall b c a. (b -> c) -> (a -> b) -> a -> c
. NormalizationMode -> Text -> Text
T.normalize NormalizationMode
mode (Text -> Text) -> (Text -> Text) -> Text -> Text
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Text -> Text
TL.toStrict

instance Normalizable (NI s) where
    normalize :: NI s -> NI s
normalize (NI s
_ s
l) = s -> s -> NI s
forall s. s -> s -> NI s
NI s
l s
l