{-# LANGUAGE CPP                #-}
#if __GLASGOW_HASKELL__ < 7100
{-# LANGUAGE DeriveDataTypeable #-}
#endif
-- |
-- Module      : Data.Unicode.Types
-- Copyright   : (c) 2016 Harendra Kumar
--
-- License     : BSD-3-Clause
-- Maintainer  : harendra.kumar@gmail.com
-- Stability   : experimental
-- Portability : GHC
--
-- Character set normalization functions for Unicode.  The documentation and
-- API in this module is largely borrowed from @text-icu@.

module Data.Unicode.Types
    (
      NormalizationMode(..)
    ) where

import           Data.Typeable (Typeable)

-- |
-- Normalization transforms Unicode text into an equivalent
-- composed or decomposed form, allowing for easier sorting and
-- searching of text. Standard normalization forms are described in
-- <https://unicode.org/reports/tr15/>,
-- Unicode Standard Annex #15: Unicode Normalization Forms.
--
-- Characters with accents or other adornments can be encoded in
-- several different ways in Unicode.  For example, take the character A-acute.
-- In Unicode, this can be encoded as a single character (the
-- \"composed\" form):
--
-- @
--      00C1    LATIN CAPITAL LETTER A WITH ACUTE
-- @
--
-- or as two separate characters (the \"decomposed\" form):
--
-- @
--      0041    LATIN CAPITAL LETTER A
--      0301    COMBINING ACUTE ACCENT
-- @
--
-- To a user of your program, however, both of these sequences should
-- be treated as the same \"user-level\" character \"A with acute
-- accent\".  When you are searching or comparing text, you must
-- ensure that these two sequences are treated equivalently.  In
-- addition, you must handle characters with more than one accent.
-- Sometimes the order of a character's combining accents is
-- significant, while in other cases accent sequences in different
-- orders are really equivalent.
--
-- Similarly, the string \"ffi\" can be encoded as three separate letters:
--
-- @
--      0066    LATIN SMALL LETTER F
--      0066    LATIN SMALL LETTER F
--      0069    LATIN SMALL LETTER I
-- @
--
-- or as the single character
--
-- @
--      FB03    LATIN SMALL LIGATURE FFI
-- @
--
-- The \"ffi\" ligature is not a distinct semantic character, and
-- strictly speaking it shouldn't be in Unicode at all, but it was
-- included for compatibility with existing character sets that
-- already provided it.  The Unicode standard identifies such
-- characters by giving them \"compatibility\" decompositions into the
-- corresponding semantic characters.  When sorting and searching, you
-- will often want to use these mappings.
--
-- Normalization helps solve these problems by transforming text into
-- the canonical composed and decomposed forms as shown in the first
-- example above.  In addition, you can have it perform compatibility
-- decompositions so that you can treat compatibility characters the
-- same as their equivalents.  Finally, normalization rearranges accents
-- into the proper canonical order, so that you do not have to worry
-- about accent rearrangement on your own.
--
-- The W3C generally recommends to exchange texts in 'NFC'.  Note also
-- that most legacy character encodings use only precomposed forms and
-- often do not encode any combining marks by themselves. For
-- conversion to such character encodings the Unicode text needs to be
-- normalized to 'NFC'.  For more usage examples, see the Unicode
-- Standard Annex.
--
data NormalizationMode
    = NFD    -- ^ Canonical decomposition.
    | NFKD   -- ^ Compatibility decomposition.
    | NFC    -- ^ Canonical decomposition followed by canonical composition.
    | NFKC   -- ^ Compatibility decomposition followed by canonical composition.
      deriving (NormalizationMode -> NormalizationMode -> Bool
(NormalizationMode -> NormalizationMode -> Bool)
-> (NormalizationMode -> NormalizationMode -> Bool)
-> Eq NormalizationMode
forall a. (a -> a -> Bool) -> (a -> a -> Bool) -> Eq a
/= :: NormalizationMode -> NormalizationMode -> Bool
$c/= :: NormalizationMode -> NormalizationMode -> Bool
== :: NormalizationMode -> NormalizationMode -> Bool
$c== :: NormalizationMode -> NormalizationMode -> Bool
Eq, Int -> NormalizationMode -> ShowS
[NormalizationMode] -> ShowS
NormalizationMode -> String
(Int -> NormalizationMode -> ShowS)
-> (NormalizationMode -> String)
-> ([NormalizationMode] -> ShowS)
-> Show NormalizationMode
forall a.
(Int -> a -> ShowS) -> (a -> String) -> ([a] -> ShowS) -> Show a
showList :: [NormalizationMode] -> ShowS
$cshowList :: [NormalizationMode] -> ShowS
show :: NormalizationMode -> String
$cshow :: NormalizationMode -> String
showsPrec :: Int -> NormalizationMode -> ShowS
$cshowsPrec :: Int -> NormalizationMode -> ShowS
Show, Int -> NormalizationMode
NormalizationMode -> Int
NormalizationMode -> [NormalizationMode]
NormalizationMode -> NormalizationMode
NormalizationMode -> NormalizationMode -> [NormalizationMode]
NormalizationMode
-> NormalizationMode -> NormalizationMode -> [NormalizationMode]
(NormalizationMode -> NormalizationMode)
-> (NormalizationMode -> NormalizationMode)
-> (Int -> NormalizationMode)
-> (NormalizationMode -> Int)
-> (NormalizationMode -> [NormalizationMode])
-> (NormalizationMode -> NormalizationMode -> [NormalizationMode])
-> (NormalizationMode -> NormalizationMode -> [NormalizationMode])
-> (NormalizationMode
    -> NormalizationMode -> NormalizationMode -> [NormalizationMode])
-> Enum NormalizationMode
forall a.
(a -> a)
-> (a -> a)
-> (Int -> a)
-> (a -> Int)
-> (a -> [a])
-> (a -> a -> [a])
-> (a -> a -> [a])
-> (a -> a -> a -> [a])
-> Enum a
enumFromThenTo :: NormalizationMode
-> NormalizationMode -> NormalizationMode -> [NormalizationMode]
$cenumFromThenTo :: NormalizationMode
-> NormalizationMode -> NormalizationMode -> [NormalizationMode]
enumFromTo :: NormalizationMode -> NormalizationMode -> [NormalizationMode]
$cenumFromTo :: NormalizationMode -> NormalizationMode -> [NormalizationMode]
enumFromThen :: NormalizationMode -> NormalizationMode -> [NormalizationMode]
$cenumFromThen :: NormalizationMode -> NormalizationMode -> [NormalizationMode]
enumFrom :: NormalizationMode -> [NormalizationMode]
$cenumFrom :: NormalizationMode -> [NormalizationMode]
fromEnum :: NormalizationMode -> Int
$cfromEnum :: NormalizationMode -> Int
toEnum :: Int -> NormalizationMode
$ctoEnum :: Int -> NormalizationMode
pred :: NormalizationMode -> NormalizationMode
$cpred :: NormalizationMode -> NormalizationMode
succ :: NormalizationMode -> NormalizationMode
$csucc :: NormalizationMode -> NormalizationMode
Enum, Typeable)