-- |
-- Module      : Unicode.Char.Normalization
-- Copyright   : (c) 2020 Composewell Technologies and Contributors
-- License     : Apache-2.0
-- Maintainer  : streamly@composewell.com
-- Stability   : experimental
--
-- Low level Unicode database functions to facilitate Unicode normalization.
--
-- For more information on Unicode normalization please refer to the following
-- sections of the [Unicode standard](https://www.unicode.org/versions/latest/):
--
-- * 2 General Structure
--
--     * 2.3 Compatibility Characters
--     * 2.12 Equivalent Sequences
--
-- * 3 Conformance
--
--     * 3.6 Combination
--     * 3.7 Decomposition
--     * 3.11 Normalization Forms
--     * 3.12 Conjoining Jamo Behavior
--
-- * 4 Character Properties
--
--     * 4.3 Combining Classes
--
-- * [Unicode® Standard Annex #15 - Unicode Normalization Forms](https://www.unicode.org/reports/tr15)
-- * [Unicode® Standard Annex #44 - Unicode Character Database](https://www.unicode.org/reports/tr44/)
--

module Unicode.Char.Normalization
    (
    -- * Combining class
      isCombining
    , combiningClass
    , isCombiningStarter

    -- * Composition
    , compose
    , composeStarters

    -- * Decomposition
    -- ** Non-Hangul
    , DecomposeMode(..)
    , isDecomposable
    , decompose

    -- ** Hangul
    , decomposeHangul
    )
where

import Control.Exception (assert)
import Data.Char (ord)
import GHC.Base (unsafeChr)
import Unicode.Internal.Division (quotRem21, quotRem28)
import Unicode.Char.General
    (hangulFirst, jamoLFirst, jamoTCount, jamoTFirst, jamoVCount, jamoVFirst)

import qualified Unicode.Internal.Char.UnicodeData.CombiningClass  as CC
import qualified Unicode.Internal.Char.UnicodeData.Compositions    as C
import qualified Unicode.Internal.Char.UnicodeData.Decomposable    as D
import qualified Unicode.Internal.Char.UnicodeData.DecomposableK   as K
import qualified Unicode.Internal.Char.UnicodeData.Decompositions  as D
import qualified Unicode.Internal.Char.UnicodeData.DecompositionsK as K

-------------------------------------------------------------------------------
-- Compose
-------------------------------------------------------------------------------

-- | Compose a starter character (combining class 0) with a combining character
-- (non-zero combining class). Returns the composed character if the starter
-- combines with the combining character, returns 'Nothing' otherwise.
--
-- @since 0.1.0
{-# INLINE compose #-}
compose :: Char -> Char -> Maybe Char
compose :: Char -> Char -> Maybe Char
compose = Char -> Char -> Maybe Char
C.compose

-- | Compose a starter character with another starter character.  Returns the
-- composed character if the two starters combine, returns 'Nothing' otherwise.
--
-- @since 0.1.0
{-# INLINE composeStarters #-}
composeStarters :: Char -> Char -> Maybe Char
composeStarters :: Char -> Char -> Maybe Char
composeStarters = Char -> Char -> Maybe Char
C.composeStarters

-- | Return 'True' if a starter character may combine with some preceding
-- starter character.
--
-- @since 0.1.0
{-# INLINE isCombiningStarter #-}
isCombiningStarter :: Char -> Bool
isCombiningStarter :: Char -> Bool
isCombiningStarter = Char -> Bool
C.isSecondStarter

-------------------------------------------------------------------------------
-- Decompose
-------------------------------------------------------------------------------

-------------------------------------------------------------------------------
-- Non Hangul decomposition
-------------------------------------------------------------------------------

-- | Whether we are decomposing in canonical or compatibility mode.
--
-- @since 0.1.0
data DecomposeMode = Canonical | Kompat

-- | Decompose a non-Hangul character into its canonical or compatibility
-- decompositions.  Note that the resulting characters may further decompose.
--
-- @since 0.1.0
{-# INLINE decompose #-}
decompose :: DecomposeMode -> Char -> [Char]
decompose :: DecomposeMode -> Char -> [Char]
decompose DecomposeMode
Canonical  = Char -> [Char]
D.decompose
decompose DecomposeMode
Kompat = Char -> [Char]
K.decompose

-- | Given a non-Hangul character determine if the character is decomposable.
-- Note that in case compatibility decompositions a character may decompose
-- into a single compatibility character.
--
-- @since 0.1.0
{-# INLINE isDecomposable #-}
isDecomposable :: DecomposeMode -> Char -> Bool
isDecomposable :: DecomposeMode -> Char -> Bool
isDecomposable DecomposeMode
Canonical  = Char -> Bool
D.isDecomposable
isDecomposable DecomposeMode
Kompat = Char -> Bool
K.isDecomposable

-------------------------------------------------------------------------------
-- Hangul decomposition
-------------------------------------------------------------------------------

-- | Decompose a Hangul syllable into its corresponding Jamo characters.
--
-- @since 0.1.0
{-# INLINE decomposeHangul #-}
decomposeHangul :: Char -> (Char, Char, Char)
decomposeHangul :: Char -> (Char, Char, Char)
decomposeHangul Char
c = (Char
l, Char
v, Char
t)

    where

    i :: Int
i = Char -> Int
ord Char
c Int -> Int -> Int
forall a. Num a => a -> a -> a
- Int
hangulFirst
    !(Int
tn, Int
ti) = Bool -> (Int -> (Int, Int)) -> Int -> (Int, Int)
forall a. (?callStack::CallStack) => Bool -> a -> a
assert (Int
jamoTCount Int -> Int -> Bool
forall a. Eq a => a -> a -> Bool
== Int
28) Int -> (Int, Int)
quotRem28 Int
i
    !(Int
li, Int
vi) = Bool -> (Int -> (Int, Int)) -> Int -> (Int, Int)
forall a. (?callStack::CallStack) => Bool -> a -> a
assert (Int
jamoVCount Int -> Int -> Bool
forall a. Eq a => a -> a -> Bool
== Int
21) Int -> (Int, Int)
quotRem21 Int
tn
    l :: Char
l = Int -> Char
unsafeChr (Int
jamoLFirst Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
li)
    v :: Char
v = Int -> Char
unsafeChr (Int
jamoVFirst Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
vi)
    t :: Char
t = Int -> Char
unsafeChr (Int
jamoTFirst Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
ti)

-------------------------------------------------------------------------------
-- Combining class
-------------------------------------------------------------------------------

-- Determine the combining properties of characters.

-- | Returns the combining class of a character.
--
-- @since 0.1.0
{-# INLINE combiningClass #-}
combiningClass :: Char -> Int
combiningClass :: Char -> Int
combiningClass = Char -> Int
CC.combiningClass

-- | Returns 'True' if a character is a combining character.
--
-- @since 0.1.0
{-# INLINE isCombining #-}
isCombining :: Char -> Bool
isCombining :: Char -> Bool
isCombining = Char -> Bool
CC.isCombining