-- |
-- Module      : Unicode.Char.General.Names
-- Copyright   : (c) 2022 Composewell Technologies and Contributors
-- License     : Apache-2.0
-- Maintainer  : streamly@composewell.com
-- Stability   : experimental
--
-- Unicode character names and name aliases with 'String' API.
-- See Unicode standard 15.0.0, section 4.8.
--
-- There are also two optional APIs:
--
-- * @ByteString@ API, which requires using the package flag @has-bytestring@.
-- * @Text@ API, which requires using the package flag @has-text@.
--
-- @since 0.1.0

module Unicode.Char.General.Names
    ( -- * Unicode version
      unicodeVersion

      -- * Name
    , name
    , nameOrAlias
    , nameOrLabel
    , correctedName

      -- * Name Aliases
    , NameAliases.NameAliasType(..)
    , nameAliases
    , nameAliasesByType
    , nameAliasesWithTypes

      -- * Label
    , label
    ) where

import Control.Applicative ((<|>))
import Control.Monad ((>=>))
import Foreign.C.String (peekCAStringLen)
import GHC.Exts (
    Addr#,
    Char (..),
    Char#,
    Int#,
    dataToTag#,
    indexCharOffAddr#,
    isTrue#,
    ord#,
    plusAddr#,
    quotRemInt#,
    (+#),
    (-#),
    (<#),
 )
import System.IO.Unsafe (unsafeDupablePerformIO)

import Unicode.Internal.Bits.Names (unpackNBytes#)
import qualified Unicode.Internal.Char.Label as Label
import Unicode.Internal.Char.Names.Version (unicodeVersion)
import qualified Unicode.Internal.Char.UnicodeData.DerivedName as DerivedName
import qualified Unicode.Internal.Char.UnicodeData.NameAliases as NameAliases

-- | Name of a character, if defined.
--
-- @since 0.1.0
name :: Char -> Maybe String
name :: Char -> Maybe String
name (C# Char#
c#) = case Char# -> (# Addr#, Int# #)
DerivedName.name Char#
c# of
    (# Addr#
name#, Int#
len# #) -> case Int#
len# of
        Int#
DerivedName.NoName -> Maybe String
forall a. Maybe a
Nothing
        Int#
DerivedName.CjkCompatibilityIdeograph -> String -> Maybe String
forall a. a -> Maybe a
Just String
n
            where
            !hex :: String
hex = Char# -> String
showHex Char#
c#
            !n :: String
n = Char
'C'Char -> String -> String
forall a. a -> [a] -> [a]
:Char
'J'Char -> String -> String
forall a. a -> [a] -> [a]
:Char
'K'Char -> String -> String
forall a. a -> [a] -> [a]
:Char
' 'Char -> String -> String
forall a. a -> [a] -> [a]
:Char
'C'Char -> String -> String
forall a. a -> [a] -> [a]
:Char
'O'Char -> String -> String
forall a. a -> [a] -> [a]
:Char
'M'Char -> String -> String
forall a. a -> [a] -> [a]
:Char
'P'Char -> String -> String
forall a. a -> [a] -> [a]
:Char
'A'Char -> String -> String
forall a. a -> [a] -> [a]
:Char
'T'Char -> String -> String
forall a. a -> [a] -> [a]
:Char
'I'Char -> String -> String
forall a. a -> [a] -> [a]
:Char
'B'Char -> String -> String
forall a. a -> [a] -> [a]
:Char
'I'Char -> String -> String
forall a. a -> [a] -> [a]
:Char
'L'Char -> String -> String
forall a. a -> [a] -> [a]
:Char
'I'Char -> String -> String
forall a. a -> [a] -> [a]
:Char
'T'Char -> String -> String
forall a. a -> [a] -> [a]
:Char
'Y'Char -> String -> String
forall a. a -> [a] -> [a]
:Char
' 'Char -> String -> String
forall a. a -> [a] -> [a]
:Char
'I'Char -> String -> String
forall a. a -> [a] -> [a]
:Char
'D'Char -> String -> String
forall a. a -> [a] -> [a]
:Char
'E'Char -> String -> String
forall a. a -> [a] -> [a]
:Char
'O'Char -> String -> String
forall a. a -> [a] -> [a]
:Char
'G'Char -> String -> String
forall a. a -> [a] -> [a]
:Char
'R'Char -> String -> String
forall a. a -> [a] -> [a]
:Char
'A'Char -> String -> String
forall a. a -> [a] -> [a]
:Char
'P'Char -> String -> String
forall a. a -> [a] -> [a]
:Char
'H'Char -> String -> String
forall a. a -> [a] -> [a]
:Char
'-'Char -> String -> String
forall a. a -> [a] -> [a]
:String
hex
        Int#
DerivedName.CjkUnifiedIdeograph -> String -> Maybe String
forall a. a -> Maybe a
Just String
n
            where
            !hex :: String
hex = Char# -> String
showHex Char#
c#
            !n :: String
n = Char
'C'Char -> String -> String
forall a. a -> [a] -> [a]
:Char
'J'Char -> String -> String
forall a. a -> [a] -> [a]
:Char
'K'Char -> String -> String
forall a. a -> [a] -> [a]
:Char
' 'Char -> String -> String
forall a. a -> [a] -> [a]
:Char
'U'Char -> String -> String
forall a. a -> [a] -> [a]
:Char
'N'Char -> String -> String
forall a. a -> [a] -> [a]
:Char
'I'Char -> String -> String
forall a. a -> [a] -> [a]
:Char
'F'Char -> String -> String
forall a. a -> [a] -> [a]
:Char
'I'Char -> String -> String
forall a. a -> [a] -> [a]
:Char
'E'Char -> String -> String
forall a. a -> [a] -> [a]
:Char
'D'Char -> String -> String
forall a. a -> [a] -> [a]
:Char
' 'Char -> String -> String
forall a. a -> [a] -> [a]
:Char
'I'Char -> String -> String
forall a. a -> [a] -> [a]
:Char
'D'Char -> String -> String
forall a. a -> [a] -> [a]
:Char
'E'Char -> String -> String
forall a. a -> [a] -> [a]
:Char
'O'Char -> String -> String
forall a. a -> [a] -> [a]
:Char
'G'Char -> String -> String
forall a. a -> [a] -> [a]
:Char
'R'Char -> String -> String
forall a. a -> [a] -> [a]
:Char
'A'Char -> String -> String
forall a. a -> [a] -> [a]
:Char
'P'Char -> String -> String
forall a. a -> [a] -> [a]
:Char
'H'Char -> String -> String
forall a. a -> [a] -> [a]
:Char
'-'Char -> String -> String
forall a. a -> [a] -> [a]
:String
hex
        Int#
DerivedName.TangutIdeograph -> String -> Maybe String
forall a. a -> Maybe a
Just String
n
            where
            !hex :: String
hex = Char# -> String
showHex Char#
c#
            !n :: String
n = Char
'T'Char -> String -> String
forall a. a -> [a] -> [a]
:Char
'A'Char -> String -> String
forall a. a -> [a] -> [a]
:Char
'N'Char -> String -> String
forall a. a -> [a] -> [a]
:Char
'G'Char -> String -> String
forall a. a -> [a] -> [a]
:Char
'U'Char -> String -> String
forall a. a -> [a] -> [a]
:Char
'T'Char -> String -> String
forall a. a -> [a] -> [a]
:Char
' 'Char -> String -> String
forall a. a -> [a] -> [a]
:Char
'I'Char -> String -> String
forall a. a -> [a] -> [a]
:Char
'D'Char -> String -> String
forall a. a -> [a] -> [a]
:Char
'E'Char -> String -> String
forall a. a -> [a] -> [a]
:Char
'O'Char -> String -> String
forall a. a -> [a] -> [a]
:Char
'G'Char -> String -> String
forall a. a -> [a] -> [a]
:Char
'R'Char -> String -> String
forall a. a -> [a] -> [a]
:Char
'A'Char -> String -> String
forall a. a -> [a] -> [a]
:Char
'P'Char -> String -> String
forall a. a -> [a] -> [a]
:Char
'H'Char -> String -> String
forall a. a -> [a] -> [a]
:Char
'-'Char -> String -> String
forall a. a -> [a] -> [a]
:String
hex
        Int#
_
            | Int# -> Bool
isTrue# (Int#
len# Int# -> Int# -> Int#
<# (# #) -> Int#
DerivedName.HangulSyllable) -> let !n :: String
n = Addr# -> String -> Int# -> String
unpack Addr#
name# [] Int#
len# in String -> Maybe String
forall a. a -> Maybe a
Just String
n
            | Bool
otherwise ->
                let !rest :: String
rest = Addr# -> String -> Int# -> String
unpack Addr#
name# [] (Int#
len# Int# -> Int# -> Int#
-# (# #) -> Int#
DerivedName.HangulSyllable)
                    !n :: String
n = Char
'H'Char -> String -> String
forall a. a -> [a] -> [a]
:Char
'A'Char -> String -> String
forall a. a -> [a] -> [a]
:Char
'N'Char -> String -> String
forall a. a -> [a] -> [a]
:Char
'G'Char -> String -> String
forall a. a -> [a] -> [a]
:Char
'U'Char -> String -> String
forall a. a -> [a] -> [a]
:Char
'L'Char -> String -> String
forall a. a -> [a] -> [a]
:Char
' 'Char -> String -> String
forall a. a -> [a] -> [a]
:Char
'S'Char -> String -> String
forall a. a -> [a] -> [a]
:Char
'Y'Char -> String -> String
forall a. a -> [a] -> [a]
:Char
'L'Char -> String -> String
forall a. a -> [a] -> [a]
:Char
'L'Char -> String -> String
forall a. a -> [a] -> [a]
:Char
'A'Char -> String -> String
forall a. a -> [a] -> [a]
:Char
'B'Char -> String -> String
forall a. a -> [a] -> [a]
:Char
'L'Char -> String -> String
forall a. a -> [a] -> [a]
:Char
'E'Char -> String -> String
forall a. a -> [a] -> [a]
:Char
' 'Char -> String -> String
forall a. a -> [a] -> [a]
:String
rest
                in String -> Maybe String
forall a. a -> Maybe a
Just String
n

{-# INLINE unpack #-}
unpack :: Addr# -> String -> Int# -> String
unpack :: Addr# -> String -> Int# -> String
unpack !Addr#
addr# !String
acc = \case
    Int#
0# -> String
acc
    !Int#
i -> Addr# -> String -> Int# -> String
unpack Addr#
addr# String
acc' Int#
i'
        where
        !i' :: Int#
i' = Int#
i Int# -> Int# -> Int#
-# Int#
1#
        !c'# :: Char#
c'# = Addr# -> Int# -> Char#
indexCharOffAddr# Addr#
addr# Int#
i'
        !c' :: Char
c' = Char# -> Char
C# Char#
c'#
        !acc' :: String
acc' = Char
c' Char -> String -> String
forall a. a -> [a] -> [a]
: String
acc

-- [NOTE] We assume c# >= '\x1000' to avoid to check for padding
showHex :: Char# -> String
showHex :: Char# -> String
showHex !Char#
c# = String -> (# Int#, Int# #) -> String
showIt [] (Int# -> Int# -> (# Int#, Int# #)
quotRemInt# (Char# -> Int#
ord# Char#
c#) Int#
16#)
    where
    showIt :: String -> (# Int#, Int# #) -> String
showIt !String
acc (# Int#
q, Int#
r #) = case Int#
q of
        Int#
0# -> String
acc'
        Int#
_  -> String -> (# Int#, Int# #) -> String
showIt String
acc' (Int# -> Int# -> (# Int#, Int# #)
quotRemInt# Int#
q Int#
16#)
        where
        !c :: Char
c = Char# -> Char
C# (Addr# -> Int# -> Char#
indexCharOffAddr# Addr#
"0123456789ABCDEF"# Int#
r)
        !acc' :: String
acc' = Char
c Char -> String -> String
forall a. a -> [a] -> [a]
: String
acc

-- | Returns /corrected/ name of a character (see 'NameAliases.Correction'),
-- if defined, otherwise returns its original 'name' if defined.
--
-- @since 0.1.0
{-# INLINE correctedName #-}
correctedName :: Char -> Maybe String
correctedName :: Char -> Maybe String
correctedName c :: Char
c@(C# Char#
c#) = Maybe String
corrected Maybe String -> Maybe String -> Maybe String
forall a. Maybe a -> Maybe a -> Maybe a
forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|> Char -> Maybe String
name Char
c
    where
    -- Assumption: fromEnum NameAliases.Correction == 0
    !corrected :: Maybe String
corrected = case Addr# -> Int# -> Char#
indexCharOffAddr# Addr#
addr# Int#
0# of
        Char#
'\xff'# -> Maybe String
forall a. Maybe a
Nothing -- no aliases
        Char#
'\x00'# -> Maybe String
forall a. Maybe a
Nothing -- no correction
        Char#
i#      ->
            let !n :: String
n = Addr# -> String
unpackNBytes'# (Addr#
addr# Addr# -> Int# -> Addr#
`plusAddr#` Char# -> Int#
ord# Char#
i#)
            in String -> Maybe String
forall a. a -> Maybe a
Just String
n
    !addr# :: Addr#
addr# = Char# -> Addr#
NameAliases.nameAliases Char#
c#

-- | Returns a character’s 'name' if defined,
-- otherwise returns its first name alias if defined.
--
-- @since 0.1.0
nameOrAlias :: Char -> Maybe String
nameOrAlias :: Char -> Maybe String
nameOrAlias c :: Char
c@(C# Char#
c#) = Char -> Maybe String
name Char
c Maybe String -> Maybe String -> Maybe String
forall a. Maybe a -> Maybe a -> Maybe a
forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|> case Addr# -> Int# -> Char#
indexCharOffAddr# Addr#
addr# Int#
0# of
    Char#
'\xff'# -> Maybe String
forall a. Maybe a
Nothing -- no aliases
    Char#
'\x00'# -> let !n :: String
n = Int# -> String
go Int#
1# in String -> Maybe String
forall a. a -> Maybe a
Just String
n
    Char#
_       -> let !n :: String
n = Int# -> String
go Int#
0# in String -> Maybe String
forall a. a -> Maybe a
Just String
n
    where
    !addr# :: Addr#
addr# = Char# -> Addr#
NameAliases.nameAliases Char#
c#
    go :: Int# -> String
go Int#
t# = case Char# -> Int#
ord# (Addr# -> Int# -> Char#
indexCharOffAddr# (Addr#
addr# Addr# -> Int# -> Addr#
`plusAddr#` Int#
t#) Int#
0#) of
        -- No bound check for t#: there is at least one alias
        Int#
0# -> Int# -> String
go (Int#
t# Int# -> Int# -> Int#
+# Int#
1#)
        Int#
i# -> Addr# -> String
unpackNBytes'# (Addr#
addr# Addr# -> Int# -> Addr#
`plusAddr#` Int#
i#)

-- | Returns a character’s 'name' if defined,
-- otherwise returns its label between angle brackets.
--
-- @since 0.4.0
nameOrLabel :: Char -> String
nameOrLabel :: Char -> String
nameOrLabel Char
c = case Char -> Maybe String
name Char
c of
    Maybe String
Nothing -> Char
'<' Char -> String -> String
forall a. a -> [a] -> [a]
: Char -> String
label Char
c String -> String -> String
forall a. [a] -> [a] -> [a]
++ String
">"
    Just String
n  -> String
n

-- | All name aliases of a character, if defined.
-- The names are listed in the original order of the UCD.
--
-- See 'nameAliasesWithTypes' for the detailed list by alias type.
--
-- @since 0.1.0
{-# INLINE nameAliases #-}
nameAliases :: Char -> [String]
nameAliases :: Char -> [String]
nameAliases (C# Char#
c#) = case Addr# -> Int# -> Char#
indexCharOffAddr# Addr#
addr0# Int#
0# of
    Char#
'\xff'# -> [] -- no aliases
    Char#
_       -> Addr# -> [String]
go (Addr#
addr0# Addr# -> Int# -> Addr#
`plusAddr#` ((# #) -> Int#
NameAliases.MaxNameAliasType Int# -> Int# -> Int#
+# Int#
1#))
        where
        go :: Addr# -> [String]
go Addr#
addr# = case Char# -> Int#
ord# (Addr# -> Int# -> Char#
indexCharOffAddr# Addr#
addr# Int#
0#) of
            Int#
0# -> case Char# -> Int#
ord# (Addr# -> Int# -> Char#
indexCharOffAddr# (Addr#
addr# Addr# -> Int# -> Addr#
`plusAddr#` Int#
1#) Int#
0#) of
                -- End of list
                Int#
0# -> []
                -- skip empty entry
                Int#
l# ->
                    let !s :: String
s = Addr# -> Int# -> String
unpackNBytes# (Addr#
addr# Addr# -> Int# -> Addr#
`plusAddr#` Int#
2#) Int#
l#
                    in String
s String -> [String] -> [String]
forall a. a -> [a] -> [a]
: Addr# -> [String]
go (Addr#
addr# Addr# -> Int# -> Addr#
`plusAddr#` (Int#
l# Int# -> Int# -> Int#
+# Int#
2#))
            Int#
l# ->
                let !s :: String
s = Addr# -> Int# -> String
unpackNBytes# (Addr#
addr# Addr# -> Int# -> Addr#
`plusAddr#` Int#
1#) Int#
l#
                in String
s String -> [String] -> [String]
forall a. a -> [a] -> [a]
: Addr# -> [String]
go (Addr#
addr# Addr# -> Int# -> Addr#
`plusAddr#` (Int#
l# Int# -> Int# -> Int#
+# Int#
1#))
    where
    addr0# :: Addr#
addr0# = Char# -> Addr#
NameAliases.nameAliases Char#
c#

-- | Name aliases of a character for a specific name alias type.
--
-- @since 0.1.0
{-# INLINE nameAliasesByType #-}
nameAliasesByType :: NameAliases.NameAliasType -> Char -> [String]
nameAliasesByType :: NameAliasType -> Char -> [String]
nameAliasesByType NameAliasType
t (C# Char#
c#) = case Addr# -> Int# -> Char#
indexCharOffAddr# Addr#
addr# Int#
0# of
    Char#
'\xff'# -> [] -- no aliases
    Char#
_       -> Addr# -> NameAliasType -> [String]
nameAliasesByType# Addr#
addr# NameAliasType
t
    where
    addr# :: Addr#
addr# = Char# -> Addr#
NameAliases.nameAliases Char#
c#

-- | Detailed character names aliases.
-- The names are listed in the original order of the UCD.
--
-- See 'nameAliases' if the alias type is not required.
--
-- @since 0.1.0
{-# INLINE nameAliasesWithTypes #-}
nameAliasesWithTypes :: Char -> [(NameAliases.NameAliasType, [String])]
nameAliasesWithTypes :: Char -> [(NameAliasType, [String])]
nameAliasesWithTypes (C# Char#
c#) = case Addr# -> Int# -> Char#
indexCharOffAddr# Addr#
addr# Int#
0# of
    Char#
'\xff'# -> [] -- no aliases
    Char#
'\x00'# -> (NameAliasType
 -> [(NameAliasType, [String])] -> [(NameAliasType, [String])])
-> [(NameAliasType, [String])]
-> [NameAliasType]
-> [(NameAliasType, [String])]
forall a b. (a -> b -> b) -> b -> [a] -> b
forall (t :: * -> *) a b.
Foldable t =>
(a -> b -> b) -> b -> t a -> b
foldr NameAliasType
-> [(NameAliasType, [String])] -> [(NameAliasType, [String])]
mk [(NameAliasType, [String])]
forall a. Monoid a => a
mempty [NameAliasType -> NameAliasType
forall a. Enum a => a -> a
succ NameAliasType
forall a. Bounded a => a
minBound..NameAliasType
forall a. Bounded a => a
maxBound]
    Char#
_       -> (NameAliasType
 -> [(NameAliasType, [String])] -> [(NameAliasType, [String])])
-> [(NameAliasType, [String])]
-> [NameAliasType]
-> [(NameAliasType, [String])]
forall a b. (a -> b -> b) -> b -> [a] -> b
forall (t :: * -> *) a b.
Foldable t =>
(a -> b -> b) -> b -> t a -> b
foldr NameAliasType
-> [(NameAliasType, [String])] -> [(NameAliasType, [String])]
mk [(NameAliasType, [String])]
forall a. Monoid a => a
mempty [NameAliasType
forall a. Bounded a => a
minBound..NameAliasType
forall a. Bounded a => a
maxBound]
    where
    addr# :: Addr#
addr# = Char# -> Addr#
NameAliases.nameAliases Char#
c#
    mk :: NameAliasType
-> [(NameAliasType, [String])] -> [(NameAliasType, [String])]
mk NameAliasType
t [(NameAliasType, [String])]
acc = case Addr# -> NameAliasType -> [String]
nameAliasesByType# Addr#
addr# NameAliasType
t of
        []  -> [(NameAliasType, [String])]
acc
        ![String]
as -> (NameAliasType
t, [String]
as) (NameAliasType, [String])
-> [(NameAliasType, [String])] -> [(NameAliasType, [String])]
forall a. a -> [a] -> [a]
: [(NameAliasType, [String])]
acc

{-# INLINE unpackNBytes'# #-}
unpackNBytes'# :: Addr# -> String
unpackNBytes'# :: Addr# -> String
unpackNBytes'# Addr#
addr# = Addr# -> Int# -> String
unpackNBytes#
    (Addr#
addr# Addr# -> Int# -> Addr#
`plusAddr#` Int#
1#)
    (Char# -> Int#
ord# (Addr# -> Int# -> Char#
indexCharOffAddr# Addr#
addr# Int#
0#))

{-# INLINE nameAliasesByType# #-}
nameAliasesByType# :: Addr# -> NameAliases.NameAliasType -> [String]
nameAliasesByType# :: Addr# -> NameAliasType -> [String]
nameAliasesByType# Addr#
addr# NameAliasType
t = case Addr# -> Int# -> Char#
indexCharOffAddr# (Addr#
addr# Addr# -> Int# -> Addr#
`plusAddr#` Int#
t#) Int#
0# of
    Char#
'\0'# -> [] -- no aliases for this type
    Char#
i#    -> Addr# -> [String]
unpackCStrings# (Addr#
addr# Addr# -> Int# -> Addr#
`plusAddr#` Char# -> Int#
ord# Char#
i#)
    where t# :: Int#
t# = NameAliasType -> Int#
forall a. a -> Int#
dataToTag# NameAliasType
t

-- | Returns the label of a code point if it has no character name, otherwise
-- returns @\"UNDEFINED\"@.
--
-- See subsection
-- [“Code Point Labels”](https://www.unicode.org/versions/Unicode15.0.0/ch04.pdf#G135248)
-- in section 4.8 “Name” of the Unicode Standard.
--
-- >>> label '\0'
-- "control-0000"
-- >>> label 'a'
-- "UNDEFINED"
-- >>> label '\xffff'
-- "noncharacter-FFFF"
--
-- @since 0.4.0
label :: Char -> String
label :: Char -> String
label = IO String -> String
forall a. IO a -> a
unsafeDupablePerformIO (IO String -> String) -> (Char -> IO String) -> Char -> String
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (Char -> IO CStringLen
Label.label (Char -> IO CStringLen)
-> (CStringLen -> IO String) -> Char -> IO String
forall (m :: * -> *) a b c.
Monad m =>
(a -> m b) -> (b -> m c) -> a -> m c
>=> CStringLen -> IO String
peekCAStringLen)

{-# INLINE unpackCStrings# #-}
unpackCStrings# :: Addr# -> [String]
unpackCStrings# :: Addr# -> [String]
unpackCStrings# = Addr# -> [String]
go
    where
    go :: Addr# -> [String]
go Addr#
addr# = case Char# -> Int#
ord# (Addr# -> Int# -> Char#
indexCharOffAddr# Addr#
addr# Int#
0#) of
        Int#
0# -> []
        Int#
l# ->
            let !s :: String
s = Addr# -> Int# -> String
unpackNBytes# (Addr#
addr# Addr# -> Int# -> Addr#
`plusAddr#` Int#
1#) Int#
l#
            in String
s String -> [String] -> [String]
forall a. a -> [a] -> [a]
: Addr# -> [String]
go (Addr#
addr# Addr# -> Int# -> Addr#
`plusAddr#` (Int#
l# Int# -> Int# -> Int#
+# Int#
1#))