module Data.Text.Utf8.Unlower
  (
    unlowerCodePoint

  , printUnlowerings
  ) where

import Control.Monad (forM_)

import qualified Data.Char as Char
import qualified Data.HashMap.Strict as HashMap
import qualified Data.List as List


-- | Inverse of Char.toLower/Utf8.lowerCodePoint
--
-- Returns all the characters that have the given character as their lower case, for example:
--
--    unlowerCodePoint 'a' == "aA"
--    unlowerCodePoint 'A' == ""
--    unlowerCodePoint '1' == "1"
--    unlowerCodePoint 'i' == "İiI"
--    unlowerCodePoint 'ß' == "ẞß"
--
unlowerCodePoint :: Char -> [Char]
unlowerCodePoint :: Char -> [Char]
unlowerCodePoint =
  \Char
c -> forall b a. b -> (a -> b) -> Maybe a -> b
maybe [Char
c] forall a. a -> a
id forall a b. (a -> b) -> a -> b
$ forall k v. (Eq k, Hashable k) => k -> HashMap k v -> Maybe v
HashMap.lookup Char
c HashMap Char [Char]
unlowerings

-- | This map contains all the unlowerings for which the result is not just a singleton with the
-- input character. It's marked NOINLINE to make sure that it only gets constructed once.
unlowerings :: HashMap.HashMap Char [Char]
{-# NOINLINE unlowerings #-}
unlowerings :: HashMap Char [Char]
unlowerings =
  forall k v. (k -> v -> Bool) -> HashMap k v -> HashMap k v
HashMap.filterWithKey forall {a}. Eq a => a -> [a] -> Bool
isNotId forall a b. (a -> b) -> a -> b
$ forall (t :: * -> *) b a.
Foldable t =>
(b -> a -> b) -> b -> t a -> b
List.foldl' (forall a b c. (a -> b -> c) -> b -> a -> c
flip Char -> HashMap Char [Char] -> HashMap Char [Char]
addUnlowering) forall {a}. HashMap Char [a]
initialMap [forall a. Bounded a => a
minBound..forall a. Bounded a => a
maxBound]
  where
    initialMap :: HashMap Char [a]
initialMap = forall k v. (Eq k, Hashable k) => [(k, v)] -> HashMap k v
HashMap.fromList forall a b. (a -> b) -> a -> b
$ forall a b. [a] -> [b] -> [(a, b)]
zip [forall a. Bounded a => a
minBound..forall a. Bounded a => a
maxBound] (forall a. a -> [a]
repeat [])
    addUnlowering :: Char -> HashMap Char [Char] -> HashMap Char [Char]
addUnlowering Char
c HashMap Char [Char]
hm =
      forall k v.
(Eq k, Hashable k) =>
(v -> v -> v) -> k -> v -> HashMap k v -> HashMap k v
HashMap.insertWith forall a. [a] -> [a] -> [a]
(++) (Char -> Char
Char.toLower Char
c) [Char
c] HashMap Char [Char]
hm
    isNotId :: a -> [a] -> Bool
isNotId a
lc [a]
ucs = [a]
ucs forall a. Eq a => a -> a -> Bool
/= [a
lc]


-- | This function prints all the special cases of unlowerCodePoint where it's not @(pure . id)@:
--
--     SPECIAL: i (105) -> İ (304) i (105) I (73)
--     SPECIAL: k (107) -> K (8490) k (107) K (75)
--     SPECIAL: ß (223) -> ẞ (7838) ß (223)
--     SPECIAL: å (229) -> Å (8491) å (229) Å (197)
--     SPECIAL: dž (454) -> dž (454) Dž (453) DŽ (452)
--     SPECIAL: lj (457) -> lj (457) Lj (456) LJ (455)
--     SPECIAL: nj (460) -> nj (460) Nj (459) NJ (458)
--     SPECIAL: dz (499) -> dz (499) Dz (498) DZ (497)
--     SPECIAL: θ (952) -> ϴ (1012) θ (952) Θ (920)
--     SPECIAL: ω (969) -> Ω (8486) ω (969) Ω (937)
--     [..]
--     Inverse of Char.toUpper: a (97) -> a (97) A (65)
--     Inverse of Char.toUpper: b (98) -> b (98) B (66)
--     Inverse of Char.toUpper: c (99) -> c (99) C (67)
--     [..]
--
printUnlowerings :: IO ()
printUnlowerings :: IO ()
printUnlowerings = do

  let
    showCP :: Char -> String
    showCP :: Char -> [Char]
showCP Char
c = case Char -> Int
Char.ord Char
c of
      Int
co | Int
co forall a. Ord a => a -> a -> Bool
> Int
68000 -> forall a. Show a => a -> [Char]
show Int
co -- Some RTL languages above these code points are annoying to print
      Int
co -> Char
c forall a. a -> [a] -> [a]
: [Char]
" (" forall a. Semigroup a => a -> a -> a
<> forall a. Show a => a -> [Char]
show Int
co forall a. Semigroup a => a -> a -> a
<> [Char]
")"

    showCPs :: [Char] -> String
    showCPs :: [Char] -> [Char]
showCPs [Char]
cs = forall a. [a] -> [[a]] -> [a]
List.intercalate [Char]
" " (forall a b. (a -> b) -> [a] -> [b]
map Char -> [Char]
showCP [Char]
cs)

    isInverse :: (Char, [Char]) -> Bool
isInverse (Char
lc, [Char]
ucs) = [Char]
ucs forall a. Eq a => a -> a -> Bool
== [Char
lc, Char -> Char
Char.toUpper Char
lc] Bool -> Bool -> Bool
|| [Char]
ucs forall a. Eq a => a -> a -> Bool
== [Char -> Char
Char.toUpper Char
lc, Char
lc]
    isAlreadyUppercase :: (a, [a]) -> Bool
isAlreadyUppercase (a
_, [a]
ucs) = [a]
ucs forall a. Eq a => a -> a -> Bool
== []
    isSpecial :: (Char, [Char]) -> Bool
isSpecial (Char, [Char])
p = Bool -> Bool
not ((Char, [Char]) -> Bool
isInverse (Char, [Char])
p) Bool -> Bool -> Bool
&& Bool -> Bool
not (forall {a} {a}. Eq a => (a, [a]) -> Bool
isAlreadyUppercase (Char, [Char])
p)

    lst :: [(Char, [Char])]
    lst :: [(Char, [Char])]
lst = forall k v. HashMap k v -> [(k, v)]
HashMap.toList HashMap Char [Char]
unlowerings

  forall (t :: * -> *) (m :: * -> *) a b.
(Foldable t, Monad m) =>
t a -> (a -> m b) -> m ()
forM_ (forall a. (a -> Bool) -> [a] -> [a]
filter (Char, [Char]) -> Bool
isSpecial [(Char, [Char])]
lst) forall a b. (a -> b) -> a -> b
$ \(Char
lc, [Char]
ucs) -> do
    [Char] -> IO ()
putStrLn forall a b. (a -> b) -> a -> b
$ [Char]
"SPECIAL: " forall a. Semigroup a => a -> a -> a
<> Char -> [Char]
showCP Char
lc forall a. Semigroup a => a -> a -> a
<> [Char]
" -> " forall a. Semigroup a => a -> a -> a
<> [Char] -> [Char]
showCPs [Char]
ucs

  forall (t :: * -> *) (m :: * -> *) a b.
(Foldable t, Monad m) =>
t a -> (a -> m b) -> m ()
forM_ (forall a. (a -> Bool) -> [a] -> [a]
filter forall {a} {a}. Eq a => (a, [a]) -> Bool
isAlreadyUppercase [(Char, [Char])]
lst) forall a b. (a -> b) -> a -> b
$ \(Char
lc, [Char]
_) -> do
    [Char] -> IO ()
putStrLn forall a b. (a -> b) -> a -> b
$ [Char]
"Already uppercase (there is no unlowering): " forall a. Semigroup a => a -> a -> a
<> Char -> [Char]
showCP Char
lc

  forall (t :: * -> *) (m :: * -> *) a b.
(Foldable t, Monad m) =>
t a -> (a -> m b) -> m ()
forM_ (forall a. (a -> Bool) -> [a] -> [a]
filter (Char, [Char]) -> Bool
isInverse [(Char, [Char])]
lst) forall a b. (a -> b) -> a -> b
$ \(Char
lc, [Char]
ucs) -> do
    [Char] -> IO ()
putStrLn forall a b. (a -> b) -> a -> b
$ [Char]
"Inverse of Char.toUpper: " forall a. Semigroup a => a -> a -> a
<> Char -> [Char]
showCP Char
lc forall a. Semigroup a => a -> a -> a
<> [Char]
" -> " forall a. Semigroup a => a -> a -> a
<> forall a. [a] -> [[a]] -> [a]
List.intercalate [Char]
" " (forall a b. (a -> b) -> [a] -> [b]
map Char -> [Char]
showCP [Char]
ucs)