module Potato.Data.Text.Unicode where

import           Prelude

import           Graphics.Text.Width     (wcwidth)

import           Data.Int
import           Data.Text               (Text)
import qualified Data.Text               as T
import qualified Data.Text.ICU           as ICU
import qualified Potato.Data.Text.Zipper as TZ





-- NOTE this function won't work as expected until you've loaded a termal char width file via vty!
getCharWidth :: Char -> Int8
getCharWidth :: Char -> Int8
getCharWidth = forall a b. (Integral a, Num b) => a -> b
fromIntegral forall b c a. (b -> c) -> (a -> b) -> a -> c
. Char -> Int
TZ.charWidth

removeWideChars :: Text -> Text
removeWideChars :: Text -> Text
removeWideChars = (Char -> Bool) -> Text -> Text
T.filter (\Char
c -> Char -> Int8
getCharWidth Char
c forall a. Ord a => a -> a -> Bool
<= Int8
1)

internal_getCharacterBreaks :: Text -> [ICU.Break ()]
internal_getCharacterBreaks :: Text -> [Break ()]
internal_getCharacterBreaks Text
input = [Break ()]
r where
  breaker :: Breaker ()
breaker = LocaleName -> Breaker ()
ICU.breakCharacter LocaleName
ICU.Current
  r :: [Break ()]
r = forall a. Breaker a -> Text -> [Break a]
ICU.breaks Breaker ()
breaker Text
input

zwidge :: Char
zwidge :: Char
zwidge = Char
'\8205'

-- | True if the Text is a single grapheme cluster, False otherwise
isSingleGraphemeCluster :: Text -> Bool
isSingleGraphemeCluster :: Text -> Bool
isSingleGraphemeCluster Text
input = Bool
r where
  tbreaks :: [Break ()]
tbreaks = Text -> [Break ()]
internal_getCharacterBreaks Text
input
  r :: Bool
r = case [Break ()]
tbreaks of
    -- no characters, not a grapheme cluster
    []     -> Bool
False
    -- only one break, it's a grapheme cluster if it has more than one unicode char in it
    (Break ()
b:[]) -> Text -> Int
T.length (forall a. Break a -> Text
ICU.brkBreak Break ()
b) forall a. Ord a => a -> a -> Bool
> Int
1
    -- more than one character break
    [Break ()]
_      -> Bool
False

-- | True if the last character in the text is a single grapheme cluster, False otherwise
endsInGraphemeCluster :: Text -> Bool
endsInGraphemeCluster :: Text -> Bool
endsInGraphemeCluster Text
input = Bool
r where
  tbreaks' :: [Break ()]
tbreaks' = Text -> [Break ()]
internal_getCharacterBreaks Text
input
  gotoend :: [Break a] -> Bool
gotoend [Break a]
tbreaks = case [Break a]
tbreaks of
    []     -> Bool
False
    (Break a
b:[]) -> Text -> Bool
isSingleGraphemeCluster (forall a. Break a -> Text
ICU.brkBreak Break a
b)
    (Break a
_:[Break a]
bs) -> [Break a] -> Bool
gotoend [Break a]
bs
  r :: Bool
r = forall {a}. [Break a] -> Bool
gotoend [Break ()]
tbreaks'

-- most terminals do not support grapheme clusters right now :( 👎🏻👎🏼👎🏽👎🏾👎🏿
-- | removes grapheme clusters from the text and replaces them with the first character in the cluster
removeGraphemeCluster :: Text -> Text
removeGraphemeCluster :: Text -> Text
removeGraphemeCluster Text
input = Text
r where
  tbreaks :: [Break ()]
tbreaks = Text -> [Break ()]
internal_getCharacterBreaks Text
input
  -- if there is more than one character in the break then it must have been a grapheme cluster
  -- so just use the first character
  fmapfn :: Break a -> Text
fmapfn Break a
b = case Text -> Maybe (Char, Text)
T.uncons (forall a. Break a -> Text
ICU.brkBreak Break a
b) of
    Maybe (Char, Text)
Nothing     -> Text
""
    Just (Char
c, Text
_) -> Char -> Text
T.singleton Char
c
  r :: Text
r = forall a. Monoid a => [a] -> a
mconcat forall a b. (a -> b) -> a -> b
$ forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap forall a. Break a -> Text
fmapfn [Break ()]
tbreaks

-- | True if the input text contains a grapheme cluster
containsGraphemeCluster :: Text -> Bool
containsGraphemeCluster :: Text -> Bool
containsGraphemeCluster Text
input = Text -> Text
removeGraphemeCluster Text
input forall a. Eq a => a -> a -> Bool
/= Text
input



-- 🤖 isn't correct, misses emojis :()
{-
getCharWidth :: Char -> Int8
getCharWidth c
  | isControl c || c == '\t' = 0
  | w == 0x0 || w > 0x10ffff = 1
  | w >= 0x1100 && (w <= 0x115f || w == 0x2329 || w == 0x232a || (w >= 0x2e80 && w <= 0xa4cf && w /= 0x303f) || (w >= 0xac00 && w <= 0xd7a3) || (w >= 0xf900 && w <= 0xfaff) || (w >= 0xfe10 && w <= 0xfe19) || (w >= 0xfe30 && w <= 0xfe6f) || (w >= 0xff00 && w <= 0xff60) || (w >= 0xffe0 && w <= 0xffe6) || (w >= 0x20000 && w <= 0x2fffd) || (w >= 0x30000 && w <= 0x3fffd)) = 2
  | otherwise = 1
  where
    w = ord c
-}