-- | Calculate the width of String and Text, being aware of wide characters.

{-# LANGUAGE CPP #-}

module Text.WideString (
  -- * wide-character-aware layout
  strWidth,
  textWidth,
  charWidth,
  -- * Text Builders which keep track of length
  WideBuilder(..),
  wbUnpack,
  wbToText
  ) where

#if !(MIN_VERSION_base(4,11,0))
import Data.Semigroup (Semigroup(..))
#endif
import Data.Text (Text)
import qualified Data.Text as T
import qualified Data.Text.Lazy as TL
import qualified Data.Text.Lazy.Builder as TB


-- | Helper for constructing Builders while keeping track of text width.
data WideBuilder = WideBuilder
  { WideBuilder -> Builder
wbBuilder :: !TB.Builder
  , WideBuilder -> Int
wbWidth   :: !Int
  }

instance Semigroup WideBuilder where
  WideBuilder Builder
x Int
i <> :: WideBuilder -> WideBuilder -> WideBuilder
<> WideBuilder Builder
y Int
j = Builder -> Int -> WideBuilder
WideBuilder (Builder
x Builder -> Builder -> Builder
forall a. Semigroup a => a -> a -> a
<> Builder
y) (Int
i Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
j)

instance Monoid WideBuilder where
  mempty :: WideBuilder
mempty = Builder -> Int -> WideBuilder
WideBuilder Builder
forall a. Monoid a => a
mempty Int
0
#if !(MIN_VERSION_base(4,11,0))
  mappend = (<>)
#endif

-- | Convert a WideBuilder to a strict Text.
wbToText :: WideBuilder -> Text
wbToText :: WideBuilder -> Text
wbToText = Text -> Text
TL.toStrict (Text -> Text) -> (WideBuilder -> Text) -> WideBuilder -> Text
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Builder -> Text
TB.toLazyText (Builder -> Text)
-> (WideBuilder -> Builder) -> WideBuilder -> Text
forall b c a. (b -> c) -> (a -> b) -> a -> c
. WideBuilder -> Builder
wbBuilder

-- | Convert a WideBuilder to a String.
wbUnpack :: WideBuilder -> String
wbUnpack :: WideBuilder -> String
wbUnpack = Text -> String
TL.unpack (Text -> String) -> (WideBuilder -> Text) -> WideBuilder -> String
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Builder -> Text
TB.toLazyText (Builder -> Text)
-> (WideBuilder -> Builder) -> WideBuilder -> Text
forall b c a. (b -> c) -> (a -> b) -> a -> c
. WideBuilder -> Builder
wbBuilder


-- | Calculate the render width of a string, considering
-- wide characters (counted as double width)
strWidth :: String -> Int
strWidth :: String -> Int
strWidth = (Char -> Int -> Int) -> Int -> String -> Int
forall (t :: * -> *) a b.
Foldable t =>
(a -> b -> b) -> b -> t a -> b
foldr (\Char
a Int
b -> Char -> Int
charWidth Char
a Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
b) Int
0

-- | Calculate the render width of a string, considering
-- wide characters (counted as double width)
textWidth :: Text -> Int
textWidth :: Text -> Int
textWidth = (Char -> Int -> Int) -> Int -> Text -> Int
forall a. (Char -> a -> a) -> a -> Text -> a
T.foldr (\Char
a Int
b -> Char -> Int
charWidth Char
a Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
b) Int
0

-- from Pandoc (copyright John MacFarlane, GPL)
-- see also http://unicode.org/reports/tr11/#Description

-- | Get the designated render width of a character: 0 for a combining
-- character, 1 for a regular character, 2 for a wide character.
-- (Wide characters are rendered as exactly double width in apps and
-- fonts that support it.) (From Pandoc.)
charWidth :: Char -> Int
charWidth :: Char -> Int
charWidth Char
c
    | Char
c Char -> Char -> Bool
forall a. Ord a => a -> a -> Bool
<  Char
'\x0300'                    = Int
1
    | Char
c Char -> Char -> Bool
forall a. Ord a => a -> a -> Bool
>= Char
'\x0300' Bool -> Bool -> Bool
&& Char
c Char -> Char -> Bool
forall a. Ord a => a -> a -> Bool
<= Char
'\x036F'   = Int
0  -- combining
    | Char
c Char -> Char -> Bool
forall a. Ord a => a -> a -> Bool
>= Char
'\x0370' Bool -> Bool -> Bool
&& Char
c Char -> Char -> Bool
forall a. Ord a => a -> a -> Bool
<= Char
'\x10FC'   = Int
1
    | Char
c Char -> Char -> Bool
forall a. Ord a => a -> a -> Bool
>= Char
'\x1100' Bool -> Bool -> Bool
&& Char
c Char -> Char -> Bool
forall a. Ord a => a -> a -> Bool
<= Char
'\x115F'   = Int
2
    | Char
c Char -> Char -> Bool
forall a. Ord a => a -> a -> Bool
>= Char
'\x1160' Bool -> Bool -> Bool
&& Char
c Char -> Char -> Bool
forall a. Ord a => a -> a -> Bool
<= Char
'\x11A2'   = Int
1
    | Char
c Char -> Char -> Bool
forall a. Ord a => a -> a -> Bool
>= Char
'\x11A3' Bool -> Bool -> Bool
&& Char
c Char -> Char -> Bool
forall a. Ord a => a -> a -> Bool
<= Char
'\x11A7'   = Int
2
    | Char
c Char -> Char -> Bool
forall a. Ord a => a -> a -> Bool
>= Char
'\x11A8' Bool -> Bool -> Bool
&& Char
c Char -> Char -> Bool
forall a. Ord a => a -> a -> Bool
<= Char
'\x11F9'   = Int
1
    | Char
c Char -> Char -> Bool
forall a. Ord a => a -> a -> Bool
>= Char
'\x11FA' Bool -> Bool -> Bool
&& Char
c Char -> Char -> Bool
forall a. Ord a => a -> a -> Bool
<= Char
'\x11FF'   = Int
2
    | Char
c Char -> Char -> Bool
forall a. Ord a => a -> a -> Bool
>= Char
'\x1200' Bool -> Bool -> Bool
&& Char
c Char -> Char -> Bool
forall a. Ord a => a -> a -> Bool
<= Char
'\x2328'   = Int
1
    | Char
c Char -> Char -> Bool
forall a. Ord a => a -> a -> Bool
>= Char
'\x2329' Bool -> Bool -> Bool
&& Char
c Char -> Char -> Bool
forall a. Ord a => a -> a -> Bool
<= Char
'\x232A'   = Int
2
    | Char
c Char -> Char -> Bool
forall a. Ord a => a -> a -> Bool
>= Char
'\x232B' Bool -> Bool -> Bool
&& Char
c Char -> Char -> Bool
forall a. Ord a => a -> a -> Bool
<= Char
'\x2E31'   = Int
1
    | Char
c Char -> Char -> Bool
forall a. Ord a => a -> a -> Bool
>= Char
'\x2E80' Bool -> Bool -> Bool
&& Char
c Char -> Char -> Bool
forall a. Ord a => a -> a -> Bool
<= Char
'\x303E'   = Int
2
    | Char
c Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
== Char
'\x303F'                    = Int
1
    | Char
c Char -> Char -> Bool
forall a. Ord a => a -> a -> Bool
>= Char
'\x3041' Bool -> Bool -> Bool
&& Char
c Char -> Char -> Bool
forall a. Ord a => a -> a -> Bool
<= Char
'\x3247'   = Int
2
    | Char
c Char -> Char -> Bool
forall a. Ord a => a -> a -> Bool
>= Char
'\x3248' Bool -> Bool -> Bool
&& Char
c Char -> Char -> Bool
forall a. Ord a => a -> a -> Bool
<= Char
'\x324F'   = Int
1 -- ambiguous
    | Char
c Char -> Char -> Bool
forall a. Ord a => a -> a -> Bool
>= Char
'\x3250' Bool -> Bool -> Bool
&& Char
c Char -> Char -> Bool
forall a. Ord a => a -> a -> Bool
<= Char
'\x4DBF'   = Int
2
    | Char
c Char -> Char -> Bool
forall a. Ord a => a -> a -> Bool
>= Char
'\x4DC0' Bool -> Bool -> Bool
&& Char
c Char -> Char -> Bool
forall a. Ord a => a -> a -> Bool
<= Char
'\x4DFF'   = Int
1
    | Char
c Char -> Char -> Bool
forall a. Ord a => a -> a -> Bool
>= Char
'\x4E00' Bool -> Bool -> Bool
&& Char
c Char -> Char -> Bool
forall a. Ord a => a -> a -> Bool
<= Char
'\xA4C6'   = Int
2
    | Char
c Char -> Char -> Bool
forall a. Ord a => a -> a -> Bool
>= Char
'\xA4D0' Bool -> Bool -> Bool
&& Char
c Char -> Char -> Bool
forall a. Ord a => a -> a -> Bool
<= Char
'\xA95F'   = Int
1
    | Char
c Char -> Char -> Bool
forall a. Ord a => a -> a -> Bool
>= Char
'\xA960' Bool -> Bool -> Bool
&& Char
c Char -> Char -> Bool
forall a. Ord a => a -> a -> Bool
<= Char
'\xA97C'   = Int
2
    | Char
c Char -> Char -> Bool
forall a. Ord a => a -> a -> Bool
>= Char
'\xA980' Bool -> Bool -> Bool
&& Char
c Char -> Char -> Bool
forall a. Ord a => a -> a -> Bool
<= Char
'\xABF9'   = Int
1
    | Char
c Char -> Char -> Bool
forall a. Ord a => a -> a -> Bool
>= Char
'\xAC00' Bool -> Bool -> Bool
&& Char
c Char -> Char -> Bool
forall a. Ord a => a -> a -> Bool
<= Char
'\xD7FB'   = Int
2
    | Char
c Char -> Char -> Bool
forall a. Ord a => a -> a -> Bool
>= Char
'\xD800' Bool -> Bool -> Bool
&& Char
c Char -> Char -> Bool
forall a. Ord a => a -> a -> Bool
<= Char
'\xDFFF'   = Int
1
    | Char
c Char -> Char -> Bool
forall a. Ord a => a -> a -> Bool
>= Char
'\xE000' Bool -> Bool -> Bool
&& Char
c Char -> Char -> Bool
forall a. Ord a => a -> a -> Bool
<= Char
'\xF8FF'   = Int
1 -- ambiguous
    | Char
c Char -> Char -> Bool
forall a. Ord a => a -> a -> Bool
>= Char
'\xF900' Bool -> Bool -> Bool
&& Char
c Char -> Char -> Bool
forall a. Ord a => a -> a -> Bool
<= Char
'\xFAFF'   = Int
2
    | Char
c Char -> Char -> Bool
forall a. Ord a => a -> a -> Bool
>= Char
'\xFB00' Bool -> Bool -> Bool
&& Char
c Char -> Char -> Bool
forall a. Ord a => a -> a -> Bool
<= Char
'\xFDFD'   = Int
1
    | Char
c Char -> Char -> Bool
forall a. Ord a => a -> a -> Bool
>= Char
'\xFE00' Bool -> Bool -> Bool
&& Char
c Char -> Char -> Bool
forall a. Ord a => a -> a -> Bool
<= Char
'\xFE0F'   = Int
1 -- ambiguous
    | Char
c Char -> Char -> Bool
forall a. Ord a => a -> a -> Bool
>= Char
'\xFE10' Bool -> Bool -> Bool
&& Char
c Char -> Char -> Bool
forall a. Ord a => a -> a -> Bool
<= Char
'\xFE19'   = Int
2
    | Char
c Char -> Char -> Bool
forall a. Ord a => a -> a -> Bool
>= Char
'\xFE20' Bool -> Bool -> Bool
&& Char
c Char -> Char -> Bool
forall a. Ord a => a -> a -> Bool
<= Char
'\xFE26'   = Int
1
    | Char
c Char -> Char -> Bool
forall a. Ord a => a -> a -> Bool
>= Char
'\xFE30' Bool -> Bool -> Bool
&& Char
c Char -> Char -> Bool
forall a. Ord a => a -> a -> Bool
<= Char
'\xFE6B'   = Int
2
    | Char
c Char -> Char -> Bool
forall a. Ord a => a -> a -> Bool
>= Char
'\xFE70' Bool -> Bool -> Bool
&& Char
c Char -> Char -> Bool
forall a. Ord a => a -> a -> Bool
<= Char
'\xFEFF'   = Int
1
    | Char
c Char -> Char -> Bool
forall a. Ord a => a -> a -> Bool
>= Char
'\xFF01' Bool -> Bool -> Bool
&& Char
c Char -> Char -> Bool
forall a. Ord a => a -> a -> Bool
<= Char
'\xFF60'   = Int
2
    | Char
c Char -> Char -> Bool
forall a. Ord a => a -> a -> Bool
>= Char
'\xFF61' Bool -> Bool -> Bool
&& Char
c Char -> Char -> Bool
forall a. Ord a => a -> a -> Bool
<= Char
'\x16A38'  = Int
1
    | Char
c Char -> Char -> Bool
forall a. Ord a => a -> a -> Bool
>= Char
'\x1B000' Bool -> Bool -> Bool
&& Char
c Char -> Char -> Bool
forall a. Ord a => a -> a -> Bool
<= Char
'\x1B001' = Int
2
    | Char
c Char -> Char -> Bool
forall a. Ord a => a -> a -> Bool
>= Char
'\x1D000' Bool -> Bool -> Bool
&& Char
c Char -> Char -> Bool
forall a. Ord a => a -> a -> Bool
<= Char
'\x1F1FF' = Int
1
    | Char
c Char -> Char -> Bool
forall a. Ord a => a -> a -> Bool
>= Char
'\x1F200' Bool -> Bool -> Bool
&& Char
c Char -> Char -> Bool
forall a. Ord a => a -> a -> Bool
<= Char
'\x1F251' = Int
2
    | Char
c Char -> Char -> Bool
forall a. Ord a => a -> a -> Bool
>= Char
'\x1F300' Bool -> Bool -> Bool
&& Char
c Char -> Char -> Bool
forall a. Ord a => a -> a -> Bool
<= Char
'\x1F773' = Int
1
    | Char
c Char -> Char -> Bool
forall a. Ord a => a -> a -> Bool
>= Char
'\x20000' Bool -> Bool -> Bool
&& Char
c Char -> Char -> Bool
forall a. Ord a => a -> a -> Bool
<= Char
'\x3FFFD' = Int
2
    | Bool
otherwise                        = Int
1