{-# LANGUAGE MagicHash, UnliftedFFITypes, DeriveGeneric #-}
module Data.Text.Glyphize.Buffer where
import qualified Data.Text.Internal.Lazy as Lazy
import qualified Data.Text.Lazy as Lazy (Text, pack)
import qualified Data.Text.Internal as Txt
import Data.Char (toUpper, toLower)
import Control.Monad (forM)
import Control.Exception (bracket)
import Data.Text.Encoding (decodeUtf8Lenient)
import Codec.Binary.UTF8.Light (encodeUTF8, w2c, c2w)
import qualified Data.Text.Array as A
import GHC.Exts (ByteArray#, sizeofByteArray#, Int#)
import Data.Word (Word32)
import Data.Int (Int32)
import Data.Bits ((.|.), (.&.), shiftR, shiftL, testBit)
import System.IO.Unsafe (unsafePerformIO)
import Foreign.Marshal.Alloc (alloca, allocaBytes)
import Foreign.Ptr
import Foreign.C.String (CString, withCString, peekCString)
import Foreign.Storable (Storable(..))
import GHC.Generics (Generic(..))
import Foreign.Storable.Generic (GStorable(..))
data Buffer = Buffer {
text :: Lazy.Text,
contentType :: Maybe ContentType,
direction :: Maybe Direction,
script :: Maybe String,
language :: Maybe String,
beginsText :: Bool,
endsText :: Bool,
preserveDefaultIgnorables :: Bool,
removeDefaultIgnorables :: Bool,
don'tInsertDottedCircle :: Bool,
clusterLevel :: ClusterLevel,
invisibleGlyph :: Char,
replacementCodepoint :: Char,
notFoundGlyph :: Char
} deriving (Eq, Show, Read, Ord)
data ContentType = ContentTypeUnicode | ContentTypeGlyphs deriving (Eq, Show, Read, Ord)
data ClusterLevel = ClusterMonotoneGraphemes | ClusterMonotoneChars | ClusterChars
deriving (Eq, Show, Read, Ord)
defaultBuffer = Buffer {
text = Lazy.empty,
contentType = Just ContentTypeUnicode,
direction = Nothing,
script = Nothing,
language = Nothing,
beginsText = True,
endsText = True,
preserveDefaultIgnorables = False,
removeDefaultIgnorables = False,
don'tInsertDottedCircle = False,
clusterLevel = ClusterMonotoneGraphemes,
invisibleGlyph = ' ',
replacementCodepoint = '\xFFFD',
notFoundGlyph = '\0'
}
data Direction = DirLTR | DirRTL | DirTTB | DirBTT deriving (Eq, Show, Read, Ord)
dirFromStr ('L':_) = Just DirLTR
dirFromStr ('l':_) = Just DirLTR
dirFromStr ('R':_) = Just DirRTL
dirFromStr ('r':_) = Just DirRTL
dirFromStr ('T':_) = Just DirTTB
dirFromStr ('t':_) = Just DirTTB
dirFromStr ('B':_) = Just DirBTT
dirFromStr ('b':_) = Just DirBTT
dirFromStr _ = Nothing
dirToStr DirLTR = "ltr"
dirToStr DirRTL = "rtl"
dirToStr DirTTB = "ttb"
dirToStr DirBTT = "btt"
dirReverse DirLTR = DirRTL
dirReverse DirRTL = DirLTR
dirReverse DirTTB = DirBTT
dirReverse DirBTT = DirTTB
dirBackward dir = dir `Prelude.elem` [DirRTL, DirBTT]
dirForward dir = dir `Prelude.elem` [DirLTR, DirTTB]
dirHorizontal dir = dir `Prelude.elem` [DirLTR, DirRTL]
dirVertical dir = dir `Prelude.elem` [DirTTB, DirBTT]
dir2int Nothing = 0
dir2int (Just DirLTR) = 4
dir2int (Just DirRTL) = 5
dir2int (Just DirTTB) = 6
dir2int (Just DirBTT) = 7
foreign import ccall "hb_buffer_set_direction" hb_buffer_set_direction
:: Buffer' -> Int -> IO ()
int2dir 4 = Just DirLTR
int2dir 5 = Just DirRTL
int2dir 6 = Just DirTTB
int2dir 7 = Just DirBTT
int2dir _ = Nothing
scriptHorizontalDir :: String -> Maybe Direction
scriptHorizontalDir = int2dir . hb_script_get_horizontal_direction . script_from_string
foreign import ccall "hb_script_get_horizontal_direction" hb_script_get_horizontal_direction
:: Word32 -> Int
data Language'
type Language = Ptr Language'
languageDefault :: IO String
languageDefault = hb_language_get_default >>= hb_language_to_string >>= peekCString
foreign import ccall "hb_language_to_string" hb_language_to_string :: Language -> IO CString
foreign import ccall "hb_language_get_default" hb_language_get_default :: IO Language
hb_language_from_string :: String -> IO Language
hb_language_from_string str =
withCString str $ \str' -> hb_language_from_string' str' (-1)
foreign import ccall "hb_language_from_string" hb_language_from_string'
:: CString -> Int -> IO Language
data Buffer''
type Buffer' = Ptr Buffer''
withNewBuffer :: (Buffer' -> IO a) -> IO a
withNewBuffer cb = bracket hb_buffer_create hb_buffer_destroy cb
foreign import ccall "hb_buffer_create" hb_buffer_create :: IO Buffer'
foreign import ccall "hb_buffer_destroy" hb_buffer_destroy :: Buffer' -> IO ()
bufferWithText _ Lazy.Empty cb = cb
bufferWithText buffer txt@(Lazy.Chunk (Txt.Text (A.ByteArray arr) offset length) txts) cb = do
hb_buffer_add_utf8 buffer arr (sizeofByteArray# arr) (toEnum offset) length
bufferWithText buffer txts cb
foreign import ccall "hb_buffer_add_utf8" hb_buffer_add_utf8
:: Buffer' -> ByteArray# -> Int# -> Word -> Int -> IO ()
titlecase :: String -> String
titlecase "" = ""
titlecase (c:cs) = toUpper c : Prelude.map toLower cs
script_from_string :: String -> Word32
script_from_string str = tag_from_string $ case titlecase str of
'Q':'a':'a':'i':_ -> "Zinh"
'Q':'a':'a':'c':_ -> "Copt"
'A':'r':'a':'n':_ -> "Arab"
'C':'y':'r':'s':_ -> "Cyrl"
'G':'e':'o':'k':_ -> "Geor"
'H':'a':'n':'s':_ -> "Hani"
'H':'a':'n':'t':_ -> "Hani"
'J':'a':'m':'o':_ -> "Hang"
'L':'a':'t':'f':_ -> "Latn"
'L':'a':'t':'g':_ -> "Latn"
'S':'y':'r':'e':_ -> "Syrc"
'S':'y':'r':'j':_ -> "Syrc"
'S':'y':'r':'n':_ -> "Syrc"
x -> x
tag_from_string :: String -> Word32
tag_from_string str = case str ++ Prelude.repeat ' ' of
c1:c2:c3:c4:_ -> Prelude.foldl (.|.) 0 [
shiftL (c2w c1 .&. 0x7f) 24,
shiftL (c2w c2 .&. 0x7f) 16,
shiftL (c2w c3 .&. 0x7f) 8,
shiftL (c2w c4 .&. 0x7f) 0
]
_ -> 0
tag_to_string :: Word32 -> String
tag_to_string tag = [
w2c (shiftR tag 24 .&. 0x7f),
w2c (shiftR tag 16 .&. 0x7f),
w2c (shiftR tag 8 .&. 0x7f),
w2c (shiftR tag 0 .&. 0x7f)
]
withBuffer :: Buffer -> (Buffer' -> IO a) -> IO a
withBuffer buf cb = withNewBuffer $ \buf' -> bufferWithText buf' (text buf) $ do
hb_buffer_set_content_type buf' $ case contentType buf of
Nothing -> 0
Just ContentTypeUnicode -> 1
Just ContentTypeGlyphs -> 2
hb_buffer_set_direction buf' $ dir2int $ direction buf
case script buf of
Just script' -> hb_buffer_set_script buf' $ script_from_string script'
Nothing -> return ()
case language buf of
Just lang' -> hb_buffer_set_language buf' =<< hb_language_from_string lang'
Nothing -> return ()
hb_buffer_set_flags buf' $ Prelude.foldl (.|.) 0 [
if beginsText buf then 1 else 0,
if endsText buf then 2 else 0,
if preserveDefaultIgnorables buf then 4 else 0,
if removeDefaultIgnorables buf then 8 else 0,
if don'tInsertDottedCircle buf then 16 else 0
]
hb_buffer_set_cluster_level buf' $ case clusterLevel buf of
ClusterMonotoneGraphemes -> 0
ClusterMonotoneChars -> 1
ClusterChars -> 2
hb_buffer_set_invisible_glyph buf' $ c2w $ invisibleGlyph buf
hb_buffer_set_replacement_codepoint buf' $ c2w $ replacementCodepoint buf
case (contentType buf, direction buf, script buf, language buf) of
(Just ContentTypeUnicode, Nothing, _, _) -> hb_buffer_guess_segment_properties buf'
(Just ContentTypeUnicode, _, Nothing, _) -> hb_buffer_guess_segment_properties buf'
(Just ContentTypeUnicode, _, _, Nothing) -> hb_buffer_guess_segment_properties buf'
cb buf'
foreign import ccall "hb_buffer_set_content_type" hb_buffer_set_content_type
:: Buffer' -> Int -> IO ()
foreign import ccall "hb_buffer_set_script" hb_buffer_set_script
:: Buffer' -> Word32 -> IO ()
foreign import ccall "hb_buffer_set_language" hb_buffer_set_language
:: Buffer' -> Language -> IO ()
foreign import ccall "hb_buffer_set_flags" hb_buffer_set_flags :: Buffer' -> Int -> IO ()
foreign import ccall "hb_buffer_set_cluster_level" hb_buffer_set_cluster_level
:: Buffer' -> Int -> IO ()
foreign import ccall "hb_buffer_set_invisible_glyph" hb_buffer_set_invisible_glyph
:: Buffer' -> Word32 -> IO ()
foreign import ccall "hb_buffer_set_replacement_codepoint" hb_buffer_set_replacement_codepoint
:: Buffer' -> Word32 -> IO ()
foreign import ccall "hb_buffer_guess_segment_properties" hb_buffer_guess_segment_properties
:: Buffer' -> IO ()
data GlyphInfo = GlyphInfo {
codepoint :: Word32,
cluster :: Word32,
unsafeToBreak :: Bool,
unsafeToConcat :: Bool,
safeToInsertTatweel :: Bool
} deriving (Show, Read, Eq)
instance Storable GlyphInfo where
sizeOf _ = sizeOf (undefined :: Word32) * 5
alignment _ = alignment (undefined :: Word32)
peek ptr = do
let ptr' :: Ptr Word32
ptr' = castPtr ptr
codepoint' <- ptr' `peekElemOff` 0
mask <- ptr' `peekElemOff` 1
cluster' <- ptr' `peekElemOff` 2
return $ GlyphInfo codepoint' cluster' (mask `testBit` 1) (mask `testBit` 2) (mask `testBit` 3)
poke ptr (GlyphInfo codepoint' cluster' flag1 flag2 flag3) = do
let ptr' :: Ptr Word32
ptr' = castPtr ptr
pokeElemOff ptr' 0 codepoint'
pokeElemOff ptr' 1 $ Prelude.foldl (.|.) 0 [
if flag1 then 1 else 0,
if flag2 then 2 else 0,
if flag3 then 4 else 0
]
pokeElemOff ptr' 2 cluster'
pokeElemOff ptr' 3 0
pokeElemOff ptr' 4 0
glyphInfos buf' = do
arr <- hb_buffer_get_glyph_infos buf' nullPtr
length <- hb_buffer_get_length buf'
if length == 0 || arr == nullPtr
then return []
else forM [0..fromEnum length - 1] $ peekElemOff arr
foreign import ccall "hb_buffer_get_glyph_infos" hb_buffer_get_glyph_infos
:: Buffer' -> Ptr Word -> IO (Ptr GlyphInfo)
foreign import ccall "hb_buffer_get_length" hb_buffer_get_length :: Buffer' -> IO Word
data GlyphPos = GlyphPos {
x_advance :: Int32,
y_advance :: Int32,
x_offset :: Int32,
y_offset :: Int32
} deriving (Show, Read, Eq, Generic)
instance Storable GlyphPos where
sizeOf _ = sizeOf (undefined :: Int32) * 5
alignment _ = alignment (undefined :: Int32)
peek ptr = let ptr' = castPtr ptr in do
x_advance' <- ptr' `peekElemOff` 0
y_advance' <- ptr' `peekElemOff` 1
x_offset' <- ptr' `peekElemOff` 2
y_offset' <- ptr' `peekElemOff` 3
return $ GlyphPos x_advance' y_advance' x_offset' y_offset'
poke ptr (GlyphPos x_advance' y_advance' x_offset' y_offset') = do
let ptr' :: Ptr Int32
ptr' = castPtr ptr
pokeElemOff ptr' 0 x_advance'
pokeElemOff ptr' 1 y_advance'
pokeElemOff ptr' 2 x_offset'
pokeElemOff ptr' 3 y_offset'
pokeElemOff ptr' 4 0
glyphsPos buf' = do
arr <- hb_buffer_get_glyph_positions buf' nullPtr
length <- hb_buffer_get_length buf'
if length == 0 || arr == nullPtr
then return []
else forM [0..fromEnum length - 1] $ peekElemOff arr
foreign import ccall "hb_buffer_get_glyph_positions" hb_buffer_get_glyph_positions
:: Buffer' -> Ptr Word -> IO (Ptr GlyphPos)
thawBuffer :: Buffer' -> IO Buffer
thawBuffer buf' = do
glyphInfos' <- glyphInfos buf'
contentType' <- hb_buffer_get_content_type buf'
direction' <- hb_buffer_get_direction buf'
script' <- hb_buffer_get_script buf'
language'' <- hb_buffer_get_language buf'
language' <- peekCString language''
flags' <- hb_buffer_get_flags buf'
clusterLevel' <- hb_buffer_get_cluster_level buf'
invisibleGlyph' <- hb_buffer_get_invisible_glyph buf'
replacementCodepoint' <- hb_buffer_get_replacement_codepoint buf'
return defaultBuffer {
text = Lazy.pack $ Prelude.map (w2c . codepoint) glyphInfos',
contentType = case contentType' of
1 -> Just ContentTypeUnicode
2 -> Just ContentTypeGlyphs
_ -> Nothing,
direction = int2dir direction',
script = Just $ tag_to_string script',
language = Just language',
beginsText = testBit flags' 0, endsText = testBit flags' 1,
preserveDefaultIgnorables = testBit flags' 2,
removeDefaultIgnorables = testBit flags' 3,
don'tInsertDottedCircle = testBit flags' 4,
clusterLevel = case clusterLevel' of
1 -> ClusterMonotoneChars
2 -> ClusterChars
_ -> ClusterMonotoneGraphemes,
invisibleGlyph = w2c invisibleGlyph',
replacementCodepoint = w2c replacementCodepoint'
}
foreign import ccall "hb_buffer_get_content_type" hb_buffer_get_content_type
:: Buffer' -> IO Int
foreign import ccall "hb_buffer_get_direction" hb_buffer_get_direction :: Buffer' -> IO Int
foreign import ccall "hb_buffer_get_script" hb_buffer_get_script :: Buffer' -> IO Word32
foreign import ccall "hb_buffer_get_language" hb_buffer_get_language :: Buffer' -> IO CString
foreign import ccall "hb_buffer_get_flags" hb_buffer_get_flags :: Buffer' -> IO Int
foreign import ccall "hb_buffer_get_cluster_level" hb_buffer_get_cluster_level
:: Buffer' -> IO Int
foreign import ccall "hb_buffer_get_invisible_glyph" hb_buffer_get_invisible_glyph
:: Buffer' -> IO Word32
foreign import ccall "hb_buffer_get_replacement_codepoint" hb_buffer_get_replacement_codepoint
:: Buffer' -> IO Word32