module Data.Text.ParagraphLayout.Internal.Run (Run (..), ScriptCode, spanToRuns) where import Data.List (mapAccumL) import Data.List.NonEmpty (NonEmpty ((:|))) import Data.Text (Text) import qualified Data.Text as Text import Data.Text.Foreign (dropWord8, lengthWord8, takeWord8) import Data.Text.Glyphize (Direction (..)) import qualified Data.Text.ICU.Char as ICUChar import Data.Text.ParagraphLayout.Internal.ResolvedSpan import Data.Text.ParagraphLayout.Internal.Script import Data.Text.ParagraphLayout.Internal.TextContainer import Data.Text.ParagraphLayout.Internal.Zipper type ScriptCode = String -- | Each span can be broken into one or more runs by Balkón. -- -- Each run could have a different script, language, or direction. data Run = Run { runOffsetInSpan :: Int , runText :: Text , runDirection :: Maybe Direction , runScript :: Maybe ScriptCode } deriving (Eq, Show) instance TextContainer Run where getText = runText instance SeparableTextContainer Run where splitTextAt8 n r = ( r { runText = t1 } , r { runText = t2, runOffsetInSpan = runOffsetInSpan r + l1 } ) where l1 = lengthWord8 t1 t1 = takeWord8 (fromIntegral n) t t2 = dropWord8 (fromIntegral n) t t = getText r dropWhileStart p r = r { runText = t', runOffsetInSpan = o' } where t = runText r t' = Text.dropWhile p t l = lengthWord8 t l' = lengthWord8 t' o = runOffsetInSpan r o' = o + l - l' dropWhileEnd p r = r { runText = Text.dropWhileEnd p (runText r) } type ProtoRun = (Zipper, Maybe Direction, ScriptCode) -- | Represents a zipper that can advance by at least one character. data ZipperChoice = ZipperChoice { nextChar :: Char , continuingRun :: Zipper -- ^ The zipper will advance over the next character, -- merging it with all preceding characters. , startingNewRun :: Zipper -- ^ The zipper will forget all preceding characters and then advance over -- the next character, making it the first character in a new run of text. } considerNext :: Zipper -> Maybe ZipperChoice considerNext z = case next z of Nothing -> Nothing Just c -> Just ZipperChoice { nextChar = c , continuingRun = step z , startingNewRun = step $ start $ following z } data Merged a = Incompatible | Merged a spanToRuns :: ResolvedSpan d -> [Run] spanToRuns s = snd $ mapAccumL run 0 $ protoRuns zipper where zipper = start $ spanText s run acc (z, d, sc) = let t = preceding z in ( acc + lengthWord8 t , Run { runOffsetInSpan = acc , runText = t , runDirection = d , runScript = Just sc } ) protoRuns :: Zipper -> [ProtoRun] protoRuns z = reverse $ protoRuns' z [] protoRuns' :: Zipper -> [ProtoRun] -> [ProtoRun] protoRuns' curZipper curRuns = case considerNext curZipper of Nothing -> curRuns Just choice -> let headRun@(nextZipper, _, _) :| tailRuns = foldRun choice curRuns in protoRuns' nextZipper (headRun : tailRuns) foldRun :: ZipperChoice -> [ProtoRun] -> NonEmpty ProtoRun -- If there are no runs, create a new run with a single character. foldRun x [] = (continuingRun x, d, s) :| [] where d = charDirection (nextChar x) s = charScript (nextChar x) foldRun x (previousRun@(_, d1, s1) : tailRuns) = case (mergeDirections d1 d2, mergeScripts s1 s2) of (Merged d, Merged s) -> (continuingRun x, d, s) :| tailRuns _ -> (startingNewRun x, d2, s2) :| previousRun : tailRuns where d2 = charDirection (nextChar x) s2 = charScript (nextChar x) -- | Simplified detection of text direction for unidirectional text. mergeDirections :: Maybe Direction -> Maybe Direction -> Merged (Maybe Direction) mergeDirections Nothing Nothing = Merged Nothing mergeDirections (Just d1) Nothing = Merged (Just d1) mergeDirections Nothing (Just d2) = Merged (Just d2) mergeDirections (Just d1) (Just d2) | d1 == d2 = Merged (Just d1) | otherwise = Incompatible -- TODO: Implement proper inheritance rules. mergeScripts :: ScriptCode -> ScriptCode -> Merged ScriptCode mergeScripts "Zyyy" s2 = Merged s2 mergeScripts s1 "Zyyy" = Merged s1 mergeScripts s1 "Zinh" = Merged s1 mergeScripts s1 s2 | s1 == s2 = Merged s1 | otherwise = Incompatible -- TODO: Use the BiDi algorithm to support bidirectional text. charDirection :: Char -> Maybe Direction charDirection c = case ICUChar.direction c of ICUChar.LeftToRight -> Just DirLTR ICUChar.RightToLeft -> Just DirRTL ICUChar.RightToLeftArabic -> Just DirRTL _ -> Nothing