module Text.Trans.Tokenize
( TextStream(..)
, TextStreamEntity(..)
, Token(..)
, tokenize
, serialize
, tokenLen
, entityToken
, streamEntities
, truncateLine
, truncateText
, wrapStream
, findLines
#ifdef TESTING
, isWhitespace
, partitions
#endif
)
where
import Control.Applicative
import Data.List
( inits
)
import qualified Data.Text as T
import Graphics.Vty.Widgets.Util
data Token a = S { tokenStr :: !T.Text
, tokenAttr :: !a
}
| WS { tokenStr :: !T.Text
, tokenAttr :: !a
}
data TextStreamEntity a = T !(Token a)
| NL
data TextStream a = TS ![TextStreamEntity a]
instance (Show a) => Show (TextStream a) where
show (TS ts) = "TS " ++ show ts
instance (Show a) => Show (TextStreamEntity a) where
show (T t) = "T " ++ show t
show NL = "NL"
instance (Show a) => Show (Token a) where
show (S s a) = "S " ++ show s ++ " " ++ show a
show (WS s a) = "WS " ++ show s ++ " " ++ show a
instance (Eq a) => Eq (Token a) where
a == b = (tokenStr a) == (tokenStr b) &&
(tokenAttr a) == (tokenAttr b)
instance (Eq a) => Eq (TextStreamEntity a) where
NL == NL = True
T a == T b = a == b
_ == _ = False
instance (Eq a) => Eq (TextStream a) where
(TS as) == (TS bs) = as == bs
streamEntities :: TextStream a -> [TextStreamEntity a]
streamEntities (TS es) = es
tokenLen :: Token a -> Int
tokenLen (S s _) = T.length s
tokenLen (WS s _) = T.length s
wsChars :: [Char]
wsChars = [' ', '\t']
isWs :: Char -> Bool
isWs = (`elem` wsChars)
isNL :: TextStreamEntity a -> Bool
isNL NL = True
isNL _ = False
entityToken :: TextStreamEntity a -> Token a
entityToken (T t) = t
entityToken _ = error "Cannot get token from non-token entity"
isWhitespace :: Token a -> Bool
isWhitespace (WS _ _) = True
isWhitespace _ = False
isWsEnt :: TextStreamEntity a -> Bool
isWsEnt (T (WS _ _)) = True
isWsEnt _ = False
serialize :: TextStream a -> T.Text
serialize (TS es) = T.concat $ serializeEntity <$> es
where
serializeEntity NL = T.pack "\n"
serializeEntity (T (WS s _)) = s
serializeEntity (T (S s _)) = s
tokenize :: T.Text -> a -> TextStream a
tokenize s def = TS $ findEntities s
where
findEntities str
| T.null str = []
| otherwise = nextEntity : findEntities (T.drop nextLen str)
where
c = T.head str
(nextEntity, nextLen) = if isWs c
then (T (WS nextWs def), T.length nextWs)
else if c == '\n'
then (NL, 1)
else (T (S nextStr def), T.length nextStr)
nextWs = T.takeWhile isWs str
nextStr = T.takeWhile (\ch -> not $ ch `elem` ('\n':wsChars)) str
truncateText :: Phys -> T.Text -> T.Text
truncateText width t =
let TS ts = tokenize t ()
tokens = entityToken <$> ts
in T.concat $ tokenStr <$> truncateLine width tokens
truncateLine :: Phys -> [Token a] -> [Token a]
truncateLine l _ | l < 0 = error $ "truncateLine cannot truncate at length = " ++ show l
truncateLine _ [] = []
truncateLine width ts =
if null remaining
then [first_tok { tokenStr = takeMaxText width $ tokenStr first_tok }]
else if length tokens == length ts
then tokens
else if T.null $ tokenStr lastToken
then tokens
else tokens ++ [lastToken]
where
lengths = map (sum . (chWidth <$>) . T.unpack . tokenStr) ts
cases = reverse $ inits lengths
remaining = dropWhile ((> width) . sum) cases
tokens = take (length $ head remaining) ts
truncLength = sum $ head remaining
first_tok = ts !! 0
last_tok = ts !! (length tokens)
lastToken = last_tok { tokenStr = takeMaxText (width truncLength) $
tokenStr last_tok
}
wrapStream :: (Eq a) => Phys -> TextStream a -> TextStream a
wrapStream width (TS stream) = TS $ reverse $ dropWhile (== NL) $ reverse $ wrapAll' 0 stream
where
wrapAll' :: Phys -> [TextStreamEntity a] -> [TextStreamEntity a]
wrapAll' _ [] = []
wrapAll' _ (NL:rest) = NL : wrapAll' 0 rest
wrapAll' accum (T t:rest) =
if (textWidth $ tokenStr t) + accum > width
then if isWhitespace t
then [NL] ++ wrapAll' 0 (dropWhile isWsEnt rest)
else if accum == 0 && ((textWidth $ tokenStr t) >= width)
then [T t] ++ wrapAll' (textWidth $ tokenStr t) (dropWhile isWsEnt rest)
else [NL, T t] ++ wrapAll' (textWidth $ tokenStr t) rest
else T t : wrapAll' (accum + (textWidth $ tokenStr t)) rest
partitions :: (a -> Bool) -> [a] -> [[a]]
partitions _ [] = []
partitions f as = p : partitions f (drop (length p + 1) as)
where
p = takeWhile f as
findLines :: [TextStreamEntity a] -> [[TextStreamEntity a]]
findLines = partitions (not . isNL)