-- | Some common functions and things that are not of immediate importance to -- understand the algorithms. module Linguistics.Common where import Control.Monad (forM_) import Data.ByteString (ByteString) import Data.Char import Data.List (transpose,reverse) import qualified Data.ByteString.Short as S import qualified Data.Text as T import qualified Data.Text.Lazy as TL import qualified Data.Text.Lazy.IO as TL import qualified Data.Text.Lazy.Builder as TL import Data.Text (Text) import qualified Data.Text.Format as TF import qualified Data.Text.Encoding as T import Text.Printf import Data.Stringable (toString) import GHC.IO.Handle import NLP.Text.BTI type IMCp = (BTI, BTI) -- | Actually align something prettily alignPretty :: [[IMCp]] -> [String] alignPretty xss = map concat . transpose . map (\xs -> map (f xs) xs) . transpose . map reverse $ xss where f zs x = printAligned x zs -- | Prettyprint ``characters'', which are actually small bytestrings. printAligned = printAlignedPad ' ' -- | Print with special padding character printAlignedPad :: Char -> IMCp -> [IMCp] -> String printAlignedPad p (_,c) zs = printf " %s%s" (replicate pad p) (toUtf8String c) where pad :: Int pad = (1+) . maximum $ 0 : map (\(_,x) -> printLength x - printLength c) zs -- | Length in /printed characters/ of an UTF8 string wrapped as a 'ByteString' -- -- NOTE 'isMark' selects unicode symbols that modify a character, thereby not -- increasing the length of the /printed/ string. printLength :: BTI -> Int printLength = length . filter isAN . toUtf8String where isAN c = not (isMark c) -- isAlphaNum c || c `elem` [ '\\', '\'', '^', '$', '-', '\'' ] {- } where prnt x z = let pad = max 0 (length (filter isAN $ pp z) - length (filter isAN $ pp x)) in printf " %s%s" (replicate pad ' ') (pp x) ds x = ' ' : replicate (length $ filter isAN $ pp x) '-' isAN c = isAlphaNum c || c `elem` [ '\\', '\'' ] -} toUtf8String :: BTI -> String toUtf8String = toString -- T.unpack . T.decodeUtf8 . conv {-# INLINE toUtf8String #-} buildLines :: [[Text]] -> TL.Builder buildLines xss = s where n = (1+) . maximum $ 1 : (map (T.length . T.filter (not . isMark)) . concat $ xss) yss = transpose xss fmt = "%" ++ show n ++ "s" s = mconcat [ (mconcat $ map (TF.left n ' ') ys) `mappend` "\n" | ys <- yss ] printLines :: Handle -> [[Text]] -> IO () printLines hndl xss = do let n = (1+) . maximum $ 1 : (map (T.length . T.filter (not . isMark)) . concat $ xss) let yss = transpose xss let fmt = "%" ++ show n ++ "s" let s = mconcat [ (mconcat $ map (TF.left n ' ') ys) `mappend` "\n" | ys <- yss ] TL.hPutStrLn hndl $ TL.toLazyText s --forM_ yss $ \ys -> do -- forM_ ys $ \y -> hPrintf hndl fmt (T.unpack y) -- hPrintf hndl "\n" --conv = S.fromShort . getMultiChar . uninternMultiChar --{-# INLINE conv #-}