module Pdf.Toolbox.Content.FontInfo
(
FontInfo(..),
FISimple(..),
SimpleFontEncoding(..),
FIComposite(..),
CIDFontWidths(..),
makeCIDFontWidths,
cidFontGetWidth,
fontInfoDecodeGlyphs
)
where
import Data.List
import Data.Monoid
import Data.Map (Map)
import qualified Data.Map as Map
import qualified Data.ByteString as BS
import qualified Data.Text as Text
import qualified Data.Text.Encoding as Text
import qualified Data.Encoding as Encoding
import qualified Data.Encoding.CP1252 as Encoding
import qualified Data.Encoding.MacOSRoman as Encoding
import Control.Monad
import Pdf.Toolbox.Core
import Pdf.Toolbox.Content.UnicodeCMap
import Pdf.Toolbox.Content.Transform
import Pdf.Toolbox.Content.Processor (Glyph(..))
data FontInfo
= FontInfoSimple FISimple
| FontInfoComposite FIComposite
deriving (Show)
data FISimple = FISimple {
fiSimpleUnicodeCMap :: Maybe UnicodeCMap,
fiSimpleEncoding :: Maybe SimpleFontEncoding,
fiSimpleWidths :: Maybe (Int, Int, [Double])
}
deriving (Show)
data SimpleFontEncoding
= SimpleFontEncodingWinAnsi
| SimpleFontEncodingMacRoman
deriving (Show)
data FIComposite = FIComposite {
fiCompositeUnicodeCMap :: Maybe UnicodeCMap,
fiCompositeWidths :: CIDFontWidths,
fiCompositeDefaultWidth :: Double
}
deriving (Show)
data CIDFontWidths = CIDFontWidths {
cidFontWidthsChars :: Map Int Double,
cidFontWidthsRanges :: [(Int, Int, Double)]
}
deriving (Show)
instance Monoid CIDFontWidths where
mempty = CIDFontWidths {
cidFontWidthsChars = mempty,
cidFontWidthsRanges = mempty
}
w1 `mappend` w2 = CIDFontWidths {
cidFontWidthsChars = cidFontWidthsChars w1 `mappend` cidFontWidthsChars w2,
cidFontWidthsRanges = cidFontWidthsRanges w1 `mappend` cidFontWidthsRanges w2
}
makeCIDFontWidths :: Monad m => Array -> PdfE m CIDFontWidths
makeCIDFontWidths (Array vals) = go mempty vals
where
go res [] = return res
go res (ONumber x1 : ONumber x2 : ONumber x3 : xs) = do
n1 <- intValue x1
n2 <- intValue x2
n3 <- realValue x3
go res {cidFontWidthsRanges = (n1, n2, n3) : cidFontWidthsRanges res} xs
go res (ONumber x: OArray (Array arr): xs) = do
n <- intValue x
ws <- forM arr $ \w -> fromObject w >>= realValue
go res {cidFontWidthsChars = Map.fromList (zip [n ..] ws) `mappend` cidFontWidthsChars res} xs
go _ _ = left $ UnexpectedError "Can't parse CIDFont width"
cidFontGetWidth :: CIDFontWidths -> Int -> Maybe Double
cidFontGetWidth w code =
case Map.lookup code (cidFontWidthsChars w) of
Just width -> Just width
Nothing -> case find (\(start, end, _) -> code >= start && code <= end) (cidFontWidthsRanges w) of
Just (_, _, width) -> Just width
_ -> Nothing
fontInfoDecodeGlyphs :: FontInfo -> Str -> [(Glyph, Double)]
fontInfoDecodeGlyphs (FontInfoSimple fi) = \(Str bs) ->
flip map (BS.unpack bs) $ \c ->
let code = fromIntegral c
txt =
case fiSimpleUnicodeCMap fi of
Nothing ->
case fiSimpleEncoding fi of
Nothing ->
case Text.decodeUtf8' (BS.pack [c]) of
Right t -> Just t
_ -> Nothing
Just SimpleFontEncodingWinAnsi ->
case Encoding.decodeStrictByteStringExplicit Encoding.CP1252 (BS.pack [c]) of
Left _ -> Nothing
Right t -> Just $ Text.pack t
Just SimpleFontEncodingMacRoman ->
case Encoding.decodeStrictByteStringExplicit Encoding.MacOSRoman (BS.pack [c]) of
Left _ -> Nothing
Right t -> Just $ Text.pack t
Just toUnicode -> unicodeCMapDecodeGlyph toUnicode code
width =
case fiSimpleWidths fi of
Nothing -> 0
Just (firstChar, lastChar, widths) ->
if code >= firstChar && code <= lastChar && (code firstChar) < length widths
then (widths !! (code firstChar)) / 1000
else 0
in (Glyph {
glyphCode = code,
glyphTopLeft = Vector 0 0,
glyphBottomRight = Vector width 1,
glyphText = txt
}, width)
fontInfoDecodeGlyphs (FontInfoComposite fi) = \str ->
case fiCompositeUnicodeCMap fi of
Nothing ->
let Str bs = str
in tryDecode2byte $ BS.unpack bs
Just toUnicode ->
let getWidth = fromMaybe (fiCompositeDefaultWidth fi) . cidFontGetWidth (fiCompositeWidths fi)
in cmapDecodeString getWidth toUnicode str
where
tryDecode2byte (b1:b2:rest) =
let code = fromIntegral b1 * 255 + fromIntegral b2
width = (/ 1000) $ fromMaybe (fiCompositeDefaultWidth fi) $ cidFontGetWidth (fiCompositeWidths fi) code
txt =
case Text.decodeUtf8' (BS.pack [b1, b2]) of
Right t -> Just t
_ -> Nothing
g = Glyph {
glyphCode = code,
glyphTopLeft = Vector 0 0,
glyphBottomRight = Vector width 1,
glyphText = txt
}
in (g, width) : tryDecode2byte rest
tryDecode2byte _ = []
cmapDecodeString :: (Int -> Double) -> UnicodeCMap -> Str -> [(Glyph, Double)]
cmapDecodeString getWidth cmap (Str str) = go str
where
go s =
case unicodeCMapNextGlyph cmap s of
Nothing -> []
Just (g, rest) ->
let width = getWidth g / 1000
glyph = Glyph {
glyphCode = g,
glyphTopLeft = Vector 0 0,
glyphBottomRight = Vector width 1,
glyphText = unicodeCMapDecodeGlyph cmap g
}
in (glyph, width) : go rest