module Opentype.Fileformat.Cmap where
import Opentype.Fileformat.Types
import Data.Binary
import Data.Binary.Put
import Data.List (sort, mapAccumL, foldl')
import Data.Either (either)
import Control.Monad
import Data.Traversable (for)
import Data.Foldable (for_, traverse_)
import qualified Data.ByteString.Lazy as Lazy
import qualified Data.ByteString as Strict
import qualified Data.Map as M
import qualified Data.IntSet as IS
import Data.Maybe
import Data.Bits
import Data.Int
newtype CmapTable = CmapTable {getCmaps :: [CMap]}
deriving Show
emptyCmapTable :: CmapTable
emptyCmapTable = CmapTable []
data CMap = CMap {
cmapPlatform :: PlatformID,
cmapEncoding :: Word16,
cmapLanguage :: Word16,
mapFormat :: MapFormat,
multiByte :: IS.IntSet,
glyphMap :: WordMap GlyphID
}
deriving Show
instance Ord CMap where
compare (CMap pfID encID lang _ _ _) (CMap pfID2 encID2 lang2 _ _ _) =
compare (pfID, encID, lang) (pfID2, encID2, lang2)
instance Eq CMap where
(CMap pfID encID lang _ _ _) == (CMap pfID2 encID2 lang2 _ _ _) =
(pfID, encID, lang) == (pfID2, encID2, lang2)
data MapFormat =
MapFormat0 |
MapFormat2 |
MapFormat4 |
MapFormat6 |
MapFormat8 |
MapFormat10 |
MapFormat12
deriving Show
putCmapTable :: CmapTable -> Put
putCmapTable (CmapTable cmaps_) =
do putWord16be 0
putWord16be $ fromIntegral $ length cmaps
for_ (zip offsets cmaps) $
\(offset, CMap pfID encID _ _ _ _) ->
do putPf pfID
putWord16be encID
putWord32be offset
traverse_ putLazyByteString cmapsBs
where
cmaps = sort cmaps_
offsets :: [Word32]
offsets =
scanl (+) (fromIntegral $ 4 + 8 * length cmaps) $
map (fromIntegral . Lazy.length) cmapsBs
cmapsBs = map (runPut.putCmap) cmaps
readCmapTable :: Strict.ByteString -> Either String CmapTable
readCmapTable bs = do
version <- index16 bs 0
when (version /= 0) $
fail "unsupported cmap version."
n <- index16 bs 1
entries <- for [0..n1] $ \i -> do
pfID <- toPf =<< (index16 bs $ 2 + i*4)
encID <- index16 bs $ 2 + i*4 + 1
offset <- index32 bs $ 2 + fromIntegral i*2
return (offset, pfID, encID)
cmaps <- for entries $ \(offset, pfID, encID) -> do
cm <- readCmap $ Strict.drop (fromIntegral offset) bs
Right $ cm {cmapPlatform = pfID, cmapEncoding = encID}
return $ CmapTable cmaps
putCmap :: CMap -> Put
putCmap cmap = case mapFormat cmap of
MapFormat0 -> putMap0 cmap
MapFormat2 -> putMap2 cmap
MapFormat4 -> putMap4 cmap
MapFormat6 -> putMap6 cmap
MapFormat8 -> putMap8 cmap
MapFormat10 -> putMap10 cmap
MapFormat12 -> putMap12 cmap
readCmap :: Strict.ByteString -> Either String CMap
readCmap bs_ = do
c <- index16 bs_ 0
let bs | (c >= 8 && c < 14) = Strict.drop 8 bs_
| otherwise = Strict.drop 4 bs_
either fail return $
case c of
0 -> getMap0 bs
2 -> getMap2 bs
4 -> getMap4 bs
6 -> getMap6 bs
8 -> getMap8 bs
10 -> getMap10 bs
12 -> getMap12 bs
i -> fail $ "unsupported map encoding " ++ show i
subIntMap :: Word32 -> Word32 -> WordMap GlyphID -> WordMap GlyphID
subIntMap from to intMap =
fst $ M.split (fromIntegral to+1) $ snd $
M.split (fromIntegral from1) intMap
asSubtypeFrom :: b -> [(a, b)] -> b
asSubtypeFrom a _ = a
putCodes :: Word32 -> Word32 -> [(Word32, GlyphID)] -> Put
putCodes start end _
| start > end = return ()
putCodes start end [] = do
putWord16be 0
putCodes (start+1) end []
putCodes start end l@((i, code):rest)
| start < i = do putWord16be 0
putCodes (start+1) end l
| otherwise = do putWord16be code
putCodes (i+1) end rest
subCodes :: WordMap GlyphID -> Word32 -> Word32 -> Put
subCodes set start end =
putCodes start end $ M.toList $ subIntMap start end set
data SubTable2 = SubTable2 {
highByte :: Word16,
firstCode :: Word16,
entryCount :: Word16,
rangeOffset :: Word16,
rangeBytes :: Put
}
putMap0 :: CMap -> PutM ()
putMap0 cmap = do
putWord16be 0
putWord16be 262
putWord16be $ cmapLanguage cmap
let gm = glyphMap cmap
for_ [0..255] $ \c ->
putWord8 $ fromIntegral $
fromMaybe 0 (M.lookup c gm)
getMap0 :: Strict.ByteString -> Either String CMap
getMap0 bs =
if Strict.length bs < 258 then
Left "invalid map format 0"
else
do lang <- index16 bs 0
let gmap = M.fromAscList $
filter ((/= 0).snd) $
(flip map) [0..255] $ \c ->
(fromIntegral c, fromIntegral $ Strict.index bs (c+2))
Right $ CMap UnicodePlatform 0 lang MapFormat0 IS.empty gmap
putMap2 :: CMap -> PutM ()
putMap2 cmap = do
putWord16be 2
putWord16be size
putWord16be (cmapLanguage cmap)
putCodes 0 255 $ zip (map (fromIntegral.highByte) subTableCodes) [1::Word16 ..]
for_ subTables $ \(SubTable2 _ fc ec ro _) ->
do putWord16be fc
putWord16be ec
putWord16be 0
putWord16be ro
for_ subTables rangeBytes
where
highBytes :: [Int]
highBytes =
IS.toList $ fst $
IS.split 255 (multiByte cmap)
subTableCodes =
filter ((/= 0) . entryCount) $
flip map highBytes $ \hb ->
let subMap = subIntMap (fromIntegral hb `shift` 8)
(fromIntegral hb `shift` 8 .|. 0xff) $
glyphMap cmap
(fstCode, lstCode)
| M.null subMap = (0, 1)
| otherwise = (fst $ M.findMin subMap,
fst $ M.findMax subMap)
ec = lstCode fstCode + 1
rb = subCodes subMap fstCode lstCode
in SubTable2 (fromIntegral hb) (fromIntegral fstCode) (fromIntegral ec) 0 rb
where
subTables = scanl calcOffset firstTable subTableCodes
firstTable =
SubTable2 0 0 256 (fromIntegral $ length subTableCodes * 8 + 2) $
subCodes (glyphMap cmap) 0 255
size :: Word16
size = 518 + 8 * (fromIntegral $ length subTables) + 2 * sum (map entryCount subTables)
calcOffset prev st = st { rangeOffset = rangeOffset prev 8 + 2*entryCount prev }
getMap2 :: Strict.ByteString -> Either String CMap
getMap2 bs = do
lang <- index16 bs 0
highBytes <- do
l <- traverse (index16 bs) [1..256]
Right $ map fst $ filter ((/=0).snd) $ zip [0::Int ..255] l
l <- for [0::Word16 .. fromIntegral $ length highBytes] $ \i -> do
fstCode <- index16 bs (fromIntegral $ 257 + i*4)
cnt <- index16 bs (fromIntegral $ 257 + i*4 + 1)
delta <- index16 bs (fromIntegral $ 257 + i*4 + 2)
ro <- index16 bs (fromIntegral $ 257 + i*4 + 3)
for [0 .. fromIntegral cnt1] $ \entry -> do
p <- index16 bs (fromIntegral $ 257 + i*4 + 3 + ro `quot` 2 + entry)
Right (fromIntegral $ fstCode + entry, if p == 0 then 0 else p + delta)
let im = M.fromAscList $ filter ((/= 0).snd) $ concat l
is = IS.fromAscList $ map fromIntegral highBytes
Right $ CMap UnicodePlatform 0 lang MapFormat2 is im
data Segment4 = RangeSegment Word16 Word16 Word16
| CodeSegment Word16 Word16 [Word16]
deriving Show
findRange :: Word32 -> Int64 -> [(Word32, Word16)] -> (Word32, [(Word32, Word16)])
findRange nextI _ [] =
(nextI1, [])
findRange nextI offset l@((i,c):r)
| i == nextI && c == fromIntegral (fromIntegral nextI+offset) =
findRange (nextI+1) offset r
| otherwise = (nextI1, l)
findCodes :: Word32 -> [(Word32, Word16)] -> ([GlyphID], [(Word32, Word16)])
findCodes _ [] = ([], [])
findCodes prevI l@((i,c):r)
| i prevI > 4 = ([], l)
| otherwise = (replicate (fromIntegral $ iprevI1) 0 ++ c:c2, r2)
where (c2, r2) = findCodes i r
getSegments :: [(Word32, Word16)] -> [Segment4]
getSegments [] = [RangeSegment 0xffff 1 0]
getSegments l@((start, c):_)
| fromIntegral end start >= 4 ||
lc <= endstart+1 =
RangeSegment (fromIntegral start) (fromIntegral endfromIntegral start+1) c :
getSegments r
| otherwise =
CodeSegment (fromIntegral start) (fromIntegral lc) codes :
getSegments r2
where
lc = fromIntegral $ length codes
(end, r) = findRange start (fromIntegral c fromIntegral start) l
(codes, r2) = findCodes (start1) l
data Segment4layout = Segment4layout {
s4endCode :: Word16,
s4startCode :: Word16,
s4idDelta :: Word16,
s4idRangeOffset :: Word16,
s4glyphIndex :: [GlyphID] }
deriving Show
putMap4 :: CMap -> PutM ()
putMap4 cmap = do
putWord16be 4
putWord16be size
putWord16be (cmapLanguage cmap)
putWord16be (segCount*2)
putWord16be searchRange
putWord16be entrySelector
putWord16be $ 2*segCount searchRange
traverse_ (put.s4endCode) layout
putWord16be 0
traverse_ (put.s4startCode) layout
traverse_ (put.s4idDelta) layout
traverse_ (put.s4idRangeOffset) layout
traverse_ (traverse_ put.s4glyphIndex) layout
where
size, segCount, searchRange, entrySelector :: Word16
entrySelector = iLog2 segCount
searchRange = 1 `shift` (fromIntegral $ entrySelector+1)
segments = getSegments $ M.toList $ subIntMap 0 0xffff $ glyphMap cmap
(codeSize, layout) = mapAccumL foldLayout (segCount*2) segments
foldLayout offset (RangeSegment start len code) =
(offset2, Segment4layout (fromIntegral $ start+len1)
(fromIntegral start) (code(fromIntegral start)) 0 [])
foldLayout offset (CodeSegment start len codes) =
(offset+fromIntegral len*22,
Segment4layout (fromIntegral $ start+len1)
(fromIntegral start) 0 offset codes)
size = 8*segCount + codeSize + 16
segCount = fromIntegral $ length segments
getMap4 :: Strict.ByteString -> Either String CMap
getMap4 bs = do
lang <- index16 bs 0
segCount <- (`quot` 2) <$> index16 bs 1
gmap <- fmap (M.fromAscList . filter ((/= 0).snd) . concat ) $
for [0::Word16 .. segCount2] $ \i ->
do idDelta <- index16 bs (i + 6 + segCount*2)
endCode <- index16 bs (i + 5)
startCode <- index16 bs (i + 6 + segCount)
idRangeOffset <- index16 bs (i + 6 + segCount*3)
if idRangeOffset == 0
then Right [(fromIntegral c, c+idDelta) | c <- [startCode .. endCode]]
else for [0..endCodestartCode] $ \j ->
do glyph <- index16 bs (fromIntegral $ i + 6 + segCount*3 + idRangeOffset`div`2 + j)
Right (fromIntegral $ startCode + j, glyph)
Right $ CMap UnicodePlatform 0 lang MapFormat4 IS.empty gmap
putMap6 :: CMap -> PutM ()
putMap6 cmap = do
putWord16be 6
putWord16be size
putWord16be (cmapLanguage cmap)
putWord16be fCode
putWord16be eCount
subCodes (glyphMap cmap) (fromIntegral fCode) (fromIntegral lastCode)
where
size, eCount, fCode, lastCode :: Word16
size = eCount*2 + 10
eCount = lastCode fCode + 1
fCode = fromIntegral $
min (fromIntegral (maxBound :: Word16)::Word32) $
fst $ M.findMin (glyphMap cmap)
lastCode = fromIntegral $
min (fromIntegral (maxBound :: Word16)::Word32) $
fst $ M.findMax (glyphMap cmap)
getMap6 :: Strict.ByteString -> Either String CMap
getMap6 bs = do
lang <- index16 bs 0
fCode <- index16 bs 1
eCount <- index16 bs 2
gmap <- fmap (M.fromAscList . filter ((/= 0).snd)) $
for [0..eCount1] $ \i -> do
g <- index16 bs (i+3)
Right (fromIntegral $ i + fCode, g)
Right $ CMap UnicodePlatform 0 lang MapFormat6 IS.empty gmap
putPacked :: Int -> [Int] -> Put
putPacked start highBytes
| start >= 8192 = return ()
| otherwise = do
putWord8 $ foldl' (.|.) 0 $ map ((shift 1) . (.&. 7)) bytes
putPacked (start+1) rest
where
(bytes, rest) = span (\b -> b >= (start*8) &&
b < (start+1)*8)
highBytes
readPacked :: Strict.ByteString -> [Int]
readPacked bs =
[i .|. b `shift` 3 |
(a, b) <- zip (Strict.unpack bs) [0..8191],
i <- [0..7],
a .&. (1 `shift` i) /= 0
]
findRanges :: [(Word32, GlyphID)] -> [(Word32, Word32, GlyphID)]
findRanges [] = []
findRanges l@((i,c):_) = (i, i2, c) : findRanges next
where (i2, next) = findRange i (fromIntegral cfromIntegral i) l
putMap8 :: CMap -> PutM ()
putMap8 cmap = do
putWord16be 8
putWord16be 0
putWord32be $ fromIntegral size
putWord32be (fromIntegral $ cmapLanguage cmap)
putPacked 0 highBytes
putWord32be $ fromIntegral nGroups
for_ ranges $ \(start, end, code) -> do
putWord32be $ fromIntegral start
putWord32be $ fromIntegral end
putWord32be $ fromIntegral code
where
size = nGroups * 12 + 8208
highBytes = IS.toList $ multiByte cmap
ranges = findRanges $ M.toList $ glyphMap cmap
nGroups = length ranges
getMap8 :: Strict.ByteString -> Either String CMap
getMap8 bs = do
_ <- index16 bs 0
lang <- index16 bs 1
let is = IS.fromAscList $ readPacked (Strict.drop 4 bs)
nGroups <- index32 bs 2049
gmap <- fmap (M.fromAscList . concat) $ for [0..nGroups1] $ \i -> do
start <- index32 bs (i*3 + 2050)
end <- index32 bs (i*3 + 2051)
glyph <- index32 bs (i*3 + 2052)
return [(fromIntegral c, fromIntegral $ glyph+cstart) | c <- [start .. end]]
Right $ CMap UnicodePlatform 0 lang MapFormat8 is gmap
getMap10 :: Strict.ByteString -> Either String CMap
getMap10 bs = do
lang <- index32 bs 0
fCode <- index32 bs 1
eCount <- index32 bs 2
gmap <- fmap (M.fromAscList . filter ((/= 0).snd)) $
for [0..eCount1] $ \i -> do
g <- index16 bs (fromIntegral i+6)
Right (fromIntegral $ i + fCode, g)
Right $ CMap UnicodePlatform 0 (fromIntegral lang) MapFormat6 IS.empty gmap
putMap10 :: CMap -> Put
putMap10 cmap = do
putWord16be 10
putWord16be 0
putWord32be size
putWord32be $ fromIntegral $ cmapLanguage cmap
putWord32be fCode
putWord32be eCount
subCodes (glyphMap cmap) (fromIntegral fCode) (fromIntegral lastCode)
where
size, eCount, fCode, lastCode :: Word32
size = eCount*2 + 20
eCount = lastCode fCode + 1
fCode = fromIntegral $ fst $ M.findMin $ glyphMap cmap
lastCode = fromIntegral $ fst $ M.findMax $ glyphMap cmap
putMap12 :: CMap -> PutM ()
putMap12 cmap = do
putWord16be 12
putWord16be 0
putWord32be $ fromIntegral size
putWord32be (fromIntegral $ cmapLanguage cmap)
putWord32be $ fromIntegral nGroups
for_ ranges $ \(start, end, code) -> do
putWord32be $ fromIntegral start
putWord32be $ fromIntegral end
putWord32be $ fromIntegral code
where
size = nGroups * 12 + 16
ranges = findRanges $ M.toList $ glyphMap cmap
nGroups = length ranges
getMap12 :: Strict.ByteString -> Either String CMap
getMap12 bs = do
_ <- index16 bs 0
lang <- index16 bs 1
nGroups <- index32 bs 1
gmap <- fmap (M.fromAscList . concat) $ for [0..nGroups1] $ \i -> do
start <- index32 bs (i*3 + 2)
end <- index32 bs (i*3 + 3)
glyph <- index32 bs (i*3 + 4)
return [(fromIntegral c, fromIntegral $ glyph+cstart) | c <- [start .. end]]
Right $ CMap UnicodePlatform 0 lang MapFormat8 IS.empty gmap