module Opentype.Fileformat.Cmap where
import Opentype.Fileformat.Types
import Data.Binary
import Data.Binary.Put
import Data.List (sort, mapAccumL, foldl')
import Data.Either (either)
import Control.Monad
import Data.Traversable (for)
import Data.Foldable (for_, traverse_)
import qualified Data.ByteString.Lazy as Lazy
import qualified Data.ByteString as Strict
import qualified Data.Map as M
import qualified Data.IntSet as IS
import Data.Maybe
import Data.Bits
import Data.Int
newtype CmapTable = CmapTable {getCmaps :: [CMap]}
  deriving Show
emptyCmapTable :: CmapTable
emptyCmapTable = CmapTable []
data CMap = CMap {
  cmapPlatform :: PlatformID,
  cmapEncoding :: Word16,
  
  cmapLanguage :: Word16,
  
  mapFormat :: MapFormat,
  
  
  multiByte :: IS.IntSet,
  
  glyphMap :: WordMap GlyphID
  }
  deriving Show
instance Ord CMap where
  compare (CMap pfID encID lang _ _ _) (CMap pfID2 encID2 lang2 _ _ _) =
    compare (pfID, encID, lang) (pfID2, encID2, lang2)
instance Eq CMap where
  (CMap pfID encID lang _ _ _) == (CMap pfID2 encID2 lang2 _ _ _) =
    (pfID, encID, lang) == (pfID2, encID2, lang2)
data MapFormat = 
  
  MapFormat0 |
  
  MapFormat2 |
  
  
  MapFormat4 |
  
  MapFormat6 |
  
  MapFormat8 |
  
  
  MapFormat10 |
  
  
  
  MapFormat12
  deriving Show
putCmapTable :: CmapTable -> Put
putCmapTable (CmapTable cmaps_) =
  do putWord16be 0
     putWord16be $ fromIntegral $ length cmaps
     for_ (zip offsets cmaps) $
       \(offset, CMap pfID encID _ _ _ _) ->
         do putPf pfID
            putWord16be encID
            putWord32be offset
     traverse_ putLazyByteString cmapsBs
       where
         cmaps = sort cmaps_
         offsets :: [Word32]
         offsets =
           scanl (+) (fromIntegral $ 4 + 8 * length cmaps) $
           map (fromIntegral . Lazy.length) cmapsBs
         cmapsBs = map (runPut.putCmap) cmaps
readCmapTable :: Strict.ByteString -> Either String CmapTable
readCmapTable bs = do
  version <- index16 bs 0
  when (version /= 0) $
    fail "unsupported cmap version."
  n <- index16 bs 1
  entries <- for [0..n1] $ \i -> do
    pfID <- toPf =<< (index16 bs $ 2 + i*4)
    encID <- index16 bs $ 2 + i*4 + 1
    offset <- index32 bs $ 2 + fromIntegral i*2
    return (offset, pfID, encID)
  cmaps <- for entries $ \(offset, pfID, encID) -> do
    cm <- readCmap $ Strict.drop (fromIntegral offset) bs
    Right $ cm {cmapPlatform = pfID, cmapEncoding = encID}
  return $ CmapTable cmaps
putCmap :: CMap -> Put
putCmap cmap = case mapFormat cmap of
  MapFormat0 -> putMap0 cmap
  MapFormat2 -> putMap2 cmap
  
  MapFormat4 -> putMap4 cmap
  
  MapFormat6 -> putMap6 cmap
  
  MapFormat8 -> putMap8 cmap
  MapFormat10 -> putMap10 cmap
  
  MapFormat12 -> putMap12 cmap
readCmap :: Strict.ByteString -> Either String CMap
readCmap bs_ = do
  c <- index16 bs_ 0
  let bs | (c >= 8 && c < 14) = Strict.drop 8 bs_
         | otherwise = Strict.drop 4 bs_
  either fail return $
    case c of 
      0 -> getMap0 bs
      2 -> getMap2 bs
      4 -> getMap4 bs
      6 -> getMap6 bs
      8 -> getMap8 bs
      10 -> getMap10 bs
      12 -> getMap12 bs
      i -> fail $ "unsupported map encoding " ++ show i
subIntMap :: Word32 -> Word32 -> WordMap GlyphID -> WordMap GlyphID
subIntMap from to intMap =
  fst $ M.split (fromIntegral to+1) $ snd $
  M.split (fromIntegral from1) intMap
asSubtypeFrom :: b -> [(a, b)] -> b
asSubtypeFrom a _ = a
putCodes :: Word32 -> Word32 -> [(Word32, GlyphID)] -> Put
putCodes start end _
  | start > end = return ()
putCodes start end [] = do
  putWord16be 0
  putCodes (start+1) end []
      
putCodes start end l@((i, code):rest)
  | start < i = do putWord16be 0
                   putCodes (start+1) end l
  | otherwise = do putWord16be code
                   putCodes (i+1) end rest
subCodes :: WordMap GlyphID -> Word32 -> Word32 -> Put
subCodes set start end =
  putCodes start end $ M.toList $ subIntMap start end set
data SubTable2 = SubTable2 {
  highByte :: Word16,
  firstCode :: Word16,
  entryCount :: Word16,
  rangeOffset :: Word16,
  rangeBytes :: Put
  }
putMap0 :: CMap -> PutM ()
putMap0 cmap = do
  putWord16be 0
  putWord16be 262
  putWord16be $ cmapLanguage cmap
  let gm = glyphMap cmap
  for_ [0..255] $ \c ->
    putWord8 $ fromIntegral $
    fromMaybe 0 (M.lookup c gm)
getMap0 :: Strict.ByteString -> Either String CMap
getMap0 bs =
  if Strict.length bs < 258 then
    Left "invalid map format 0"
  else
    do lang <- index16 bs 0
       let gmap = M.fromAscList $
             filter ((/= 0).snd) $
             (flip map) [0..255] $ \c ->
             (fromIntegral c, fromIntegral $ Strict.index bs (c+2))
       Right $ CMap UnicodePlatform 0 lang MapFormat0 IS.empty gmap
putMap2 :: CMap -> PutM ()
putMap2 cmap = do
  putWord16be 2
  putWord16be size
  putWord16be (cmapLanguage cmap)
  putCodes 0 255 $ zip (map (fromIntegral.highByte) subTableCodes) [1::Word16 ..]
  for_ subTables $ \(SubTable2 _ fc ec ro _) ->
    do putWord16be fc
       putWord16be ec
       putWord16be 0
       putWord16be ro
  for_ subTables rangeBytes
    where
      highBytes :: [Int]
      highBytes =
        IS.toList $ fst $
        IS.split 255 (multiByte cmap)
      subTableCodes =
        filter ((/= 0) . entryCount) $ 
        flip map highBytes $ \hb ->
        let subMap = subIntMap (fromIntegral hb `shift` 8)
                     (fromIntegral hb `shift` 8 .|. 0xff) $
                     glyphMap cmap
            (fstCode, lstCode)
              | M.null subMap = (0, 1)
              | otherwise = (fst $ M.findMin subMap,
                             fst $ M.findMax subMap)
            ec = lstCode  fstCode + 1
            rb = subCodes subMap fstCode lstCode
        in SubTable2 (fromIntegral hb) (fromIntegral fstCode) (fromIntegral ec) 0 rb
          where
      subTables = scanl calcOffset firstTable subTableCodes
      firstTable =
        SubTable2 0 0 256 (fromIntegral $ length subTableCodes * 8 + 2) $
        subCodes (glyphMap cmap) 0 255
      size :: Word16
      size = 518 + 8 * (fromIntegral $ length subTables) + 2 * sum (map entryCount subTables)
      calcOffset prev st = st { rangeOffset = rangeOffset prev  8 + 2*entryCount prev }
        
getMap2 :: Strict.ByteString -> Either String CMap
getMap2 bs = do
  lang <- index16 bs 0
  highBytes <- do
    l <- traverse (index16 bs) [1..256]
    Right $ map fst $ filter ((/=0).snd) $ zip [0::Int ..255] l
  l <- for [0::Word16 .. fromIntegral $ length highBytes] $ \i -> do
    fstCode <- index16 bs (fromIntegral $ 257 + i*4)
    cnt <- index16 bs (fromIntegral $ 257 + i*4 + 1)
    delta <- index16 bs (fromIntegral $ 257 + i*4 + 2)
    ro <- index16 bs (fromIntegral $ 257 + i*4 + 3)
    for [0 .. fromIntegral cnt1] $ \entry -> do
      p <- index16 bs (fromIntegral $ 257 + i*4 + 3 + ro `quot` 2 + entry)
      Right (fromIntegral $ fstCode + entry, if p == 0 then 0 else p + delta)
  let im = M.fromAscList $ filter ((/= 0).snd) $ concat l
      is = IS.fromAscList $ map fromIntegral highBytes
  Right $ CMap UnicodePlatform 0 lang MapFormat2 is im      
      
data Segment4 = RangeSegment Word16 Word16 Word16
              | CodeSegment Word16 Word16 [Word16]
              deriving Show
findRange :: Word32 -> Int64 -> [(Word32, Word16)] -> (Word32, [(Word32, Word16)])
findRange nextI _ [] =
  (nextI1, [])
findRange nextI offset l@((i,c):r)
  | i == nextI && c == fromIntegral (fromIntegral nextI+offset) =
    findRange (nextI+1) offset r
  | otherwise = (nextI1, l)
findCodes :: Word32 -> [(Word32, Word16)] -> ([GlyphID], [(Word32, Word16)])
findCodes _ [] = ([], [])
findCodes prevI l@((i,c):r)
  
  | i  prevI > 4 = ([], l)
  | otherwise = (replicate (fromIntegral $ iprevI1) 0 ++ c:c2, r2)
  where (c2, r2) = findCodes i r
getSegments :: [(Word32, Word16)] -> [Segment4]
getSegments [] = [RangeSegment 0xffff 1 0]
getSegments l@((start, c):_)
  | fromIntegral end  start >= 4 ||
    lc <= endstart+1 = 
      RangeSegment (fromIntegral start) (fromIntegral endfromIntegral start+1) c :
      getSegments r
  | otherwise =
      CodeSegment (fromIntegral start) (fromIntegral lc) codes :
      getSegments r2
  where
    lc = fromIntegral $ length codes
    (end, r) = findRange start (fromIntegral c  fromIntegral start) l
    (codes, r2) = findCodes (start1) l
data Segment4layout = Segment4layout {
  s4endCode :: Word16,
  s4startCode :: Word16,
  s4idDelta :: Word16,
  s4idRangeOffset :: Word16,
  s4glyphIndex :: [GlyphID] }
  deriving Show
  
putMap4 :: CMap -> PutM ()
putMap4 cmap = do
  putWord16be 4
  putWord16be size
  putWord16be (cmapLanguage cmap)
  putWord16be (segCount*2)
  putWord16be searchRange
  putWord16be entrySelector
  putWord16be $ 2*segCount  searchRange
  traverse_ (put.s4endCode) layout
  putWord16be 0
  traverse_ (put.s4startCode) layout
  traverse_ (put.s4idDelta) layout
  traverse_ (put.s4idRangeOffset) layout
  traverse_ (traverse_ put.s4glyphIndex) layout
    where
      size, segCount, searchRange, entrySelector :: Word16
      entrySelector = iLog2 segCount
      searchRange = 1 `shift` (fromIntegral $ entrySelector+1)
      segments = getSegments $ M.toList $ subIntMap 0 0xffff $ glyphMap cmap
      (codeSize, layout) = mapAccumL foldLayout (segCount*2) segments
      foldLayout offset (RangeSegment start len code) =
        (offset2, Segment4layout (fromIntegral $ start+len1)
                   (fromIntegral start) (code(fromIntegral start)) 0 [])
      foldLayout offset (CodeSegment start len codes) =
        (offset+fromIntegral len*22,
         Segment4layout (fromIntegral $ start+len1)
         (fromIntegral start) 0 offset codes)
      size = 8*segCount + codeSize + 16
      segCount = fromIntegral $ length segments
getMap4 :: Strict.ByteString -> Either String CMap
getMap4 bs = do
    lang <- index16 bs 0
    segCount <- (`quot` 2) <$> index16 bs 1
    gmap <- fmap (M.fromAscList . filter ((/= 0).snd) . concat ) $
            for [0::Word16 .. segCount2] $ \i ->
      do idDelta <- index16 bs (i + 6 + segCount*2)
         endCode <- index16 bs (i + 5)
         startCode <- index16 bs (i + 6 + segCount)
         idRangeOffset <- index16 bs (i + 6 + segCount*3)
         if idRangeOffset == 0
           then Right [(fromIntegral c, c+idDelta) | c <- [startCode .. endCode]]
           else for [0..endCodestartCode] $ \j ->
           do glyph <- index16 bs (fromIntegral $ i + 6 + segCount*3 + idRangeOffset`div`2 + j)
              Right (fromIntegral $ startCode + j, glyph)
    Right $ CMap UnicodePlatform 0 lang MapFormat4 IS.empty gmap
putMap6 :: CMap -> PutM ()
putMap6 cmap = do
  putWord16be 6
  putWord16be size
  putWord16be (cmapLanguage cmap)
  putWord16be fCode
  putWord16be eCount
  subCodes (glyphMap cmap) (fromIntegral fCode) (fromIntegral lastCode)
  where
    size, eCount, fCode, lastCode :: Word16
    size = eCount*2 + 10
    eCount = lastCode  fCode + 1
    fCode = fromIntegral $
      min (fromIntegral (maxBound :: Word16)::Word32) $
      fst $ M.findMin (glyphMap cmap)
    lastCode = fromIntegral $
      min (fromIntegral (maxBound :: Word16)::Word32) $
      fst $ M.findMax (glyphMap cmap)
    
getMap6 :: Strict.ByteString -> Either String CMap
getMap6 bs = do
  lang <- index16 bs 0
  fCode <- index16 bs 1
  eCount <- index16 bs 2
  gmap <- fmap (M.fromAscList . filter ((/= 0).snd)) $
          for [0..eCount1] $ \i -> do
    g <- index16 bs (i+3)
    Right (fromIntegral $ i + fCode, g)
  Right $ CMap UnicodePlatform 0 lang MapFormat6 IS.empty gmap
putPacked :: Int -> [Int] -> Put
putPacked start highBytes
  | start >= 8192 = return ()
  | otherwise = do
      putWord8 $ foldl' (.|.) 0 $ map ((shift 1) . (.&. 7)) bytes
      putPacked (start+1) rest
        where
          (bytes, rest) = span (\b -> b >= (start*8) &&
                                      b < (start+1)*8)
                          highBytes
readPacked :: Strict.ByteString -> [Int]
readPacked bs =
  [i .|. b `shift` 3 |
   (a, b) <- zip (Strict.unpack bs) [0..8191],
   i <- [0..7],
   a .&. (1 `shift` i) /= 0
  ]
      
findRanges :: [(Word32, GlyphID)] -> [(Word32, Word32, GlyphID)]
findRanges [] = []
findRanges l@((i,c):_) = (i, i2, c) : findRanges next
  where (i2, next) = findRange i (fromIntegral cfromIntegral i) l
  
putMap8 :: CMap -> PutM ()
putMap8 cmap = do
  putWord16be 8
  putWord16be 0
  putWord32be $ fromIntegral size
  putWord32be (fromIntegral $ cmapLanguage cmap)
  putPacked 0 highBytes
  putWord32be $ fromIntegral nGroups
  for_ ranges $ \(start, end, code) -> do
    putWord32be $ fromIntegral start
    putWord32be $ fromIntegral end
    putWord32be $ fromIntegral code
  where
    size = nGroups * 12 + 8208
    highBytes = IS.toList $ multiByte cmap
    ranges = findRanges $ M.toList $ glyphMap cmap
    nGroups = length ranges
getMap8 :: Strict.ByteString -> Either String CMap
getMap8 bs = do
  _ <- index16 bs 0
  lang <- index16 bs 1
  let is = IS.fromAscList $ readPacked (Strict.drop 4 bs)
  nGroups <- index32 bs 2049
  gmap <- fmap (M.fromAscList . concat) $ for [0..nGroups1] $ \i -> do
    start <- index32 bs (i*3 + 2050)
    end <- index32 bs (i*3 + 2051)
    glyph <- index32 bs (i*3 + 2052)
    return [(fromIntegral c, fromIntegral $ glyph+cstart) | c <- [start .. end]]
  Right $ CMap UnicodePlatform 0 lang MapFormat8 is gmap
getMap10 :: Strict.ByteString -> Either String CMap
getMap10 bs = do
  lang <- index32 bs 0
  fCode <- index32 bs 1
  eCount <- index32 bs 2
  gmap <- fmap (M.fromAscList . filter ((/= 0).snd)) $
          for [0..eCount1] $ \i -> do
    g <- index16 bs (fromIntegral i+6)
    Right (fromIntegral $ i + fCode, g)
  Right $ CMap UnicodePlatform 0 (fromIntegral lang) MapFormat6 IS.empty gmap
putMap10 :: CMap -> Put
putMap10 cmap = do
  putWord16be 10
  putWord16be 0
  putWord32be size
  putWord32be $ fromIntegral $ cmapLanguage cmap
  putWord32be fCode
  putWord32be eCount
  subCodes (glyphMap cmap) (fromIntegral fCode) (fromIntegral lastCode)
  where
    size, eCount, fCode, lastCode :: Word32
    size = eCount*2 + 20
    eCount = lastCode  fCode  + 1
    fCode = fromIntegral $ fst $ M.findMin $ glyphMap cmap
    lastCode = fromIntegral $ fst $ M.findMax $ glyphMap cmap
putMap12 :: CMap -> PutM ()
putMap12 cmap = do
  putWord16be 12
  putWord16be 0
  putWord32be $ fromIntegral size
  putWord32be (fromIntegral $ cmapLanguage cmap)
  putWord32be $ fromIntegral nGroups
  for_ ranges $ \(start, end, code) -> do
    putWord32be $ fromIntegral start
    putWord32be $ fromIntegral end
    putWord32be $ fromIntegral code
  where
    size = nGroups * 12 + 16
    ranges = findRanges $ M.toList $ glyphMap cmap
    nGroups = length ranges
getMap12 :: Strict.ByteString -> Either String CMap
getMap12 bs = do
  _ <- index16 bs 0
  lang <- index16 bs 1
  nGroups <- index32 bs 1
  gmap <- fmap (M.fromAscList . concat) $ for [0..nGroups1] $ \i -> do
    start <- index32 bs (i*3 + 2)
    end <- index32 bs (i*3 + 3)
    glyph <- index32 bs (i*3 + 4)
    return [(fromIntegral c, fromIntegral $ glyph+cstart) | c <- [start .. end]]
  Right $ CMap UnicodePlatform 0 lang MapFormat8 IS.empty gmap