-- | Data.OSXAr is a module for parsing an OS X static archive
-- into a list of its member files and its symbol table.
module Data.OSXAr (parseOSXAr, ArchiveEntry(..)) where

import Data.Binary
import Data.Binary.Get
import Control.Monad
import qualified Data.Map as M
import qualified Data.ByteString as B
import qualified Data.ByteString.Char8 as C
import qualified Data.ByteString.Lazy as L

data ArchiveEntry = ArchiveEntry
    { filename :: String       -- ^ File name.
    , filesize :: Int          -- ^ File size.
    , filedata :: B.ByteString -- ^ File bytes.
    } deriving (Eq, Show)

getArchEntries = do
    empty <- isEmpty
    if empty then
        return []
     else do
        offset  <- liftM fromIntegral bytesRead :: Get Int 
        name    <- getByteString 16
        skip 32
        st_size <- liftM (read . C.unpack) (getByteString 8) :: Get Int
        skip 4
        off1    <- liftM fromIntegral bytesRead :: Get Int 
        name    <- if C.unpack (C.take 3 name) == "#1/" then
                        liftM (C.unpack . C.takeWhile (/= '\0')) (getByteString $ read $ C.unpack $ C.drop 3 name)
                    else
                        return $ C.unpack $ C.takeWhile (/= '\0') name
        off2    <- liftM fromIntegral bytesRead :: Get Int 
        file    <- getByteString (st_size - (off2 - off1))
        rest    <- getArchEntries
        return $ (offset, ArchiveEntry name (st_size - (off2 - off1)) file) : rest

getArchMagic = do
    magic <- liftM C.unpack $ getByteString 8
    if magic /= "!<arch>\n" then
        fail $ "Invalid magic number " ++ show magic
     else
        return ()

getArch = do
    getArchMagic
    getArchEntries

getRanlibs strings = do
    empty <- isEmpty
    if empty then
        return []
     else do
        str_offset <- liftM fromIntegral getWord32host
        fil_offset <- liftM fromIntegral getWord32host
        rest       <- getRanlibs strings
        let name = C.unpack $ C.takeWhile (/= '\0') $ B.drop str_offset strings
        return $ (name, fil_offset) : rest

getSymbolTable = do
    nranlibs <- liftM fromIntegral getWord32host
    ranlibs  <- getByteString nranlibs
    nstrings <- liftM fromIntegral getWord32host
    strings  <- getByteString nstrings
    return $ runGet (getRanlibs strings) (L.fromChunks [ranlibs])

parseSymbolTable ((_,x):_) | filename x == "__.SYMDEF" || filename x == "__.SYMDEF SORTED" =
    Just $ M.fromList $ runGet getSymbolTable (L.fromChunks [filedata x])
parseSymbolTable _ = Nothing

peelSymbolTable ((_,x):xs) | filename x == "__.SYMDEF" || filename x == "__.SYMDEF SORTED" = xs
peelSymbolTable xs = xs

-- | Parse an OS X archive into a set of contained objects and a lookup table
-- for fast lookup of which member file contains a symbol.
parseOSXAr :: B.ByteString -> (M.Map Int ArchiveEntry, Maybe (M.Map String Int))
parseOSXAr bs =
    let entries = runGet getArch (L.fromChunks [bs])
        symtab  = parseSymbolTable entries
    in (M.fromList $ peelSymbolTable entries, symtab)