module Codec.Archive.Zip
       (
       
         Archive (..)
       , Entry (..)
       , CompressionMethod (..)
       , ZipOption (..)
       , emptyArchive
       
       , toArchive
       , fromArchive
       , filesInArchive
       , addEntryToArchive
       , deleteEntryFromArchive
       , findEntryByPath
       , fromEntry
       , toEntry
       
       , readEntry
       , writeEntry
       , addFilesToArchive
       , extractFilesFromArchive
       ) where
import System.Time ( toUTCTime, addToClockTime, CalendarTime (..), ClockTime (..), TimeDiff (..) )
import Data.Bits ( shiftL, shiftR, (.&.) )
import Data.Binary
import Data.Binary.Get
import Data.Binary.Put
import Data.List ( nub, find )
import Text.Printf
import System.FilePath
import System.Directory ( doesDirectoryExist, getDirectoryContents, createDirectoryIfMissing )
import Control.Monad ( when, unless, zipWithM, liftM )
import System.Directory ( getModificationTime )
import System.IO ( stderr, hPutStrLn )
import qualified Data.Digest.CRC32 as CRC32
import qualified Data.Map as M
#ifndef _WINDOWS
import System.Posix.Files ( setFileTimes )
#endif
import qualified Data.ByteString.Lazy as B
import Data.ByteString.Lazy.UTF8 ( toString, fromString )
import qualified Codec.Compression.Zlib.Raw as Zlib
data Archive = Archive
                { zEntries                :: [Entry]              
                , zSignature              :: Maybe B.ByteString   
                , zComment                :: B.ByteString         
                } deriving (Read, Show)
data Entry = Entry
               { eRelativePath            :: FilePath            
               , eCompressionMethod       :: CompressionMethod   
               , eLastModified            :: Integer             
               , eCRC32                   :: Word32              
               , eCompressedSize          :: Word32              
               , eUncompressedSize        :: Word32              
               , eExtraField              :: B.ByteString        
               , eFileComment             :: B.ByteString        
               , eInternalFileAttributes  :: Word16              
               , eExternalFileAttributes  :: Word32              
               , eCompressedData          :: B.ByteString        
               } deriving (Read, Show, Eq)
data CompressionMethod = Deflate
                       | NoCompression
                       deriving (Read, Show, Eq)
data ZipOption = OptRecursive               
               | OptVerbose                 
               deriving (Read, Show, Eq)
emptyArchive :: Archive
emptyArchive = Archive
                { zEntries                  = []
                , zSignature              = Nothing
                , zComment                = B.empty }
toArchive :: B.ByteString -> Archive
toArchive = runGet getArchive
fromArchive :: Archive -> B.ByteString
fromArchive = runPut . putArchive
filesInArchive :: Archive -> [FilePath]
filesInArchive = (map eRelativePath) . zEntries
addEntryToArchive :: Entry -> Archive -> Archive
addEntryToArchive entry archive =
  let archive'   = deleteEntryFromArchive (eRelativePath entry) archive
      oldEntries = zEntries archive'
  in  archive' { zEntries = entry : oldEntries }
deleteEntryFromArchive :: FilePath -> Archive -> Archive
deleteEntryFromArchive path archive =
  let path'      = zipifyFilePath path
      newEntries = filter (\e -> eRelativePath e /= path') $ zEntries archive
  in  archive { zEntries = newEntries }
findEntryByPath :: FilePath -> Archive -> Maybe Entry
findEntryByPath path archive = find (\e -> path == eRelativePath e) (zEntries archive)
fromEntry :: Entry -> B.ByteString
fromEntry entry =
  let uncompressedData = decompressData (eCompressionMethod entry) (eCompressedData entry)
  in  if eCRC32 entry == CRC32.crc32 uncompressedData
         then uncompressedData
         else error "CRC32 mismatch"
toEntry :: FilePath         
        -> Integer          
        -> B.ByteString     
        -> Entry
toEntry path modtime contents =
  let uncompressedSize = B.length contents
      compressedData = compressData Deflate contents
      compressedSize = B.length compressedData
      
      (compressionMethod, finalData, finalSize) =
        if uncompressedSize <= compressedSize
           then (NoCompression, contents, uncompressedSize)
           else (Deflate, compressedData, compressedSize)
      crc32 = CRC32.crc32 contents
  in  Entry { eRelativePath            = path
            , eCompressionMethod       = compressionMethod
            , eLastModified            = modtime
            , eCRC32                   = crc32
            , eCompressedSize          = fromIntegral finalSize
            , eUncompressedSize        = fromIntegral uncompressedSize
            , eExtraField              = B.empty
            , eFileComment             = B.empty
            , eInternalFileAttributes  = 0  
            , eExternalFileAttributes  = 0  
            , eCompressedData          = finalData
            }
readEntry :: [ZipOption] -> FilePath -> IO Entry
readEntry opts path = do
  isDir <- doesDirectoryExist path
  let path' = zipifyFilePath $ normalise $
              path ++ if isDir then "/" else ""  
  contents <- if isDir
                 then return B.empty
                 else B.readFile path
  (TOD modEpochTime _) <- getModificationTime path
  let entry = toEntry path' modEpochTime contents
  when (OptVerbose `elem` opts) $ do
    let compmethod = case eCompressionMethod entry of
                     Deflate       -> "deflated"
                     NoCompression -> "stored"
    hPutStrLn stderr $
      printf "  adding: %s (%s %.f%%)" (eRelativePath entry)
      compmethod (100  (100 * compressionRatio entry))
  return entry
writeEntry :: [ZipOption] -> Entry -> IO ()
writeEntry opts entry = do
  let path = eRelativePath entry
  
  let dir = takeDirectory path
  exists <- doesDirectoryExist dir
  unless exists $ do
    createDirectoryIfMissing True dir
    when (OptVerbose `elem` opts) $
      hPutStrLn stderr $ "  creating: " ++ dir
  if length path > 0 && last path == '/' 
     then return ()
     else do
       when (OptVerbose `elem` opts) $ do
         hPutStrLn stderr $ case eCompressionMethod entry of
                                 Deflate       -> " inflating: " ++ path
                                 NoCompression -> "extracting: " ++ path
       B.writeFile path (fromEntry entry)
  
  
  setFileTimeStamp path (eLastModified entry)
addFilesToArchive :: [ZipOption] -> Archive -> [FilePath] -> IO Archive
addFilesToArchive opts archive files = do
  filesAndChildren <- if OptRecursive `elem` opts
                         then mapM getDirectoryContentsRecursive files >>= return . nub . concat
                         else return files
  entries <- mapM (readEntry opts) filesAndChildren
  return $ foldr addEntryToArchive archive entries
extractFilesFromArchive :: [ZipOption] -> Archive -> IO ()
extractFilesFromArchive opts archive = mapM_ (writeEntry opts) $ zEntries archive
many :: Monad m => m (Maybe a) -> m [a]
many p = do
  r <- p
  case r of
       Just x  ->  many p >>= return . (x:)
       Nothing -> return []
zipifyFilePath :: FilePath -> String
zipifyFilePath path =
  let dir = takeDirectory path
      fn  = takeFileName path
      (drive, dir') = splitDrive dir
      dirParts = splitDirectories dir'
  in  drive ++ (concat (map (++ "/") dirParts)) ++ fn
compressData :: CompressionMethod -> B.ByteString -> B.ByteString
compressData Deflate       = Zlib.compress
compressData NoCompression = id
decompressData :: CompressionMethod -> B.ByteString -> B.ByteString
decompressData Deflate       = Zlib.decompress
decompressData NoCompression = id
compressionRatio :: Entry -> Float
compressionRatio entry =
  if eUncompressedSize entry == 0
     then 1
     else fromIntegral (eCompressedSize entry) / fromIntegral (eUncompressedSize entry)
data MSDOSDateTime = MSDOSDateTime { msDOSDate :: Word16
                                   , msDOSTime :: Word16
                                   } deriving (Read, Show, Eq)
minMSDOSDateTime :: Integer
minMSDOSDateTime = 315532800
epochTimeToMSDOSDateTime :: Integer -> MSDOSDateTime
epochTimeToMSDOSDateTime epochtime | epochtime < minMSDOSDateTime =
  epochTimeToMSDOSDateTime minMSDOSDateTime
  
epochTimeToMSDOSDateTime epochtime =
  let ut = toUTCTime (TOD epochtime 0)
      dosTime = toEnum $ (ctSec ut `div` 2) + shiftL (ctMin ut) 5 + shiftL (ctHour ut) 11
      dosDate = toEnum $ ctDay ut + shiftL (fromEnum (ctMonth ut) + 1) 5 + shiftL (ctYear ut  1980) 9
  in  MSDOSDateTime { msDOSDate = dosDate, msDOSTime = dosTime }
msDOSDateTimeToEpochTime :: MSDOSDateTime -> Integer
msDOSDateTimeToEpochTime (MSDOSDateTime {msDOSDate = dosDate, msDOSTime = dosTime}) =
  let seconds = fromIntegral $ 2 * (dosTime .&. 0O37)
      minutes = fromIntegral $ (shiftR dosTime 5) .&. 0O77
      hour    = fromIntegral $ shiftR dosTime 11
      day     = fromIntegral $ dosDate .&. 0O37
      month   = fromIntegral $ ((shiftR dosDate 5) .&. 0O17)  1
      year    = fromIntegral $ shiftR dosDate 9
      timeSinceEpoch = TimeDiff
               { tdYear = year + 10, 
                 tdMonth = month,
                 tdDay = day  1,  
                 tdHour = hour,
                 tdMin = minutes,
                 tdSec = seconds,
                 tdPicosec = 0 }
      (TOD epochsecs _) = addToClockTime timeSinceEpoch (TOD 0 0)
  in  epochsecs
getDirectoryContentsRecursive :: FilePath -> IO [FilePath]
getDirectoryContentsRecursive path = do
  isDir <- doesDirectoryExist path
  if isDir
     then do
       contents <- getDirectoryContents path
       let contents' = map (path </>) $ filter (`notElem` ["..","."]) contents
       children <- mapM getDirectoryContentsRecursive contents'
       if path == "."
          then return (concat children)
          else return (path : concat children)
     else return [path]
setFileTimeStamp :: FilePath -> Integer -> IO ()
setFileTimeStamp file epochtime = do
#ifdef _WINDOWS
  return ()  
#else
  let epochtime' = fromInteger epochtime
  setFileTimes file epochtime' epochtime'
#endif
getArchive :: Get Archive
getArchive = do
  locals <- many getLocalFile
  files <- many (getFileHeader (M.fromList locals))
  digSig <- lookAheadM getDigitalSignature
  endSig <- getWord32le
  unless (endSig == 0x06054b50) $ fail "Did not find end of central directory signature"
  skip 2 
  skip 2 
  skip 2 
  skip 2 
  skip 4 
  skip 4 
  commentLength <- getWord16le
  zipComment <- getLazyByteString (toEnum $ fromEnum commentLength)
  return $ Archive
           { zEntries                = files
           , zSignature              = digSig
           , zComment                = zipComment
           }
putArchive :: Archive -> Put
putArchive archive = do
  mapM_ putLocalFile $ zEntries archive
  let localFileSizes = map localFileSize $ zEntries archive
  let offsets = scanl (+) 0 localFileSizes
  let cdOffset = last offsets
  _ <- zipWithM putFileHeader offsets (zEntries archive)
  putDigitalSignature $ zSignature archive
  putWord32le 0x06054b50
  putWord16le 0 
  putWord16le 0 
  putWord16le $ fromIntegral $ length $ zEntries archive 
  putWord16le $ fromIntegral $ length $ zEntries archive 
  putWord32le $ sum $ map fileHeaderSize $ zEntries archive  
  putWord32le $ fromIntegral cdOffset                    
  putWord16le $ fromIntegral $ B.length $ zComment archive
  putLazyByteString $ zComment archive
fileHeaderSize :: Entry -> Word32
fileHeaderSize f =
  fromIntegral $ 4 + 2 + 2 + 2 + 2 + 2 + 2 + 4 + 4 + 4 + 2 + 2 + 2 + 2 + 2 + 4 + 4 +
    fromIntegral (B.length $ fromString $ zipifyFilePath $ eRelativePath f) +
    B.length (eExtraField f) + B.length (eFileComment f)
localFileSize :: Entry -> Word32
localFileSize f =
  fromIntegral $ 4 + 2 + 2 + 2 + 2 + 2 + 4 + 4 + 4 + 2 + 2 +
    fromIntegral (B.length $ fromString $ zipifyFilePath $ eRelativePath f) +
    B.length (eExtraField f) + B.length (eCompressedData f)
getLocalFile :: Get (Maybe (Word32, B.ByteString))
getLocalFile = do
  sig <- lookAhead getWord32le
  if sig /= 0x04034b50
    then return Nothing
    else do
      offset <- bytesRead
      skip 4  
      skip 2  
      bitflag <- getWord16le
      skip 2  
      skip 2  
      skip 2  
      skip 4  
      compressedSize <- getWord32le
      when (compressedSize == 0xFFFFFFFF) $
        fail "Can't read ZIP64 archive."
      skip 4  
      fileNameLength <- getWord16le
      extraFieldLength <- getWord16le
      skip (fromIntegral fileNameLength)  
      skip (fromIntegral extraFieldLength) 
      compressedData <- if bitflag .&. 0O10 == 0
          then getLazyByteString (fromIntegral compressedSize)
          else 
               
               
               
               
               do raw <- many $ do
                           s <- lookAhead getWord32le
                           if s == 0x08074b50
                              then return Nothing
                              else liftM Just getWord8
                  skip 4 
                  skip 4 
                  cs <- getWord32le  
                  skip 4 
                  if fromIntegral cs == length raw
                     then return $ B.pack raw
                     else fail "Content size mismatch in data descriptor record" 
      return $ Just (fromIntegral offset, compressedData)
putLocalFile :: Entry -> Put
putLocalFile f = do
  putWord32le 0x04034b50
  putWord16le 20 
  putWord16le 2  
  putWord16le $ case eCompressionMethod f of
                     NoCompression -> 0
                     Deflate       -> 8
  let modTime = epochTimeToMSDOSDateTime $ eLastModified f
  putWord16le $ msDOSTime modTime
  putWord16le $ msDOSDate modTime
  putWord32le $ eCRC32 f
  putWord32le $ eCompressedSize f
  putWord32le $ eUncompressedSize f
  putWord16le $ fromIntegral $ length $ eRelativePath f
  putWord16le $ fromIntegral $ B.length $ eExtraField f
  putLazyByteString $ fromString $ zipifyFilePath $ eRelativePath f
  putLazyByteString $ eExtraField f
  putLazyByteString $ eCompressedData f
getFileHeader :: M.Map Word32 B.ByteString 
              -> Get (Maybe Entry)
getFileHeader locals = do
  sig <- lookAhead getWord32le
  if sig /= 0x02014b50
     then return Nothing
     else do
       skip 4 
       skip 2 
       versionNeededToExtract <- getWord16le
       unless (versionNeededToExtract <= 20) $
         fail "This archive requires zip >= 2.0 to extract."
       skip 2 
       rawCompressionMethod <- getWord16le
       compressionMethod <- case rawCompressionMethod of
                             0 -> return NoCompression
                             8 -> return Deflate
                             _ -> fail $ "Unknown compression method " ++ show rawCompressionMethod
       lastModFileTime <- getWord16le
       lastModFileDate <- getWord16le
       crc32 <- getWord32le
       compressedSize <- getWord32le
       uncompressedSize <- getWord32le
       fileNameLength <- getWord16le
       extraFieldLength <- getWord16le
       fileCommentLength <- getWord16le
       skip 2 
       internalFileAttributes <- getWord16le
       externalFileAttributes <- getWord32le
       relativeOffset <- getWord32le
       fileName <- getLazyByteString (toEnum $ fromEnum fileNameLength)
       extraField <- getLazyByteString (toEnum $ fromEnum extraFieldLength)
       fileComment <- getLazyByteString (toEnum $ fromEnum fileCommentLength)
       compressedData <- case (M.lookup relativeOffset locals) of
                         Just x  -> return x
                         Nothing -> fail $ "Unable to find data at offset " ++ show relativeOffset
       return $ Just $ Entry
                 { eRelativePath            = toString fileName
                 , eCompressionMethod       = compressionMethod
                 , eLastModified            = msDOSDateTimeToEpochTime $
                                              MSDOSDateTime { msDOSDate = lastModFileDate,
                                                              msDOSTime = lastModFileTime }
                 , eCRC32                   = crc32
                 , eCompressedSize          = compressedSize
                 , eUncompressedSize        = uncompressedSize
                 , eExtraField              = extraField
                 , eFileComment             = fileComment
                 , eInternalFileAttributes  = internalFileAttributes
                 , eExternalFileAttributes  = externalFileAttributes
                 , eCompressedData          = compressedData
                 }
putFileHeader :: Word32        
              -> Entry
              -> Put
putFileHeader offset local = do
  putWord32le 0x02014b50
  putWord16le 0  
  putWord16le 20 
  putWord16le 2  
  putWord16le $ case eCompressionMethod local of
                     NoCompression -> 0
                     Deflate       -> 8
  let modTime = epochTimeToMSDOSDateTime $ eLastModified local
  putWord16le $ msDOSTime modTime
  putWord16le $ msDOSDate modTime
  putWord32le $ eCRC32 local
  putWord32le $ eCompressedSize local
  putWord32le $ eUncompressedSize local
  putWord16le $ fromIntegral $ length $ eRelativePath local
  putWord16le $ fromIntegral $ B.length $ eExtraField local
  putWord16le $ fromIntegral $ B.length $ eFileComment local
  putWord16le 0  
  putWord16le $ eInternalFileAttributes local
  putWord32le $ eExternalFileAttributes local
  putWord32le offset
  putLazyByteString $ fromString $ zipifyFilePath $ eRelativePath local
  putLazyByteString $ eExtraField local
  putLazyByteString $ eFileComment local
getDigitalSignature :: Get (Maybe B.ByteString)
getDigitalSignature = do
  hdrSig <- getWord32le
  if hdrSig /= 0x08064b50
     then return Nothing
     else do
        sigSize <- getWord16le
        getLazyByteString (toEnum $ fromEnum sigSize) >>= return . Just
putDigitalSignature :: Maybe B.ByteString -> Put
putDigitalSignature Nothing = return ()
putDigitalSignature (Just sig) = do
  putWord32le 0x08064b50
  putWord16le $ fromIntegral $ B.length sig
  putLazyByteString sig