-- |
-- Module      :  Codec.Archive.Zip
-- Copyright   :  © 2016–2018 Mark Karpov
-- License     :  BSD 3 clause
--
-- Maintainer  :  Mark Karpov <markkarpov92@gmail.com>
-- Stability   :  experimental
-- Portability :  portable
--
-- The module provides everything you may need to manipulate Zip archives.
-- There are three things that should be clarified right away, to avoid
-- confusion in the future.
--
-- First, we use the 'EntrySelector' type that can be obtained from relative
-- 'FilePath's (paths to directories are not allowed). This method may seem
-- awkward at first, but it will protect you from the problems with
-- portability when your archive is unpacked on a different platform.
--
-- The second thing, that is rather a consequence of the first, is that
-- there is no way to add directories, or to be precise, /empty directories/
-- to your archive. This approach is used in Git, and I find it quite sane.
--
-- Finally, the third feature of the library is that it does not modify
-- archive instantly, because doing so on every manipulation would often be
-- inefficient. Instead we maintain a collection of pending actions that can
-- be turned into an optimized procedure that efficiently modifies archive
-- in one pass. Normally this should be of no concern to you, because all
-- actions are performed automatically when you leave the realm of
-- 'ZipArchive' monad. If, however, you ever need to force an update, the
-- 'commit' function is your friend. There are even “undo” functions, by the
-- way.
--
-- === Examples
--
-- An example of a program that prints a list of archive entries:
--
-- > import Codec.Archive.Zip
-- > import System.Environment (getArgs)
-- > import qualified Data.Map as M
-- >
-- > main :: IO ()
-- > main = do
-- >   [path]  <- getArgs
-- >   entries <- withArchive path (M.keys <$> getEntries)
-- >   mapM_ print entries
--
-- Create a Zip archive with a “Hello World” file:
--
-- > import Codec.Archive.Zip
-- > import System.Environment (getArgs)
-- >
-- > main :: IO ()
-- > main = do
-- >   [path] <- getArgs
-- >   s      <- mkEntrySelector "hello-world.txt"
-- >   createArchive path (addEntry Store "Hello, World!" s)
--
-- Extract contents of a specific file and print them:
--
-- > import Codec.Archive.Zip
-- > import System.Environment (getArgs)
-- > import qualified Data.ByteString.Char8 as B
-- >
-- > main :: IO ()
-- > main = do
-- >   [path,f] <- getArgs
-- >   s        <- mkEntrySelector f
-- >   bs       <- withArchive path (getEntry s)
-- >   B.putStrLn bs

{-# LANGUAGE GeneralizedNewtypeDeriving #-}
{-# LANGUAGE LambdaCase                 #-}
{-# LANGUAGE MultiParamTypeClasses      #-}
{-# LANGUAGE TypeFamilies               #-}

module Codec.Archive.Zip
  ( -- * Types
    -- ** Entry selector
    EntrySelector
  , mkEntrySelector
  , unEntrySelector
  , getEntryName
  , EntrySelectorException (..)
    -- ** Entry description
  , EntryDescription (..)
  , CompressionMethod (..)
    -- ** Archive description
  , ArchiveDescription (..)
    -- ** Exceptions
  , ZipException (..)
    -- * Archive monad
  , ZipArchive
  , ZipState
  , createArchive
  , withArchive
    -- * Retrieving information
  , getEntries
  , doesEntryExist
  , getEntryDesc
  , getEntry
  , getEntrySource
  , sourceEntry
  , saveEntry
  , checkEntry
  , unpackInto
  , getArchiveComment
  , getArchiveDescription
    -- * Modifying archive
    -- ** Adding entries
  , addEntry
  , sinkEntry
  , loadEntry
  , copyEntry
  , packDirRecur
    -- ** Modifying entries
  , renameEntry
  , deleteEntry
  , recompress
  , setEntryComment
  , deleteEntryComment
  , setModTime
  , addExtraField
  , deleteExtraField
  , forEntries
    -- ** Operations on archive as a whole
  , setArchiveComment
  , deleteArchiveComment
    -- ** Control over editing
  , undoEntryChanges
  , undoArchiveChanges
  , undoAll
  , commit )
where

import Codec.Archive.Zip.Type
import Conduit (PrimMonad)
import Control.Monad
import Control.Monad.Base (MonadBase (..))
import Control.Monad.Catch
import Control.Monad.State.Strict
import Control.Monad.Trans.Control (MonadBaseControl (..))
import Control.Monad.Trans.Resource (ResourceT, MonadResource)
import Data.ByteString (ByteString)
import Data.Conduit (ConduitT, (.|))
import Data.Map.Strict (Map, (!))
import Data.Sequence (Seq, (|>))
import Data.Text (Text)
import Data.Time.Clock (UTCTime)
import Data.Void
import Data.Word (Word16)
import System.Directory
import System.FilePath ((</>))
import System.IO.Error (isDoesNotExistError)
import qualified Codec.Archive.Zip.Internal as I
import qualified Data.Conduit               as C
import qualified Data.Conduit.Binary        as CB
import qualified Data.Conduit.List          as CL
import qualified Data.DList                 as DList
import qualified Data.Map.Strict            as M
import qualified Data.Sequence              as S
import qualified Data.Set                   as E
import qualified System.FilePath            as FP

----------------------------------------------------------------------------
-- Archive monad

-- | Monad that provides context necessary for performing operations on zip
-- archives. It's intentionally opaque and not a monad transformer to limit
-- the actions that can be performed in it to those provided by this module
-- and their combinations.

newtype ZipArchive a = ZipArchive
  { unZipArchive :: StateT ZipState IO a
  } deriving ( Functor
             , Applicative
             , Monad
             , MonadIO
             , MonadThrow
             , MonadCatch
             , MonadMask )

-- | @since 0.2.0

instance MonadBase IO ZipArchive where
  liftBase = liftIO

-- | @since 0.2.0

instance MonadBaseControl IO ZipArchive where
  type StM ZipArchive a = (a, ZipState)
  liftBaseWith f = ZipArchive . StateT $ \s ->
    (\x -> (x, s)) <$> f (flip runStateT s . unZipArchive)
  {-# INLINEABLE liftBaseWith #-}
  restoreM       = ZipArchive . StateT . const . return
  {-# INLINEABLE restoreM #-}

-- | Internal state record used by the 'ZipArchive' monad. This is only
-- exported for use with 'MonadBaseControl' methods, you can't look inside.
--
-- @since 0.2.0

data ZipState = ZipState
  { zsFilePath  :: FilePath
    -- ^ Path to zip archive
  , zsEntries   :: Map EntrySelector EntryDescription
    -- ^ Actual collection of entries
  , zsArchive   :: ArchiveDescription
    -- ^ Info about the whole archive
  , zsActions   :: Seq I.PendingAction
    -- ^ Pending actions
  }

-- | Create a new archive given its location and an action that describes
-- how to create contents of the archive. This will silently overwrite the
-- specified file if it already exists. See 'withArchive' if you want to
-- work with an existing archive.

createArchive :: MonadIO m
  => FilePath          -- ^ Location of archive file to create
  -> ZipArchive a      -- ^ Actions that form archive's content
  -> m a
createArchive path m = liftIO $ do
  apath <- makeAbsolute path
  ignoringAbsence (removeFile apath)
  let st = ZipState
        { zsFilePath = apath
        , zsEntries  = M.empty
        , zsArchive  = ArchiveDescription Nothing 0 0
        , zsActions  = S.empty }
      action = unZipArchive (liftM2 const m commit)
  evalStateT action st

-- | Work with an existing archive. See 'createArchive' if you want to
-- create a new archive instead.
--
-- This operation may fail with:
--
--     * @isAlreadyInUseError@ if the file is already open and cannot be
--     reopened;
--
--     * @isDoesNotExistError@ if the file does not exist;
--
--     * @isPermissionError@ if the user does not have permission to open
--     the file;
--
--     * 'ParsingFailed' when specified archive is something this library
--     cannot parse (this includes multi-disk archives, for example).
--
-- Please note that entries with invalid (non-portable) file names may be
-- missing in the list of entries. Files that are compressed with
-- unsupported compression methods are skipped as well. Also, if several
-- entries would collide on some operating systems (such as Windows, because
-- of its case-insensitivity), only one of them will be available, because
-- 'EntrySelector' is case-insensitive. These are the consequences of the
-- design decision to make it impossible to create non-portable archives
-- with this library.

withArchive :: MonadIO m
  => FilePath          -- ^ Location of archive to work with
  -> ZipArchive a      -- ^ Actions on that archive
  -> m a
withArchive path m = liftIO $ do
  apath           <- canonicalizePath path
  (desc, entries) <- liftIO (I.scanArchive apath)
  let st = ZipState
        { zsFilePath = apath
        , zsEntries  = entries
        , zsArchive  = desc
        , zsActions  = S.empty }
      action = unZipArchive (liftM2 const m commit)
  liftIO (evalStateT action st)

----------------------------------------------------------------------------
-- Retrieving information

-- | Retrieve description of all archive entries. This is an efficient
-- operation that can be used for example to list all entries in an archive.
-- Do not hesitate to use the function frequently: scanning of archive
-- happens only once anyway.
--
-- Please note that the returned value only reflects actual contents of the
-- archive in file system, non-committed actions do not influence the list
-- of entries, see 'commit' for more information.

getEntries :: ZipArchive (Map EntrySelector EntryDescription)
getEntries = ZipArchive (gets zsEntries)

-- | Check whether the specified entry exists in the archive. This is a
-- simple shortcut defined as:
--
-- > doesEntryExist s = M.member s <$> getEntries

doesEntryExist :: EntrySelector -> ZipArchive Bool
doesEntryExist s = M.member s <$> getEntries

-- | Get 'EntryDescription' for specified entry. This is a simple shortcut
-- defined as:
--
-- > getEntryDesc s = M.lookup s <$> getEntries

getEntryDesc :: EntrySelector -> ZipArchive (Maybe EntryDescription)
getEntryDesc s = M.lookup s <$> getEntries

-- | Get contents of a specific archive entry as a strict 'ByteString'. It's
-- not recommended to use this on big entries, because it will suck out a
-- lot of memory. For big entries, use conduits: 'sourceEntry'.
--
-- Throws: 'EntryDoesNotExist'.

getEntry
  :: EntrySelector     -- ^ Selector that identifies archive entry
  -> ZipArchive ByteString -- ^ Contents of the entry
getEntry s = sourceEntry s (CL.foldMap id)

-- | Get an entry source.
--
-- Throws: 'EntryDoesNotExist'.
--
-- @since 0.1.3

getEntrySource
  :: (PrimMonad m, MonadThrow m, MonadResource m)
  => EntrySelector     -- ^ Selector that identifies archive entry
  -> ZipArchive (ConduitT () ByteString m ())
getEntrySource s = do
  path  <- getFilePath
  mdesc <- M.lookup s <$> getEntries
  case mdesc of
    Nothing   -> throwM (EntryDoesNotExist path s)
    Just desc -> return (I.sourceEntry path desc True)

-- | Stream contents of an archive entry to the given 'Sink'.
--
-- Throws: 'EntryDoesNotExist'.

sourceEntry
  :: EntrySelector
     -- ^ Selector that identifies archive entry
  -> ConduitT ByteString Void (ResourceT IO) a
     -- ^ Sink where to stream entry contents
  -> ZipArchive a
     -- ^ Contents of the entry (if found)
sourceEntry s sink = do
  src <- getEntrySource s
  (liftIO . C.runConduitRes) (src .| sink)

-- | Save a specific archive entry as a file in the file system.
--
-- Throws: 'EntryDoesNotExist'.

saveEntry
  :: EntrySelector     -- ^ Selector that identifies archive entry
  -> FilePath          -- ^ Where to save the file
  -> ZipArchive ()
saveEntry s path = do
  sourceEntry s (CB.sinkFile path)
  med <- getEntryDesc s
  forM_ med (liftIO . setModificationTime path . edModTime)

-- | Calculate CRC32 check sum and compare it with the value read from the
-- archive. The function returns 'True' when the check sums are the
-- same—that is, the data is not corrupted.
--
-- Throws: 'EntryDoesNotExist'.

checkEntry
  :: EntrySelector     -- ^ Selector that identifies archive entry
  -> ZipArchive Bool   -- ^ Is the entry intact?
checkEntry s = do
  calculated <- sourceEntry s I.crc32Sink
  given      <- edCRC32 . (! s) <$> getEntries
  -- ↑ NOTE We can assume that entry exists for sure because otherwise
  -- 'sourceEntry' would have thrown 'EntryDoesNotExist' already.
  return (calculated == given)

-- | Unpack the entire archive into the specified directory. The directory
-- will be created if it does not exist.

unpackInto :: FilePath -> ZipArchive ()
unpackInto dir' = do
  selectors <- M.keysSet <$> getEntries
  unless (null selectors) $ do
    dir <- liftIO (makeAbsolute dir')
    liftIO (createDirectoryIfMissing True dir)
    let dirs = E.map (FP.takeDirectory . (dir </>) . unEntrySelector) selectors
    forM_ dirs (liftIO . createDirectoryIfMissing True)
    forM_ selectors $ \s ->
      saveEntry s (dir </> unEntrySelector s)

-- | Get the archive comment.

getArchiveComment :: ZipArchive (Maybe Text)
getArchiveComment = adComment <$> getArchiveDescription

-- | Get the archive description record.

getArchiveDescription :: ZipArchive ArchiveDescription
getArchiveDescription = ZipArchive (gets zsArchive)

----------------------------------------------------------------------------
-- Modifying archive

-- | Add a new entry to the archive given its contents in binary form.

addEntry
  :: CompressionMethod -- ^ Compression method to use
  -> ByteString        -- ^ Entry contents
  -> EntrySelector     -- ^ Name of entry to add
  -> ZipArchive ()
addEntry t b s = addPending (I.SinkEntry t (C.yield b) s)

-- | Stream data from the specified source to an archive entry.

sinkEntry
  :: CompressionMethod -- ^ Compression method to use
  -> ConduitT () ByteString (ResourceT IO) () -- ^ Source of entry contents
  -> EntrySelector     -- ^ Name of entry to add
  -> ZipArchive ()
sinkEntry t src s = addPending (I.SinkEntry t src s)

-- | Load an entry from a given file.

loadEntry
  :: CompressionMethod -- ^ Compression method to use
  -> EntrySelector     -- ^ Name of entry to add
  -> FilePath          -- ^ Path to file to add
  -> ZipArchive ()
loadEntry t s path = do
  apath   <- liftIO (canonicalizePath path)
  modTime <- liftIO (getModificationTime path)
  let src = CB.sourceFile apath
  addPending (I.SinkEntry t src s)
  addPending (I.SetModTime modTime s)

-- | Copy an entry “as is” from another zip archive. If the entry does not
-- exist in that archive, 'EntryDoesNotExist' will be eventually thrown.

copyEntry
  :: FilePath          -- ^ Path to archive to copy from
  -> EntrySelector     -- ^ Name of entry (in source archive) to copy
  -> EntrySelector     -- ^ Name of entry to insert (in current archive)
  -> ZipArchive ()
copyEntry path s' s = do
  apath <- liftIO (canonicalizePath path)
  addPending (I.CopyEntry apath s' s)

-- | Add an entire directory to the archive. Please note that due to the
-- design of the library, empty sub-directories won't be added.
--
-- The action can throw 'InvalidEntrySelector'.

packDirRecur
  :: CompressionMethod -- ^ Compression method to use
  -> (FilePath -> ZipArchive EntrySelector)
     -- ^ How to get 'EntrySelector' from a path relative to the root of the
     -- directory we pack
  -> FilePath          -- ^ Path to directory to add
  -> ZipArchive ()
packDirRecur t f path = do
  files <- liftIO (listDirRecur path)
  forM_ files $ \x -> do
    s <- f x
    loadEntry t s (path </> x)

-- | Rename an entry in the archive. If the entry does not exist, nothing
-- will happen.

renameEntry
  :: EntrySelector     -- ^ Original entry name
  -> EntrySelector     -- ^ New entry name
  -> ZipArchive ()
renameEntry old new = addPending (I.RenameEntry old new)

-- | Delete an entry from the archive, if it does not exist, nothing will
-- happen.

deleteEntry :: EntrySelector -> ZipArchive ()
deleteEntry s = addPending (I.DeleteEntry s)

-- | Change compression method of an entry, if it does not exist, nothing
-- will happen.

recompress
  :: CompressionMethod -- ^ New compression method
  -> EntrySelector     -- ^ Name of entry to re-compress
  -> ZipArchive ()
recompress t s = addPending (I.Recompress t s)

-- | Set an entry comment, if that entry does not exist, nothing will
-- happen. Note that if binary representation of the comment is longer than
-- 65535 bytes, it will be truncated on writing.

setEntryComment
  :: Text              -- ^ Text of the comment
  -> EntrySelector     -- ^ Name of entry to comment on
  -> ZipArchive ()
setEntryComment text s = addPending (I.SetEntryComment text s)

-- | Delete an entry's comment, if that entry does not exist, nothing will
-- happen.

deleteEntryComment :: EntrySelector -> ZipArchive ()
deleteEntryComment s = addPending (I.DeleteEntryComment s)

-- | Set the “last modification” date\/time. The specified entry may be
-- missing, in that case the action has no effect.

setModTime
  :: UTCTime           -- ^ New modification time
  -> EntrySelector     -- ^ Name of entry to modify
  -> ZipArchive ()
setModTime time s = addPending (I.SetModTime time s)

-- | Add an extra field. The specified entry may be missing, in that case
-- this action has no effect.

addExtraField
  :: Word16            -- ^ Tag (header id) of extra field to add
  -> ByteString        -- ^ Body of the field
  -> EntrySelector     -- ^ Name of entry to modify
  -> ZipArchive ()
addExtraField n b s = addPending (I.AddExtraField n b s)

-- | Delete an extra field by its type (tag). The specified entry may be
-- missing, in that case this action has no effect.

deleteExtraField
  :: Word16            -- ^ Tag (header id) of extra field to delete
  -> EntrySelector     -- ^ Name of entry to modify
  -> ZipArchive ()
deleteExtraField n s = addPending (I.DeleteExtraField n s)

-- | Perform an action on every entry in the archive.

forEntries
  :: (EntrySelector -> ZipArchive ()) -- ^ Action to perform
  -> ZipArchive ()
forEntries action = getEntries >>= mapM_ action . M.keysSet

-- | Set comment of the entire archive.

setArchiveComment :: Text -> ZipArchive ()
setArchiveComment text = addPending (I.SetArchiveComment text)

-- | Delete the archive comment if it's present.

deleteArchiveComment :: ZipArchive ()
deleteArchiveComment = addPending I.DeleteArchiveComment

-- | Undo changes to a specific archive entry.

undoEntryChanges :: EntrySelector -> ZipArchive ()
undoEntryChanges s = modifyActions f
  where f = S.filter ((/= Just s) . I.targetEntry)

-- | Undo changes to the archive as a whole (archive's comment).

undoArchiveChanges :: ZipArchive ()
undoArchiveChanges = modifyActions f
  where f = S.filter ((/= Nothing) . I.targetEntry)

-- | Undo all changes made in this editing session.

undoAll :: ZipArchive ()
undoAll = modifyActions (const S.empty)

-- | Archive contents are not modified instantly, but instead changes are
-- collected as “pending actions” that should be committed, in order to
-- efficiently modify the archive in one pass. The actions are committed
-- automatically when the program leaves the realm of 'ZipArchive' monad
-- (i.e. as part of 'createArchive' or 'withArchive'), or can be forced
-- explicitly with the help of this function. Once committed, changes take
-- place in the file system and cannot be undone.

commit :: ZipArchive ()
commit = do
  file     <- getFilePath
  odesc    <- getArchiveDescription
  oentries <- getEntries
  actions  <- getPending
  exists   <- liftIO (doesFileExist file)
  unless (S.null actions && exists) $ do
    liftIO (I.commit file odesc oentries actions)
    -- NOTE The most robust way to update internal description of the
    -- archive is to scan it again—manual manipulations with descriptions of
    -- entries are too error-prone. We also want to erase all pending
    -- actions because 'I.commit' executes them all by definition.
    (ndesc, nentries) <- liftIO (I.scanArchive file)
    ZipArchive . modify $ \st -> st
      { zsEntries = nentries
      , zsArchive = ndesc
      , zsActions = S.empty }

----------------------------------------------------------------------------
-- Helpers

-- | Get the path of the actual archive file from inside of 'ZipArchive'
-- monad.

getFilePath :: ZipArchive FilePath
getFilePath = ZipArchive (gets zsFilePath)

-- | Get the collection of pending actions.

getPending :: ZipArchive (Seq I.PendingAction)
getPending = ZipArchive (gets zsActions)

-- | Modify the collection of pending actions in some way.

modifyActions :: (Seq I.PendingAction -> Seq I.PendingAction) -> ZipArchive ()
modifyActions f = ZipArchive (modify g)
  where g st = st { zsActions = f (zsActions st) }

-- | Add a new action to the list of pending actions.

addPending :: I.PendingAction -> ZipArchive ()
addPending a = modifyActions (|> a)

-- | Recursively list a directory. Do not return paths to empty directories.

listDirRecur :: FilePath -> IO [FilePath]
listDirRecur path = DList.toList <$> go ""
  where
    go adir = do
      let cdir = path </> adir
      raw <- listDirectory cdir
      fmap mconcat . forM raw $ \case
        ""   -> return mempty
        "."  -> return mempty
        ".." -> return mempty
        x    -> do
          let fullx = cdir </> x
              adir' = adir </> x
          isFile <- doesFileExist      fullx
          isDir  <- doesDirectoryExist fullx
          if isFile
            then return (DList.singleton adir')
            else if isDir
                   then go adir'
                   else return mempty

-- | Perform an action ignoring IO exceptions it may throw.

ignoringAbsence :: IO () -> IO ()
ignoringAbsence io = catchJust select io handler
  where
    select e = if isDoesNotExistError e then Just e else Nothing
    handler = const (return ())