module Codec.Archive.Tar.Utf8
  ( module Codec.Archive.Tar
  , entryPath
  , unpack
  ) where

-- | A module that is equivalent to "Codec.Archive.Tar" from the @tar@ package,

-- except that @unpack@ assumes that the file paths in an archive are UTF8

-- encoded.


import           Codec.Archive.Tar hiding ( entryPath, unpack )
import           Codec.Archive.Tar.Check ( checkSecurity )
import           Codec.Archive.Tar.Entry ( Entry (..), TarPath, fromLinkTarget )
import qualified Codec.Archive.Tar.Entry as Tar
import           Control.Exception ( Exception, catch, throwIO )
import           Data.Bits ( (.|.), (.&.), shiftL )
import qualified Data.ByteString.Lazy as LBS
import           Data.Char ( chr, ord )
import           Data.Int ( Int64 )
import           Data.Maybe ( fromMaybe )
import           Data.Time.Clock.POSIX ( posixSecondsToUTCTime )
import           System.Directory
                   ( copyFile, createDirectoryIfMissing, setModificationTime )
import           System.FilePath ( (</>) )
import qualified System.FilePath as FP
import           System.IO.Error ( isPermissionError )

type EpochTime = Int64

-- | Native 'FilePath' of the file or directory within the archive.

--

-- Assumes that the 'TarPath' of an 'Entry' is UTF8 encoded.

entryPath :: Entry -> FilePath
entryPath :: Entry -> FilePath
entryPath = TarPath -> FilePath
fromTarPath (TarPath -> FilePath) -> (Entry -> TarPath) -> Entry -> FilePath
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Entry -> TarPath
entryTarPath

-- | Convert a 'TarPath' to a native 'FilePath'.

--

-- The native 'FilePath' will use the native directory separator but it is not

-- otherwise checked for validity or sanity. In particular:

--

-- * The tar path may be invalid as a native path, eg the file name @\"nul\"@

--   is not valid on Windows.

--

-- * The tar path may be an absolute path or may contain @\"..\"@ components.

--   For security reasons this should not usually be allowed, but it is your

--   responsibility to check for these conditions (eg using 'checkSecurity').

--

-- Assumes that the 'TarPath' is UTF8 encoded.

fromTarPath :: TarPath -> FilePath
fromTarPath :: TarPath -> FilePath
fromTarPath TarPath
tp = FilePath -> FilePath
decodeIfUtf8Encoded (FilePath -> FilePath) -> FilePath -> FilePath
forall a b. (a -> b) -> a -> b
$ TarPath -> FilePath
Tar.fromTarPath TarPath
tp

-- | Create local files and directories based on the entries of a tar archive.

--

-- This is a portable implementation of unpacking suitable for portable

-- archives. It handles 'NormalFile' and 'Directory' entries and has simulated

-- support for 'SymbolicLink' and 'HardLink' entries. Links are implemented by

-- copying the target file. This therefore works on Windows as well as Unix.

-- All other entry types are ignored, that is they are not unpacked and no

-- exception is raised.

--

-- If the 'Entries' ends in an error then it is raised an an exception. Any

-- files or directories that have been unpacked before the error was

-- encountered will not be deleted. For this reason you may want to unpack

-- into an empty directory so that you can easily clean up if unpacking fails

-- part-way.

--

-- On its own, this function only checks for security (using 'checkSecurity').

-- You can do other checks by applying checking functions to the 'Entries' that

-- you pass to this function. For example:

--

-- > unpack dir (checkTarbomb expectedDir entries)

--

-- If you care about the priority of the reported errors then you may want to

-- use 'checkSecurity' before 'checkTarbomb' or other checks.

--

-- Assumes that the 'TarPath' of an `Entry` is UTF8 encoded.

unpack :: Exception e => FilePath -> Entries e -> IO ()
unpack :: forall e. Exception e => FilePath -> Entries e -> IO ()
unpack FilePath
baseDir Entries e
entries = [(FilePath, FilePath)]
-> Entries (Either e FileNameError) -> IO [(FilePath, FilePath)]
forall {a} {b}.
(Exception a, Exception b) =>
[(FilePath, FilePath)]
-> Entries (Either a b) -> IO [(FilePath, FilePath)]
unpackEntries [] (Entries e -> Entries (Either e FileNameError)
forall e. Entries e -> Entries (Either e FileNameError)
checkSecurity Entries e
entries)
                     IO [(FilePath, FilePath)]
-> ([(FilePath, FilePath)] -> IO ()) -> IO ()
forall a b. IO a -> (a -> IO b) -> IO b
forall (m :: * -> *) a b. Monad m => m a -> (a -> m b) -> m b
>>= [(FilePath, FilePath)] -> IO ()
emulateLinks

  where
    -- We're relying here on 'checkSecurity' to make sure we're not scribbling

    -- files all over the place.


    unpackEntries :: [(FilePath, FilePath)]
-> Entries (Either a b) -> IO [(FilePath, FilePath)]
unpackEntries [(FilePath, FilePath)]
_     (Fail Either a b
err)      = (a -> IO [(FilePath, FilePath)])
-> (b -> IO [(FilePath, FilePath)])
-> Either a b
-> IO [(FilePath, FilePath)]
forall a c b. (a -> c) -> (b -> c) -> Either a b -> c
either a -> IO [(FilePath, FilePath)]
forall e a. Exception e => e -> IO a
throwIO b -> IO [(FilePath, FilePath)]
forall e a. Exception e => e -> IO a
throwIO Either a b
err
    unpackEntries [(FilePath, FilePath)]
links Entries (Either a b)
Done            = [(FilePath, FilePath)] -> IO [(FilePath, FilePath)]
forall a. a -> IO a
forall (m :: * -> *) a. Monad m => a -> m a
return [(FilePath, FilePath)]
links
    unpackEntries [(FilePath, FilePath)]
links (Next Entry
entry Entries (Either a b)
es) = case Entry -> EntryContent
entryContent Entry
entry of
      NormalFile ByteString
file EpochTime
_ -> FilePath -> ByteString -> EpochTime -> IO ()
extractFile FilePath
path ByteString
file EpochTime
mtime
                        IO () -> IO [(FilePath, FilePath)] -> IO [(FilePath, FilePath)]
forall a b. IO a -> IO b -> IO b
forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> [(FilePath, FilePath)]
-> Entries (Either a b) -> IO [(FilePath, FilePath)]
unpackEntries [(FilePath, FilePath)]
links Entries (Either a b)
es
      EntryContent
Directory         -> FilePath -> EpochTime -> IO ()
extractDir FilePath
path EpochTime
mtime
                        IO () -> IO [(FilePath, FilePath)] -> IO [(FilePath, FilePath)]
forall a b. IO a -> IO b -> IO b
forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> [(FilePath, FilePath)]
-> Entries (Either a b) -> IO [(FilePath, FilePath)]
unpackEntries [(FilePath, FilePath)]
links Entries (Either a b)
es
      HardLink     LinkTarget
link -> ([(FilePath, FilePath)]
-> Entries (Either a b) -> IO [(FilePath, FilePath)]
unpackEntries ([(FilePath, FilePath)]
 -> Entries (Either a b) -> IO [(FilePath, FilePath)])
-> [(FilePath, FilePath)]
-> Entries (Either a b)
-> IO [(FilePath, FilePath)]
forall a b. (a -> b) -> a -> b
$! FilePath
-> LinkTarget -> [(FilePath, FilePath)] -> [(FilePath, FilePath)]
forall {t :: * -> *} {a}.
Foldable t =>
t a -> LinkTarget -> [(t a, FilePath)] -> [(t a, FilePath)]
saveLink FilePath
path LinkTarget
link [(FilePath, FilePath)]
links) Entries (Either a b)
es
      SymbolicLink LinkTarget
link -> ([(FilePath, FilePath)]
-> Entries (Either a b) -> IO [(FilePath, FilePath)]
unpackEntries ([(FilePath, FilePath)]
 -> Entries (Either a b) -> IO [(FilePath, FilePath)])
-> [(FilePath, FilePath)]
-> Entries (Either a b)
-> IO [(FilePath, FilePath)]
forall a b. (a -> b) -> a -> b
$! FilePath
-> LinkTarget -> [(FilePath, FilePath)] -> [(FilePath, FilePath)]
forall {t :: * -> *} {a}.
Foldable t =>
t a -> LinkTarget -> [(t a, FilePath)] -> [(t a, FilePath)]
saveLink FilePath
path LinkTarget
link [(FilePath, FilePath)]
links) Entries (Either a b)
es
      EntryContent
_                 -> [(FilePath, FilePath)]
-> Entries (Either a b) -> IO [(FilePath, FilePath)]
unpackEntries [(FilePath, FilePath)]
links Entries (Either a b)
es --ignore other file types

      where
        path :: FilePath
path  = Entry -> FilePath
entryPath Entry
entry
        mtime :: EpochTime
mtime = Entry -> EpochTime
entryTime Entry
entry

    extractFile :: FilePath -> ByteString -> EpochTime -> IO ()
extractFile FilePath
path ByteString
content EpochTime
mtime = do
      -- Note that tar archives do not make sure each directory is created

      -- before files they contain, indeed we may have to create several

      -- levels of directory.

      Bool -> FilePath -> IO ()
createDirectoryIfMissing Bool
True FilePath
absDir
      FilePath -> ByteString -> IO ()
LBS.writeFile FilePath
absPath ByteString
content
      FilePath -> EpochTime -> IO ()
setModTime FilePath
absPath EpochTime
mtime
      where
        absDir :: FilePath
absDir  = FilePath
baseDir FilePath -> FilePath -> FilePath
</> FilePath -> FilePath
FP.takeDirectory FilePath
path
        absPath :: FilePath
absPath = FilePath
baseDir FilePath -> FilePath -> FilePath
</> FilePath
path

    extractDir :: FilePath -> EpochTime -> IO ()
extractDir FilePath
path EpochTime
mtime = do
      Bool -> FilePath -> IO ()
createDirectoryIfMissing Bool
True FilePath
absPath
      FilePath -> EpochTime -> IO ()
setModTime FilePath
absPath EpochTime
mtime
      where
        absPath :: FilePath
absPath = FilePath
baseDir FilePath -> FilePath -> FilePath
</> FilePath
path

    saveLink :: t a -> LinkTarget -> [(t a, FilePath)] -> [(t a, FilePath)]
saveLink t a
path LinkTarget
link [(t a, FilePath)]
links = Int -> [(t a, FilePath)] -> [(t a, FilePath)]
forall a b. a -> b -> b
seq (t a -> Int
forall a. t a -> Int
forall (t :: * -> *) a. Foldable t => t a -> Int
length t a
path)
                             ([(t a, FilePath)] -> [(t a, FilePath)])
-> [(t a, FilePath)] -> [(t a, FilePath)]
forall a b. (a -> b) -> a -> b
$ Int -> [(t a, FilePath)] -> [(t a, FilePath)]
forall a b. a -> b -> b
seq (FilePath -> Int
forall a. [a] -> Int
forall (t :: * -> *) a. Foldable t => t a -> Int
length FilePath
link')
                             ([(t a, FilePath)] -> [(t a, FilePath)])
-> [(t a, FilePath)] -> [(t a, FilePath)]
forall a b. (a -> b) -> a -> b
$ (t a
path, FilePath
link')(t a, FilePath) -> [(t a, FilePath)] -> [(t a, FilePath)]
forall a. a -> [a] -> [a]
:[(t a, FilePath)]
links
      where link' :: FilePath
link' = LinkTarget -> FilePath
fromLinkTarget LinkTarget
link

    emulateLinks :: [(FilePath, FilePath)] -> IO ()
emulateLinks = ((FilePath, FilePath) -> IO ()) -> [(FilePath, FilePath)] -> IO ()
forall (t :: * -> *) (m :: * -> *) a b.
(Foldable t, Monad m) =>
(a -> m b) -> t a -> m ()
mapM_ (((FilePath, FilePath) -> IO ())
 -> [(FilePath, FilePath)] -> IO ())
-> ((FilePath, FilePath) -> IO ())
-> [(FilePath, FilePath)]
-> IO ()
forall a b. (a -> b) -> a -> b
$ \(FilePath
relPath, FilePath
relLinkTarget) ->
      let absPath :: FilePath
absPath   = FilePath
baseDir FilePath -> FilePath -> FilePath
</> FilePath
relPath
          absTarget :: FilePath
absTarget = FilePath -> FilePath
FP.takeDirectory FilePath
absPath FilePath -> FilePath -> FilePath
</> FilePath
relLinkTarget
       in FilePath -> FilePath -> IO ()
copyFile FilePath
absTarget FilePath
absPath

setModTime :: FilePath -> EpochTime -> IO ()
setModTime :: FilePath -> EpochTime -> IO ()
setModTime FilePath
path EpochTime
t =
    FilePath -> UTCTime -> IO ()
setModificationTime FilePath
path (POSIXTime -> UTCTime
posixSecondsToUTCTime (EpochTime -> POSIXTime
forall a b. (Integral a, Num b) => a -> b
fromIntegral EpochTime
t))
      IO () -> (IOError -> IO ()) -> IO ()
forall e a. Exception e => IO a -> (e -> IO a) -> IO a
`catch` \IOError
e ->
        if IOError -> Bool
isPermissionError IOError
e then () -> IO ()
forall a. a -> IO a
forall (m :: * -> *) a. Monad m => a -> m a
return () else IOError -> IO ()
forall e a. Exception e => e -> IO a
throwIO IOError
e

-- | If the given 'String' can be interpreted as a string of bytes that encodes

-- a string using UTF8, then yields the string decoded, otherwise yields the

-- given 'String'.


-- Inspired by the utf8-string package.

decodeIfUtf8Encoded :: String -> String
decodeIfUtf8Encoded :: FilePath -> FilePath
decodeIfUtf8Encoded FilePath
s = FilePath -> Maybe FilePath -> FilePath
forall a. a -> Maybe a -> a
fromMaybe FilePath
s (Maybe FilePath -> FilePath) -> Maybe FilePath -> FilePath
forall a b. (a -> b) -> a -> b
$ FilePath -> Maybe FilePath
decode FilePath
s
 where
  decode :: String -> Maybe String
  decode :: FilePath -> Maybe FilePath
decode [] = FilePath -> Maybe FilePath
forall a. a -> Maybe a
Just FilePath
""
  decode (Char
c:FilePath
cs)
    | Int
c' Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
< Int
0x80  = Char -> FilePath -> Maybe FilePath
decode' Char
c FilePath
cs
    | Int
c' Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
< Int
0xc0  = Maybe FilePath
forall a. Maybe a
Nothing
    | Int
c' Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
< Int
0xe0  = Maybe FilePath
multi1
    | Int
c' Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
< Int
0xf0  = Int -> Int -> Int -> Maybe FilePath
multiByte Int
2 Int
0b1111 Int
0x00000800
    | Int
c' Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
< Int
0xf8  = Int -> Int -> Int -> Maybe FilePath
multiByte Int
3 Int
0b0111 Int
0x00010000
    | Int
c' Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
< Int
0xfc  = Int -> Int -> Int -> Maybe FilePath
multiByte Int
4 Int
0b0011 Int
0x00200000
    | Int
c' Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
< Int
0xfe  = Int -> Int -> Int -> Maybe FilePath
multiByte Int
5 Int
0b0001 Int
0x04000000
    | Bool
otherwise = Maybe FilePath
forall a. Maybe a
Nothing
   where
    c' :: Int
c' = Char -> Int
ord Char
c
    isValidByte :: a -> Bool
isValidByte a
b = a
b a -> a -> Bool
forall a. Ord a => a -> a -> Bool
<= a
0xff Bool -> Bool -> Bool
&& a
b a -> a -> a
forall a. Bits a => a -> a -> a
.&. a
0b11000000 a -> a -> Bool
forall a. Eq a => a -> a -> Bool
== a
0b10000000
    combine :: a -> a -> a
combine a
b1 a
b2 = (a
b1 a -> Int -> a
forall a. Bits a => a -> Int -> a
`shiftL` Int
6) a -> a -> a
forall a. Bits a => a -> a -> a
.|. (a
b2 a -> a -> a
forall a. Bits a => a -> a -> a
.&. a
0b00111111)
    multi1 :: Maybe FilePath
multi1 = case FilePath
cs of
      Char
c1:FilePath
ds | Int -> Bool
forall {a}. (Ord a, Num a, Bits a) => a -> Bool
isValidByte Int
c1' ->
        let d :: Int
d = Int -> Int -> Int
forall {a}. (Bits a, Num a) => a -> a -> a
combine (Int
c' Int -> Int -> Int
forall a. Bits a => a -> a -> a
.&. Int
0b00011111) Int
c1'
        in  if Int
d Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
>= Int
0x80
              then Char -> FilePath -> Maybe FilePath
decode' (Int -> Char
chr Int
d) FilePath
ds
              else Maybe FilePath
forall a. Maybe a
Nothing
       where
        c1' :: Int
c1' = Char -> Int
ord Char
c1
      FilePath
_ -> Maybe FilePath
forall a. Maybe a
Nothing
    multiByte :: Int -> Int -> Int -> Maybe String
    multiByte :: Int -> Int -> Int -> Maybe FilePath
multiByte Int
i Int
mask Int
overlong = Int -> FilePath -> Int -> Maybe FilePath
forall {t}. (Eq t, Num t) => t -> FilePath -> Int -> Maybe FilePath
aux Int
i FilePath
cs (Int
c' Int -> Int -> Int
forall a. Bits a => a -> a -> a
.&. Int
mask)
      where
        aux :: t -> FilePath -> Int -> Maybe FilePath
aux t
0 FilePath
rs Int
acc
          | Bool
isValidAcc = Char -> FilePath -> Maybe FilePath
decode' (Int -> Char
chr Int
acc) FilePath
rs
          | Bool
otherwise = Maybe FilePath
forall a. Maybe a
Nothing
         where
          isValidAcc :: Bool
isValidAcc =  Int
overlong Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
<= Int
acc
                     Bool -> Bool -> Bool
&& Int
acc Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
<= Int
0x10ffff
                     Bool -> Bool -> Bool
&& (Int
acc Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
< Int
0xd800 Bool -> Bool -> Bool
|| Int
0xdfff Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
< Int
acc)
                     Bool -> Bool -> Bool
&& (Int
acc Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
< Int
0xfffe Bool -> Bool -> Bool
|| Int
0xffff Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
< Int
acc)
        aux t
n (Char
r : FilePath
rs) Int
acc | Int -> Bool
forall {a}. (Ord a, Num a, Bits a) => a -> Bool
isValidByte Int
r' = t -> FilePath -> Int -> Maybe FilePath
aux (t
n t -> t -> t
forall a. Num a => a -> a -> a
- t
1) FilePath
rs (Int -> Maybe FilePath) -> Int -> Maybe FilePath
forall a b. (a -> b) -> a -> b
$ Int -> Int -> Int
forall {a}. (Bits a, Num a) => a -> a -> a
combine Int
acc Int
r'
         where
          r' :: Int
r' = Char -> Int
ord Char
r
        aux t
_ FilePath
_ Int
_ = Maybe FilePath
forall a. Maybe a
Nothing
  decode' :: Char -> String -> Maybe String
  decode' :: Char -> FilePath -> Maybe FilePath
decode' Char
x FilePath
xs = do
    FilePath
xs' <- FilePath -> Maybe FilePath
decode FilePath
xs
    FilePath -> Maybe FilePath
forall a. a -> Maybe a
forall (f :: * -> *) a. Applicative f => a -> f a
pure (FilePath -> Maybe FilePath) -> FilePath -> Maybe FilePath
forall a b. (a -> b) -> a -> b
$ Char
x Char -> FilePath -> FilePath
forall a. a -> [a] -> [a]
: FilePath
xs'