{-# LANGUAGE BangPatterns #-}
{-# LANGUAGE PackageImports #-}
{-# OPTIONS_HADDOCK hide #-}
-----------------------------------------------------------------------------
-- |
-- Module      :  Codec.Archive.Tar.Read
-- Copyright   :  (c) 2007 Bjorn Bringert,
--                    2008 Andrea Vezzosi,
--                    2008-2009 Duncan Coutts,
--                    2011 Max Bolingbroke
-- License     :  BSD3
--
-- Maintainer  :  duncan@community.haskell.org
-- Portability :  portable
--
-----------------------------------------------------------------------------
module Codec.Archive.Tar.Read
  ( read
  , FormatError(..)
  ) where

import Codec.Archive.Tar.PackAscii
import Codec.Archive.Tar.Types

import Data.Char     (ord)
import Data.Int      (Int64)
import Data.Bits     (Bits(shiftL, (.&.), complement))
import Control.Exception (Exception(..))
import Data.Typeable (Typeable)
import Control.Applicative
import Control.Monad
import Control.DeepSeq
import Control.Monad.Trans.State.Lazy

import qualified Data.ByteString        as BS
import qualified Data.ByteString.Char8  as BS.Char8
import qualified Data.ByteString.Unsafe as BS
import qualified Data.ByteString.Lazy   as LBS
import System.IO.Unsafe (unsafePerformIO)
import "os-string" System.OsString.Posix (PosixString, PosixChar)
import qualified "os-string" System.OsString.Posix as PS

import Prelude hiding (read)

-- | Errors that can be encountered when parsing a Tar archive.
data FormatError
  = TruncatedArchive
  | ShortTrailer
  | BadTrailer
  | TrailingJunk
  | ChecksumIncorrect
  | NotTarFormat
  | UnrecognisedTarFormat
  | HeaderBadNumericEncoding
  deriving (FormatError -> FormatError -> Bool
(FormatError -> FormatError -> Bool)
-> (FormatError -> FormatError -> Bool) -> Eq FormatError
forall a. (a -> a -> Bool) -> (a -> a -> Bool) -> Eq a
$c== :: FormatError -> FormatError -> Bool
== :: FormatError -> FormatError -> Bool
$c/= :: FormatError -> FormatError -> Bool
/= :: FormatError -> FormatError -> Bool
Eq, Int -> FormatError -> ShowS
[FormatError] -> ShowS
FormatError -> String
(Int -> FormatError -> ShowS)
-> (FormatError -> String)
-> ([FormatError] -> ShowS)
-> Show FormatError
forall a.
(Int -> a -> ShowS) -> (a -> String) -> ([a] -> ShowS) -> Show a
$cshowsPrec :: Int -> FormatError -> ShowS
showsPrec :: Int -> FormatError -> ShowS
$cshow :: FormatError -> String
show :: FormatError -> String
$cshowList :: [FormatError] -> ShowS
showList :: [FormatError] -> ShowS
Show, Typeable)

instance Exception FormatError where
  displayException :: FormatError -> String
displayException FormatError
TruncatedArchive         = String
"truncated tar archive"
  displayException FormatError
ShortTrailer             = String
"short tar trailer"
  displayException FormatError
BadTrailer               = String
"bad tar trailer"
  displayException FormatError
TrailingJunk             = String
"tar file has trailing junk"
  displayException FormatError
ChecksumIncorrect        = String
"tar checksum error"
  displayException FormatError
NotTarFormat             = String
"data is not in tar format"
  displayException FormatError
UnrecognisedTarFormat    = String
"tar entry not in a recognised format"
  displayException FormatError
HeaderBadNumericEncoding = String
"tar header is malformed (bad numeric encoding)"

instance NFData    FormatError where
  rnf :: FormatError -> ()
rnf !FormatError
_ = () -- enumerations are fully strict by construction

-- | Convert a data stream in the tar file format into an internal data
-- structure. Decoding errors are reported by the 'Fail' constructor of the
-- 'Entries' type.
--
-- * The conversion is done lazily.
--
read :: LBS.ByteString -> Entries FormatError
read :: ByteString -> Entries FormatError
read = State ByteString (Entries FormatError)
-> ByteString -> Entries FormatError
forall s a. State s a -> s -> a
evalState ((Int64 -> StateT ByteString Identity ByteString)
-> StateT ByteString Identity ByteString
-> State ByteString (Entries FormatError)
forall (m :: * -> *).
Monad m =>
(Int64 -> m ByteString) -> m ByteString -> m (Entries FormatError)
readStreaming Int64 -> StateT ByteString Identity ByteString
getN StateT ByteString Identity ByteString
forall (m :: * -> *) s. Monad m => StateT s m s
get)
  where
    getN :: Int64 -> State LBS.ByteString LBS.ByteString
    getN :: Int64 -> StateT ByteString Identity ByteString
getN Int64
n = do
      (ByteString
pref, ByteString
st) <- Int64 -> ByteString -> (ByteString, ByteString)
LBS.splitAt Int64
n (ByteString -> (ByteString, ByteString))
-> StateT ByteString Identity ByteString
-> StateT ByteString Identity (ByteString, ByteString)
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> StateT ByteString Identity ByteString
forall (m :: * -> *) s. Monad m => StateT s m s
get
      ByteString -> StateT ByteString Identity ()
forall (m :: * -> *) s. Monad m => s -> StateT s m ()
put ByteString
st
      ByteString -> StateT ByteString Identity ByteString
forall a. a -> StateT ByteString Identity a
forall (f :: * -> *) a. Applicative f => a -> f a
pure ByteString
pref

readStreaming
  :: Monad m
  => (Int64 -> m LBS.ByteString)
  -> m LBS.ByteString
  -> m (Entries FormatError)
readStreaming :: forall (m :: * -> *).
Monad m =>
(Int64 -> m ByteString) -> m ByteString -> m (Entries FormatError)
readStreaming = ((forall a. m a -> m a)
-> m (Either FormatError (Maybe (GenEntry TarPath LinkTarget)))
-> m (Entries FormatError)
forall (m :: * -> *) e tarPath linkTarget.
Monad m =>
(forall a. m a -> m a)
-> m (Either e (Maybe (GenEntry tarPath linkTarget)))
-> m (GenEntries tarPath linkTarget e)
unfoldEntriesM m a -> m a
forall a. a -> a
forall a. m a -> m a
id (m (Either FormatError (Maybe (GenEntry TarPath LinkTarget)))
 -> m (Entries FormatError))
-> (m ByteString
    -> m (Either FormatError (Maybe (GenEntry TarPath LinkTarget))))
-> m ByteString
-> m (Entries FormatError)
forall b c a. (b -> c) -> (a -> b) -> a -> c
.) ((m ByteString
  -> m (Either FormatError (Maybe (GenEntry TarPath LinkTarget))))
 -> m ByteString -> m (Entries FormatError))
-> ((Int64 -> m ByteString)
    -> m ByteString
    -> m (Either FormatError (Maybe (GenEntry TarPath LinkTarget))))
-> (Int64 -> m ByteString)
-> m ByteString
-> m (Entries FormatError)
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (Int64 -> m ByteString)
-> m ByteString
-> m (Either FormatError (Maybe (GenEntry TarPath LinkTarget)))
forall (m :: * -> *).
Monad m =>
(Int64 -> m ByteString)
-> m ByteString
-> m (Either FormatError (Maybe (GenEntry TarPath LinkTarget)))
getEntryStreaming

getEntryStreaming
  :: Monad m
  => (Int64 -> m LBS.ByteString)
  -> m LBS.ByteString
  -> m (Either FormatError (Maybe Entry))
getEntryStreaming :: forall (m :: * -> *).
Monad m =>
(Int64 -> m ByteString)
-> m ByteString
-> m (Either FormatError (Maybe (GenEntry TarPath LinkTarget)))
getEntryStreaming Int64 -> m ByteString
getN m ByteString
getAll = do
  ByteString
header <- Int64 -> m ByteString
getN Int64
512
  if ByteString -> Int64
LBS.length ByteString
header Int64 -> Int64 -> Bool
forall a. Ord a => a -> a -> Bool
< Int64
512 then Either FormatError (Maybe (GenEntry TarPath LinkTarget))
-> m (Either FormatError (Maybe (GenEntry TarPath LinkTarget)))
forall a. a -> m a
forall (f :: * -> *) a. Applicative f => a -> f a
pure (FormatError
-> Either FormatError (Maybe (GenEntry TarPath LinkTarget))
forall a b. a -> Either a b
Left FormatError
TruncatedArchive) else do

    -- Tar files end with at least two blocks of all '0'. Checking this serves
    -- two purposes. It checks the format but also forces the tail of the data
    -- which is necessary to close the file if it came from a lazily read file.
    --
    -- It's tempting to fall into trailer parsing as soon as LBS.head bs == '\0',
    -- because, if interpreted as an 'Entry', it means that 'entryTarPath' is an empty
    -- string. Yet it's not a concern of this function: parse it as an 'Entry'
    -- and let further pipeline such as 'checkEntrySecurity' deal with it. After all,
    -- it might be a format extension with unknown semantics. Such somewhat malformed
    -- archives do exist in the wild, see https://github.com/haskell/tar/issues/73.
    --
    -- Only if an entire block is null, we assume that we are parsing a trailer.
    if (Word8 -> Bool) -> ByteString -> Bool
LBS.all (Word8 -> Word8 -> Bool
forall a. Eq a => a -> a -> Bool
== Word8
0) ByteString
header then do
      ByteString
nextBlock <- Int64 -> m ByteString
getN Int64
512
      if ByteString -> Int64
LBS.length ByteString
nextBlock Int64 -> Int64 -> Bool
forall a. Ord a => a -> a -> Bool
< Int64
512 then Either FormatError (Maybe (GenEntry TarPath LinkTarget))
-> m (Either FormatError (Maybe (GenEntry TarPath LinkTarget)))
forall a. a -> m a
forall (f :: * -> *) a. Applicative f => a -> f a
pure (FormatError
-> Either FormatError (Maybe (GenEntry TarPath LinkTarget))
forall a b. a -> Either a b
Left FormatError
ShortTrailer)
        else if (Word8 -> Bool) -> ByteString -> Bool
LBS.all (Word8 -> Word8 -> Bool
forall a. Eq a => a -> a -> Bool
== Word8
0) ByteString
nextBlock then do
          ByteString
remainder <- m ByteString
getAll
          Either FormatError (Maybe (GenEntry TarPath LinkTarget))
-> m (Either FormatError (Maybe (GenEntry TarPath LinkTarget)))
forall a. a -> m a
forall (f :: * -> *) a. Applicative f => a -> f a
pure (Either FormatError (Maybe (GenEntry TarPath LinkTarget))
 -> m (Either FormatError (Maybe (GenEntry TarPath LinkTarget))))
-> Either FormatError (Maybe (GenEntry TarPath LinkTarget))
-> m (Either FormatError (Maybe (GenEntry TarPath LinkTarget)))
forall a b. (a -> b) -> a -> b
$ if (Word8 -> Bool) -> ByteString -> Bool
LBS.all (Word8 -> Word8 -> Bool
forall a. Eq a => a -> a -> Bool
== Word8
0) ByteString
remainder then Maybe (GenEntry TarPath LinkTarget)
-> Either FormatError (Maybe (GenEntry TarPath LinkTarget))
forall a b. b -> Either a b
Right Maybe (GenEntry TarPath LinkTarget)
forall a. Maybe a
Nothing else FormatError
-> Either FormatError (Maybe (GenEntry TarPath LinkTarget))
forall a b. a -> Either a b
Left FormatError
TrailingJunk
          else Either FormatError (Maybe (GenEntry TarPath LinkTarget))
-> m (Either FormatError (Maybe (GenEntry TarPath LinkTarget)))
forall a. a -> m a
forall (f :: * -> *) a. Applicative f => a -> f a
pure (FormatError
-> Either FormatError (Maybe (GenEntry TarPath LinkTarget))
forall a b. a -> Either a b
Left FormatError
BadTrailer)

      else case ByteString
-> Either
     FormatError
     (ByteString, Permissions, Int, Int, Int64, Int64, Char, ByteString,
      Format, ByteString, ByteString, Int, Int, ByteString)
parseHeader ByteString
header of
        Left FormatError
err -> Either FormatError (Maybe (GenEntry TarPath LinkTarget))
-> m (Either FormatError (Maybe (GenEntry TarPath LinkTarget)))
forall a. a -> m a
forall (f :: * -> *) a. Applicative f => a -> f a
pure (Either FormatError (Maybe (GenEntry TarPath LinkTarget))
 -> m (Either FormatError (Maybe (GenEntry TarPath LinkTarget))))
-> Either FormatError (Maybe (GenEntry TarPath LinkTarget))
-> m (Either FormatError (Maybe (GenEntry TarPath LinkTarget)))
forall a b. (a -> b) -> a -> b
$ FormatError
-> Either FormatError (Maybe (GenEntry TarPath LinkTarget))
forall a b. a -> Either a b
Left FormatError
err
        Right (ByteString
name, Permissions
mode, Int
uid, Int
gid, Int64
size, Int64
mtime, Char
typecode, ByteString
linkname, Format
format, ByteString
uname, ByteString
gname, Int
devmajor, Int
devminor, ByteString
prefix) -> do

          -- It is crucial to get (size + padding) in one monadic operation
          -- and drop padding in a pure computation. If you get size bytes first,
          -- then skip padding, unpacking in constant memory will become impossible.
          let paddedSize :: Int64
paddedSize = (Int64
size Int64 -> Int64 -> Int64
forall a. Num a => a -> a -> a
+ Int64
511) Int64 -> Int64 -> Int64
forall a. Bits a => a -> a -> a
.&. Int64 -> Int64
forall a. Bits a => a -> a
complement Int64
511
          ByteString
paddedContent <- Int64 -> m ByteString
getN Int64
paddedSize
          let content :: ByteString
content = Int64 -> ByteString -> ByteString
LBS.take Int64
size ByteString
paddedContent

          Either FormatError (Maybe (GenEntry TarPath LinkTarget))
-> m (Either FormatError (Maybe (GenEntry TarPath LinkTarget)))
forall a. a -> m a
forall (f :: * -> *) a. Applicative f => a -> f a
pure (Either FormatError (Maybe (GenEntry TarPath LinkTarget))
 -> m (Either FormatError (Maybe (GenEntry TarPath LinkTarget))))
-> Either FormatError (Maybe (GenEntry TarPath LinkTarget))
-> m (Either FormatError (Maybe (GenEntry TarPath LinkTarget)))
forall a b. (a -> b) -> a -> b
$ Maybe (GenEntry TarPath LinkTarget)
-> Either FormatError (Maybe (GenEntry TarPath LinkTarget))
forall a b. b -> Either a b
Right (Maybe (GenEntry TarPath LinkTarget)
 -> Either FormatError (Maybe (GenEntry TarPath LinkTarget)))
-> Maybe (GenEntry TarPath LinkTarget)
-> Either FormatError (Maybe (GenEntry TarPath LinkTarget))
forall a b. (a -> b) -> a -> b
$ GenEntry TarPath LinkTarget -> Maybe (GenEntry TarPath LinkTarget)
forall a. a -> Maybe a
Just (GenEntry TarPath LinkTarget
 -> Maybe (GenEntry TarPath LinkTarget))
-> GenEntry TarPath LinkTarget
-> Maybe (GenEntry TarPath LinkTarget)
forall a b. (a -> b) -> a -> b
$ Entry {
            entryTarPath :: TarPath
entryTarPath     = PosixString -> PosixString -> TarPath
TarPath (ByteString -> PosixString
byteToPosixString ByteString
name) (ByteString -> PosixString
byteToPosixString ByteString
prefix),
            entryContent :: GenEntryContent LinkTarget
entryContent     = case Char
typecode of
                 Char
'\0' -> ByteString -> Int64 -> GenEntryContent LinkTarget
forall linkTarget.
ByteString -> Int64 -> GenEntryContent linkTarget
NormalFile      ByteString
content Int64
size
                 Char
'0'  -> ByteString -> Int64 -> GenEntryContent LinkTarget
forall linkTarget.
ByteString -> Int64 -> GenEntryContent linkTarget
NormalFile      ByteString
content Int64
size
                 Char
'1'  -> LinkTarget -> GenEntryContent LinkTarget
forall linkTarget. linkTarget -> GenEntryContent linkTarget
HardLink        (PosixString -> LinkTarget
LinkTarget (PosixString -> LinkTarget) -> PosixString -> LinkTarget
forall a b. (a -> b) -> a -> b
$ ByteString -> PosixString
byteToPosixString ByteString
linkname)
                 Char
'2'  -> LinkTarget -> GenEntryContent LinkTarget
forall linkTarget. linkTarget -> GenEntryContent linkTarget
SymbolicLink    (PosixString -> LinkTarget
LinkTarget (PosixString -> LinkTarget) -> PosixString -> LinkTarget
forall a b. (a -> b) -> a -> b
$ ByteString -> PosixString
byteToPosixString ByteString
linkname)
                 Char
_ | Format
format Format -> Format -> Bool
forall a. Eq a => a -> a -> Bool
== Format
V7Format
                      -> Char -> ByteString -> Int64 -> GenEntryContent LinkTarget
forall linkTarget.
Char -> ByteString -> Int64 -> GenEntryContent linkTarget
OtherEntryType  Char
typecode ByteString
content Int64
size
                 Char
'3'  -> Int -> Int -> GenEntryContent LinkTarget
forall linkTarget. Int -> Int -> GenEntryContent linkTarget
CharacterDevice Int
devmajor Int
devminor
                 Char
'4'  -> Int -> Int -> GenEntryContent LinkTarget
forall linkTarget. Int -> Int -> GenEntryContent linkTarget
BlockDevice     Int
devmajor Int
devminor
                 Char
'5'  -> GenEntryContent LinkTarget
forall linkTarget. GenEntryContent linkTarget
Directory
                 Char
'6'  -> GenEntryContent LinkTarget
forall linkTarget. GenEntryContent linkTarget
NamedPipe
                 Char
'7'  -> ByteString -> Int64 -> GenEntryContent LinkTarget
forall linkTarget.
ByteString -> Int64 -> GenEntryContent linkTarget
NormalFile      ByteString
content Int64
size
                 Char
_    -> Char -> ByteString -> Int64 -> GenEntryContent LinkTarget
forall linkTarget.
Char -> ByteString -> Int64 -> GenEntryContent linkTarget
OtherEntryType  Char
typecode ByteString
content Int64
size,
            entryPermissions :: Permissions
entryPermissions = Permissions
mode,
            entryOwnership :: Ownership
entryOwnership   = String -> String -> Int -> Int -> Ownership
Ownership (ByteString -> String
BS.Char8.unpack ByteString
uname)
                                         (ByteString -> String
BS.Char8.unpack ByteString
gname) Int
uid Int
gid,
            entryTime :: Int64
entryTime        = Int64
mtime,
            entryFormat :: Format
entryFormat      = Format
format
            }

parseHeader
  :: LBS.ByteString
  -> Either FormatError (BS.ByteString, Permissions, Int, Int, Int64, EpochTime, Char, BS.ByteString, Format, BS.ByteString, BS.ByteString, DevMajor, DevMinor, BS.ByteString)
parseHeader :: ByteString
-> Either
     FormatError
     (ByteString, Permissions, Int, Int, Int64, Int64, Char, ByteString,
      Format, ByteString, ByteString, Int, Int, ByteString)
parseHeader ByteString
header' = do
  case (Either FormatError Int
chksum_, ByteString -> Either FormatError Format
format_ ByteString
magic) of
    (Right Int
chksum, Either FormatError Format
_ ) | ByteString -> Int -> Bool
correctChecksum ByteString
header Int
chksum -> () -> Either FormatError ()
forall a. a -> Either FormatError a
forall (m :: * -> *) a. Monad m => a -> m a
return ()
    (Right Int
_, Right Format
_) -> FormatError -> Either FormatError ()
forall a b. a -> Either a b
Left FormatError
ChecksumIncorrect
    (Either FormatError Int, Either FormatError Format)
_                  -> FormatError -> Either FormatError ()
forall a b. a -> Either a b
Left FormatError
NotTarFormat

  Permissions
mode     <- Either FormatError Permissions
mode_
  Int
uid      <- Either FormatError Int
uid_
  Int
gid      <- Either FormatError Int
gid_
  Int64
size     <- Either FormatError Int64
size_
  Int64
mtime    <- Either FormatError Int64
mtime_
  Format
format   <- ByteString -> Either FormatError Format
format_ ByteString
magic
  Int
devmajor <- Either FormatError Int
devmajor_
  Int
devminor <- Either FormatError Int
devminor_

  (ByteString, Permissions, Int, Int, Int64, Int64, Char, ByteString,
 Format, ByteString, ByteString, Int, Int, ByteString)
-> Either
     FormatError
     (ByteString, Permissions, Int, Int, Int64, Int64, Char, ByteString,
      Format, ByteString, ByteString, Int, Int, ByteString)
forall a. a -> Either FormatError a
forall (f :: * -> *) a. Applicative f => a -> f a
pure (ByteString
name, Permissions
mode, Int
uid, Int
gid, Int64
size, Int64
mtime, Char
typecode, ByteString
linkname, Format
format, ByteString
uname, ByteString
gname, Int
devmajor, Int
devminor, ByteString
prefix)
  where
    header :: ByteString
header     = ByteString -> ByteString
LBS.toStrict ByteString
header'

    name :: ByteString
name       = Int -> Int -> ByteString -> ByteString
getString   Int
0 Int
100 ByteString
header
    mode_ :: Either FormatError Permissions
mode_      = Int -> Int -> ByteString -> Either FormatError Permissions
forall a.
(Integral a, Bits a) =>
Int -> Int -> ByteString -> Either FormatError a
getOct    Int
100   Int
8 ByteString
header
    uid_ :: Either FormatError Int
uid_       = Int -> Int -> ByteString -> Either FormatError Int
forall a.
(Integral a, Bits a) =>
Int -> Int -> ByteString -> Either FormatError a
getOct    Int
108   Int
8 ByteString
header
    gid_ :: Either FormatError Int
gid_       = Int -> Int -> ByteString -> Either FormatError Int
forall a.
(Integral a, Bits a) =>
Int -> Int -> ByteString -> Either FormatError a
getOct    Int
116   Int
8 ByteString
header
    size_ :: Either FormatError Int64
size_      = Int -> Int -> ByteString -> Either FormatError Int64
forall a.
(Integral a, Bits a) =>
Int -> Int -> ByteString -> Either FormatError a
getOct    Int
124  Int
12 ByteString
header
    mtime_ :: Either FormatError Int64
mtime_     = Int -> Int -> ByteString -> Either FormatError Int64
forall a.
(Integral a, Bits a) =>
Int -> Int -> ByteString -> Either FormatError a
getOct    Int
136  Int
12 ByteString
header
    chksum_ :: Either FormatError Int
chksum_    = Int -> Int -> ByteString -> Either FormatError Int
forall a.
(Integral a, Bits a) =>
Int -> Int -> ByteString -> Either FormatError a
getOct    Int
148   Int
8 ByteString
header
    typecode :: Char
typecode   = Int -> ByteString -> Char
getByte   Int
156     ByteString
header
    linkname :: ByteString
linkname   = Int -> Int -> ByteString -> ByteString
getString Int
157 Int
100 ByteString
header
    magic :: ByteString
magic      = Int -> Int -> ByteString -> ByteString
getChars  Int
257   Int
8 ByteString
header
    uname :: ByteString
uname      = Int -> Int -> ByteString -> ByteString
getString Int
265  Int
32 ByteString
header
    gname :: ByteString
gname      = Int -> Int -> ByteString -> ByteString
getString Int
297  Int
32 ByteString
header
    devmajor_ :: Either FormatError Int
devmajor_  = Int -> Int -> ByteString -> Either FormatError Int
forall a.
(Integral a, Bits a) =>
Int -> Int -> ByteString -> Either FormatError a
getOct    Int
329   Int
8 ByteString
header
    devminor_ :: Either FormatError Int
devminor_  = Int -> Int -> ByteString -> Either FormatError Int
forall a.
(Integral a, Bits a) =>
Int -> Int -> ByteString -> Either FormatError a
getOct    Int
337   Int
8 ByteString
header
    prefix :: ByteString
prefix     = Int -> Int -> ByteString -> ByteString
getString Int
345 Int
155 ByteString
header
    -- trailing   = getBytes  500  12 header

format_ :: BS.ByteString -> Either FormatError Format
format_ :: ByteString -> Either FormatError Format
format_ ByteString
magic
  | ByteString
magic ByteString -> ByteString -> Bool
forall a. Eq a => a -> a -> Bool
== ByteString
ustarMagic = Format -> Either FormatError Format
forall a. a -> Either FormatError a
forall (m :: * -> *) a. Monad m => a -> m a
return Format
UstarFormat
  | ByteString
magic ByteString -> ByteString -> Bool
forall a. Eq a => a -> a -> Bool
== ByteString
gnuMagic   = Format -> Either FormatError Format
forall a. a -> Either FormatError a
forall (m :: * -> *) a. Monad m => a -> m a
return Format
GnuFormat
  | ByteString
magic ByteString -> ByteString -> Bool
forall a. Eq a => a -> a -> Bool
== ByteString
v7Magic    = Format -> Either FormatError Format
forall a. a -> Either FormatError a
forall (m :: * -> *) a. Monad m => a -> m a
return Format
V7Format
  | Bool
otherwise           = FormatError -> Either FormatError Format
forall a b. a -> Either a b
Left FormatError
UnrecognisedTarFormat

v7Magic, ustarMagic, gnuMagic :: BS.ByteString
v7Magic :: ByteString
v7Magic    = String -> ByteString
BS.Char8.pack String
"\0\0\0\0\0\0\0\0"
ustarMagic :: ByteString
ustarMagic = String -> ByteString
BS.Char8.pack String
"ustar\NUL00"
gnuMagic :: ByteString
gnuMagic   = String -> ByteString
BS.Char8.pack String
"ustar  \NUL"

correctChecksum :: BS.ByteString -> Int -> Bool
correctChecksum :: ByteString -> Int -> Bool
correctChecksum ByteString
header Int
checksum = Int
checksum Int -> Int -> Bool
forall a. Eq a => a -> a -> Bool
== Int
checksum'
  where
    -- sum of all 512 bytes in the header block,
    -- treating each byte as an 8-bit unsigned value
    sumchars :: ByteString -> Int
sumchars  = (Int -> Word8 -> Int) -> Int -> ByteString -> Int
forall a. (a -> Word8 -> a) -> a -> ByteString -> a
BS.foldl' (\Int
x Word8
y -> Int
x Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Word8 -> Int
forall a b. (Integral a, Num b) => a -> b
fromIntegral Word8
y) Int
0
    -- treating the 8 bytes of chksum as blank characters.
    checksum' :: Int
checksum' = ByteString -> Int
sumchars (Int -> ByteString -> ByteString
BS.take Int
148 ByteString
header)
              Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
256 -- 256 = sumchars (BS.Char8.replicate 8 ' ')
              Int -> Int -> Int
forall a. Num a => a -> a -> a
+ ByteString -> Int
sumchars (Int -> ByteString -> ByteString
BS.drop Int
156 ByteString
header)

-- * TAR format primitive input

{-# SPECIALISE getOct :: Int -> Int -> BS.ByteString -> Either FormatError Int   #-}
{-# SPECIALISE getOct :: Int -> Int -> BS.ByteString -> Either FormatError Int64 #-}
getOct :: (Integral a, Bits a) => Int -> Int -> BS.ByteString -> Either FormatError a
getOct :: forall a.
(Integral a, Bits a) =>
Int -> Int -> ByteString -> Either FormatError a
getOct Int
off Int
len = ByteString -> Either FormatError a
forall {a}.
(Integral a, Bits a) =>
ByteString -> Either FormatError a
parseOct (ByteString -> Either FormatError a)
-> (ByteString -> ByteString) -> ByteString -> Either FormatError a
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Int -> Int -> ByteString -> ByteString
getBytes Int
off Int
len
  where
    -- As a star extension, octal fields can hold a base-256 value if the high
    -- bit of the initial character is set. The initial character can be:
    --   0x80 ==> trailing characters hold a positive base-256 value
    --   0xFF ==> trailing characters hold a negative base-256 value
    --
    -- In both cases, there won't be a trailing NUL/space.
    --
    -- GNU tar seems to contain a half-implementation of code that deals with
    -- extra bits in the first character, but I don't think it works and the
    -- docs I can find on star seem to suggest that these will always be 0,
    -- which is what I will assume.
    parseOct :: ByteString -> Either FormatError a
parseOct ByteString
s | HasCallStack => ByteString -> Word8
ByteString -> Word8
BS.head ByteString
s Word8 -> Word8 -> Bool
forall a. Eq a => a -> a -> Bool
== Word8
128 = a -> Either FormatError a
forall a. a -> Either FormatError a
forall (m :: * -> *) a. Monad m => a -> m a
return (ByteString -> a
forall a. (Integral a, Bits a) => ByteString -> a
readBytes (HasCallStack => ByteString -> ByteString
ByteString -> ByteString
BS.tail ByteString
s))
               | HasCallStack => ByteString -> Word8
ByteString -> Word8
BS.head ByteString
s Word8 -> Word8 -> Bool
forall a. Eq a => a -> a -> Bool
== Word8
255 = a -> Either FormatError a
forall a. a -> Either FormatError a
forall (m :: * -> *) a. Monad m => a -> m a
return (a -> a
forall a. Num a => a -> a
negate (ByteString -> a
forall a. (Integral a, Bits a) => ByteString -> a
readBytes (HasCallStack => ByteString -> ByteString
ByteString -> ByteString
BS.tail ByteString
s)))
    parseOct ByteString
s
      | ByteString -> Bool
BS.null ByteString
stripped = a -> Either FormatError a
forall a. a -> Either FormatError a
forall (m :: * -> *) a. Monad m => a -> m a
return a
0
      | Bool
otherwise = case ByteString -> Maybe a
forall n. Integral n => ByteString -> Maybe n
readOct ByteString
stripped of
        Just a
x  -> a -> Either FormatError a
forall a. a -> Either FormatError a
forall (m :: * -> *) a. Monad m => a -> m a
return a
x
        Maybe a
Nothing -> FormatError -> Either FormatError a
forall a b. a -> Either a b
Left FormatError
HeaderBadNumericEncoding
     where
      stripped :: ByteString
stripped = (Char -> Bool) -> ByteString -> ByteString
BS.Char8.takeWhile (\Char
c -> Char
c Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
/= Char
'\NUL' Bool -> Bool -> Bool
&& Char
c Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
/= Char
' ')
               (ByteString -> ByteString) -> ByteString -> ByteString
forall a b. (a -> b) -> a -> b
$ (Char -> Bool) -> ByteString -> ByteString
BS.Char8.dropWhile (Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
== Char
' ') ByteString
s

    readBytes :: (Integral a, Bits a) => BS.ByteString -> a
    readBytes :: forall a. (Integral a, Bits a) => ByteString -> a
readBytes = (a -> Word8 -> a) -> a -> ByteString -> a
forall a. (a -> Word8 -> a) -> a -> ByteString -> a
BS.foldl' (\a
acc Word8
x -> a
acc a -> Int -> a
forall a. Bits a => a -> Int -> a
`shiftL` Int
8 a -> a -> a
forall a. Num a => a -> a -> a
+ Word8 -> a
forall a b. (Integral a, Num b) => a -> b
fromIntegral Word8
x) a
0

getBytes :: Int -> Int -> BS.ByteString -> BS.ByteString
getBytes :: Int -> Int -> ByteString -> ByteString
getBytes Int
off Int
len = Int -> ByteString -> ByteString
BS.take Int
len (ByteString -> ByteString)
-> (ByteString -> ByteString) -> ByteString -> ByteString
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Int -> ByteString -> ByteString
BS.drop Int
off

getByte :: Int -> BS.ByteString -> Char
getByte :: Int -> ByteString -> Char
getByte Int
off ByteString
bs = ByteString -> Int -> Char
BS.Char8.index ByteString
bs Int
off

getChars :: Int -> Int -> BS.ByteString -> BS.ByteString
getChars :: Int -> Int -> ByteString -> ByteString
getChars = Int -> Int -> ByteString -> ByteString
getBytes

getString :: Int -> Int -> BS.ByteString -> BS.ByteString
getString :: Int -> Int -> ByteString -> ByteString
getString Int
off Int
len = ByteString -> ByteString
BS.copy (ByteString -> ByteString)
-> (ByteString -> ByteString) -> ByteString -> ByteString
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (Char -> Bool) -> ByteString -> ByteString
BS.Char8.takeWhile (Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
/=Char
'\0') (ByteString -> ByteString)
-> (ByteString -> ByteString) -> ByteString -> ByteString
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Int -> Int -> ByteString -> ByteString
getBytes Int
off Int
len

{-# SPECIALISE readOct :: BS.ByteString -> Maybe Int   #-}
{-# SPECIALISE readOct :: BS.ByteString -> Maybe Int64 #-}
readOct :: Integral n => BS.ByteString -> Maybe n
readOct :: forall n. Integral n => ByteString -> Maybe n
readOct = Int -> n -> ByteString -> Maybe n
forall n. Integral n => Int -> n -> ByteString -> Maybe n
go Int
0 n
0
  where
    go :: Integral n => Int -> n -> BS.ByteString -> Maybe n
    go :: forall n. Integral n => Int -> n -> ByteString -> Maybe n
go !Int
i !n
n !ByteString
bs = case ByteString -> Maybe (Word8, ByteString)
BS.uncons ByteString
bs of
      Maybe (Word8, ByteString)
Nothing -> if Int
i Int -> Int -> Bool
forall a. Eq a => a -> a -> Bool
== Int
0 then Maybe n
forall a. Maybe a
Nothing else n -> Maybe n
forall a. a -> Maybe a
Just n
n
      Just (Word8
w, ByteString
tl)
        | Word8
w Word8 -> Word8 -> Bool
forall a. Ord a => a -> a -> Bool
>= Word8
0x30 Bool -> Bool -> Bool
&& Word8
w Word8 -> Word8 -> Bool
forall a. Ord a => a -> a -> Bool
<= Word8
0x39 ->
          Int -> n -> ByteString -> Maybe n
forall n. Integral n => Int -> n -> ByteString -> Maybe n
go (Int
iInt -> Int -> Int
forall a. Num a => a -> a -> a
+Int
1) (n
n n -> n -> n
forall a. Num a => a -> a -> a
* n
8 n -> n -> n
forall a. Num a => a -> a -> a
+ (Word8 -> n
forall a b. (Integral a, Num b) => a -> b
fromIntegral Word8
w n -> n -> n
forall a. Num a => a -> a -> a
- n
0x30)) ByteString
tl
        | Bool
otherwise -> Maybe n
forall a. Maybe a
Nothing