module Codec.Archive.Tar.Read (read) where
import Codec.Archive.Tar.Types
import Data.Char (ord)
import Data.Int (Int64)
import Numeric (readOct)
import qualified Data.ByteString.Lazy as BS
import qualified Data.ByteString.Lazy.Char8 as BS.Char8
import Data.ByteString.Lazy (ByteString)
import Prelude hiding (read)
read :: ByteString -> Entries
read = unfoldEntries getEntry
getEntry :: ByteString -> Either String (Maybe (Entry, ByteString))
getEntry bs
| BS.length header < 512 = Left "truncated tar archive"
| BS.head bs == 0 = case BS.splitAt 1024 bs of
(end, trailing)
| BS.length end /= 1024 -> Left "short tar trailer"
| not (BS.all (== 0) end) -> Left "bad tar trailer"
| not (BS.all (== 0) trailing) -> Left "tar file has trailing junk"
| otherwise -> Right Nothing
| otherwise = partial $ do
case (chksum_, format_) of
(Ok chksum, _ ) | correctChecksum header chksum -> return ()
(Ok _, Ok _) -> fail "tar checksum error"
_ -> fail "data is not in tar format"
format <- format_; mode <- mode_;
uid <- uid_; gid <- gid_;
size <- size_; mtime <- mtime_;
devmajor <- devmajor_; devminor <- devminor_;
let content = BS.take size (BS.drop 512 bs)
padding = (512 size) `mod` 512
bs' = BS.drop (512 + size + padding) bs
entry = Entry {
entryTarPath = TarPath name prefix,
entryContent = case typecode of
'\0' -> NormalFile content size
'0' -> NormalFile content size
'1' -> HardLink (LinkTarget linkname)
'2' -> SymbolicLink (LinkTarget linkname)
'3' -> CharacterDevice devmajor devminor
'4' -> BlockDevice devmajor devminor
'5' -> Directory
'6' -> NamedPipe
'7' -> NormalFile content size
_ -> OtherEntryType typecode content size,
entryPermissions = mode,
entryOwnership = Ownership uname gname uid gid,
entryTime = mtime,
entryFormat = format
}
return (Just (entry, bs'))
where
header = BS.take 512 bs
name = getString 0 100 header
mode_ = getOct 100 8 header
uid_ = getOct 108 8 header
gid_ = getOct 116 8 header
size_ = getOct 124 12 header
mtime_ = getOct 136 12 header
chksum_ = getOct 148 8 header
typecode = getByte 156 header
linkname = getString 157 100 header
magic = getChars 257 8 header
uname = getString 265 32 header
gname = getString 297 32 header
devmajor_ = getOct 329 8 header
devminor_ = getOct 337 8 header
prefix = getString 345 155 header
format_ = case magic of
"\0\0\0\0\0\0\0\0" -> return V7Format
"ustar\NUL00" -> return UstarFormat
"ustar \NUL" -> return GnuFormat
_ -> fail "tar entry not in a recognised format"
correctChecksum :: ByteString -> Int -> Bool
correctChecksum header checksum = checksum == checksum'
where
checksum' = BS.Char8.foldl' (\x y -> x + ord y) 0 header'
header' = BS.concat [BS.take 148 header,
BS.Char8.replicate 8 ' ',
BS.drop 156 header]
getOct :: Integral a => Int64 -> Int64 -> ByteString -> Partial a
getOct off len = parseOct
. BS.Char8.unpack
. BS.Char8.takeWhile (\c -> c /= '\NUL' && c /= ' ')
. BS.Char8.dropWhile (== ' ')
. getBytes off len
where
parseOct "" = return 0
parseOct ('\128':_) = fail "tar header uses non-standard number encoding"
parseOct s = case readOct s of
[(x,[])] -> return x
_ -> fail "tar header is malformed (bad numeric encoding)"
getBytes :: Int64 -> Int64 -> ByteString -> ByteString
getBytes off len = BS.take len . BS.drop off
getByte :: Int64 -> ByteString -> Char
getByte off bs = BS.Char8.index bs off
getChars :: Int64 -> Int64 -> ByteString -> String
getChars off len = BS.Char8.unpack . getBytes off len
getString :: Int64 -> Int64 -> ByteString -> String
getString off len = BS.Char8.unpack . BS.Char8.takeWhile (/='\0') . getBytes off len
data Partial a = Error String | Ok a
partial :: Partial a -> Either String a
partial (Error msg) = Left msg
partial (Ok x) = Right x
instance Monad Partial where
return = Ok
Error m >>= _ = Error m
Ok x >>= k = k x
fail = Error