{-# LANGUAGE RecordWildCards #-} module Codec.Archive.Tar( Archive , ArchiveHeader(..) , ArchiveMember(..) , RegularFile(..) , Link(..) , Device(..) , SpecialFIFOFile(..) , unarchive ) where import Control.Monad(unless) import qualified Data.ByteString as B import qualified Data.ByteString.Char8 as BC import Data.Char(digitToInt) import Data.List(isPrefixOf) import Data.Map.Strict(Map) import qualified Data.Map.Strict as Map import Data.Serialize.Get import Data.String(IsString(..)) import Data.Time.Clock(UTCTime(..)) import Data.Time.Clock.POSIX(posixSecondsToUTCTime) import Data.Time.LocalTime() import Data.ByteString(ByteString) import Data.Word(Word) type Archive = Map FilePath ArchiveMember data ArchiveMember = RegularFileMember RegularFile | LinkMember Link | SymbolicLinkMember Link | CharacterDeviceMember Device | BlockDeviceMember Device | DirectoryMember Directory | SpecialFIFOFileMember SpecialFIFOFile | ReservedMember RegularFile | CustomMember Char ArchiveHeader ByteString deriving (Show) unarchive :: ByteString -> Either String Archive unarchive = runGet getMembers data ArchiveHeader = ArchiveHeader { hdrFileName :: FilePath , hdrMode :: Word , hdrOwner :: Word , hdrGroup :: Word , hdrSize :: Word , hdrModificationTime :: UTCTime , hdrChecksum :: Word , hdrTypeFlag :: RecordTypeFlag , hdrLinkName :: FilePath , hdrMagic :: String , hdrVersion :: ByteString , hdrUserName :: String , hdrGroupName :: String , hdrDeviceMajor :: Word , hdrDeviceMinor :: Word , hdrPrefix :: FilePath } deriving (Show) getMembers :: Get Archive getMembers = do next <- nextMember case next of Nothing -> return Map.empty Just (key, value) -> Map.insert key value `fmap` getMembers nextMember :: Get (Maybe (FilePath, ArchiveMember)) nextMember = do nextTwoBlocks <- lookAhead (getBytes 1024) if B.all (== 0) nextTwoBlocks then return Nothing else Just `fmap` getArchiveMember getArchiveMember :: Get (FilePath, ArchiveMember) getArchiveMember = do hdr <- isolate 512 getProperHeader case hdrTypeFlag hdr of FlagRegularFile -> convertRegularFile hdr FlagLink -> convertLink LinkMember hdr FlagSymbolicLink -> convertLink SymbolicLinkMember hdr FlagCharacterDevice -> convertDevice CharacterDeviceMember hdr FlagBlockDevice -> convertDevice BlockDeviceMember hdr FlagDirectory -> convertDirectory hdr FlagSpecialFIFOFile -> convertFIFO hdr FlagReserved -> convertRegularFile hdr FlagCustom c -> do body <- getPaddedBody (hdrSize hdr) let key = hdrPrefix hdr ++ hdrFileName hdr return (key, CustomMember c hdr body) data RegularFile = RegularFile { regFileName :: FilePath , regMode :: Word , regOwner :: Word , regGroup :: Word , regSize :: Word , regModificationTime :: UTCTime , regUserName :: String , regGroupName :: String , regContents :: ByteString } deriving (Show) convertRegularFile :: ArchiveHeader -> Get (FilePath, ArchiveMember) convertRegularFile hdr = do let regFileName = hdrPrefix hdr ++ hdrFileName hdr regMode = hdrMode hdr regOwner = hdrOwner hdr regGroup = hdrGroup hdr regSize = hdrSize hdr regModificationTime = hdrModificationTime hdr regUserName = hdrUserName hdr regGroupName = hdrGroupName hdr regContents <- getPaddedBody regSize return (regFileName, RegularFileMember RegularFile{..}) data Link = Link { linkFileName :: FilePath , linkMode :: Word , linkOwner :: Word , linkGroup :: Word , linkSize :: Word , linkModificationTime :: UTCTime , linkUserName :: String , linkGroupName :: String , linkTarget :: FilePath } deriving (Show) convertLink :: (Link -> ArchiveMember) -> ArchiveHeader -> Get (FilePath, ArchiveMember) convertLink builder hdr = return (linkFileName, builder link) where link = Link{ .. } linkFileName = hdrPrefix hdr ++ hdrFileName hdr linkMode = hdrMode hdr linkOwner = hdrOwner hdr linkGroup = hdrGroup hdr linkSize = hdrSize hdr linkModificationTime = hdrModificationTime hdr linkUserName = hdrUserName hdr linkGroupName = hdrGroupName hdr linkTarget = hdrLinkName hdr data Device = Device { devFileName :: FilePath , devMode :: Word , devOwner :: Word , devGroup :: Word , devModificationTime :: UTCTime , devUserName :: String , devGroupName :: String , devMajorNumber :: Word , devMinorNumber :: Word } deriving (Show) convertDevice :: (Device -> ArchiveMember) -> ArchiveHeader -> Get (FilePath, ArchiveMember) convertDevice builder hdr = return (devFileName, builder device) where device = Device{..} devFileName = hdrPrefix hdr ++ hdrFileName hdr devMode = hdrMode hdr devOwner = hdrOwner hdr devGroup = hdrGroup hdr devModificationTime = hdrModificationTime hdr devUserName = hdrUserName hdr devGroupName = hdrGroupName hdr devMajorNumber = hdrDeviceMajor hdr devMinorNumber = hdrDeviceMinor hdr data Directory = Directory { dirFileName :: FilePath , dirMode :: Word , dirOwner :: Word , dirGroup :: Word , dirModificationTime :: UTCTime , dirUserName :: String , dirGroupName :: String } deriving (Show) convertDirectory :: ArchiveHeader -> Get (FilePath, ArchiveMember) convertDirectory hdr = return (dirFileName, DirectoryMember directory) where directory = Directory{..} dirFileName = hdrPrefix hdr ++ hdrFileName hdr dirMode = hdrMode hdr dirOwner = hdrOwner hdr dirGroup = hdrGroup hdr dirModificationTime = hdrModificationTime hdr dirUserName = hdrUserName hdr dirGroupName = hdrGroupName hdr data SpecialFIFOFile = SpecialFIFOFile { fifoFileName :: FilePath , fifoMode :: Word , fifoOwner :: Word , fifoGroup :: Word , fifoModificationTime :: UTCTime , fifoUserName :: String , fifoGroupName :: String } deriving (Show) convertFIFO :: ArchiveHeader -> Get (FilePath, ArchiveMember) convertFIFO hdr = return (fifoFileName, SpecialFIFOFileMember fifo) where fifo = SpecialFIFOFile{..} fifoFileName = hdrPrefix hdr ++ hdrFileName hdr fifoMode = hdrMode hdr fifoOwner = hdrOwner hdr fifoGroup = hdrGroup hdr fifoModificationTime = hdrModificationTime hdr fifoUserName = hdrUserName hdr fifoGroupName = hdrGroupName hdr getPaddedBody :: Word -> Get ByteString getPaddedBody sizeb = do let blocks = (sizeb + 511) `div` 512 padded = blocks * 512 skipAmt = padded - sizeb res <- getByteString (fromIntegral sizeb) unless (skipAmt == 0) $ skip (fromIntegral skipAmt) return res getProperHeader :: Get ArchiveHeader getProperHeader = do checksum1 <- lookAhead (label "checksum computation" getChecksum) header <- label "archive header" getArchiveHeader let checksum2 = hdrChecksum header unless (checksum1 == checksum2) $ fail ("Checksum mismatch: "++show checksum1++" /= "++show checksum2) return header getChecksum :: Get Word getChecksum = do acc0 <- addSum 0 `fmap` getByteString 100 -- name of file acc1 <- addSum acc0 `fmap` getByteString 8 -- file mode acc2 <- addSum acc1 `fmap` getByteString 8 -- user ID acc3 <- addSum acc2 `fmap` getByteString 8 -- group ID acc4 <- addSum acc3 `fmap` getByteString 12 -- length in bytes acc5 <- addSum acc4 `fmap` getByteString 12 -- modify time skip 8 -- the actual checksum let acc6 = addSum acc5 (BC.replicate 8 ' ') acc7 <- addSum acc6 `fmap` getByteString 1 -- type of file acc8 <- addSum acc7 `fmap` getByteString 100 -- name of linked file acc9 <- addSum acc8 `fmap` getByteString 6 -- USTAR indicator acc10 <- addSum acc9 `fmap` getByteString 2 -- USTAR version acc11 <- addSum acc10 `fmap` getByteString 32 -- user name acc12 <- addSum acc11 `fmap` getByteString 32 -- user group acc13 <- addSum acc12 `fmap` getByteString 8 -- device major number acc14 <- addSum acc13 `fmap` getByteString 8 -- device minor number acc15 <- addSum acc14 `fmap` getByteString 155 -- prefix file name skip 12 -- trailing bytes return acc15 where addSum = B.foldl' (\ acc x -> acc + fromIntegral x) getArchiveHeader :: Get ArchiveHeader getArchiveHeader = do hdrFileName <- toStringLike `fmap` getByteString 100 hdrMode <- toNumeric `fmap` getByteString 8 hdrOwner <- toNumeric `fmap` getByteString 8 hdrGroup <- toNumeric `fmap` getByteString 8 hdrSize <- toNumeric `fmap` getByteString 12 hdrModificationTime <- toUTCTime `fmap` getByteString 12 hdrChecksum <- toNumeric `fmap` getByteString 8 hdrTypeFlag <- toRecordTypeFlag =<< getByteString 1 hdrLinkName <- toStringLike `fmap` getByteString 100 hdrMagic <- toStringLike `fmap` getByteString 6 hdrVersion <- getByteString 2 hdrUserName <- toStringLike `fmap` getByteString 32 hdrGroupName <- toStringLike `fmap` getByteString 32 hdrDeviceMajor <- toNumeric `fmap` getByteString 8 hdrDeviceMinor <- toNumeric `fmap` getByteString 8 hdrPrefix <- toStringLike `fmap` getByteString 155 skip 12 -- trailing bytes unless ("ustar" `isPrefixOf` hdrMagic) $ fail ("Bad magic value: " ++ hdrMagic ++ " (fname: " ++ show hdrFileName) return ArchiveHeader{..} data RecordTypeFlag = FlagRegularFile | FlagLink | FlagSymbolicLink | FlagCharacterDevice | FlagBlockDevice | FlagDirectory | FlagSpecialFIFOFile | FlagReserved | FlagCustom Char deriving (Show) toRecordTypeFlag :: ByteString -> Get RecordTypeFlag toRecordTypeFlag x = case BC.uncons x of Nothing -> fail "The world went insane." Just ('\0', _) -> return FlagRegularFile Just ('0', _) -> return FlagRegularFile Just ('1', _) -> return FlagLink Just ('2', _) -> return FlagSymbolicLink Just ('3', _) -> return FlagCharacterDevice Just ('4', _) -> return FlagBlockDevice Just ('5', _) -> return FlagDirectory Just ('6', _) -> return FlagSpecialFIFOFile Just ('7', _) -> return FlagReserved Just (c, _) | c `elem` (['A'..'Z']++['a'..'z']) -> return (FlagCustom c) Just _ -> fail ("Unexpected record type: " ++ show x) toStringLike :: IsString a => ByteString -> a toStringLike = fromString . BC.unpack . stripTrailingNulls where stripTrailingNulls x = case B.unsnoc x of Nothing -> B.empty Just (x', 0) -> stripTrailingNulls x' Just (_, _) -> x toNumeric :: Num a => ByteString -> a toNumeric = parseOctal 0 where parseOctal acc x = case BC.uncons x of Nothing -> acc Just (' ', _) -> acc Just ('\0', _) -> acc Just (c, rest) -> parseOctal ((acc * 8) + digitToInt' c) rest digitToInt' = fromIntegral . digitToInt toUTCTime :: ByteString -> UTCTime toUTCTime = posixSecondsToUTCTime . toNumeric