{-# LANGUAGE RecordWildCards #-}
module Codec.Archive.Tar(
         Archive
       , ArchiveHeader(..)
       , ArchiveMember(..)
       , RegularFile(..)
       , Link(..)
       , Device(..)
       , SpecialFIFOFile(..)
       , unarchive
       )
 where

import           Control.Monad(unless)
import qualified Data.ByteString as B
import qualified Data.ByteString.Char8 as BC
import           Data.Char(digitToInt)
import           Data.List(isPrefixOf)
import           Data.Map.Strict(Map)
import qualified Data.Map.Strict as Map
import           Data.Serialize.Get
import           Data.String(IsString(..))
import           Data.Time.Clock(UTCTime(..))
import           Data.Time.Clock.POSIX(posixSecondsToUTCTime)
import           Data.Time.LocalTime()
import           Data.ByteString(ByteString)
import           Data.Word(Word)

type Archive = Map FilePath ArchiveMember

data ArchiveMember = RegularFileMember     RegularFile
                   | LinkMember            Link
                   | SymbolicLinkMember    Link
                   | CharacterDeviceMember Device
                   | BlockDeviceMember     Device
                   | DirectoryMember       Directory
                   | SpecialFIFOFileMember SpecialFIFOFile
                   | ReservedMember        RegularFile
                   | CustomMember          Char ArchiveHeader ByteString
 deriving (Show)

unarchive :: ByteString -> Either String Archive
unarchive = runGet getMembers

data ArchiveHeader = ArchiveHeader
     { hdrFileName         :: FilePath
     , hdrMode             :: Word
     , hdrOwner            :: Word
     , hdrGroup            :: Word
     , hdrSize             :: Word
     , hdrModificationTime :: UTCTime
     , hdrChecksum         :: Word
     , hdrTypeFlag         :: RecordTypeFlag
     , hdrLinkName         :: FilePath
     , hdrMagic            :: String
     , hdrVersion          :: ByteString
     , hdrUserName         :: String
     , hdrGroupName        :: String
     , hdrDeviceMajor      :: Word
     , hdrDeviceMinor      :: Word
     , hdrPrefix           :: FilePath
     }
 deriving (Show)

getMembers :: Get Archive
getMembers =
  do next <- nextMember
     case next of
       Nothing           -> return Map.empty
       Just (key, value) -> Map.insert key value `fmap` getMembers

nextMember :: Get (Maybe (FilePath, ArchiveMember))
nextMember =
  do nextTwoBlocks <- lookAhead (getBytes 1024)
     if B.all (== 0) nextTwoBlocks
        then return Nothing
        else Just `fmap` getArchiveMember

getArchiveMember :: Get (FilePath, ArchiveMember)
getArchiveMember =
  do hdr <- isolate 512 getProperHeader
     case hdrTypeFlag hdr of
       FlagRegularFile     -> convertRegularFile                       hdr
       FlagLink            -> convertLink        LinkMember            hdr
       FlagSymbolicLink    -> convertLink        SymbolicLinkMember    hdr
       FlagCharacterDevice -> convertDevice      CharacterDeviceMember hdr
       FlagBlockDevice     -> convertDevice      BlockDeviceMember     hdr
       FlagDirectory       -> convertDirectory                         hdr
       FlagSpecialFIFOFile -> convertFIFO                              hdr
       FlagReserved        -> convertRegularFile                       hdr
       FlagCustom c        -> do body <- getPaddedBody (hdrSize hdr)
                                 let key = hdrPrefix hdr ++ hdrFileName hdr
                                 return (key, CustomMember c hdr body)

data RegularFile = RegularFile
     { regFileName         :: FilePath
     , regMode             :: Word
     , regOwner            :: Word
     , regGroup            :: Word
     , regSize             :: Word
     , regModificationTime :: UTCTime
     , regUserName         :: String
     , regGroupName        :: String
     , regContents         :: ByteString
     }
 deriving (Show)

convertRegularFile :: ArchiveHeader -> Get (FilePath, ArchiveMember)
convertRegularFile hdr =
  do let regFileName         = hdrPrefix           hdr ++ hdrFileName hdr
         regMode             = hdrMode             hdr
         regOwner            = hdrOwner            hdr
         regGroup            = hdrGroup            hdr
         regSize             = hdrSize             hdr
         regModificationTime = hdrModificationTime hdr
         regUserName         = hdrUserName         hdr
         regGroupName        = hdrGroupName        hdr
     regContents <- getPaddedBody regSize
     return (regFileName, RegularFileMember RegularFile{..})

data Link = Link
     { linkFileName         :: FilePath
     , linkMode             :: Word
     , linkOwner            :: Word
     , linkGroup            :: Word
     , linkSize             :: Word
     , linkModificationTime :: UTCTime
     , linkUserName         :: String
     , linkGroupName        :: String
     , linkTarget           :: FilePath
     }
 deriving (Show)

convertLink :: (Link -> ArchiveMember) -> ArchiveHeader ->
               Get (FilePath, ArchiveMember)
convertLink builder hdr = return (linkFileName, builder link)
 where
  link                 = Link{ .. }
  linkFileName         = hdrPrefix           hdr ++ hdrFileName hdr
  linkMode             = hdrMode             hdr
  linkOwner            = hdrOwner            hdr
  linkGroup            = hdrGroup            hdr
  linkSize             = hdrSize             hdr
  linkModificationTime = hdrModificationTime hdr
  linkUserName         = hdrUserName         hdr
  linkGroupName        = hdrGroupName        hdr
  linkTarget           = hdrLinkName         hdr

data Device = Device
     { devFileName         :: FilePath
     , devMode             :: Word
     , devOwner            :: Word
     , devGroup            :: Word
     , devModificationTime :: UTCTime
     , devUserName         :: String
     , devGroupName        :: String
     , devMajorNumber      :: Word
     , devMinorNumber      :: Word
     }
 deriving (Show)
 
convertDevice :: (Device -> ArchiveMember) -> ArchiveHeader ->
                 Get (FilePath, ArchiveMember)
convertDevice builder hdr = return (devFileName, builder device)
 where
  device              = Device{..}
  devFileName         = hdrPrefix           hdr ++ hdrFileName hdr
  devMode             = hdrMode             hdr
  devOwner            = hdrOwner            hdr
  devGroup            = hdrGroup            hdr
  devModificationTime = hdrModificationTime hdr
  devUserName         = hdrUserName         hdr
  devGroupName        = hdrGroupName        hdr
  devMajorNumber      = hdrDeviceMajor      hdr
  devMinorNumber      = hdrDeviceMinor      hdr

data Directory = Directory
     { dirFileName         :: FilePath
     , dirMode             :: Word
     , dirOwner            :: Word
     , dirGroup            :: Word
     , dirModificationTime :: UTCTime
     , dirUserName         :: String
     , dirGroupName        :: String
     }
 deriving (Show)

convertDirectory :: ArchiveHeader -> Get (FilePath, ArchiveMember)
convertDirectory hdr = return (dirFileName, DirectoryMember directory)
 where
  directory           = Directory{..}
  dirFileName         = hdrPrefix           hdr ++ hdrFileName hdr
  dirMode             = hdrMode             hdr
  dirOwner            = hdrOwner            hdr
  dirGroup            = hdrGroup            hdr
  dirModificationTime = hdrModificationTime hdr
  dirUserName         = hdrUserName         hdr
  dirGroupName        = hdrGroupName        hdr

data SpecialFIFOFile = SpecialFIFOFile
     { fifoFileName         :: FilePath
     , fifoMode             :: Word
     , fifoOwner            :: Word
     , fifoGroup            :: Word
     , fifoModificationTime :: UTCTime
     , fifoUserName         :: String
     , fifoGroupName        :: String
     }
 deriving (Show)

convertFIFO :: ArchiveHeader -> Get (FilePath, ArchiveMember)
convertFIFO hdr = return (fifoFileName, SpecialFIFOFileMember fifo)
 where
  fifo                 = SpecialFIFOFile{..}
  fifoFileName         = hdrPrefix           hdr ++ hdrFileName hdr
  fifoMode             = hdrMode             hdr
  fifoOwner            = hdrOwner            hdr
  fifoGroup            = hdrGroup            hdr
  fifoModificationTime = hdrModificationTime hdr
  fifoUserName         = hdrUserName         hdr
  fifoGroupName        = hdrGroupName        hdr


getPaddedBody :: Word -> Get ByteString
getPaddedBody sizeb =
  do let blocks  = (sizeb + 511) `div` 512
         padded  = blocks * 512
         skipAmt = padded - sizeb
     res <- getByteString (fromIntegral sizeb)
     unless (skipAmt == 0) $ skip (fromIntegral skipAmt)
     return res

getProperHeader :: Get ArchiveHeader
getProperHeader =
  do checksum1 <- lookAhead (label "checksum computation" getChecksum)
     header    <- label "archive header" getArchiveHeader
     let checksum2 = hdrChecksum header
     unless (checksum1 == checksum2) $
       fail ("Checksum mismatch: "++show checksum1++" /= "++show checksum2)
     return header

getChecksum :: Get Word
getChecksum =
  do acc0  <- addSum 0     `fmap` getByteString 100 -- name of file
     acc1  <- addSum acc0  `fmap` getByteString 8   -- file mode
     acc2  <- addSum acc1  `fmap` getByteString 8   -- user ID
     acc3  <- addSum acc2  `fmap` getByteString 8   -- group ID
     acc4  <- addSum acc3  `fmap` getByteString 12  -- length in bytes
     acc5  <- addSum acc4  `fmap` getByteString 12  -- modify time
     skip 8 -- the actual checksum
     let acc6 = addSum acc5 (BC.replicate 8 ' ')
     acc7  <- addSum acc6  `fmap` getByteString 1   -- type of file
     acc8  <- addSum acc7  `fmap` getByteString 100 -- name of linked file
     acc9  <- addSum acc8  `fmap` getByteString 6   -- USTAR indicator
     acc10 <- addSum acc9  `fmap` getByteString 2   -- USTAR version
     acc11 <- addSum acc10 `fmap` getByteString 32  -- user name
     acc12 <- addSum acc11 `fmap` getByteString 32  -- user group
     acc13 <- addSum acc12 `fmap` getByteString 8   -- device major number
     acc14 <- addSum acc13 `fmap` getByteString 8   -- device minor number
     acc15 <- addSum acc14 `fmap` getByteString 155 -- prefix file name
     skip 12 -- trailing bytes
     return acc15
 where
  addSum = B.foldl' (\ acc x -> acc + fromIntegral x)
 
getArchiveHeader :: Get ArchiveHeader
getArchiveHeader =
  do hdrFileName         <- toStringLike    `fmap` getByteString 100
     hdrMode             <- toNumeric       `fmap` getByteString 8
     hdrOwner            <- toNumeric       `fmap` getByteString 8
     hdrGroup            <- toNumeric       `fmap` getByteString 8
     hdrSize             <- toNumeric       `fmap` getByteString 12
     hdrModificationTime <- toUTCTime       `fmap` getByteString 12
     hdrChecksum         <- toNumeric       `fmap` getByteString 8
     hdrTypeFlag         <- toRecordTypeFlag =<<   getByteString 1
     hdrLinkName         <- toStringLike    `fmap` getByteString 100
     hdrMagic            <- toStringLike    `fmap` getByteString 6
     hdrVersion          <-                        getByteString 2
     hdrUserName         <- toStringLike    `fmap` getByteString 32
     hdrGroupName        <- toStringLike    `fmap` getByteString 32
     hdrDeviceMajor      <- toNumeric       `fmap` getByteString 8
     hdrDeviceMinor      <- toNumeric       `fmap` getByteString 8
     hdrPrefix           <- toStringLike    `fmap` getByteString 155
     skip 12 -- trailing bytes
     unless ("ustar" `isPrefixOf` hdrMagic) $
       fail ("Bad magic value: " ++ hdrMagic ++ " (fname: " ++ show hdrFileName)
     return ArchiveHeader{..}

data RecordTypeFlag = FlagRegularFile
                    | FlagLink
                    | FlagSymbolicLink
                    | FlagCharacterDevice
                    | FlagBlockDevice
                    | FlagDirectory
                    | FlagSpecialFIFOFile
                    | FlagReserved
                    | FlagCustom Char
 deriving (Show)

toRecordTypeFlag :: ByteString -> Get RecordTypeFlag
toRecordTypeFlag x =
  case BC.uncons x of
    Nothing   -> fail "The world went insane."
    Just ('\0', _) -> return FlagRegularFile
    Just ('0',  _) -> return FlagRegularFile
    Just ('1',  _) -> return FlagLink
    Just ('2',  _) -> return FlagSymbolicLink
    Just ('3',  _) -> return FlagCharacterDevice
    Just ('4',  _) -> return FlagBlockDevice
    Just ('5',  _) -> return FlagDirectory
    Just ('6',  _) -> return FlagSpecialFIFOFile
    Just ('7',  _) -> return FlagReserved
    Just (c,    _) | c `elem` (['A'..'Z']++['a'..'z']) ->
      return (FlagCustom c)
    Just _ ->
      fail ("Unexpected record type: " ++ show x)

toStringLike :: IsString a => ByteString -> a
toStringLike = fromString . BC.unpack . stripTrailingNulls
 where
  stripTrailingNulls x =
    case B.unsnoc x of
      Nothing      -> B.empty
      Just (x', 0) -> stripTrailingNulls x'
      Just (_,  _) -> x

toNumeric :: Num a => ByteString -> a
toNumeric = parseOctal 0
 where
  parseOctal acc x =
    case BC.uncons x of
      Nothing        -> acc
      Just (' ',  _) -> acc
      Just ('\0', _) -> acc
      Just (c, rest) -> parseOctal ((acc * 8) + digitToInt' c) rest
  digitToInt' = fromIntegral . digitToInt

toUTCTime :: ByteString -> UTCTime
toUTCTime = posixSecondsToUTCTime . toNumeric