module Codec.Archive.Tar.Read (readTarArchive) where

import Codec.Archive.Tar.Types
import Codec.Archive.Tar.Util

import Data.Binary.Get

import Data.Char (chr,ord)
import Data.Int (Int64)
import Control.Monad (liftM)
import qualified Data.ByteString.Char8 as B
import qualified Data.ByteString.Lazy.Char8 as L
import Data.Int (Int8)
import Numeric (readOct)

-- | Reads a TAR archive from a lazy ByteString.
readTarArchive :: L.ByteString -> TarArchive
readTarArchive = runGet getTarArchive

getTarArchive :: Get TarArchive
getTarArchive = liftM TarArchive $ unfoldM getTarEntry

-- | Returns 'Nothing' if the entry is an end block.
getTarEntry :: Get (Maybe TarEntry)
getTarEntry =
    do mhdr <- getTarHeader
       case mhdr of
         Nothing -> return Nothing
         Just hdr -> do let size = contentSize hdr
                        cnt <- if size == 0 
                                then return L.empty
                                else let padding = (512 - size) `mod` 512
                                in liftM (L.take size) $ getLazyByteString $ size + padding
                        return $ Just $ TarEntry hdr cnt

-- | Get the size of the content for the given header. This can sometimes
-- be different from 'tarFileSize'. I have seen hints that some platforms
-- may set the size to non-zero values for directories.
contentSize :: TarHeader -> Int64
contentSize hdr = if hasContent hdr then tarFileSize hdr else 0

hasContent :: TarHeader -> Bool
hasContent hdr = case tarFileType hdr of
                    TarNormalFile -> True
                    TarOther _    -> True
                    _             -> False

getTarHeader :: Get (Maybe TarHeader)
getTarHeader =
    do -- FIXME: warn and return nothing on EOF
       block <- liftM B.copy $ getBytes 512
       return $ 
        if B.head block == '\NUL'
          then Nothing
          else let (hdr,chkSum) = 
                       runGet getHeaderAndChkSum $ L.fromChunks [block]
                in if checkChkSum block chkSum
                     then Just hdr
                     else error $ "TAR header checksum failure." 

checkChkSum :: B.ByteString -> Int -> Bool
checkChkSum block s = s == chkSum block' || s == signedChkSum block'
  where 
    block' = B.concat [B.take 148 block, B.replicate 8 ' ', B.drop 156 block]
    -- tar.info says that Sun tar is buggy and 
    -- calculates the checksum using signed chars
    chkSum = B.foldl' (\x y -> x + ord y) 0
    signedChkSum = B.foldl' (\x y -> x + (ordSigned y)) 0

ordSigned :: Char -> Int
ordSigned c = fromIntegral (fromIntegral (ord c) :: Int8)

getHeaderAndChkSum :: Get (TarHeader, Int)
getHeaderAndChkSum =
    do fileSuffix <- getString  100
       mode       <- getOct       8
       uid        <- getOct       8
       gid        <- getOct       8
       size       <- getOct      12
       time       <- getOct      12
       chkSum     <- getOct       8
       typ        <- getTarFileType
       target     <- getString  100
       _ustar     <- skip         6
       _version   <- skip         2
       uname      <- getString   32
       gname      <- getString   32
       major      <- getOct       8
       minor      <- getOct       8
       filePrefix <- getString  155
       _          <- skip        12      
       let hdr = TarHeader {
                            tarFileName    = filePrefix ++ fileSuffix,
                            tarFileMode    = mode,
                            tarOwnerID     = uid,
                            tarGroupID     = gid,
                            tarFileSize    = size,
                            tarModTime     = fromInteger time,
                            tarFileType    = typ,
                            tarLinkTarget  = target,
                            tarOwnerName   = uname,
                            tarGroupName   = gname,
                            tarDeviceMajor = major,
                            tarDeviceMinor = minor
                           }
       return (hdr,chkSum)

getTarFileType :: Get TarFileType
getTarFileType = 
    do c <- getChar8
       return $ case c of
                  '\0'-> TarNormalFile
                  '0' -> TarNormalFile
                  '1' -> TarHardLink
                  '2' -> TarSymbolicLink
                  '3' -> TarCharacterDevice
                  '4' -> TarBlockDevice
                  '5' -> TarDirectory
                  '6' -> TarFIFO
                  _   -> TarOther c

-- * TAR format primitive input

getOct :: Integral a => Int -> Get a
getOct n = getBytes n >>= parseOct . takeWhile (/='\0') . B.unpack
  where parseOct "" = return 0
        parseOct s = case readOct s of
                       [(x,_)] -> return x
                       _       -> fail $ "Number format error: " ++ show s

getString :: Int -> Get String
getString n = liftM (takeWhile (/='\NUL') . B.unpack) $ getBytes n

getChar8 :: Get Char
getChar8 = fmap (chr . fromIntegral) getWord8