{-# LANGUAGE Safe #-}
{-# OPTIONS_GHC -fno-warn-name-shadowing #-}
{- arch-tag: GZip file support in Haskell
Copyright (c) 2004-2011 John Goerzen <jgoerzen@complete.org>

All rights reserved.

For license and copyright information, see the file LICENSE
-}

{- |
   Module     : System.FileArchive.GZip
   Copyright  : Copyright (C) 2004-2011 John Goerzen
   SPDX-License-Identifier: BSD-3-Clause

   Stability  : stable
   Portability: portable

GZip file decompression

Copyright (c) 2004 John Goerzen, jgoerzen\@complete.org

The GZip format is described in RFC1952.
-}
module System.FileArchive.GZip (
                                  -- * GZip Files
                                  -- $gzipfiles

                                  -- * Types
                                  Header(..), Section, GZipError(..),
                                  Footer(..),
                                  -- * Whole-File Processing
                                  decompress,
                                  hDecompress,
                                  read_sections,
                                  -- * Section Processing
                                  read_header,
                                  read_section
                                 )
    where

import Control.Monad.Except     (MonadError(..))
import Data.Bits                ((.&.))
import Data.Bits.Utils          (fromBytes)
import Data.Char                (ord)
import Data.Compression.Inflate (inflate_string_remainder)
import Data.Hash.CRC32.GZip     (update_crc)
import Data.Word                (Word32)
import System.IO                (Handle, hGetContents, hPutStr)

data GZipError = CRCError               -- ^ CRC-32 check failed
               | NotGZIPFile            -- ^ Couldn't find a GZip header
               | UnknownMethod          -- ^ Compressed with something other than method 8 (deflate)
               | UnknownError String    -- ^ Other problem arose
               deriving (GZipError -> GZipError -> Bool
(GZipError -> GZipError -> Bool)
-> (GZipError -> GZipError -> Bool) -> Eq GZipError
forall a. (a -> a -> Bool) -> (a -> a -> Bool) -> Eq a
/= :: GZipError -> GZipError -> Bool
$c/= :: GZipError -> GZipError -> Bool
== :: GZipError -> GZipError -> Bool
$c== :: GZipError -> GZipError -> Bool
Eq, Int -> GZipError -> ShowS
[GZipError] -> ShowS
GZipError -> String
(Int -> GZipError -> ShowS)
-> (GZipError -> String)
-> ([GZipError] -> ShowS)
-> Show GZipError
forall a.
(Int -> a -> ShowS) -> (a -> String) -> ([a] -> ShowS) -> Show a
showList :: [GZipError] -> ShowS
$cshowList :: [GZipError] -> ShowS
show :: GZipError -> String
$cshow :: GZipError -> String
showsPrec :: Int -> GZipError -> ShowS
$cshowsPrec :: Int -> GZipError -> ShowS
Show)

-- | First two bytes of file
magic :: String
magic :: String
magic = String
"\x1f\x8b"

-- | Flags
fFHCRC, fFEXTRA, fFNAME, fFCOMMENT :: Int
-- fFTEXT = 1 :: Int
fFHCRC :: Int
fFHCRC = Int
2
fFEXTRA :: Int
fFEXTRA = Int
4
fFNAME :: Int
fFNAME = Int
8
fFCOMMENT :: Int
fFCOMMENT = Int
16

{- | The data structure representing the GZip header.  This occurs
at the beginning of each 'Section' on disk. -}
data Header = Header {
                      Header -> Int
method   :: Int,    -- ^ Compression method.  Only 8 is defined at present.
                      Header -> Int
flags    :: Int,
                      Header -> Maybe String
extra    :: Maybe String,
                      Header -> Maybe String
filename :: Maybe String,
                      Header -> Maybe String
comment  :: Maybe String,
                      Header -> Word32
mtime    :: Word32,  -- ^ Modification time of the original file
                      Header -> Int
xfl      :: Int,       -- ^ Extra flags
                      Header -> Int
os       :: Int         -- ^ Creating operating system
                     } deriving (Header -> Header -> Bool
(Header -> Header -> Bool)
-> (Header -> Header -> Bool) -> Eq Header
forall a. (a -> a -> Bool) -> (a -> a -> Bool) -> Eq a
/= :: Header -> Header -> Bool
$c/= :: Header -> Header -> Bool
== :: Header -> Header -> Bool
$c== :: Header -> Header -> Bool
Eq, Int -> Header -> ShowS
[Header] -> ShowS
Header -> String
(Int -> Header -> ShowS)
-> (Header -> String) -> ([Header] -> ShowS) -> Show Header
forall a.
(Int -> a -> ShowS) -> (a -> String) -> ([a] -> ShowS) -> Show a
showList :: [Header] -> ShowS
$cshowList :: [Header] -> ShowS
show :: Header -> String
$cshow :: Header -> String
showsPrec :: Int -> Header -> ShowS
$cshowsPrec :: Int -> Header -> ShowS
Show)

{- | Stored on-disk at the end of each section. -}
data Footer = Footer {
                      Footer -> Word32
size       :: Word32,   -- ^ The size of the original, decompressed data
                      Footer -> Word32
crc32      :: Word32,  -- ^ The stored GZip CRC-32 of the original, decompressed data
                      Footer -> Bool
crc32valid :: Bool -- ^ Whether or not the stored CRC-32 matches the calculated CRC-32 of the data
                     }

{- | A section represents a compressed component in a GZip file.
Every GZip file has at least one. -}
type Section = (Header, String, Footer)

split1 :: String -> (Char, String)
split1 :: String -> (Char, String)
split1 String
s = (String -> Char
forall a. [a] -> a
head String
s, ShowS
forall a. [a] -> [a]
tail String
s)

{- | Read a GZip file, decompressing all sections found.

Writes the decompressed data stream to the given output handle.

Returns Nothing if the action was successful, or Just GZipError if there
was a problem.  If there was a problem, the data written to the output
handle should be discarded.
-}

hDecompress :: Handle                   -- ^ Input handle
            -> Handle                   -- ^ Output handle
            -> IO (Maybe GZipError)
hDecompress :: Handle -> Handle -> IO (Maybe GZipError)
hDecompress Handle
infd Handle
outfd =
    do String
inc <- Handle -> IO String
hGetContents Handle
infd
       let (String
outstr, Maybe GZipError
err) = String -> (String, Maybe GZipError)
decompress String
inc
       Handle -> String -> IO ()
hPutStr Handle
outfd String
outstr
       Maybe GZipError -> IO (Maybe GZipError)
forall (m :: * -> *) a. Monad m => a -> m a
return Maybe GZipError
err

{- | Read a GZip file, decompressing all sections that are found.

Returns a decompresed data stream and Nothing, or an unreliable string
and Just (error).  If you get anything other than Nothing, the String
returned should be discarded.
-}
decompress :: String -> (String, Maybe GZipError)
{-
decompress s =
    do x <- read_header s
       let rem = snd x
       return $ inflate_string rem
-}
decompress :: String -> (String, Maybe GZipError)
decompress String
s =
    let procs :: [Section] -> (String, Bool)
        procs :: [Section] -> (String, Bool)
procs [] = ([], Bool
True)
        procs ((Header
_, String
content, Footer
foot):[Section]
xs) =
            let (String
nexth, Bool
nextb) = [Section] -> (String, Bool)
procs [Section]
xs in
                (String
content String -> ShowS
forall a. [a] -> [a] -> [a]
++ String
nexth, (Footer -> Bool
crc32valid Footer
foot) Bool -> Bool -> Bool
&& Bool
nextb)
        in case String -> Either GZipError [Section]
read_sections String
s of
           Left GZipError
x -> (String
"", GZipError -> Maybe GZipError
forall a. a -> Maybe a
Just GZipError
x)
           Right [Section]
x -> let (String
decomp, Bool
iscrcok) = [Section] -> (String, Bool)
procs [Section]
x
                          in (String
decomp, if Bool
iscrcok then Maybe GZipError
forall a. Maybe a
Nothing else GZipError -> Maybe GZipError
forall a. a -> Maybe a
Just GZipError
CRCError)

{-
decompress s = do x <- read_sections s
                  return $ concatMap (\(_, x, _) -> x) x
-}

-- | Read all sections.
read_sections :: String -> Either GZipError [Section]
read_sections :: String -> Either GZipError [Section]
read_sections [] = [Section] -> Either GZipError [Section]
forall a b. b -> Either a b
Right []
read_sections String
s =
    do (Section, String)
x <- String -> Either GZipError (Section, String)
read_section String
s
       case (Section, String)
x of
           (Section
sect, String
remain) ->
               do [Section]
next <- String -> Either GZipError [Section]
read_sections String
remain
                  [Section] -> Either GZipError [Section]
forall (m :: * -> *) a. Monad m => a -> m a
return ([Section] -> Either GZipError [Section])
-> [Section] -> Either GZipError [Section]
forall a b. (a -> b) -> a -> b
$ Section
sect Section -> [Section] -> [Section]
forall a. a -> [a] -> [a]
: [Section]
next

parseword :: String -> Word32
parseword :: String -> Word32
parseword String
s = [Word32] -> Word32
forall a. (Bits a, Num a) => [a] -> a
fromBytes ([Word32] -> Word32) -> [Word32] -> Word32
forall a b. (a -> b) -> a -> b
$ (Char -> Word32) -> String -> [Word32]
forall a b. (a -> b) -> [a] -> [b]
map (Int -> Word32
forall a b. (Integral a, Num b) => a -> b
fromIntegral (Int -> Word32) -> (Char -> Int) -> Char -> Word32
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Char -> Int
ord) (String -> [Word32]) -> String -> [Word32]
forall a b. (a -> b) -> a -> b
$ ShowS
forall a. [a] -> [a]
reverse String
s

-- | Read one section, returning (ThisSection, Remainder)
read_section :: String -> Either GZipError (Section, String)
read_section :: String -> Either GZipError (Section, String)
read_section String
s =
        do (Header, String)
x <- String -> Either GZipError (Header, String)
read_header String
s
           let headerrem :: String
headerrem = (Header, String) -> String
forall a b. (a, b) -> b
snd (Header, String)
x
           let (String
decompressed, Word32
crc, String
remainder) = String -> (String, Word32, String)
read_data String
headerrem
           let (String
crc32str, String
rm) = Int -> String -> (String, String)
forall a. Int -> [a] -> ([a], [a])
splitAt Int
4 String
remainder
           let (String
sizestr, String
rem2) = Int -> String -> (String, String)
forall a. Int -> [a] -> ([a], [a])
splitAt Int
4 String
rm
           let filecrc32 :: Word32
filecrc32 = String -> Word32
parseword String
crc32str
           let filesize :: Word32
filesize = String -> Word32
parseword String
sizestr
           (Section, String) -> Either GZipError (Section, String)
forall (m :: * -> *) a. Monad m => a -> m a
return (((Header, String) -> Header
forall a b. (a, b) -> a
fst (Header, String)
x, String
decompressed,
                   Footer :: Word32 -> Word32 -> Bool -> Footer
Footer {size :: Word32
size = Word32
filesize, crc32 :: Word32
crc32 = Word32
filecrc32,
                           crc32valid :: Bool
crc32valid = Word32
filecrc32 Word32 -> Word32 -> Bool
forall a. Eq a => a -> a -> Bool
== Word32
crc})
                   ,String
rem2)

-- | Read the file's compressed data, returning
-- (Decompressed, Calculated CRC32, Remainder)
read_data :: String -> (String, Word32, String)
read_data :: String -> (String, Word32, String)
read_data String
x =
    let (String
decompressed1, String
remainder) = String -> (String, String)
inflate_string_remainder String
x
        (String
decompressed, Word32
crc32) = String -> Word32 -> (String, Word32)
read_data_internal String
decompressed1 Word32
0
        in
          (String
decompressed, Word32
crc32, String
remainder)
    where
      read_data_internal :: String -> Word32 -> (String, Word32)
read_data_internal [] Word32
ck = ([], Word32
ck)
      read_data_internal (Char
y:String
ys) Word32
ck =
        let newcrc :: Word32
newcrc = Word32 -> Char -> Word32
update_crc Word32
ck Char
y
            n :: (String, Word32)
n = Word32
newcrc Word32 -> (String, Word32) -> (String, Word32)
`seq` String -> Word32 -> (String, Word32)
read_data_internal String
ys Word32
newcrc
            in
            (Char
y Char -> ShowS
forall a. a -> [a] -> [a]
: (String, Word32) -> String
forall a b. (a, b) -> a
fst (String, Word32)
n, (String, Word32) -> Word32
forall a b. (a, b) -> b
snd (String, Word32)
n)



{- | Read the GZip header.  Return (Header, Remainder).
-}
read_header :: String -> Either GZipError (Header, String)
read_header :: String -> Either GZipError (Header, String)
read_header String
s =
    let ok :: Either a String
ok = String -> Either a String
forall a b. b -> Either a b
Right String
"ok" in
    do let (String
mag, String
rem1) = Int -> String -> (String, String)
forall a. Int -> [a] -> ([a], [a])
splitAt Int
2 String
s
       String
_ <- if String
mag String -> String -> Bool
forall a. Eq a => a -> a -> Bool
/= String
magic
          then GZipError -> Either GZipError String
forall e (m :: * -> *) a. MonadError e m => e -> m a
throwError GZipError
NotGZIPFile
          else Either GZipError String
forall a. Either a String
ok
       let (Char
method, String
rem2) = String -> (Char, String)
split1 String
rem1
       String
_ <- if (Char -> Int
ord(Char
method) Int -> Int -> Bool
forall a. Eq a => a -> a -> Bool
/= Int
8)
          then GZipError -> Either GZipError String
forall e (m :: * -> *) a. MonadError e m => e -> m a
throwError GZipError
UnknownMethod
          else Either GZipError String
forall a. Either a String
ok
       let (Char
flag_S, String
rem3) = String -> (Char, String)
split1 String
rem2
       let flag :: Int
flag = Char -> Int
ord Char
flag_S
       let (String
mtimea, String
rem3a) = Int -> String -> (String, String)
forall a. Int -> [a] -> ([a], [a])
splitAt Int
4 String
rem3
       let mtime :: Word32
mtime = String -> Word32
parseword String
mtimea
       let (Char
xfla, String
rem3b) = String -> (Char, String)
split1 String
rem3a
       let xfl :: Int
xfl = Char -> Int
ord Char
xfla
       let (Char
osa, String
_) = String -> (Char, String)
split1 String
rem3b
       let os :: Int
os = Char -> Int
ord Char
osa
       -- skip modtime (4), extraflag (1), and os (1)
       let rem4 :: String
rem4 = Int -> ShowS
forall a. Int -> [a] -> [a]
drop Int
6 String
rem3

       let (Maybe String
extra, String
rem5) =
               if (Int
flag Int -> Int -> Int
forall a. Bits a => a -> a -> a
.&. Int
fFEXTRA Int -> Int -> Bool
forall a. Eq a => a -> a -> Bool
/= Int
0)
               -- Skip past the extra field if we have it.
                  then let (Char
xlen_S, String
_) = String -> (Char, String)
split1 String
rem4
                           (Char
xlen2_S, String
rem4b) = String -> (Char, String)
split1 String
rem4
                           xlen :: Int
xlen = (Char -> Int
ord Char
xlen_S) Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
256 Int -> Int -> Int
forall a. Num a => a -> a -> a
* (Char -> Int
ord Char
xlen2_S)
                           (String
ex, String
rrem) = Int -> String -> (String, String)
forall a. Int -> [a] -> ([a], [a])
splitAt Int
xlen String
rem4b
                           in (String -> Maybe String
forall a. a -> Maybe a
Just String
ex, String
rrem)
                  else (Maybe String
forall a. Maybe a
Nothing, String
rem4)

       let (Maybe String
filename, String
rem6) =
               if (Int
flag Int -> Int -> Int
forall a. Bits a => a -> a -> a
.&. Int
fFNAME Int -> Int -> Bool
forall a. Eq a => a -> a -> Bool
/= Int
0)
               -- Skip past the null-terminated filename
                  then let fn :: String
fn = (Char -> Bool) -> ShowS
forall a. (a -> Bool) -> [a] -> [a]
takeWhile (Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
/= Char
'\x00') String
rem5
                                in (String -> Maybe String
forall a. a -> Maybe a
Just String
fn, Int -> ShowS
forall a. Int -> [a] -> [a]
drop ((String -> Int
forall (t :: * -> *) a. Foldable t => t a -> Int
length String
fn) Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
1) String
rem5)
                  else (Maybe String
forall a. Maybe a
Nothing, String
rem5)

       let (Maybe String
comment, String
rem7) =
               if (Int
flag Int -> Int -> Int
forall a. Bits a => a -> a -> a
.&. Int
fFCOMMENT Int -> Int -> Bool
forall a. Eq a => a -> a -> Bool
/= Int
0)
                  -- Skip past the null-terminated comment
                  then let cm :: String
cm = (Char -> Bool) -> ShowS
forall a. (a -> Bool) -> [a] -> [a]
takeWhile (Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
/= Char
'\x00') String
rem6
                           in (String -> Maybe String
forall a. a -> Maybe a
Just String
cm, Int -> ShowS
forall a. Int -> [a] -> [a]
drop ((String -> Int
forall (t :: * -> *) a. Foldable t => t a -> Int
length String
cm) Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
1) String
rem6)
                  else (Maybe String
forall a. Maybe a
Nothing, String
rem6)

       String
rem8 <- if (Int
flag Int -> Int -> Int
forall a. Bits a => a -> a -> a
.&. Int
fFHCRC Int -> Int -> Bool
forall a. Eq a => a -> a -> Bool
/= Int
0)
                  -- Skip past the header CRC
                  then String -> Either GZipError String
forall (m :: * -> *) a. Monad m => a -> m a
return (String -> Either GZipError String)
-> String -> Either GZipError String
forall a b. (a -> b) -> a -> b
$ Int -> ShowS
forall a. Int -> [a] -> [a]
drop Int
2 String
rem7
                  else String -> Either GZipError String
forall (m :: * -> *) a. Monad m => a -> m a
return String
rem7

       (Header, String) -> Either GZipError (Header, String)
forall (m :: * -> *) a. Monad m => a -> m a
return (Header :: Int
-> Int
-> Maybe String
-> Maybe String
-> Maybe String
-> Word32
-> Int
-> Int
-> Header
Header {method :: Int
method = Char -> Int
ord Char
method,
                      flags :: Int
flags = Int
flag,
                      extra :: Maybe String
extra = Maybe String
extra,
                      filename :: Maybe String
filename = Maybe String
filename,
                      comment :: Maybe String
comment = Maybe String
comment,
                      mtime :: Word32
mtime = Word32
mtime,
                      xfl :: Int
xfl = Int
xfl,
                      os :: Int
os = Int
os}, String
rem8)

----------------------------------------------------------------------
-- Documentation
----------------------------------------------------------------------

{- $gzipfiles

GZip files contain one or more 'Section's.  Each 'Section', on disk, begins
with a GZip 'Header', then stores the compressed data itself, and finally
stores a GZip 'Footer'.

The 'Header' identifies the file as a GZip file, records the original
modification date and time, and, in some cases, also records the original
filename and comments.

The 'Footer' contains a GZip CRC32 checksum over the decompressed data as
well as a 32-bit length of the decompressed data.  The module
'Data.Hash.CRC32.GZip' is used to validate stored CRC32 values.

The vast majority of GZip files contain only one 'Section'.  Standard tools
that work with GZip files create single-section files by default.

Multi-section files can be created by simply concatenating two existing
GZip files together.  The standard gunzip and zcat tools will simply
concatenate the decompressed data when reading these files back.  The
'decompress' function in this module will do the same.

When reading data from this module, please use caution regarding how you access
it.  For instance, if you are wanting to write the decompressed stream
to disk and validate its CRC32 value, you could use the 'decompress'
function.  However, you should process the entire stream before you check
the value of the Bool it returns.  Otherwise, you will force Haskell to buffer
the entire file in memory just so it can check the CRC32.
-}