#!/usr/bin/env runhaskell {-# LANGUAGE DeriveDataTypeable #-} module Main where -- una.hs, version 2.0 (2010-07-02) -- -- by John Wiegley -- -- A simple universal unarchiving utility. Just point it at any archive or -- compressed file, and it spits out a single file or directory in the current -- directory with its contents. Use -d to delete the original archive on -- success. Use -f to overwrite any existing file or directory which might be -- in the way. -- -- To handle all the supported formats on Mac OS X, you must first install: -- sudo port install cabextract unarj unrar lha p7zip import Data.Char import Data.List import Data.Function import Data.Maybe import qualified Data.ByteString.Lazy as B import qualified Data.ByteString.Lazy.Char8 as Char8 import System.Environment import System.Console.CmdArgs import System.FilePath import System.Process import System.Directory import System.IO import System.IO.Storage import System.Exit import Control.Monad import Control.Concurrent import qualified Control.Exception as C -- This script takes a series of pathnames to compressed files and/or -- archives, and uncompresses/decodes/unarchives them. -- -- What's especially useful about this script is that it guarantees that the -- result of this process is a single new entry in the current directory, -- whether that be a file or a directory. That is: -- -- 1. If it's simply a compressed file, it uncompresses in the current -- directory. -- 2. If it's an archive containing a single file or directory, the result -- is the same as if it had been compressed: the file or directory -- ends up in the current directory. -- 3. If the archive contains multiple items, they are unarchived in a -- directory named after the original file. version = "2.0.0" copyright = "2009-2012" unaSummary = "una v" ++ version ++ ", (C) John Wiegley " ++ copyright data Una = Una { delete_ :: Bool , force :: Bool , temp :: FilePath , output :: FilePath , sys_temp :: Bool , test :: Bool , files :: [FilePath] } deriving (Data, Typeable, Show, Eq) una = Una { delete_ = def &= help "Delete the original archive if successful" , force = def &= name "f" &= help "Overwrite any existing file/dir" , temp = def &= typDir &= help "Use DIR as a temp directory, instead of current" , output = def &= typDir &= name "o" &= help "Unarchive to DIR instead of archive's directory" , sys_temp = def &= name "T" &= help "Use the system's temp directory (typically /tmp)" , test = def &= explicit &= name "test" &= help "Extract, throw away resulting file(s), set error code" , files = def &= args &= typ "FILE..." } &= summary unaSummary &= program "una" &= helpArg [explicit, name "h"] &= help "Universal recursive unarchiver/decoder/decompressor tool" main :: IO () main = do opts <- cmdArgs una --when (null (files opts)) $ cmdArgsApply cmdArgsHelp -- Extract each archive given on the command-line. If it's not recognizable -- as an archive, the resulting pathname will be identical to the input, in -- which case nothing has been done. If an error occurs for a given -- archive, stop then. forM_ (files opts) $ \path -> do result <- withStore "main" $ do putValue "main" "opts" opts extract path (force opts) if test opts then case result of ArchiveError err -> exitWith $ ExitFailure 1 FileName fp -> if fp /= path then removeFilePath fp else exitWith $ ExitFailure 1 DirectoryName dp -> removeFilePath dp else case result of ArchiveError err -> error err FileName fp -> if fp /= path then success path fp "file" (delete_ opts) else putStrLn $ "Archive unrecognized: " ++ fp DirectoryName dp -> success path dp "directory" (delete_ opts) -- In case of success, print the final product's path and delete the -- original archive if -d was used. where success path f kind delete = do rf <- makeRelativeToCurrentDirectory f putStrLn $ "Extracted " ++ kind ++ ": " ++ rf when delete $ removeFile path getOption :: Data a => (a -> b) -> IO b getOption opt = do opts <- getValue "main" "opts" case opts of Just o -> return $ opt o -- Determine which "type" an archive is by examining its extension. It may -- not be an archive at all, but just a compressed file. It could even be a -- compressed archive containing just a single file! There are too many -- possible combinations to know at this point. exts = [ (".tar", [tarballExtractor]) , (".taz", [compressExtractor, tarballExtractor]) , (".tgz", [gzipExtractor, tarballExtractor]) , (".tbz", [bzip2Extractor, tarballExtractor]) , (".tz2", [bzip2Extractor, tarballExtractor]) , (".txz", [xzipExtractor, tarballExtractor]) , (".shar", [sharExtractor]) , (".z", [compressExtractor]) , (".gz", [gzipExtractor]) , (".bz2", [bzip2Extractor]) , (".xz", [xzipExtractor]) , (".7z", [p7zipExtractor]) , (".zip", [zipExtractor]) , (".jar", [zipExtractor]) , (".arj", [arjExtractor]) , (".lha", [lhaExtractor]) , (".lzh", [lhaExtractor]) , (".rar", [rarExtractor]) , (".uu", [uuExtractor]) , (".a", [arExtractor]) , (".cab", [cabExtractor]) , (".cpio", [cpioExtractor]) , (".gpg", [gpgExtractor]) , (".asc", [gpgExtractor]) , (".dmg", [diskImageExtractor]) , (".iso", [diskImageExtractor]) , (".cdr", [diskImageExtractor]) , (".sparseimage", [diskImageExtractor]) , (".sparsebundle", [diskImageExtractor]) , (".sit", [stuffItExtractor True]) , (".sea", [stuffItExtractor True]) , (".bin", [stuffItExtractor False]) , (".hqx", [stuffItExtractor False]) , (".sdk", [shrinkItExtractor]) , (".shk", [shrinkItExtractor]) , (".bxy", [shrinkItExtractor]) , (".bny", [shrinkItExtractor]) , (".bqy", [shrinkItExtractor]) ] -- Gzip and family are very simple compressors that can handle streaming data -- quite easily. returnStream :: B.ByteString -> IO ExtractionResult returnStream out = return (DataStream out, noCleanup) simpleExtractor :: String -> [String] -> Extraction -> IO ExtractionResult simpleExtractor cmd args item = case item of DataStream d -> performExtract cmd args d returnStream FileName f -> performExtract cmd (args ++ [f]) B.empty returnStream gzipExtractor = Extractor False $ simpleExtractor "gzip" ["-qdc"] compressExtractor = gzipExtractor bzip2Extractor = Extractor False $ simpleExtractor "bzip2" ["-qdc"] xzipExtractor = Extractor False $ simpleExtractor "xz" ["-qdc"] uuExtractor = Extractor False $ simpleExtractor "uudecode" [] gpgExtractor = Extractor False $ simpleExtractor "gpg" ["-d"] -- Tarballs and 7-zip are both archive formats that can accept their input on -- stdin. It's not likely that someone will compress a 7zip archive, but it's -- handled anyway. returnContents :: FilePath -> B.ByteString -> IO ExtractionResult returnContents dir _ = examineContents dir True tarballExtractor = Extractor True $ \item -> do dir <- createTempDirectory case item of DataStream d -> performExtract "tar" ["xCf", dir, "-"] d (returnContents dir) FileName f -> performExtract "tar" ["xCf", dir, f] B.empty (returnContents dir) sharExtractor = Extractor True $ \item -> do dir <- createTempDirectory case item of DataStream d -> performExtract "unshar" ["-d", dir] d (returnContents dir) FileName f -> performExtract "unshar" ["-d", dir, f] B.empty (returnContents dir) p7zipExtractor = Extractor True $ \item -> do dir <- createTempDirectory case item of DataStream d -> performExtract "7za" ["x", "-bd", "-o" ++ dir, "-si"] d (returnContents dir) FileName f -> performExtract "7za" ["x", f, "-bd", "-o" ++ dir] B.empty (returnContents dir) -- Zip and CAB are not quite as flexible as tar and 7-zip, in that they cannot -- accept the archive via stdin. If there is a stream from an earlier -- decoding, it must be written to a temp file and then extracted from that. zipExtractor = Extractor True $ fix $ \fn item -> case item of DataStream d -> extractByTemp d ".zip" fn FileName f -> do dir <- createTempDirectory performExtract "unzip" ["-q", "-d", dir, f] B.empty (returnContents dir) cabExtractor = Extractor True $ fix $ \fn item -> case item of DataStream d -> extractByTemp d ".cab" fn FileName f -> do dir <- createTempDirectory performExtract "cabextract" ["-q", "-d", dir, f] B.empty (returnContents dir) -- cpio doesn't know how to extract its contents to a particular directory, so -- a temporary must be created. It can, however, read input from stream. extractInTempDir :: String -> [String] -> B.ByteString -> IO ExtractionResult extractInTempDir cmd args inp = do dir <- createTempDirectory cdir <- canonicalizePath dir cwd <- getCurrentDirectory ccwd <- canonicalizePath cwd C.bracket (setCurrentDirectory cdir) (\_ -> setCurrentDirectory ccwd) (\_ -> performExtract cmd args inp (returnContents cdir)) cpioExtractor = Extractor True $ fix $ \fn item -> case item of DataStream d -> extractInTempDir "cpio" ["-id"] d FileName f -> extractInTempDir "cpio" ["-idF", f] B.empty -- The next set of formats can't handle streaming input, nor can they extract -- to a specified directory. This is why I call them the "dumb" extractors. -- Everything must be setup for them in advance. dumbExtractor :: String -> String -> String -> Extraction -> IO ExtractionResult dumbExtractor cmd arg ext item = case item of DataStream d -> extractByTemp d ext (dumbExtractor cmd ext arg) FileName f -> extractInTempDir cmd [arg, f] B.empty arjExtractor = Extractor True $ dumbExtractor "unarj" "x" ".arj" lhaExtractor = Extractor True $ dumbExtractor "lha" "x" ".lha" rarExtractor = Extractor True $ dumbExtractor "unrar" "x" ".rar" arExtractor = Extractor True $ dumbExtractor "ar" "x" ".ar" shrinkItExtractor = Extractor True $ dumbExtractor "nulib2" "-x" ".shk" -- Disk images are mountable archives, which means the data must be copied out -- in order to "extract" it. jww (2010-07-09): We should handle Linux -- loopbook mounts too. diskImageExtractor = Extractor True $ fix $ \fn item -> case item of DataStream d -> extractByTemp d ".dmg" fn FileName f -> do let args = ["attach", "-readonly", "-mountrandom", "/tmp", "-noverify", "-noautofsck", f] loud <- isLoud when loud $ putStrLn $ "! hdiutil " ++ unwords args (exit, out, _) <- readProcessWithExitCode "hdiutil" args [] case exit of ExitFailure _ -> return (ArchiveError "Failed to attach disk image", noCleanup) ExitSuccess -> do let mountPoint = case find (isInfixOf "/tmp/dmg") (lines out) of Just line -> (Just . last . words) line Nothing -> Nothing case mountPoint of Nothing -> return (ArchiveError "Failed to attach disk image", noCleanup) Just dir -> do tmpDir <- createTempDirectory when loud $ putStrLn $ "! ditto " ++ unwords [dir, tmpDir] readProcessWithExitCode "ditto" [dir, tmpDir] [] when loud $ putStrLn $ "! hdiutil " ++ unwords ["detach", dir, "-force"] readProcessWithExitCode "hdiutil" ["detach", dir, "-force"] [] examineContents tmpDir True -- StuffIt Expander is its own creature. We talk to it via Applescript, as I -- know of no better way. stuffItExtractor archivep = Extractor archivep $ fix $ \fn item -> case item of DataStream d -> extractByTemp d ".sit" fn FileName f -> do tmpDir <- createTempDirectory canonTmp <- canonicalizePath tmpDir let script = " tell application \"StuffIt Expander\"\n" ++ " run\n" ++ " expand {POSIX file \"" ++ f ++ "\"}" -- ++ " with delete originals" ++ " to POSIX file \"" ++ canonTmp ++ "\"\n" ++ " end tell" loud <- isLoud when loud $ putStrLn "! invoke StuffIt Expander" (exit, _, err) <- readProcessWithExitCode "osascript" [] script case exit of ExitSuccess -> if archivep then examineContents canonTmp True else do contents <- getDirectoryContents canonTmp let elems = delete "." $ delete ".." contents file = C.assert (length elems == 1) (canonTmp head elems) exists <- doesFileExist file return (FileName $ C.assert exists file, removeDirectoryRecursive canonTmp) ExitFailure _ -> return (ArchiveError $ "Failed to invoke StuffIt Expander: " ++ err, removeDirectoryRecursive canonTmp) -- Types used by this script. data Extractor = Extractor { isArchive :: Bool , extractor :: Extraction -> IO ExtractionResult } data Extraction = DataStream B.ByteString | FileName FilePath | DirectoryName FilePath | ArchiveError String deriving (Show) type ExtractionResult = (Extraction, IO ()) noCleanup :: IO () noCleanup = return () -- Given a file path, determine its type and extract its contents. Depending -- on the type of the file, and what command-line options the extraction -- command supports, the result may be one of several types, described by the -- type Extraction. extract :: FilePath -> Bool -> IO Extraction extract rpath overwrite = do path <- canonicalizePath rpath pexists <- doesFileExist path unless pexists $ error $ "File does not exist: " ++ path destination <- getDestination fexists <- doesFileExist destination dexists <- doesDirectoryExist destination when (fexists || dexists) $ if overwrite then if fexists then removeFile destination else removeDirectoryRecursive destination else unless (null typs) $ error $ "Destination already exists: " ++ destination extract' typs (FileName path,return ()) where (basename, typs) = findExtractors [] rpath getDestination = do destpath <- getOption output cwd <- getCurrentDirectory return $ (if null destpath then cwd else destpath) takeFileName basename -- The variations of extract' receive a list of archive types yet to -- be "unwrapped" from the previous extraction, plus a cleanup action -- which must be executed before the final result is returned. The -- end result is that only the final extraction remains, indicated by -- the return value, with all temporaries having been properly cleaned -- up. extract' _ (x@(ArchiveError _),m) = do m; return x extract' [] (DataStream d,m) = do -- If we reach the final step of the unarchiving process, and -- the result is a data stream, write it to disk at the desired -- basename. destination <- getDestination B.writeFile destination d m; return $ FileName destination extract' [] (FileName f,m) = do destination <- getDestination -- Don't rename the file let realdest = dropFileName destination takeFileName f renameFile f realdest m; return $ FileName realdest extract' [] (DirectoryName f,m) = do destination <- getDestination renameDirectory f destination m; return $ DirectoryName destination extract' (t:ts) (x,m) = do y <- extractor t x -- If t is an archive extractor, we'll let examineContents decide if -- it contains items needing further extraction. Otherwise, if -- there are successive compression or encoding stages, process them -- now recursively. let ts' = if isArchive t then [] else ts result <- extract' ts' y m; return result findExtractors :: [Extractor] -> FilePath -> (FilePath, [Extractor]) findExtractors acc f = apply $ lookup (map toLower (takeExtension f)) exts where apply (Just types) = findExtractors (acc ++ types) (dropExtension f) apply Nothing = (f, acc) performExtract :: String -- command to execute -> [String] -- command arguments -> B.ByteString -- standard input -> (B.ByteString -> IO ExtractionResult) -- function to process output -> IO ExtractionResult performExtract cmd args ds fn = do (exit, out, err) <- bReadProcessWithExitCode cmd args ds if exit == ExitSuccess then fn out else return (ArchiveError err, noCleanup) examineContents :: FilePath -> Bool -> IO ExtractionResult examineContents dir cleanup = do -- Examine the contents of a populated directory -- if it's a single file, return the name and a cleanup action to -- remove temp -- if it's a single directory, recurse this step -- if it's many files and/or directories, return the name of the temp -- directory, and no cleanup action canon <- canonicalizePath dir contents <- getDirectoryContents canon case delete "." $ delete ".." contents of [] -> return (ArchiveError "Empty archive", removeDirectoryRecursive canon) (x:[]) -> do let path = canon x isDir <- doesDirectoryExist path if isDir then do (x,m) <- examineContents path False return (x, do when cleanup $ removeDirectoryRecursive canon; m) else do x <- extract path False return (x, when cleanup $ removeDirectoryRecursive canon) xs -> return (DirectoryName canon, noCleanup) extractByTemp :: B.ByteString -- output to write to temp -> String -- the temporary file extension -> (Extraction -> IO ExtractionResult) -- function to handle the new temp file -> IO ExtractionResult extractByTemp ds ext fn = do dir <- workingDirectory (path, handle) <- openBinaryTempFile dir ("file" ++ ext) loud <- isLoud when loud $ putStrLn $ "> " ++ path B.hPut handle ds hFlush handle (x,m) <- fn (FileName path) hClose handle return (x, do removeFile path; m) createTempDirectory :: IO FilePath createTempDirectory = do dir <- workingDirectory (path, handle) <- openBinaryTempFile dir "dir.tmp" hClose handle removeFile path createDirectory path return path workingDirectory :: IO FilePath workingDirectory = do sysp <- getOption sys_temp if sysp then getTemporaryDirectory else do dir <- getOption temp if null dir then return "." else return dir removeFilePath :: FilePath -> IO () removeFilePath path = do fexists <- doesFileExist path dexists <- doesDirectoryExist path when (fexists || dexists) $ if fexists then removeFile path else removeDirectoryRecursive path -- The following function was copied from System.Process, because I needed a -- variant which operates on lazy ByteStrings. The regular version attempts -- to decode Unicode in the binary output from the decompressor. bReadProcessWithExitCode :: FilePath -- ^ command to run -> [String] -- ^ any arguments -> B.ByteString -- ^ standard input -> IO (ExitCode,B.ByteString,String) -- ^ exitcode, stdout, stderr bReadProcessWithExitCode cmd args input = do (Just inh, Just outh, Just errh, pid) <- createProcess (proc cmd args){ std_in = CreatePipe , std_out = CreatePipe , std_err = CreatePipe } outMVar <- newEmptyMVar -- fork off a thread to start consuming stdout hSetBinaryMode outh True out <- B.hGetContents outh _ <- forkIO $ C.evaluate (B.length out) >> putMVar outMVar () -- fork off a thread to start consuming stderr hSetBinaryMode errh False err <- hGetContents errh _ <- forkIO $ C.evaluate (length err) >> putMVar outMVar () -- now write and flush any input loud <- isLoud if loud then if B.null input then putStrLn $ "! " ++ cmd ++ " " ++ unwords args else do putStrLn $ "| " ++ cmd ++ " " ++ unwords args B.hPutStr inh input; hFlush inh else unless (B.null input) $ do B.hPutStr inh input; hFlush inh hClose inh -- done with stdin -- wait on the output takeMVar outMVar takeMVar outMVar hClose outh hClose errh -- wait on the process ex <- waitForProcess pid return (ex, out, err) -- una.hs ends here