{-# LANGUAGE DeriveDataTypeable, OverloadedStrings, ScopedTypeVariables #-} {-# OPTIONS_GHC -fno-warn-name-shadowing -fno-warn-orphans #-} module Debian.Apt.Index ( update , Fetcher , CheckSums(..) , Compression(..) , FileTuple , Size , controlFromIndex , controlFromIndex' , findContentsFiles , findIndexes , indexesInRelease , tupleFromFilePath ) where import Control.Monad import qualified Codec.Compression.GZip as GZip import qualified Codec.Compression.BZip as BZip import qualified Data.ByteString.Char8 as B import qualified Data.ByteString.Lazy.Char8 as L import qualified Data.Digest.Pure.MD5 as MD5 import Data.Function import Data.List import qualified Data.Map as M import Data.Monoid ((<>)) import qualified Data.Text as T (Text, unpack, concat, lines, null, words) import Data.Time import Debian.Apt.Methods import Debian.Control (formatControl) import Debian.Control.ByteString import Debian.Control.Common import Debian.Control.Text (decodeControl) --import Debian.Repo.Types import Debian.Release import Debian.Sources import Debian.URI import System.Directory import System.FilePath (()) import System.Posix.Files import System.FilePath (takeBaseName) --import qualified System.Unix.Misc as Misc import Text.ParserCombinators.Parsec.Error import Text.PrettyPrint.ANSI.Leijen (pretty) -- |Package indexes on the server are uncompressed or compressed with -- gzip or bzip2. We do not know what will exist on the server until we -- actually look. This type is used to mark the compression status of -- what was actually found. data Compression = BZ2 | GZ | Uncompressed deriving (Read, Show, Eq, Ord, Enum, Bounded) data CheckSums = CheckSums { md5sum :: Maybe String , sha1 :: Maybe String , sha256 :: Maybe String } deriving (Read, Show, Eq) -- |function-type for a function that downloads a file -- The timestamp is optional. If the local file is as new or newer -- than the remote copy, the download may be skipped. -- -- A good choice might be a partially parameterized call to -- 'Debian.Apt.Methods.fetch' type Fetcher = URI -> -- remote URI FilePath -> -- local file name Maybe UTCTime -> -- optional time stamp for local file IO Bool -- True on success, False on failure -- |update - similar to apt-get update -- downloads the index files associated with a sources.list. The -- downloaded index files will have the same basenames that apt-get uses -- in \/var\/lib\/apt\/lists. You can almost use this function instead of -- calling apt-get update. However there are a few key differences: -- 1. apt-get update also updates the binary cache files -- 2. apt-get update uses the partial directory and lock file in\ /var\/lib\/apt\/lists -- 3. apt-get update downloads the Release and Release.gpg files update :: Fetcher -- ^ function that will do actually downloading -> FilePath -- ^ download indexes to the directory (must already exist) -> String -- ^ binary architecture -> [DebSource] -- ^ sources.list -> IO [Maybe (FilePath, Compression)] -- ^ (basename of index file, compression status) update fetcher basePath arch sourcesList = mapM (uncurry $ fetchIndex fetcher) (map (\(uri, fp, _) -> (uri, (basePath fp))) (concatMap (indexURIs arch) sourcesList)) -- | download possibly compressed files -- NOTE: index uri must not include the .bz2 or .gz extension fetchIndex :: Fetcher -- ^ function that will do the actual fetch -> URI -- ^ remote URI of package index, without .bz2 or .gz extension -> FilePath -- ^ name to save downloaded file as, without .bz2 or .gz extension -> IO (Maybe (FilePath, Compression)) -- ^ (downloaded file name + extension, compression status) fetchIndex fetcher uri localPath = do let localPath' = localPath ++ ".bz2" --lm <- getLastModified localPath' res <- fetcher (uri { uriPath = (uriPath uri) ++ ".bz2" }) localPath' Nothing if res then return $ Just (localPath', BZ2) else do let localPath' = localPath ++ ".gz" lm <- getLastModified localPath' res <- fetcher (uri { uriPath = (uriPath uri) ++ ".gz" }) localPath' lm if res then return $ Just (localPath', GZ) else do lm <- getLastModified localPath res <- fetcher (uri { uriPath = (uriPath uri) }) localPath lm if res then return (Just (localPath, Uncompressed)) else return Nothing -- |examine a DebSource line, and calculate for each section: -- - the URI to the uncompressed index file -- - the basename that apt-get would name the downloaded index -- FIXME: ExactPath dist will fail with error at runtime :( indexURIs :: String -- ^ which binary architecture -> DebSource -- ^ line from sources.list -> [(URI, FilePath, DebSource)] -- ^ (remote uri, local name, deb source for just this section) indexURIs arch debSource = map (\ section -> let (uri, fp) = calcPath (sourceType debSource) arch baseURI release section in (uri,fp, debSource { sourceDist = (Right (release, [section])) }) ) sections where baseURI = sourceUri debSource (release, sections) = either (error $ "indexURIs: support not implemented for exact path: " ++ show (pretty debSource)) id (sourceDist debSource) -- |return a tuple for the section -- - the URI to the uncompressed index file -- - the basename that apt-get uses for the downloaded index -- FIXME: support for Release and Release.gpg calcPath :: SourceType -- ^ do we want Packages or Sources -> String -- ^ The binary architecture to use for Packages -> URI -- ^ base URI as it appears in sources.list -> ReleaseName -- ^ the release (e.g., unstable, testing, stable, sid, etc) -> Section -- ^ the section (main, contrib, non-free, etc) -> (URI, [Char]) -- ^ (uri to index file, basename for the downloaded file) calcPath srcType arch baseURI release section = let indexPath = case srcType of DebSrc -> "source/Sources" Deb -> "binary-" ++ arch "Packages" path = (uriPath baseURI) "dists" (releaseName' release) sectionName' section indexPath in (baseURI { uriPath = path }, addPrefix . escapePath $ path) where addPrefix s = prefix scheme user' pass' reg port ++ {- "_" ++ -} s prefix "http:" (Just user) Nothing (Just host) port = user ++ host ++ port prefix "http:" _ _ (Just host) port = host ++ port prefix "ftp:" _ _ (Just host) _ = host prefix "file:" Nothing Nothing Nothing "" = "" prefix "ssh:" (Just user) Nothing (Just host) port = user ++ host ++ port prefix "ssh:" _ _ (Just host) port = host ++ port prefix _ _ _ _ _ = error ("calcPath: unsupported uri: " ++ uriToString' baseURI) user' = maybeOfString user pass' = maybeOfString pass (user, pass) = break (== ':') userpass userpass = maybe "" uriUserInfo auth reg = maybeOfString $ maybe "" uriRegName auth port = maybe "" uriPort auth scheme = uriScheme baseURI auth = uriAuthority baseURI --path = uriPath baseURI escapePath :: String -> String escapePath s = intercalate "_" $ wordsBy (== '/') s maybeOfString :: String -> Maybe String maybeOfString "" = Nothing maybeOfString s = Just s wordsBy :: Eq a => (a -> Bool) -> [a] -> [[a]] wordsBy p s = case (break p s) of (s, []) -> [s] (h, t) -> h : wordsBy p (drop 1 t) -- |Parse a possibly compressed index file. controlFromIndex :: Compression -> FilePath -> L.ByteString -> Either ParseError (Control' T.Text) controlFromIndex GZ path s = either Left (Right . decodeControl) . parseControl path . B.concat . L.toChunks . GZip.decompress $ s controlFromIndex BZ2 path s = either Left (Right . decodeControl) . parseControl path . B.concat . L.toChunks . BZip.decompress $ s controlFromIndex Uncompressed path s = either Left (Right . decodeControl) . parseControl path . B.concat . L.toChunks $ s -- |parse an index possibly compressed file controlFromIndex' :: Compression -> FilePath -> IO (Either ParseError (Control' T.Text)) controlFromIndex' compression path = L.readFile path >>= return . controlFromIndex compression path type Size = Integer type FileTuple = (CheckSums, Size, FilePath) -- |A release file contains a list of indexes (Packages\/Sources). Each -- Package or Source index may appear multiple times because it may be -- compressed several different ways. This function will return an -- assoc list where the key is the name of the uncompressed package -- index name and the value is the list of (file, compression) which -- decompress to the key. groupIndexes :: [FileTuple] -> [(FilePath, [(FileTuple, Compression)])] groupIndexes indexFiles = M.toList $ M.fromListWith combine $ map makeKV indexFiles where makeKV fileTuple@(_,_,fp) = let (name, compressionMethod) = uncompressedName fp in (name, [(fileTuple, compressionMethod)]) combine = (\x y -> sortBy (compare `on` snd) (x ++ y)) {- with t@(_,_,fp) m = let (un, compression) = in M.insertWith -} {- groupIndexes' :: String ->[FileTuple] -> [(FilePath, [(FileTuple, Compression)])] groupIndexes' iType indexFiles = M.toList (foldr (insertType iType) M.empty indexFiles) where insertType iType t@(_,_,fp) m = case uncompressedName' iType fp of Nothing -> m (Just (un, compression)) -> M.insertWith (\x y -> sortBy (compare `on` snd) (x ++ y)) un [(t, compression)] m -} -- |The release file contains the checksums for the uncompressed -- package indexes, even if the uncompressed package indexes are not -- stored on the server. This function returns the list of files that -- actually exist. filterExists :: FilePath -> (FilePath, [(FileTuple, Compression)]) -> IO (FilePath, [(FileTuple, Compression)]) filterExists distDir (fp, alternatives) = do e <- filterM ( \((_,_,fp),_) -> fileExist (distDir fp)) alternatives -- when (null e) (error $ "None of these files exist: " ++ show alternatives) return (fp, e) findIndexes :: FilePath -> String -> [FileTuple] -> IO [(FileTuple, Compression)] findIndexes distDir iType controlFiles = let indexes = groupIndexes controlFiles in do indexes' <- mapM (filterExists distDir) (filter (isType iType) indexes) return $ map (head . snd) (filter (not . null . snd) indexes') where isType iType (fp, _) = iType `isSuffixOf` fp {- findIndexes' :: FilePath -> String -> [FileTuple] -> IO [(FileTuple, Compression)] findIndexes' distDir iType controlFiles = let m = groupIndexes' iType controlFiles in do m' <- mapM (filterExists distDir) m return $ map (head . snd) (filter (not . null . snd) m') -} -- insertType :: String -> (CheckSums, Integer, FilePath) -> M.Map FilePath ((CheckSums, Integer, FilePath), Compression) -> M.Map FilePath ((CheckSums, Integer, FilePath), Compression) {- uncompressedName' :: String -> FilePath -> Maybe (FilePath, Compression) uncompressedName' iType fp | isSuffixOf iType fp = Just (fp, Uncompressed) | isSuffixOf (iType ++".gz") fp = Just (reverse . (drop 3) . reverse $ fp, GZ) | isSuffixOf (iType ++".bz2") fp = Just (reverse . (drop 4) . reverse $ fp, BZ2) | otherwise = Nothing -} uncompressedName :: FilePath -> (FilePath, Compression) uncompressedName fp | isSuffixOf ".gz" fp = (reverse . (drop 3) . reverse $ fp, GZ) | isSuffixOf ".bz2" fp = (reverse . (drop 4) . reverse $ fp, BZ2) | otherwise = (fp, Uncompressed) indexesInRelease :: (FilePath -> Bool) -> Control' T.Text -- ^ A release file -> [(CheckSums, Integer, FilePath)] -- ^ indexesInRelease filterp (Control [p]) = let md5sums = case md5sumField p of (Just md5) -> md5 Nothing -> error $ "Did not find MD5Sum field." in filter (\(_,_,fp) -> filterp fp) $ map (makeTuple . T.words) $ filter (not . T.null) (T.lines md5sums) where makeTuple :: [T.Text] -> (CheckSums, Integer, FilePath) makeTuple [md5sum, size, fp] = (CheckSums { md5sum = Just (T.unpack md5sum), sha1 = Nothing, sha256 = Nothing }, read (T.unpack size), T.unpack fp) makeTuple x = error $ "Invalid line in release file: " ++ show x indexesInRelease _ x = error $ "Invalid release file: " <> T.unpack (T.concat (formatControl x)) -- |make a FileTuple for a file found on the local disk -- returns 'Nothing' if the file does not exist. tupleFromFilePath :: FilePath -> FilePath -> IO (Maybe FileTuple) tupleFromFilePath basePath fp = do e <- fileExist (basePath fp) if not e then return Nothing else do size <- getFileStatus (basePath fp) >>= return . fromIntegral . fileSize md5 <- L.readFile (basePath fp) >>= return . show . MD5.md5 return $ Just (CheckSums { md5sum = Just md5, sha1 = Nothing, sha256 = Nothing }, size, fp) -- |find the Contents-* files. These are not listed in the Release file findContentsFiles :: (FilePath -> Bool) -> FilePath -> IO [FilePath] findContentsFiles filterP distDir = do files <- getDirectoryContents distDir return $ filter filterP $ filter (isPrefixOf "Contents-" . takeBaseName) files