{-# LANGUAGE CPP, DeriveDataTypeable, FlexibleContexts, OverloadedStrings, ScopedTypeVariables #-} {-# OPTIONS_GHC -fno-warn-name-shadowing -fno-warn-orphans #-} module Debian.Apt.Index ( update , Fetcher , CheckSums(..) , Compression(..) , FileTuple , Size , controlFromIndex , controlFromIndex' , findContentsFiles , findIndexes , indexesInRelease , tupleFromFilePath ) where import qualified Codec.Compression.GZip as GZip import qualified Codec.Compression.BZip as BZip import Control.Lens (over, to, view) import Control.Monad import qualified Data.ByteString.Char8 as B import qualified Data.ByteString.Lazy.Char8 as L import qualified Data.Digest.Pure.MD5 as MD5 import qualified Data.Digest.Pure.SHA as SHA import Data.Either (partitionEithers) import Data.Function import Data.List as List (null, intercalate, sortBy, isSuffixOf, isPrefixOf) import qualified Data.Map as M import Data.Monoid ((<>)) import Data.Text as Text (Text, unpack, concat, lines, words) import Data.Time import Debian.Apt.Methods import Debian.Codename (Codename, codename) import Debian.Control (formatControl) import Debian.Control.ByteString --import Debian.Control.Common import Debian.Control.Text (decodeControl) import Debian.Release import Debian.Sources import Debian.URI (uriPathLens, uriToString') import Debian.VendorURI (VendorURI, vendorURI) import Network.URI import System.Directory import System.FilePath (()) import System.Posix.Files import System.FilePath (takeBaseName) --import qualified System.Unix.Misc as Misc import Text.ParserCombinators.Parsec.Error import Text.PrettyPrint (render) import Distribution.Pretty (pretty) import Text.Read (readMaybe) #if !MIN_VERSION_base(4,8,0) import Control.Applicative (pure, (<$>), (<*>)) #endif -- |Package indexes on the server are uncompressed or compressed with -- gzip or bzip2. We do not know what will exist on the server until we -- actually look. This type is used to mark the compression status of -- what was actually found. data Compression = BZ2 | GZ | Uncompressed deriving (Read, Show, Eq, Ord, Enum, Bounded) data CheckSums = CheckSums { md5sum :: Maybe String , sha1 :: Maybe String , sha256 :: Maybe String } deriving (Read, Show, Eq) -- |function-type for a function that downloads a file -- The timestamp is optional. If the local file is as new or newer -- than the remote copy, the download may be skipped. -- -- A good choice might be a partially parameterized call to -- 'Debian.Apt.Methods.fetch' type Fetcher = URI -> -- remote URI FilePath -> -- local file name Maybe UTCTime -> -- optional time stamp for local file IO Bool -- True on success, False on failure -- |update - similar to apt-get update -- downloads the index files associated with a sources.list. The -- downloaded index files will have the same basenames that apt-get uses -- in \/var\/lib\/apt\/lists. You can almost use this function instead of -- calling apt-get update. However there are a few key differences: -- 1. apt-get update also updates the binary cache files -- 2. apt-get update uses the partial directory and lock file in\ /var\/lib\/apt\/lists -- 3. apt-get update downloads the Release and Release.gpg files update :: Fetcher -- ^ function that will do actually downloading -> FilePath -- ^ download indexes to the directory (must already exist) -> String -- ^ binary architecture -> [DebSource] -- ^ sources.list -> IO [Maybe (FilePath, Compression)] -- ^ (basename of index file, compression status) update fetcher basePath arch sourcesList = mapM (uncurry $ fetchIndex fetcher) (map (\(uri, fp, _) -> (uri, (basePath fp))) (concatMap (indexURIs arch) sourcesList)) -- | download possibly compressed files -- NOTE: index uri must not include the .bz2 or .gz extension fetchIndex :: Fetcher -- ^ function that will do the actual fetch -> URI -- ^ remote URI of package index, without .bz2 or .gz extension -> FilePath -- ^ name to save downloaded file as, without .bz2 or .gz extension -> IO (Maybe (FilePath, Compression)) -- ^ (downloaded file name + extension, compression status) fetchIndex fetcher uri localPath = do let localPath' = localPath ++ ".bz2" --lm <- getLastModified localPath' res <- fetcher (uri { uriPath = (uriPath uri) ++ ".bz2" }) localPath' Nothing if res then return $ Just (localPath', BZ2) else do let localPath' = localPath ++ ".gz" lm <- getLastModified localPath' res <- fetcher (uri { uriPath = (uriPath uri) ++ ".gz" }) localPath' lm if res then return $ Just (localPath', GZ) else do lm <- getLastModified localPath res <- fetcher (uri { uriPath = (uriPath uri) }) localPath lm if res then return (Just (localPath, Uncompressed)) else return Nothing -- |examine a DebSource line, and calculate for each section: -- - the URI to the uncompressed index file -- - the basename that apt-get would name the downloaded index -- FIXME: ExactPath dist will fail with error at runtime :( indexURIs :: String -- ^ which binary architecture -> DebSource -- ^ line from sources.list -> [(URI, FilePath, DebSource)] -- ^ (remote uri, local name, deb source for just this section) indexURIs arch debSource = map (\ section -> let (uri, fp) = calcPath (view sourceType debSource) arch baseURI release section in (uri,fp, debSource { _sourceDist = (Right (release, [section])) }) ) sections where baseURI = view sourceUri debSource (release, sections) = either (error $ "indexURIs: support not implemented for exact path: " ++ render (pretty debSource)) id (view sourceDist debSource) -- |return a tuple for the section -- - the URI to the uncompressed index file -- - the basename that apt-get uses for the downloaded index -- FIXME: support for Release and Release.gpg calcPath :: SourceType -- ^ do we want Packages or Sources -> String -- ^ The binary architecture to use for Packages -> VendorURI -- ^ base URI as it appears in sources.list -> Codename -- ^ the release (e.g., unstable, testing, stable, sid, etc) -> Section -- ^ the section (main, contrib, non-free, etc) -> (URI, [Char]) -- ^ (uri to index file, basename for the downloaded file) calcPath srcType arch baseURI release section = let indexPath = case srcType of DebSrc -> "source/Sources" Deb -> "binary-" ++ arch "Packages" uri' = over uriPathLens (\path -> path "dists" codename release sectionName' section indexPath) (view vendorURI baseURI) path = view uriPathLens uri' in (uri', addPrefix (escapePath path)) where addPrefix s = prefix scheme user' pass' reg port ++ {- "_" ++ -} s prefix "http:" (Just user) Nothing (Just host) port = user ++ host ++ port prefix "http:" _ _ (Just host) port = host ++ port prefix "ftp:" _ _ (Just host) _ = host prefix "file:" Nothing Nothing Nothing "" = "" prefix "ssh:" (Just user) Nothing (Just host) port = user ++ host ++ port prefix "ssh:" _ _ (Just host) port = host ++ port prefix _ _ _ _ _ = error ("calcPath: unsupported uri: " ++ view (vendorURI . to uriToString') baseURI) user' = maybeOfString user pass' = maybeOfString pass (user, pass) = break (== ':') userpass userpass = maybe "" uriUserInfo auth reg = maybeOfString $ maybe "" uriRegName auth port = maybe "" uriPort auth scheme = view (vendorURI . to uriScheme) baseURI auth = view (vendorURI . to uriAuthority) baseURI --path = uriPath baseURI escapePath :: String -> String escapePath s = intercalate "_" $ wordsBy (== '/') s maybeOfString :: String -> Maybe String maybeOfString "" = Nothing maybeOfString s = Just s wordsBy :: Eq a => (a -> Bool) -> [a] -> [[a]] wordsBy p s = case (break p s) of (s, []) -> [s] (h, t) -> h : wordsBy p (drop 1 t) -- |Parse a possibly compressed index file. controlFromIndex :: Compression -> FilePath -> L.ByteString -> Either ParseError (Control' Text) controlFromIndex GZ path s = either Left (Right . decodeControl) . parseControl path . B.concat . L.toChunks . GZip.decompress $ s controlFromIndex BZ2 path s = either Left (Right . decodeControl) . parseControl path . B.concat . L.toChunks . BZip.decompress $ s controlFromIndex Uncompressed path s = either Left (Right . decodeControl) . parseControl path . B.concat . L.toChunks $ s -- |parse an index possibly compressed file controlFromIndex' :: Compression -> FilePath -> IO (Either ParseError (Control' Text)) controlFromIndex' compression path = L.readFile path >>= return . controlFromIndex compression path type Size = Integer type FileTuple = (CheckSums, Size, FilePath) -- |A release file contains a list of indexes (Packages\/Sources). Each -- Package or Source index may appear multiple times because it may be -- compressed several different ways. This function will return an -- assoc list where the key is the name of the uncompressed package -- index name and the value is the list of (file, compression) which -- decompress to the key. groupIndexes :: [FileTuple] -> [(FilePath, [(FileTuple, Compression)])] groupIndexes indexFiles = M.toList $ M.fromListWith combine $ map makeKV indexFiles where makeKV fileTuple@(_,_,fp) = let (name, compressionMethod) = uncompressedName fp in (name, [(fileTuple, compressionMethod)]) combine = (\x y -> sortBy (compare `on` snd) (x ++ y)) {- with t@(_,_,fp) m = let (un, compression) = in M.insertWith -} {- groupIndexes' :: String ->[FileTuple] -> [(FilePath, [(FileTuple, Compression)])] groupIndexes' iType indexFiles = M.toList (foldr (insertType iType) M.empty indexFiles) where insertType iType t@(_,_,fp) m = case uncompressedName' iType fp of Nothing -> m (Just (un, compression)) -> M.insertWith (\x y -> sortBy (compare `on` snd) (x ++ y)) un [(t, compression)] m -} -- |The release file contains the checksums for the uncompressed -- package indexes, even if the uncompressed package indexes are not -- stored on the server. This function returns the list of files that -- actually exist. filterExists :: FilePath -> (FilePath, [(FileTuple, Compression)]) -> IO (FilePath, [(FileTuple, Compression)]) filterExists distDir (fp, alternatives) = do e <- filterM ( \((_,_,fp),_) -> fileExist (distDir fp)) alternatives -- when (null e) (error $ "None of these files exist: " ++ show alternatives) return (fp, e) findIndexes :: FilePath -> String -> [FileTuple] -> IO [(FileTuple, Compression)] findIndexes distDir iType controlFiles = let indexes = groupIndexes controlFiles in do indexes' <- mapM (filterExists distDir) (filter (isType iType) indexes) return $ map (head . snd) (filter (not . List.null . snd) indexes') where isType iType (fp, _) = iType `isSuffixOf` fp {- findIndexes' :: FilePath -> String -> [FileTuple] -> IO [(FileTuple, Compression)] findIndexes' distDir iType controlFiles = let m = groupIndexes' iType controlFiles in do m' <- mapM (filterExists distDir) m return $ map (head . snd) (filter (not . null . snd) m') -} -- insertType :: String -> (CheckSums, Integer, FilePath) -> M.Map FilePath ((CheckSums, Integer, FilePath), Compression) -> M.Map FilePath ((CheckSums, Integer, FilePath), Compression) {- uncompressedName' :: String -> FilePath -> Maybe (FilePath, Compression) uncompressedName' iType fp | isSuffixOf iType fp = Just (fp, Uncompressed) | isSuffixOf (iType ++".gz") fp = Just (reverse . (drop 3) . reverse $ fp, GZ) | isSuffixOf (iType ++".bz2") fp = Just (reverse . (drop 4) . reverse $ fp, BZ2) | otherwise = Nothing -} uncompressedName :: FilePath -> (FilePath, Compression) uncompressedName fp | isSuffixOf ".gz" fp = (reverse . (drop 3) . reverse $ fp, GZ) | isSuffixOf ".bz2" fp = (reverse . (drop 4) . reverse $ fp, BZ2) | otherwise = (fp, Uncompressed) indexesInRelease :: (FilePath -> Bool) -> Control' Text -- ^ A release file -> [(CheckSums, Integer, FilePath)] -- ^ indexesInRelease filterp (Control [p]) = -- In a release file we should find one or more of the fields -- "SHA256", "SHA1", or "MD5Sum", each containing a list of triples either error (filter (\(_,_,fp) -> filterp fp)) $ msum [either Left (makeTuples makeSHA256) (maybe (Left "No SHA256 Field") makeTriples $ fieldValue "SHA256" p), either Left (makeTuples makeSHA1) (maybe (Left "No SHA1 Field") makeTriples $ fieldValue "SHA1" p), either Left (makeTuples makeMD5) (maybe (Left "No MD5Sum Field") makeTriples $ msum [fieldValue "MD5Sum" p, fieldValue "Md5Sum" p, fieldValue "MD5sum" p])] where makeSHA256 s = CheckSums {md5sum = Nothing, sha1 = Nothing, sha256 = Just s} makeSHA1 s = CheckSums {md5sum = Nothing, sha1 = Just s, sha256 = Nothing} makeMD5 s = CheckSums {md5sum = Just s, sha1 = Nothing, sha256 = Nothing} makeTuples :: (String -> CheckSums) -> [(Text, Text, Text)] -> Either String [(CheckSums, Integer, FilePath)] makeTuples mk triples = case partitionEithers (fmap (makeTuple mk) triples) of ([], tuples) -> Right tuples (s : _, _) -> Left s makeTuple :: (String -> CheckSums) -> (Text, Text, Text) -> Either String (CheckSums, Integer, FilePath) makeTuple mk (sum, size, fp) = (,,) <$> pure (mk (Text.unpack sum)) <*> maybe (Left ("Invalid size field: " ++ show size)) Right (readMaybe (Text.unpack size)) <*> pure (Text.unpack fp) makeTriples :: Text -> Either String [(Text, Text, Text)] makeTriples t = case partitionEithers (map makeTriple (Text.lines t)) of ([], xs) -> Right xs (s : _, _) -> Left s makeTriple :: Text -> Either String (Text, Text, Text) makeTriple t = case Text.words t of [a, b, c] -> Right (a, b, c) _ -> Left ("Invalid checksum line: " ++ show t) indexesInRelease _ x = error $ "Invalid release file: " <> Text.unpack (Text.concat (formatControl x)) -- |make a FileTuple for a file found on the local disk -- returns 'Nothing' if the file does not exist. tupleFromFilePath :: FilePath -> FilePath -> IO (Maybe FileTuple) tupleFromFilePath basePath fp = do e <- fileExist (basePath fp) if not e then return Nothing else do size <- getFileStatus (basePath fp) >>= return . fromIntegral . fileSize md5 <- L.readFile (basePath fp) >>= return . show . MD5.md5 sha1 <- L.readFile (basePath fp) >>= return . show . SHA.sha1 sha256 <- L.readFile (basePath fp) >>= return . show . SHA.sha256 return $ Just (CheckSums { md5sum = Just md5, sha1 = Just sha1, sha256 = Just sha256 }, size, fp) -- |find the Contents-* files. These are not listed in the Release file findContentsFiles :: (FilePath -> Bool) -> FilePath -> IO [FilePath] findContentsFiles filterP distDir = do files <- getDirectoryContents distDir return $ filter filterP $ filter (isPrefixOf "Contents-" . takeBaseName) files