{-# LANGUAGE DeriveDataTypeable #-}
module Debian.Apt.Index
    ( update
    , Fetcher(..)
    , CheckSums(..)
    , Compression(..)
    , FileTuple
    , Size
    , controlFromIndex
    , controlFromIndex'
    , findContentsFiles
    , findIndexes
    , indexesInRelease
    , tupleFromFilePath
    ) where

import Control.Monad
import qualified Codec.Compression.GZip as GZip
import qualified Codec.Compression.BZip as BZip
import qualified Data.ByteString.Char8 as B
import qualified Data.ByteString.Lazy.Char8 as L
import Data.Function
import Data.List
import qualified Data.Map as M
import Data.Time
import Data.Generics
import Debian.Apt.Methods
import Debian.Control (formatControl)
import Debian.Control.ByteString
import Debian.Control.Common
import Debian.Repo.Types
import Debian.URI
import System.Directory
import System.FilePath ((</>))
import System.Posix.Files
import System.Unix.FilePath
import qualified System.Unix.Misc as Misc
import Text.ParserCombinators.Parsec.Error


-- |Package indexes on the server are uncompressed or compressed with
-- gzip or bzip2. We do not know what will exist on the server until we
-- actually look. This type is used to mark the compression status of
-- what was actually found.
data Compression
    = BZ2 | GZ | Uncompressed
      deriving (Read, Show, Eq, Ord, Data, Typeable, Enum, Bounded)

data CheckSums 
    = CheckSums { md5sum :: Maybe String
                , sha1   :: Maybe String
                , sha256 :: Maybe String
                }
      deriving (Read, Show, Eq)

-- |function-type for a function that downloads a file
-- The timestamp is optional. If the local file is as new or newer
-- than the remote copy, the download may be skipped.
--
-- A good choice might be a partially parameterized call to
-- 'Debian.Apt.Methods.fetch'
type Fetcher =
    URI ->		-- remote URI
    FilePath ->		-- local file name
    Maybe UTCTime ->	-- optional time stamp for local file
    IO Bool		-- True on success, False on failure

-- |update - similar to apt-get update

-- downloads the index files associated with a sources.list. The
-- downloaded index files will have the same basenames that apt-get uses
-- in \/var\/lib\/apt\/lists. You can almost use this function instead of
-- calling apt-get update. However there are a few key differences:
--  1. apt-get update also updates the binary cache files
--  2. apt-get update uses the partial directory and lock file in\ /var\/lib\/apt\/lists 
--  3. apt-get update downloads the Release and Release.gpg files
update :: Fetcher -- ^ function that will do actually downloading
       -> FilePath -- ^ download indexes to the directory (must already exist)
       -> String -- ^ binary architecture
       -> [DebSource] -- ^ sources.list 
       -> IO [Maybe (FilePath, Compression)] -- ^ (basename of index file, compression status)
update fetcher basePath arch sourcesList =
    mapM (uncurry $ fetchIndex fetcher) (map (\(uri, fp, _) -> (uri, (basePath </> fp))) (concatMap (indexURIs arch) sourcesList))

-- | download possibly compressed files
-- NOTE: index uri must not include the .bz2 or .gz extension
fetchIndex :: Fetcher -- ^ function that will do the actual fetch
           -> URI -- ^ remote URI of package index, without .bz2 or .gz extension
           -> FilePath -- ^ name to save downloaded file as, without .bz2 or .gz extension
           -> IO (Maybe (FilePath, Compression)) -- ^ (downloaded file name + extension, compression status)
fetchIndex fetcher uri localPath =
    do let localPath' = localPath ++ ".bz2"
       --lm <- getLastModified localPath'
       res <- fetcher (uri { uriPath = (uriPath uri) ++ ".bz2" }) localPath' Nothing
       return Nothing
       if res
          then return $ Just (localPath', BZ2)
          else do let localPath' = localPath ++ ".gz"
                  lm <- getLastModified localPath'
                  res <- fetcher (uri { uriPath = (uriPath uri) ++ ".gz" }) localPath' lm
                  if res
                     then return $ Just (localPath', GZ)
                     else do lm <- getLastModified localPath
                             res <- fetcher (uri { uriPath = (uriPath uri) }) localPath lm
                             if res
                                then return (Just (localPath, Uncompressed))
                                else return Nothing

-- |examine a DebSource line, and calculate for each section:
--  - the URI to the uncompressed index file
--  - the basename that apt-get would name the downloaded index
-- FIXME: ExactPath dist will fail with error at runtime :(
indexURIs :: String -- ^ which binary architecture
          -> DebSource -- ^ line from sources.list
          -> [(URI, FilePath, DebSource)] -- ^ (remote uri, local name, deb source for just this section)
indexURIs arch debSource =
    map (\ section -> let (uri, fp) = calcPath (sourceType debSource) arch baseURI release section
                      in (uri,fp, debSource { sourceDist = (Right (release, [section])) }) ) sections
    where
      baseURI = sourceUri debSource
      (release, sections) =
          either (error $ "indexURIs: support not implemented for exact path: " ++ show debSource) id (sourceDist debSource)

-- |return a tuple for the section 
--  - the URI to the uncompressed index file
--  - the basename that apt-get uses for the downloaded index
-- FIXME: support for Release and Release.gpg
calcPath :: SourceType -- ^ do we want Packages or Sources
         -> String  -- ^ The binary architecture to use for Packages
         -> URI -- ^ base URI as it appears in sources.list
         -> ReleaseName -- ^ the release (e.g., unstable, testing, stable, sid, etc)
         -> Section -- ^ the section (main, contrib, non-free, etc)
         -> (URI, [Char]) -- ^ (uri to index file, basename for the downloaded file)
calcPath srcType arch baseURI release section =
          let indexPath = case srcType of
                      DebSrc -> "source/Sources"
                      Deb -> "binary-" ++ arch </> "Packages"
              path = (uriPath baseURI) </> "dists" </> (releaseName' release) </> sectionName' section </> indexPath
          in (baseURI { uriPath = path }, addPrefix . escapePath $ path)
          where
            addPrefix s = prefix scheme user' pass' reg port ++ {- "_" ++ -} s
            prefix "http:" (Just user) Nothing (Just host) port = user ++ host ++ port
            prefix "http:" _ _ (Just host) port = host ++ port
            prefix "ftp:" _ _ (Just host) _ = host
            prefix "file:" Nothing Nothing Nothing "" = ""
            prefix "ssh:" (Just user) Nothing (Just host) port = user ++ host ++ port
            prefix "ssh:" _ _ (Just host) port = host ++ port
            prefix _ _ _ _ _ = error ("calcPath: unsupported uri: " ++ uriToString' baseURI)
            user' = maybeOfString user
            pass' = maybeOfString pass
            (user, pass) = break (== ':') userpass
            userpass = maybe "" uriUserInfo auth
            reg = maybeOfString $ maybe "" uriRegName auth
            port = maybe "" uriPort auth
            scheme = uriScheme baseURI
            auth = uriAuthority baseURI
            --path = uriPath baseURI

            escapePath :: String -> String
            escapePath s = intercalate "_" $ wordsBy (== '/') s

            maybeOfString :: String -> Maybe String
            maybeOfString "" = Nothing
            maybeOfString s = Just s

            wordsBy :: Eq a => (a -> Bool) -> [a] -> [[a]]
            wordsBy p s = 
                case (break p s) of
                  (s, []) -> [s]
                  (h, t) -> h : wordsBy p (drop 1 t)

-- |Parse a possibly compressed index file.
controlFromIndex :: Compression -> FilePath -> L.ByteString -> Either ParseError (Control' B.ByteString)
controlFromIndex GZ path s = parseControl path . B.concat . L.toChunks . GZip.decompress $ s
controlFromIndex BZ2 path s = parseControl path . B.concat . L.toChunks . BZip.decompress $ s
controlFromIndex Uncompressed path s = parseControl path . B.concat . L.toChunks $ s

-- |parse an index possibly compressed file 
controlFromIndex' :: Compression -> FilePath -> IO (Either ParseError (Control' B.ByteString))
controlFromIndex' compression path = L.readFile path >>= return . controlFromIndex compression path

type Size = Integer
type FileTuple = (CheckSums, Size, FilePath) 

-- |A release file contains a list of indexes (Packages\/Sources). Each
-- Package or Source index may appear multiple times because it may be
-- compressed several different ways. This function will return an
-- assoc list where the key is the name of the uncompressed package
-- index name and the value is the list of (file, compression) which
-- decompress to the key.
groupIndexes :: [FileTuple] -> [(FilePath, [(FileTuple, Compression)])]
groupIndexes indexFiles =
    M.toList $ M.fromListWith combine $ map makeKV indexFiles
    where
      makeKV fileTuple@(_,_,fp) =
          let (name, compressionMethod) = uncompressedName fp
          in
            (name, [(fileTuple, compressionMethod)])
      combine = (\x y -> sortBy (compare `on` snd) (x ++ y))
{-
      with t@(_,_,fp) m =
          let (un, compression) = 
          in
            M.insertWith
-}

groupIndexes' :: String ->[FileTuple] -> [(FilePath, [(FileTuple, Compression)])]
groupIndexes' iType indexFiles =
    M.toList (foldr (insertType iType) M.empty indexFiles)
    where
      insertType iType t@(_,_,fp) m =
          case uncompressedName' iType fp of
            Nothing -> m
            (Just (un, compression)) ->
                M.insertWith (\x y -> sortBy (compare `on` snd) (x ++ y)) un [(t, compression)] m

-- |The release file contains the checksums for the uncompressed
-- package indexes, even if the uncompressed package indexes are not
-- stored on the server. This function returns the list of files that
-- actually exist.
filterExists :: FilePath -> (FilePath, [(FileTuple, Compression)]) -> IO (FilePath, [(FileTuple, Compression)])
filterExists distDir (fp, alternatives) =
          do e <- filterM ( \((_,_,fp),_) -> fileExist (distDir </> fp)) alternatives
             -- when (null e) (error $ "None of these files exist: " ++ show alternatives)
             return (fp, e)

findIndexes :: FilePath -> String -> [FileTuple] -> IO [(FileTuple, Compression)]
findIndexes distDir iType controlFiles =
    let indexes = groupIndexes controlFiles
    in
      do indexes' <- mapM (filterExists distDir) (filter (isType iType) indexes)
         return $ map (head . snd) (filter (not . null . snd) indexes')
    where
      isType iType (fp, _) = iType `isSuffixOf` fp 

findIndexes' :: FilePath -> String -> [FileTuple] -> IO [(FileTuple, Compression)]
findIndexes' distDir iType controlFiles =
    let m = groupIndexes' iType controlFiles
    in
      do m' <- mapM (filterExists distDir) m
         return $ map (head . snd) (filter (not . null . snd) m')

      -- insertType :: String -> (CheckSums, Integer, FilePath) -> M.Map FilePath ((CheckSums, Integer, FilePath), Compression) -> M.Map FilePath ((CheckSums, Integer, FilePath), Compression)

uncompressedName' :: String -> FilePath -> Maybe (FilePath, Compression)
uncompressedName' iType fp
          | isSuffixOf iType fp = Just (fp, Uncompressed)
          | isSuffixOf (iType ++".gz") fp = Just (reverse . (drop 3) . reverse $ fp, GZ)
          | isSuffixOf (iType ++".bz2") fp = Just (reverse . (drop 4) . reverse $ fp, BZ2)
          | otherwise = Nothing

uncompressedName :: FilePath -> (FilePath, Compression)
uncompressedName fp
          | isSuffixOf ".gz"  fp = (reverse . (drop 3) . reverse $ fp, GZ)
          | isSuffixOf ".bz2" fp = (reverse . (drop 4) . reverse $ fp, BZ2)
          | otherwise            = (fp, Uncompressed)

indexesInRelease :: (FilePath -> Bool)
                 -> Control -- ^ A release file
                 -> [(CheckSums, Integer, FilePath)] -- ^ 
indexesInRelease filterp (Control [p]) =
    let md5sums =
            case md5sumField p of
              (Just md5) -> md5
              Nothing -> error $ "Did not find MD5Sum field."
    in
      filter (\(_,_,fp) -> filterp fp) $ map (makeTuple . B.words) $ filter (not . B.null) (B.lines md5sums)
    where
      makeTuple :: [B.ByteString] -> (CheckSums, Integer, FilePath)
      makeTuple [md5sum, size, fp] = (CheckSums { md5sum = Just (B.unpack md5sum), sha1 = Nothing, sha256 = Nothing }, read (B.unpack size), B.unpack fp)
      makeTuple x = error $ "Invalid line in release file: " ++ show x
indexesInRelease _ x = error $ "Invalid release file: " ++ B.unpack (B.concat (formatControl x))

-- |make a FileTuple for a file found on the local disk
-- returns 'Nothing' if the file does not exist.
tupleFromFilePath :: FilePath -> FilePath -> IO (Maybe FileTuple)
tupleFromFilePath basePath fp =
          do e <- fileExist (basePath </> fp)
             if not e
              then return Nothing
              else do size <- getFileStatus (basePath </> fp) >>= return . fromIntegral . fileSize
                      md5 <- Misc.md5sum (basePath </> fp)
                      return $ Just (CheckSums { md5sum = Just md5, sha1 = Nothing, sha256 = Nothing }, size, fp)

-- |find the Contents-* files. These are not listed in the Release file
findContentsFiles :: (FilePath -> Bool) -> FilePath -> IO [FilePath]
findContentsFiles filterP distDir =
          do files <- getDirectoryContents distDir
             return $ filter filterP $ filter (isPrefixOf "Contents-" . baseName) files