{-# LANGUAGE CPP, DeriveDataTypeable, FlexibleContexts, OverloadedStrings, ScopedTypeVariables #-}
{-# OPTIONS_GHC -fno-warn-name-shadowing -fno-warn-orphans #-}
module Debian.Apt.Index
    ( update
    , Fetcher
    , CheckSums(..)
    , Compression(..)
    , FileTuple
    , Size
    , controlFromIndex
    , controlFromIndex'
    , findContentsFiles
    , findIndexes
    , indexesInRelease
    , tupleFromFilePath
    ) where

import Control.Monad
import qualified Codec.Compression.GZip as GZip
import qualified Codec.Compression.BZip as BZip
import qualified Data.ByteString.Char8 as B
import qualified Data.ByteString.Lazy.Char8 as L
import qualified Data.Digest.Pure.MD5 as MD5
import qualified Data.Digest.Pure.SHA as SHA
import Data.Either (partitionEithers)
import Data.Function
import Data.List as List (null, intercalate, sortBy, isSuffixOf, isPrefixOf)
import qualified Data.Map as M
import Data.Monoid ((<>))
import Data.Text as Text (Text, unpack, concat, lines, null, words)
import Data.Time
import Debian.Apt.Methods
import Debian.Control (formatControl)
import Debian.Control.ByteString
import Debian.Control.Common
import Debian.Control.Text (decodeControl)
import Debian.Release
import Debian.Sources
import Debian.URI
import System.Directory
import System.FilePath ((</>))
import System.Posix.Files
import System.FilePath (takeBaseName)
--import qualified System.Unix.Misc as Misc
import Text.ParserCombinators.Parsec.Error
import Text.PrettyPrint (render)
import Text.PrettyPrint.HughesPJClass (pPrint)
import Text.Read (readMaybe)
#if !MIN_VERSION_base(4,8,0)
import Control.Applicative (pure, (<$>), (<*>))
#endif

-- |Package indexes on the server are uncompressed or compressed with
-- gzip or bzip2. We do not know what will exist on the server until we
-- actually look. This type is used to mark the compression status of
-- what was actually found.
data Compression
    = BZ2 | GZ | Uncompressed
      deriving (Read, Show, Eq, Ord, Enum, Bounded)

data CheckSums
    = CheckSums { md5sum :: Maybe String
                , sha1   :: Maybe String
                , sha256 :: Maybe String
                }
      deriving (Read, Show, Eq)

-- |function-type for a function that downloads a file
-- The timestamp is optional. If the local file is as new or newer
-- than the remote copy, the download may be skipped.
--
-- A good choice might be a partially parameterized call to
-- 'Debian.Apt.Methods.fetch'
type Fetcher =
    URI ->              -- remote URI
    FilePath ->         -- local file name
    Maybe UTCTime ->    -- optional time stamp for local file
    IO Bool             -- True on success, False on failure

-- |update - similar to apt-get update

-- downloads the index files associated with a sources.list. The
-- downloaded index files will have the same basenames that apt-get uses
-- in \/var\/lib\/apt\/lists. You can almost use this function instead of
-- calling apt-get update. However there are a few key differences:
--  1. apt-get update also updates the binary cache files
--  2. apt-get update uses the partial directory and lock file in\ /var\/lib\/apt\/lists
--  3. apt-get update downloads the Release and Release.gpg files
update :: Fetcher -- ^ function that will do actually downloading
       -> FilePath -- ^ download indexes to the directory (must already exist)
       -> String -- ^ binary architecture
       -> [DebSource] -- ^ sources.list
       -> IO [Maybe (FilePath, Compression)] -- ^ (basename of index file, compression status)
update fetcher basePath arch sourcesList =
    mapM (uncurry $ fetchIndex fetcher) (map (\(uri, fp, _) -> (uri, (basePath </> fp))) (concatMap (indexURIs arch) sourcesList))

-- | download possibly compressed files
-- NOTE: index uri must not include the .bz2 or .gz extension
fetchIndex :: Fetcher -- ^ function that will do the actual fetch
           -> URI -- ^ remote URI of package index, without .bz2 or .gz extension
           -> FilePath -- ^ name to save downloaded file as, without .bz2 or .gz extension
           -> IO (Maybe (FilePath, Compression)) -- ^ (downloaded file name + extension, compression status)
fetchIndex fetcher uri localPath =
    do let localPath' = localPath ++ ".bz2"
       --lm <- getLastModified localPath'
       res <- fetcher (uri { uriPath = (uriPath uri) ++ ".bz2" }) localPath' Nothing
       if res
          then return $ Just (localPath', BZ2)
          else do let localPath' = localPath ++ ".gz"
                  lm <- getLastModified localPath'
                  res <- fetcher (uri { uriPath = (uriPath uri) ++ ".gz" }) localPath' lm
                  if res
                     then return $ Just (localPath', GZ)
                     else do lm <- getLastModified localPath
                             res <- fetcher (uri { uriPath = (uriPath uri) }) localPath lm
                             if res
                                then return (Just (localPath, Uncompressed))
                                else return Nothing

-- |examine a DebSource line, and calculate for each section:
--  - the URI to the uncompressed index file
--  - the basename that apt-get would name the downloaded index
-- FIXME: ExactPath dist will fail with error at runtime :(
indexURIs :: String -- ^ which binary architecture
          -> DebSource -- ^ line from sources.list
          -> [(URI, FilePath, DebSource)] -- ^ (remote uri, local name, deb source for just this section)
indexURIs arch debSource =
    map (\ section -> let (uri, fp) = calcPath (sourceType debSource) arch baseURI release section
                      in (uri,fp, debSource { sourceDist = (Right (release, [section])) }) ) sections
    where
      baseURI = sourceUri debSource
      (release, sections) =
          either (error $ "indexURIs: support not implemented for exact path: " ++ render (pPrint debSource)) id (sourceDist debSource)

-- |return a tuple for the section
--  - the URI to the uncompressed index file
--  - the basename that apt-get uses for the downloaded index
-- FIXME: support for Release and Release.gpg
calcPath :: SourceType -- ^ do we want Packages or Sources
         -> String  -- ^ The binary architecture to use for Packages
         -> URI -- ^ base URI as it appears in sources.list
         -> ReleaseName -- ^ the release (e.g., unstable, testing, stable, sid, etc)
         -> Section -- ^ the section (main, contrib, non-free, etc)
         -> (URI, [Char]) -- ^ (uri to index file, basename for the downloaded file)
calcPath srcType arch baseURI release section =
          let indexPath = case srcType of
                      DebSrc -> "source/Sources"
                      Deb -> "binary-" ++ arch </> "Packages"
              path = (uriPath baseURI) </> "dists" </> (releaseName' release) </> sectionName' section </> indexPath
          in (baseURI { uriPath = path }, addPrefix . escapePath $ path)
          where
            addPrefix s = prefix scheme user' pass' reg port ++ {- "_" ++ -} s
            prefix "http:" (Just user) Nothing (Just host) port = user ++ host ++ port
            prefix "http:" _ _ (Just host) port = host ++ port
            prefix "ftp:" _ _ (Just host) _ = host
            prefix "file:" Nothing Nothing Nothing "" = ""
            prefix "ssh:" (Just user) Nothing (Just host) port = user ++ host ++ port
            prefix "ssh:" _ _ (Just host) port = host ++ port
            prefix _ _ _ _ _ = error ("calcPath: unsupported uri: " ++ uriToString' baseURI)
            user' = maybeOfString user
            pass' = maybeOfString pass
            (user, pass) = break (== ':') userpass
            userpass = maybe "" uriUserInfo auth
            reg = maybeOfString $ maybe "" uriRegName auth
            port = maybe "" uriPort auth
            scheme = uriScheme baseURI
            auth = uriAuthority baseURI
            --path = uriPath baseURI

            escapePath :: String -> String
            escapePath s = intercalate "_" $ wordsBy (== '/') s

            maybeOfString :: String -> Maybe String
            maybeOfString "" = Nothing
            maybeOfString s = Just s

            wordsBy :: Eq a => (a -> Bool) -> [a] -> [[a]]
            wordsBy p s =
                case (break p s) of
                  (s, []) -> [s]
                  (h, t) -> h : wordsBy p (drop 1 t)

-- |Parse a possibly compressed index file.
controlFromIndex :: Compression -> FilePath -> L.ByteString -> Either ParseError (Control' Text)
controlFromIndex GZ path s = either Left (Right . decodeControl) . parseControl path . B.concat . L.toChunks . GZip.decompress $ s
controlFromIndex BZ2 path s = either Left (Right . decodeControl) . parseControl path . B.concat . L.toChunks . BZip.decompress $ s
controlFromIndex Uncompressed path s = either Left (Right . decodeControl) . parseControl path . B.concat . L.toChunks $ s

-- |parse an index possibly compressed file
controlFromIndex' :: Compression -> FilePath -> IO (Either ParseError (Control' Text))
controlFromIndex' compression path = L.readFile path >>= return . controlFromIndex compression path

type Size = Integer
type FileTuple = (CheckSums, Size, FilePath)

-- |A release file contains a list of indexes (Packages\/Sources). Each
-- Package or Source index may appear multiple times because it may be
-- compressed several different ways. This function will return an
-- assoc list where the key is the name of the uncompressed package
-- index name and the value is the list of (file, compression) which
-- decompress to the key.
groupIndexes :: [FileTuple] -> [(FilePath, [(FileTuple, Compression)])]
groupIndexes indexFiles =
    M.toList $ M.fromListWith combine $ map makeKV indexFiles
    where
      makeKV fileTuple@(_,_,fp) =
          let (name, compressionMethod) = uncompressedName fp
          in
            (name, [(fileTuple, compressionMethod)])
      combine = (\x y -> sortBy (compare `on` snd) (x ++ y))
{-
      with t@(_,_,fp) m =
          let (un, compression) =
          in
            M.insertWith
-}

{-
groupIndexes' :: String ->[FileTuple] -> [(FilePath, [(FileTuple, Compression)])]
groupIndexes' iType indexFiles =
    M.toList (foldr (insertType iType) M.empty indexFiles)
    where
      insertType iType t@(_,_,fp) m =
          case uncompressedName' iType fp of
            Nothing -> m
            (Just (un, compression)) ->
                M.insertWith (\x y -> sortBy (compare `on` snd) (x ++ y)) un [(t, compression)] m
-}

-- |The release file contains the checksums for the uncompressed
-- package indexes, even if the uncompressed package indexes are not
-- stored on the server. This function returns the list of files that
-- actually exist.
filterExists :: FilePath -> (FilePath, [(FileTuple, Compression)]) -> IO (FilePath, [(FileTuple, Compression)])
filterExists distDir (fp, alternatives) =
          do e <- filterM ( \((_,_,fp),_) -> fileExist (distDir </> fp)) alternatives
             -- when (null e) (error $ "None of these files exist: " ++ show alternatives)
             return (fp, e)

findIndexes :: FilePath -> String -> [FileTuple] -> IO [(FileTuple, Compression)]
findIndexes distDir iType controlFiles =
    let indexes = groupIndexes controlFiles
    in
      do indexes' <- mapM (filterExists distDir) (filter (isType iType) indexes)
         return $ map (head . snd) (filter (not . List.null . snd) indexes')
    where
      isType iType (fp, _) = iType `isSuffixOf` fp

{-
findIndexes' :: FilePath -> String -> [FileTuple] -> IO [(FileTuple, Compression)]
findIndexes' distDir iType controlFiles =
    let m = groupIndexes' iType controlFiles
    in
      do m' <- mapM (filterExists distDir) m
         return $ map (head . snd) (filter (not . null . snd) m')
-}

      -- insertType :: String -> (CheckSums, Integer, FilePath) -> M.Map FilePath ((CheckSums, Integer, FilePath), Compression) -> M.Map FilePath ((CheckSums, Integer, FilePath), Compression)

{-
uncompressedName' :: String -> FilePath -> Maybe (FilePath, Compression)
uncompressedName' iType fp
          | isSuffixOf iType fp = Just (fp, Uncompressed)
          | isSuffixOf (iType ++".gz") fp = Just (reverse . (drop 3) . reverse $ fp, GZ)
          | isSuffixOf (iType ++".bz2") fp = Just (reverse . (drop 4) . reverse $ fp, BZ2)
          | otherwise = Nothing
-}

uncompressedName :: FilePath -> (FilePath, Compression)
uncompressedName fp
          | isSuffixOf ".gz"  fp = (reverse . (drop 3) . reverse $ fp, GZ)
          | isSuffixOf ".bz2" fp = (reverse . (drop 4) . reverse $ fp, BZ2)
          | otherwise            = (fp, Uncompressed)

indexesInRelease :: (FilePath -> Bool)
                 -> Control' Text -- ^ A release file
                 -> [(CheckSums, Integer, FilePath)] -- ^
indexesInRelease filterp (Control [p]) =
    -- In a release file we should find one or more of the fields
    -- "SHA256", "SHA1", or "MD5Sum", each containing a list of triples
    either error (filter (\(_,_,fp) -> filterp fp)) $
           msum [either Left (makeTuples makeSHA256) (maybe (Left "No SHA256 Field") makeTriples $ fieldValue "SHA256" p),
                 either Left (makeTuples makeSHA1) (maybe (Left "No SHA1 Field") makeTriples $ fieldValue "SHA1" p),
                 either Left (makeTuples makeMD5) (maybe (Left "No MD5Sum Field") makeTriples $ msum [fieldValue "MD5Sum" p,
                                                                                                      fieldValue "Md5Sum" p,
                                                                                                      fieldValue "MD5sum" p])]
    where
      makeSHA256 s = CheckSums {md5sum = Nothing, sha1 = Nothing, sha256 = Just s}
      makeSHA1 s = CheckSums {md5sum = Nothing, sha1 = Just s, sha256 = Nothing}
      makeMD5 s = CheckSums {md5sum = Just s, sha1 = Nothing, sha256 = Nothing}

      makeTuples :: (String -> CheckSums) -> [(Text, Text, Text)] -> Either String [(CheckSums, Integer, FilePath)]
      makeTuples mk triples =
          case partitionEithers (fmap (makeTuple mk) triples) of
            ([], tuples) -> Right tuples
            (s : _, _) -> Left s

      makeTuple :: (String -> CheckSums) -> (Text, Text, Text) -> Either String (CheckSums, Integer, FilePath)
      makeTuple mk (sum, size, fp) =
          (,,) <$> pure (mk (Text.unpack sum))
               <*> maybe (Left ("Invalid size field: " ++ show size)) Right (readMaybe (Text.unpack size))
               <*> pure (Text.unpack fp)

      makeTriples :: Text -> Either String [(Text, Text, Text)]
      makeTriples t = case partitionEithers (map makeTriple (Text.lines t)) of
                        ([], xs) -> Right xs
                        (s : _, _) -> Left s

      makeTriple :: Text -> Either String (Text, Text, Text)
      makeTriple t = case Text.words t of
                       [a, b, c] -> Right (a, b, c)
                       _ -> Left ("Invalid checksum line: " ++ show t)
indexesInRelease _ x = error $ "Invalid release file: " <> Text.unpack (Text.concat (formatControl x))

-- |make a FileTuple for a file found on the local disk
-- returns 'Nothing' if the file does not exist.
tupleFromFilePath :: FilePath -> FilePath -> IO (Maybe FileTuple)
tupleFromFilePath basePath fp =
          do e <- fileExist (basePath </> fp)
             if not e
              then return Nothing
              else do size <- getFileStatus (basePath </> fp) >>= return . fromIntegral . fileSize
                      md5 <- L.readFile (basePath </> fp) >>= return . show . MD5.md5
                      sha1 <- L.readFile (basePath </> fp) >>= return . show . SHA.sha1
                      sha256 <- L.readFile (basePath </> fp) >>= return . show . SHA.sha256
                      return $ Just (CheckSums { md5sum = Just md5, sha1 = Just sha1, sha256 = Just sha256 }, size, fp)

-- |find the Contents-* files. These are not listed in the Release file
findContentsFiles :: (FilePath -> Bool) -> FilePath -> IO [FilePath]
findContentsFiles filterP distDir =
          do files <- getDirectoryContents distDir
             return $ filter filterP $ filter (isPrefixOf "Contents-" . takeBaseName) files