----------------------------------------------------------------------------- -- -- Module : Network.Google.Storage.Sync -- Copyright : (c) 2012-13 Brian W Bush -- License : MIT -- -- Maintainer : Brian W Bush -- Stability : Stable -- Portability : POSIX -- -- | Synchronization of filesystem directories with Google Storage buckets. -- ----------------------------------------------------------------------------- {-# LANGUAGE BangPatterns #-} module Network.Google.Storage.Sync ( RegexExclusion , sync ) where import Control.Exception (SomeException, finally, handle) import Control.Monad (filterM, liftM, when) import Crypto.GnuPG (Recipient) import Crypto.MD5 (MD5Info, md5Base64) import qualified Data.ByteString.Lazy as LBS (ByteString, readFile) import qualified Data.Digest.Pure.MD5 as MD5 (md5) import Data.List ((\\), deleteFirstsBy, intersectBy) import Data.Maybe (fromJust, mapMaybe) import Data.Time.Clock (UTCTime, addUTCTime, getCurrentTime) import Data.Time.Clock.POSIX (posixSecondsToUTCTime) import Data.Time.Format (parseTime) import Network.Google (AccessToken, ProjectId, toAccessToken) import Network.Google.OAuth2 (OAuth2Client(..), OAuth2Tokens(..), refreshTokens, validateTokens) import Network.Google.Storage (BucketName, KeyName, MIMEType, StorageAcl, deleteObjectUsingManager, getBucketUsingManager, putObjectUsingManager) import Network.Google.Storage.Encrypted (putEncryptedObject, putEncryptedObjectUsingManager) import Network.HTTP.Conduit (closeManager, def, newManager) import System.Directory (doesDirectoryExist, getDirectoryContents) import System.FilePath (pathSeparator) import System.IO (hFlush, stdout) import System.Locale (defaultTimeLocale) import System.Posix.Files (fileSize, getFileStatus, modificationTime) import Text.Regex.Posix ((=~)) import Text.XML.Light (Element, QName(qName), filterChildrenName, filterChildName, ppTopElement, strContent) -- | A regular expression used for excluding files from synchronization. type RegexExclusion = String -- | Synchronize a filesystem directory with a Google Storage bucket. sync :: ProjectId -- ^ The Google project ID. -> StorageAcl -- ^ The pre-defined access control. -> BucketName -- ^ The bucket name. -> OAuth2Client -- ^ The OAuth 2.0 client information. -> OAuth2Tokens -- ^ The OAuth 2.0 tokens. -> FilePath -- ^ The directory to be synchronized. -> [Recipient] -- ^ The recipients for GnuPG encryption of the uploaded files. -> [RegexExclusion] -- ^ The regular expressions used for excluding files from synchronization. -> Bool -- ^ Whether to write a file \".md5sum\" of MD5 sums of synchronized files into the root directory. -> Bool -- ^ Whether to delete keys from the bucket that do not correspond to files on the filesystem. -> IO () -- ^ The IO action for the synchronization. sync projectId acl bucket client tokens directory recipients exclusions md5sums purge = do manager <- newManager def putStrLn $ "DIRECTORY " ++ directory putStrLn $ "PROJECT " ++ projectId putStrLn $ "BUCKET " ++ bucket putStrLn $ "ACCESS " ++ show acl finally ( sync' (getBucketUsingManager manager projectId bucket) ((if null recipients then putObjectUsingManager manager else putEncryptedObjectUsingManager manager recipients) projectId acl bucket) (deleteObjectUsingManager manager projectId bucket) client tokens directory (null recipients) (makeExcluder exclusions) md5sums purge )( closeManager manager ) -- | A function for listing a bucket. type Lister = AccessToken -- ^ The OAuth 2.0 access token. -> IO Element -- ^ The action returning the XML with the metadata for the objects. -- | A function for putting an object into a bucket. type Putter = KeyName -- ^ The object's key. -> Maybe MIMEType -- ^ The object's MIME type. -> LBS.ByteString -- ^ The object's data. -> Maybe MD5Info -- ^ The MD5 checksum. -> AccessToken -- ^ The OAuth 2.0 access token. -> IO [(String, String)] -- ^ The action to put the object and return the response header. -- | A function for deleting an object from a bucket. type Deleter = KeyName -- ^ The object's key. -> AccessToken -- ^ The OAuth 2.0 access token. -> IO [(String, String)] -- ^ The action to put the object and return the response header. -- | A function for determining whether to exclude an object from synchronization. type Excluder = ObjectMetadata -- ^ The object's metadata. -> Bool -- ^ Whether to exclude the object from synchronization. -- | An expiration time and the tokens which expire then. type TokenClock = (UTCTime, OAuth2Tokens) -- | Check whether a token has expired and refresh it if necessary. checkExpiration :: OAuth2Client -- ^ The OAuth 2.0 client information. -> TokenClock -- ^ The token and its expiration. -> IO TokenClock -- ^ The action to update the token and its expiration. checkExpiration client (expirationTime, tokens) = do now <- getCurrentTime if now > expirationTime then do tokens' <- refreshTokens client tokens start <- getCurrentTime let expirationTime' = addUTCTime (fromRational (expiresIn tokens') - 60) start putStrLn $ "REFRESH " ++ show expirationTime' return (expirationTime', tokens') else return (expirationTime, tokens) -- | Make a function to exclude objects based on regular expressions for filenames. makeExcluder :: [RegexExclusion] -- ^ The regular expressions. -> Excluder -- ^ The function for excluding objects. makeExcluder exclusions (ObjectMetadata candidate _ _ _) = let match :: String -> Bool match exclusion = candidate =~ exclusion in not $ any match exclusions -- | Synchronize a filesystem directory with a Google Storage bucket. sync' :: Lister -- ^ The bucket listing function. -> Putter -- ^ The object putting function. -> Deleter -- ^ The object deletion function. -> OAuth2Client -- ^ The OAuth 2.0 client information. -> OAuth2Tokens -- ^ The OAuth 2.0 tokens. -> FilePath -- ^ The directory to be synchronized. -> Bool -- ^ Whether to use ETags in comparing object metadata. -> Excluder -- ^ The function for excluding objects. -> Bool -- ^ Whether to write a file \".md5sum\" of MD5 sums of synchronized files into the root directory. -> Bool -- ^ Whether to delete keys from the bucket that do not correspond to files on the filesystem. -> IO () -- ^ The IO action for the synchronization. sync' lister putter deleter client tokens directory byETag excluder md5sums purge = do putStr "LOCAL " hFlush stdout local <- walkDirectories directory print $ length local now <- getCurrentTime tokenClock@(_, tokens') <- checkExpiration client (addUTCTime (-60) now, tokens) putStr "REMOTE " hFlush stdout remote' <- lister $ toAccessToken $ accessToken tokens' let remote = parseMetadata remote' print $ length remote let tolerance = 300 sameKey :: ObjectMetadata -> ObjectMetadata -> Bool sameKey (ObjectMetadata key _ _ _) (ObjectMetadata key' _ _ _) = key == key' sameETag :: ObjectMetadata -> ObjectMetadata -> Bool sameETag (ObjectMetadata key eTag _ _) (ObjectMetadata key' eTag' _ _) = key == key' && fst eTag == fst eTag' earlierTime :: ObjectMetadata -> ObjectMetadata -> Bool earlierTime (ObjectMetadata key _ _ time) (ObjectMetadata key' eTag' _ time') = key == key' && time > addUTCTime tolerance time' putStr "EXCLUDED " hFlush stdout let local' = filter excluder local print $ length local - length local' putStr "PUTS " hFlush stdout let changedObjects = deleteFirstsBy (if byETag then sameETag else earlierTime) local' remote print $ length changedObjects putStr "DELETES " hFlush stdout let deletedObjects = deleteFirstsBy sameKey remote local' print $ length deletedObjects tokenClock' <- walkPutter client tokenClock directory putter changedObjects tokenClock'' <- if purge then walkDeleter client tokenClock' deleter deletedObjects else return tokenClock' when md5sums $ writeFile (directory ++ "/.md5sum") $ unlines $ map (\x -> (fst . eTag) x ++ " ./" ++ key x) local' -- | Put a list of objects. walkPutter :: OAuth2Client -- ^ The OAuth 2.0 client information. -> TokenClock -- ^ The token and its expiration. -> FilePath -- ^ The directory to be synchronized. -> Putter -- ^ The object putting function. -> [ObjectMetadata] -- ^ Description of the objects to be put. -> IO TokenClock -- ^ The action to update the token and its expiration. walkPutter _ tokenClock _ _ [] = return tokenClock walkPutter client tokenClock directory putter (x : xs) = do let key' = key x tokenClock'@(_, tokens') <- checkExpiration client tokenClock putStrLn $ "PUT " ++ key' handle handler ( do bytes <- LBS.readFile $ directory ++ [pathSeparator] ++ key' putter key' Nothing bytes (Just $ eTag x) (toAccessToken $ accessToken tokens') return () ) walkPutter client tokenClock' directory putter xs -- | Delete a list of objects. walkDeleter :: OAuth2Client -- ^ The OAuth 2.0 client information. -> TokenClock -- ^ The token and its expiration. -> Deleter -- ^ The object deletion function. -> [ObjectMetadata] -- ^ Description of the objects to be deleted. -> IO TokenClock -- ^ The action to update the token and its expiration. walkDeleter _ tokenClock _ [] = return tokenClock walkDeleter client tokenClock deleter (x : xs) = do let key' = key x tokenClock'@(_, tokens') <- checkExpiration client tokenClock putStrLn $ "DELETE " ++ key' handle handler ( do deleter key' (toAccessToken $ accessToken tokens') return () ) walkDeleter client tokenClock' deleter xs -- | handler :: SomeException -> IO () handler exception = putStrLn $ " FAIL " ++ show exception -- | Object metadata. data ObjectMetadata = ObjectMetadata { key :: String -- ^ The object's key. , eTag :: MD5Info -- ^ The object's MD5 sum. , size :: Int -- ^ The object's size, in bytes. , lastModified :: UTCTime -- ^ The object's modification time. } deriving (Show) -- | Parse XML metadata into object descriptions. parseMetadata :: Element -- ^ The XML metadata. -> [ObjectMetadata] -- ^ The object descriptions. parseMetadata root = let makeMetadata :: Element -> Maybe ObjectMetadata makeMetadata element = do let finder :: String -> Element -> Maybe String finder name = liftM strContent . filterChildName ((name ==) . qName) key <- finder "Key" element eTag <- finder "ETag" element size <- finder "Size" element lastModified <- finder "LastModified" element return $ ObjectMetadata key (tail . init $ eTag, undefined) (read size) (fromJust $ parseTime defaultTimeLocale "%FT%T%QZ" lastModified) in mapMaybe makeMetadata $ filterChildrenName (("Contents" ==) . qName) root -- | Gather file metadata from the file system. walkDirectories :: FilePath -- ^ The directory to be synchronized. -> IO [ObjectMetadata] -- ^ Action returning file descriptions. walkDirectories directory = walkDirectories' (directory ++ [pathSeparator]) [""] -- | Gather file metadata from the file system. walkDirectories' :: FilePath -- ^ The directory to be synchronized. -> [FilePath] -- ^ The subdirectories still remaining to be described. -> IO [ObjectMetadata] -- ^ Action returning file descriptions. walkDirectories' _ [] = return [] walkDirectories' directory (y : ys) = handle (( \exception -> do putStrLn $ " LIST " ++ y putStrLn $ " FAIL " ++ show exception walkDirectories' directory ys ) :: SomeException -> IO [ObjectMetadata]) ( do let makeMetadata :: FilePath -> IO ObjectMetadata makeMetadata file = do let key = tail file path = directory ++ key bytes <- LBS.readFile path status <- getFileStatus path let lastTime :: UTCTime lastTime = posixSecondsToUTCTime $ realToFrac $ modificationTime status size :: Int size = fromIntegral $ fileSize status let !eTag = md5Base64 bytes let !x = fst eTag let !y = snd eTag return $ ObjectMetadata key eTag size lastTime files <- liftM (map ((y ++ [pathSeparator]) ++) . ( \\ [".", ".."])) $ getDirectoryContents (directory ++ y) y' <- filterM (doesDirectoryExist . (directory ++)) files x' <- mapM makeMetadata $ files \\ y' liftM (x' ++) $ walkDirectories' directory (y' ++ ys) )