{- git-annex file matching - - Copyright 2012-2019 Joey Hess - - Licensed under the GNU AGPL version 3 or higher. -} {-# LANGUAGE CPP #-} module Annex.FileMatcher ( GetFileMatcher, checkFileMatcher, checkFileMatcher', checkMatcher, checkMatcher', matchAll, PreferredContentData(..), preferredContentTokens, preferredContentKeylessTokens, preferredContentParser, ParseToken, parsedToMatcher, mkLargeFilesParser, largeFilesMatcher, ) where import qualified Data.Map as M import Annex.Common import Limit import Utility.Matcher import Types.Group import qualified Annex import Types.FileMatcher import Git.FilePath import Types.Remote (RemoteConfig) import Annex.CheckAttr import Annex.Magic import Git.CheckAttr (unspecifiedAttr) import Data.Either import qualified Data.Set as S type GetFileMatcher = FilePath -> Annex (FileMatcher Annex) checkFileMatcher :: GetFileMatcher -> FilePath -> Annex Bool checkFileMatcher getmatcher file = checkFileMatcher' getmatcher file (return True) -- | Allows running an action when no matcher is configured for the file. checkFileMatcher' :: GetFileMatcher -> FilePath -> Annex Bool -> Annex Bool checkFileMatcher' getmatcher file notconfigured = do matcher <- getmatcher file checkMatcher matcher Nothing afile S.empty notconfigured d where afile = AssociatedFile (Just file) -- checkMatcher will never use this, because afile is provided. d = return True checkMatcher :: FileMatcher Annex -> Maybe Key -> AssociatedFile -> AssumeNotPresent -> Annex Bool -> Annex Bool -> Annex Bool checkMatcher matcher mkey afile notpresent notconfigured d | isEmpty matcher = notconfigured | otherwise = case (mkey, afile) of (_, AssociatedFile (Just file)) -> go =<< fileMatchInfo file (Just key, _) -> go (MatchingKey key afile) _ -> d where go mi = checkMatcher' matcher mi notpresent checkMatcher' :: FileMatcher Annex -> MatchInfo -> AssumeNotPresent -> Annex Bool checkMatcher' matcher mi notpresent = matchMrun matcher $ \a -> a notpresent mi fileMatchInfo :: FilePath -> Annex MatchInfo fileMatchInfo file = do matchfile <- getTopFilePath <$> inRepo (toTopFilePath file) return $ MatchingFile FileInfo { matchFile = matchfile , currFile = file } matchAll :: FileMatcher Annex matchAll = generate [] parsedToMatcher :: [ParseResult (MatchFiles Annex)] -> Either String (FileMatcher Annex) parsedToMatcher parsed = case partitionEithers parsed of ([], vs) -> Right $ generate vs (es, _) -> Left $ unwords $ map ("Parse failure: " ++) es data ParseToken t = SimpleToken String (ParseResult t) | ValueToken String (String -> ParseResult t) type ParseResult t = Either String (Token t) parseToken :: [ParseToken t] -> String -> ParseResult t parseToken l t = case syntaxToken t of Right st -> Right st Left _ -> go l where go [] = Left $ "near " ++ show t go (SimpleToken s r : _) | s == t = r go (ValueToken s mkr : _) | s == k = mkr v go (_ : ps) = go ps (k, v) = separate (== '=') t {- This is really dumb tokenization; there's no support for quoted values. - Open and close parens are always treated as standalone tokens; - otherwise tokens must be separated by whitespace. -} tokenizeMatcher :: String -> [String] tokenizeMatcher = filter (not . null) . concatMap splitparens . words where splitparens = segmentDelim (`elem` "()") commonKeylessTokens :: [ParseToken (MatchFiles Annex)] commonKeylessTokens = [ SimpleToken "anything" (simply limitAnything) , SimpleToken "nothing" (simply limitNothing) , ValueToken "include" (usev limitInclude) , ValueToken "exclude" (usev limitExclude) , ValueToken "largerthan" (usev $ limitSize (>)) , ValueToken "smallerthan" (usev $ limitSize (<)) ] commonKeyedTokens :: [ParseToken (MatchFiles Annex)] commonKeyedTokens = [ SimpleToken "unused" (simply limitUnused) ] data PreferredContentData = PCD { matchStandard :: Either String (FileMatcher Annex) , matchGroupWanted :: Either String (FileMatcher Annex) , getGroupMap :: Annex GroupMap , configMap :: M.Map UUID RemoteConfig , repoUUID :: Maybe UUID } -- Tokens of preferred content expressions that do not need a Key to be -- known. -- -- When importing from a special remote, this is used to match -- some preferred content expressions before the content is downloaded, -- so the Key is not known. preferredContentKeylessTokens :: PreferredContentData -> [ParseToken (MatchFiles Annex)] preferredContentKeylessTokens pcd = [ SimpleToken "standard" (call $ matchStandard pcd) , SimpleToken "groupwanted" (call $ matchGroupWanted pcd) , SimpleToken "inpreferreddir" (simply $ limitInDir preferreddir) ] ++ commonKeylessTokens where preferreddir = fromMaybe "public" $ M.lookup "preferreddir" =<< (`M.lookup` configMap pcd) =<< repoUUID pcd preferredContentKeyedTokens :: PreferredContentData -> [ParseToken (MatchFiles Annex)] preferredContentKeyedTokens pcd = [ SimpleToken "present" (simply $ limitPresent $ repoUUID pcd) , SimpleToken "securehash" (simply limitSecureHash) , ValueToken "copies" (usev limitCopies) , ValueToken "lackingcopies" (usev $ limitLackingCopies False) , ValueToken "approxlackingcopies" (usev $ limitLackingCopies True) , ValueToken "inbacked" (usev limitInBackend) , ValueToken "metadata" (usev limitMetaData) , ValueToken "inallgroup" (usev $ limitInAllGroup $ getGroupMap pcd) ] ++ commonKeyedTokens preferredContentTokens :: PreferredContentData -> [ParseToken (MatchFiles Annex)] preferredContentTokens pcd = concat [ preferredContentKeylessTokens pcd , preferredContentKeyedTokens pcd ] preferredContentParser :: [ParseToken (MatchFiles Annex)] -> String -> [ParseResult (MatchFiles Annex)] preferredContentParser tokens = map (parseToken tokens) . tokenizeMatcher mkLargeFilesParser :: Annex (String -> [ParseResult (MatchFiles Annex)]) mkLargeFilesParser = do magicmime <- liftIO initMagicMime #ifdef WITH_MAGICMIME let mimer n f = ValueToken n (usev $ f magicmime) #else let mimer n = ValueToken n $ const $ Left $ "\""++n++"\" not supported; not built with MagicMime support" #endif let parse = parseToken $ commonKeyedTokens ++ commonKeylessTokens ++ #ifdef WITH_MAGICMIME [ mimer "mimetype" $ matchMagic "mimetype" getMagicMimeType providedMimeType , mimer "mimeencoding" $ matchMagic "mimeencoding" getMagicMimeEncoding providedMimeEncoding ] #else [ mimer "mimetype" , mimer "mimeencoding" ] #endif return $ map parse . tokenizeMatcher where {- Generates a matcher for files large enough (or meeting other criteria) - to be added to the annex, rather than directly to git. -} largeFilesMatcher :: Annex GetFileMatcher largeFilesMatcher = go =<< annexLargeFiles <$> Annex.getGitConfig where go (Just expr) = do matcher <- mkmatcher expr return $ const $ return matcher go Nothing = return $ \file -> do expr <- checkAttr "annex.largefiles" file if null expr || expr == unspecifiedAttr then return matchAll else mkmatcher expr mkmatcher expr = do parser <- mkLargeFilesParser either badexpr return $ parsedToMatcher $ parser expr badexpr e = giveup $ "bad annex.largefiles configuration: " ++ e simply :: MatchFiles Annex -> ParseResult (MatchFiles Annex) simply = Right . Operation usev :: MkLimit Annex -> String -> ParseResult (MatchFiles Annex) usev a v = Operation <$> a v call :: Either String (FileMatcher Annex) -> ParseResult (MatchFiles Annex) call (Right sub) = Right $ Operation $ \notpresent mi -> matchMrun sub $ \a -> a notpresent mi call (Left err) = Left err