-- Copyright (C) 2007 Eric Kow -- Copyright (C) 2010 Petr Rockai -- -- Permission is hereby granted, free of charge, to any person -- obtaining a copy of this software and associated documentation -- files (the "Software"), to deal in the Software without -- restriction, including without limitation the rights to use, copy, -- modify, merge, publish, distribute, sublicense, and/or sell copies -- of the Software, and to permit persons to whom the Software is -- furnished to do so, subject to the following conditions: -- -- The above copyright notice and this permission notice shall be -- included in all copies or substantial portions of the Software. -- -- THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, -- EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF -- MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND -- NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS -- BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN -- ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN -- CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -- SOFTWARE. {-# LANGUAGE CPP #-} module Darcs.Util.Path ( encodeWhite , decodeWhite , encodeWhiteName , decodeWhiteName -- * AbsolutePath , AbsolutePath , makeAbsolute , ioAbsolute -- * AbsolutePathOrStd , AbsolutePathOrStd , makeAbsoluteOrStd , ioAbsoluteOrStd , useAbsoluteOrStd , stdOut -- * AbsoluteOrRemotePath , AbsoluteOrRemotePath , ioAbsoluteOrRemote , isRemote -- * SubPath , SubPath , makeSubPathOf , simpleSubPath , floatSubPath -- * Miscellaneous , FilePathOrURL(..) , FilePathLike(toFilePath) , getCurrentDirectory , setCurrentDirectory , getUniquePathName , doesPathExist -- * Check for malicious paths , isMaliciousSubPath -- * Tree filtering. , filterPaths -- * AnchoredPaths: relative paths within a Tree. All paths are -- anchored at a certain root (this is usually the Tree root). They are -- represented by a list of Names (these are just strict bytestrings). , Name , name2fp , makeName , rawMakeName , eqAnycase , AnchoredPath(..) , anchoredRoot , appendPath , anchorPath , isPrefix , breakOnDir , movedirfilename , parent , parents , replaceParent , catPaths , flatten , inDarcsdir , displayPath , realPath , isRoot , darcsdirName -- * Unsafe AnchoredPath functions. , floatPath ) where import Darcs.Prelude import Data.List ( isPrefixOf , isSuffixOf , stripPrefix , intersect , inits ) import Data.Char ( isSpace, chr, ord, toLower ) import Data.Typeable ( Typeable ) import Control.Exception ( tryJust, bracket_, throw, Exception ) import Control.Monad ( when ) import System.IO.Error ( isDoesNotExistError ) import qualified Darcs.Util.Workaround as Workaround ( getCurrentDirectory ) import qualified System.Directory ( setCurrentDirectory ) import System.Directory ( doesDirectoryExist, doesFileExist ) import qualified System.FilePath.Posix as FilePath ( (), normalise, isRelative ) import qualified System.FilePath as NativeFilePath ( takeFileName, takeDirectory ) import System.FilePath( splitDirectories, normalise, dropTrailingPathSeparator ) import System.Posix.Files ( isDirectory, getSymbolicLinkStatus ) import Darcs.Util.ByteString ( encodeLocale, decodeLocale ) import qualified Data.ByteString.Char8 as BC import qualified Data.ByteString as B import Data.Binary import Darcs.Util.Global ( darcsdir ) import Darcs.Util.URL ( isAbsolute, isRelative, isSshNopath ) -- Utilities for use by command implementations -- | For displaying paths to the user. It should never be used -- for on-disk patch storage. This adds the "./" for consistency -- with how repo paths are displayed by 'showPatch' and friends, -- except for the root path which is displayed as plain ".". displayPath :: AnchoredPath -> FilePath displayPath p | isRoot p = "." | otherwise = anchorPath "." p -- | Interpret an 'AnchoredPath' as relative the current working -- directory. Intended for IO operations in the file system. -- Use with care! realPath :: AnchoredPath -> FilePath realPath = anchorPath "" -- | 'encodeWhite' translates whitespace in filenames to a darcs-specific -- format (numerical representation according to 'ord' surrounded by -- backslashes). Note that backslashes are also escaped since they are used -- in the encoding. -- -- > encodeWhite "hello there" == "hello\32\there" -- > encodeWhite "hello\there" == "hello\92\there" encodeWhite :: FilePath -> String encodeWhite (c:cs) | isSpace c || c == '\\' = '\\' : show (ord c) ++ "\\" ++ encodeWhite cs encodeWhite (c:cs) = c : encodeWhite cs encodeWhite [] = [] -- | 'decodeWhite' interprets the Darcs-specific \"encoded\" filenames -- produced by 'encodeWhite' -- -- > decodeWhite "hello\32\there" == "hello there" -- > decodeWhite "hello\92\there" == "hello\there" -- > decodeWhite "hello\there" == error "malformed filename" decodeWhite :: String -> FilePath decodeWhite cs_ = go cs_ [] False where go "" acc True = reverse acc -- if there was a replace, use new string go "" _ False = cs_ -- if not, use input string go ('\\':cs) acc _ = case break (=='\\') cs of (theord, '\\':rest) -> go rest (chr (read theord) :acc) True _ -> error "malformed filename" go (c:cs) acc modified = go cs (c:acc) modified class FilePathOrURL a where toPath :: a -> String class FilePathOrURL a => FilePathLike a where toFilePath :: a -> FilePath -- | Paths which are relative to the local darcs repository and normalized. -- Note: These are understood not to have the dot in front. newtype SubPath = SubPath FilePath deriving (Eq, Ord) newtype AbsolutePath = AbsolutePath FilePath deriving (Eq, Ord) -- | This is for situations where a string (e.g. a command line argument) -- may take the value \"-\" to mean stdin or stdout (which one depends on -- context) instead of a normal file path. data AbsolutePathOrStd = AP AbsolutePath | APStd deriving (Eq, Ord) data AbsoluteOrRemotePath = AbsP AbsolutePath | RmtP String deriving (Eq, Ord) instance FilePathOrURL AbsolutePath where toPath (AbsolutePath x) = x instance FilePathOrURL SubPath where toPath (SubPath x) = x instance CharLike c => FilePathOrURL [c] where toPath = toFilePath instance FilePathOrURL AbsoluteOrRemotePath where toPath (AbsP a) = toPath a toPath (RmtP r) = r instance FilePathLike AbsolutePath where toFilePath (AbsolutePath x) = x instance FilePathLike SubPath where toFilePath (SubPath x) = x class CharLike c where toChar :: c -> Char instance CharLike Char where toChar = id instance CharLike c => FilePathLike [c] where toFilePath = map toChar -- | Make the second path relative to the first, if possible makeSubPathOf :: AbsolutePath -> AbsolutePath -> Maybe SubPath makeSubPathOf (AbsolutePath p1) (AbsolutePath p2) = -- The slash prevents "foobar" from being treated as relative to "foo" if p1 == p2 || (p1 ++ "/") `isPrefixOf` p2 then Just $ SubPath $ drop (length p1 + 1) p2 else Nothing simpleSubPath :: FilePath -> Maybe SubPath simpleSubPath x | null x = error "simpleSubPath called with empty path" | isRelative x = Just $ SubPath $ FilePath.normalise $ pathToPosix x | otherwise = Nothing -- | Ensure directory exists and is not a symbolic link. doesDirectoryReallyExist :: FilePath -> IO Bool doesDirectoryReallyExist f = do x <- tryJust (\x -> if isDoesNotExistError x then Just () else Nothing) $ isDirectory <$> getSymbolicLinkStatus f return $ case x of Left () -> False Right y -> y doesPathExist :: FilePath -> IO Bool doesPathExist p = do dir_exists <- doesDirectoryExist p file_exists <- doesFileExist p return $ dir_exists || file_exists -- | Interpret a possibly relative path wrt the current working directory. ioAbsolute :: FilePath -> IO AbsolutePath ioAbsolute dir = do isdir <- doesDirectoryReallyExist dir here <- getCurrentDirectory if isdir then bracket_ (setCurrentDirectory dir) (setCurrentDirectory $ toFilePath here) getCurrentDirectory else let super_dir = case NativeFilePath.takeDirectory dir of "" -> "." d -> d file = NativeFilePath.takeFileName dir in do abs_dir <- if dir == super_dir then return $ AbsolutePath dir else ioAbsolute super_dir return $ makeAbsolute abs_dir file -- | Take an absolute path and a string representing a (possibly relative) -- path and combine them into an absolute path. If the second argument is -- already absolute, then the first argument gets ignored. This function also -- takes care that the result is converted to Posix convention and -- normalized. Also, parent directories (\"..\") at the front of the string -- argument get canceled out against trailing directory parts of the -- absolute path argument. -- -- Regarding the last point, someone more familiar with how these functions -- are used should verify that this is indeed necessary or at least useful. makeAbsolute :: AbsolutePath -> FilePath -> AbsolutePath makeAbsolute a dir = if not (null dir) && isAbsolute dir then AbsolutePath (normSlashes dir') else ma a dir' where dir' = FilePath.normalise $ pathToPosix dir -- Why do we care to reduce ".." here? -- Why not do this throughout the whole path, i.e. "x/y/../z" -> "x/z" ? ma here ('.':'.':'/':r) = ma (takeDirectory here) r ma here ".." = takeDirectory here ma here "." = here ma here "" = here ma here r = here /- ('/':r) (/-) :: AbsolutePath -> String -> AbsolutePath x /- ('/':r) = x /- r (AbsolutePath "/") /- r = AbsolutePath ('/':simpleClean r) (AbsolutePath x) /- r = AbsolutePath (x++'/':simpleClean r) -- | Convert to posix, remove trailing slashes, and (under Posix) -- reduce multiple leading slashes to one. simpleClean :: String -> String simpleClean = normSlashes . reverse . dropWhile (=='/') . reverse . pathToPosix makeAbsoluteOrStd :: AbsolutePath -> String -> AbsolutePathOrStd makeAbsoluteOrStd _ "-" = APStd makeAbsoluteOrStd a p = AP $ makeAbsolute a p stdOut :: AbsolutePathOrStd stdOut = APStd ioAbsoluteOrStd :: String -> IO AbsolutePathOrStd ioAbsoluteOrStd "-" = return APStd ioAbsoluteOrStd p = AP `fmap` ioAbsolute p -- | Execute either the first or the second argument action, depending on -- whether the given path is an 'AbsolutePath' or stdin/stdout. useAbsoluteOrStd :: (AbsolutePath -> a) -> a -> AbsolutePathOrStd -> a useAbsoluteOrStd _ f APStd = f useAbsoluteOrStd f _ (AP x) = f x ioAbsoluteOrRemote :: String -> IO AbsoluteOrRemotePath ioAbsoluteOrRemote p = do isdir <- doesDirectoryExist p if not isdir then return $ RmtP $ case () of _ | isSshNopath p -> p++"." | "/" `isSuffixOf` p -> init p | otherwise -> p else AbsP `fmap` ioAbsolute p isRemote :: AbsoluteOrRemotePath -> Bool isRemote (RmtP _) = True isRemote _ = False takeDirectory :: AbsolutePath -> AbsolutePath takeDirectory (AbsolutePath x) = case reverse $ drop 1 $ dropWhile (/='/') $ reverse x of "" -> AbsolutePath "/" x' -> AbsolutePath x' instance Show AbsolutePath where show = show . toFilePath instance Show SubPath where show = show . toFilePath instance Show AbsolutePathOrStd where show (AP a) = show a show APStd = "standard input/output" instance Show AbsoluteOrRemotePath where show (AbsP a) = show a show (RmtP r) = show r -- | Normalize the path separator to Posix style (slash, not backslash). -- This only affects Windows systems. pathToPosix :: FilePath -> FilePath pathToPosix = map convert where #ifdef WIN32 convert '\\' = '/' #endif convert c = c -- | Reduce multiple leading slashes to one. This only affects Posix systems. normSlashes :: FilePath -> FilePath #ifndef WIN32 -- multiple slashes in front are ignored under Posix normSlashes ('/':p) = '/' : dropWhile (== '/') p #endif normSlashes p = p getCurrentDirectory :: IO AbsolutePath getCurrentDirectory = AbsolutePath `fmap` Workaround.getCurrentDirectory setCurrentDirectory :: FilePathLike p => p -> IO () setCurrentDirectory = System.Directory.setCurrentDirectory . toFilePath {-| What is a malicious path? A spoofed path is a malicious path. 1. Darcs only creates explicitly relative paths (beginning with @\".\/\"@), so any not explicitly relative path is surely spoofed. 2. Darcs normalizes paths so they never contain @\"\/..\/\"@, so paths with @\"\/..\/\"@ are surely spoofed. A path to a darcs repository's meta data can modify \"trusted\" patches or change safety defaults in that repository, so we check for paths containing @\"\/_darcs\/\"@ which is the entry to darcs meta data. To do? * How about get repositories? * Would it be worth adding a --semi-safe-paths option for allowing changes to certain preference files (_darcs\/prefs\/) in sub repositories'? TODO: Properly review the way we handle paths on Windows - it's not enough to just use the OS native concept of path separator. Windows often accepts both path separators, and repositories always use the UNIX separator anyway. -} isMaliciousSubPath :: String -> Bool isMaliciousSubPath fp = not (FilePath.isRelative fp) || isGenerallyMalicious fp isGenerallyMalicious :: String -> Bool isGenerallyMalicious fp = splitDirectories fp `contains_any` [ "..", darcsdir ] where contains_any a b = not . null $ intersect a b -- | Iteratively tries find first non-existing path generated by -- buildName, it feeds to buildName the number starting with -1. When -- it generates non-existing path and it isn't first, it displays the -- message created with buildMsg. Usually used for generation of the -- name like _ when already exist -- (e.g. darcs.net_0). getUniquePathName :: Bool -> (FilePath -> String) -> (Int -> FilePath) -> IO FilePath getUniquePathName talkative buildMsg buildName = go (-1) where go :: Int -> IO FilePath go i = do exists <- doesPathExist thename if not exists then do when (i /= -1 && talkative) $ putStrLn $ buildMsg thename return thename else go $ i+1 where thename = buildName i ------------------------------- -- AnchoredPath utilities -- newtype Name = Name { unName :: B.ByteString } deriving (Binary, Eq, Show, Ord) -- | This is a type of "sane" file paths. These are always canonic in the sense -- that there are no stray slashes, no ".." components and similar. They are -- usually used to refer to a location within a Tree, but a relative filesystem -- path works just as well. These are either constructed from individual name -- components (using "appendPath", "catPaths" and "makeName"), or converted -- from a FilePath ("floatPath" -- but take care when doing that). newtype AnchoredPath = AnchoredPath [Name] deriving (Binary, Eq, Show, Ord) -- | Check whether a path is a prefix of another path. isPrefix :: AnchoredPath -> AnchoredPath -> Bool (AnchoredPath a) `isPrefix` (AnchoredPath b) = a `isPrefixOf` b -- | Append an element to the end of a path. appendPath :: AnchoredPath -> Name -> AnchoredPath appendPath (AnchoredPath p) n = AnchoredPath $ p ++ [n] -- | Catenate two paths together. Not very safe, but sometimes useful -- (e.g. when you are representing paths relative to a different point than a -- Tree root). catPaths :: AnchoredPath -> AnchoredPath -> AnchoredPath catPaths (AnchoredPath p) (AnchoredPath n) = AnchoredPath (p ++ n) -- | Get parent (path) of a given path. foo/bar/baz -> foo/bar parent :: AnchoredPath -> Maybe AnchoredPath parent (AnchoredPath []) = Nothing parent (AnchoredPath x) = Just (AnchoredPath (init x)) -- | List all parents of a given path. foo/bar/baz -> [.,foo, foo/bar] parents :: AnchoredPath -> [AnchoredPath] parents (AnchoredPath []) = [] -- root has no parents parents (AnchoredPath xs) = map AnchoredPath $ inits $ init xs -- | If the patch is under a directory, split into Right of the first component -- (which must be a directory name) and the rest of teh path. Otherwise -- return Left of the single component. -- This function is *undefined* on the root path (which has no components). breakOnDir :: AnchoredPath -> Either Name (Name, AnchoredPath) breakOnDir (AnchoredPath []) = error "breakOnDir called on root" breakOnDir (AnchoredPath (n:[])) = Left n breakOnDir (AnchoredPath (n:ns)) = Right (n, AnchoredPath ns) -- | Take a "root" directory and an anchored path and produce a full -- 'FilePath'. Moreover, you can use @anchorPath \"\"@ to get a relative -- 'FilePath'. anchorPath :: FilePath -> AnchoredPath -> FilePath anchorPath dir p = dir FilePath. decodeLocale (flatten p) {-# INLINE anchorPath #-} name2fp :: Name -> FilePath name2fp (Name ps) = decodeLocale ps -- FIXME returning "." for the root is wrong flatten :: AnchoredPath -> BC.ByteString flatten (AnchoredPath []) = BC.singleton '.' flatten (AnchoredPath p) = BC.intercalate (BC.singleton '/') [n | (Name n) <- p] -- | Make a 'Name' from a 'String'. If the input 'String' -- is invalid, that is, "", ".", "..", or contains a '/', return 'Left' -- with an error message. makeName :: String -> Either String Name makeName = rawMakeName . encodeLocale -- | Make a 'Name' from a 'String'. If the input 'String' -- is invalid, that is, "", ".", "..", or contains a '/', call error. internalMakeName :: String -> Name internalMakeName = either error id . rawMakeName . encodeLocale -- | Take a relative FilePath and turn it into an AnchoredPath. This is a -- partial function. Basically, by using floatPath, you are testifying that the -- argument is a path relative to some common root -- i.e. the root of the -- associated "Tree" object. In particular, the input path may not contain any -- ocurrences of "." or ".." after normalising. You should sanitize any -- FilePaths before you declare them "good" by converting into AnchoredPath -- (using this function), especially if the FilePath come from any external -- source (command line, file, environment, network, etc) floatPath :: FilePath -> AnchoredPath floatPath = AnchoredPath . map internalMakeName . filter sensible . splitDirectories . normalise . dropTrailingPathSeparator where sensible s = s `notElem` ["", "."] anchoredRoot :: AnchoredPath anchoredRoot = AnchoredPath [] -- | A view on 'AnchoredPath's. parentChild :: AnchoredPath -> Maybe (AnchoredPath, Name) parentChild (AnchoredPath []) = Nothing parentChild (AnchoredPath xs) = Just (AnchoredPath (init xs), last xs) -- | Replace the second arg's parent with the first arg. replaceParent :: AnchoredPath -> AnchoredPath -> Maybe AnchoredPath replaceParent (AnchoredPath xs) p = case parentChild p of Nothing -> Nothing Just (_,x) -> Just (AnchoredPath (xs ++ [x])) -- | Make a 'Name' from a 'B.ByteString'. rawMakeName :: B.ByteString -> Either String Name rawMakeName s | isBadName s = Left $ "'"++decodeLocale s++"' is not a valid AnchoredPath component name" | otherwise = Right (Name s) isBadName :: B.ByteString -> Bool isBadName n = hasPathSeparator n || n `elem` forbiddenNames -- It would be nice if we could add BC.pack "_darcs" to the list, however -- "_darcs" could be a valid file or dir name if not inside the top level -- directory. forbiddenNames :: [B.ByteString] forbiddenNames = [BC.empty, BC.pack ".", BC.pack ".."] hasPathSeparator :: B.ByteString -> Bool hasPathSeparator = BC.elem '/' eqAnycase :: Name -> Name -> Bool eqAnycase (Name a) (Name b) = BC.map toLower a == BC.map toLower b encodeWhiteName :: Name -> B.ByteString encodeWhiteName = encodeLocale . encodeWhite . decodeLocale . unName data CorruptPatch = CorruptPatch String deriving (Eq, Typeable) instance Exception CorruptPatch instance Show CorruptPatch where show (CorruptPatch s) = s decodeWhiteName :: B.ByteString -> Name decodeWhiteName = either (throw . CorruptPatch) id . rawMakeName . encodeLocale . decodeWhite . decodeLocale -- | The effect of renaming on paths. -- The first argument is the old path, the second is the new path, -- and the third is the possibly affected path we are interested in. movedirfilename :: AnchoredPath -> AnchoredPath -> AnchoredPath -> AnchoredPath movedirfilename (AnchoredPath old) newp@(AnchoredPath new) orig@(AnchoredPath path) = case stripPrefix old path of Just [] -> newp -- optimization to avoid allocation in this case Just rest -> AnchoredPath (new ++ rest) Nothing -> orig -- old is not a prefix => no change -- | Construct a filter from a list of AnchoredPaths, that will accept any path -- that is either a parent or a child of any of the listed paths, and discard -- everything else. filterPaths :: [AnchoredPath] -> AnchoredPath -> t -> Bool filterPaths files p _ = any (\x -> x `isPrefix` p || p `isPrefix` x) files -- | Transform a SubPath into an AnchoredPath. floatSubPath :: SubPath -> AnchoredPath floatSubPath = floatPath . toFilePath -- | Is the given path in (or equal to) the _darcs metadata directory? inDarcsdir :: AnchoredPath -> Bool inDarcsdir (AnchoredPath (x:_)) | x == darcsdirName = True inDarcsdir _ = False darcsdirName :: Name darcsdirName = internalMakeName darcsdir isRoot :: AnchoredPath -> Bool isRoot (AnchoredPath xs) = null xs