{-# LANGUAGE DeriveDataTypeable #-} {- |This module manages a file store in which a label is associated with every file and directory. The file store is grouped into directories by label. Files are stored under names like: > LabelHash/OpaqueName where LabelHash is a SHA-224 hash of the label, and OpaqueName is either a regular file (containing contents) or a directory populated exclusively by symbolic links pointing back into LabelHash directories. Each LabelHash directory also has a file called > LabelHash/LABEL which actually contains the label of all the files in that directory. There is also a symbolic link @root@, pointing to the root directory. For efficiency, @LabelHash@ actually consists of multiple directories. There are two externally-visible abstractions. The first is 'Name', which refers to a file name in a user directory, of the form: > LabelHash/OpaqueName/UserName There is also a special 'Name', 'rootDir', which refers to the root directory. Untrusted user code has access to the 'rootDir' 'Name', and can walk the tree from there using the 'lookupName' function. The "LIO.Handle" module contains functions 'mkDir' and 'mkLHandle' which permit untrusted code to make new 'Name's as well as to do handle-based IO on protected files. The second is 'Node', which refers to one of the @OpaqueName@s that 'Name's point to. Currently, any functions that operate on 'Node's are in the IO Monad so as not to be executable by untrusted code. This is important because in order to use a file, someone must have the right to know know that the file exists, and this requires read permission on the file's 'Name'. It would be insecure if untrusted code could execute openNode in the LIO Monad. Note that if a machine crashes, the code in this module could leave the filesystem in an inconsistent state. However, the code tries to maitain the invariant that any inconsistencies will either be: 1. temporary files or directories whose names end with the \"@~@\" character, or 2. dangling symbolic links. Both of these inconsistencies can be checked and cleaned up locally without examining the whole file system. The code tries to fix up these inconsistencies on-the-fly as it encounters them. However, it could possibly lieave some stranded temporary @LABEL...~@ files. You could also end up with some weirdness like a file that shows up in getDirectoryContents, but that you can't open for reading. To keep from having to examine the whole file system to fix errors, the code tries to maintain the invariant that if a 'Node'\'s file name doesn't end with @~@, then there must be a link pointing to it somewhere. This is why the code uses a separate 'NewNode' type to represent a 'Node' whose name ends @~@. The function 'linkNode' renames the 'NewNode' to a name without a trailing @~@ only after creating a 'Name' that points to the permenent 'Node' path. Assuming a file system that preserves the order of metadata operations, the code should mostly be okay to recover from any crashes. If using soft updates, which can re-order metadata operations, you could end up with symbolic links that point nowhere. In the worst case scenario if inconsistencies develop, you can manually fix up the file system by deleting all danglinng symbolic links and all files and directories ending @~@. Make sure no application is concurrently accessing the file system, however. -} module LIO.FS ( -- * The opaque name object Name -- Do not Export constructor! Names are TRUSTED , rootDir , getRootDir, mkRootDir , lookupName, mkTmpDirL -- * Initializing the file system , initFS -- * Internal data structures , Node -- * Helper functions in the IO Monad , labelOfName, labelOfNode, nodeOfName , mkNode, mkNodeDir, mkNodeReg, linkNode , lookupNode, openNode, getDirectoryContentsNode -- * Misc. utility functions , tryPred ) where import LIO.Armor import LIO.TCB import LIO.TmpFile import Prelude hiding (catch) import Control.Exception hiding (throwIO, catch, onException) import Control.Monad import qualified Data.ByteString.Lazy.Char8 as LC import Data.Typeable -- import qualified GHC import qualified GHC.IO.Exception as GHC (IOErrorType(..)) import System.Directory import System.FilePath import System.IO import System.IO.Error hiding (catch, try) -- import System.FilePath import System.Posix.Files import System.Posix.Process import Data.Digest.Pure.SHA -- -- Utility functions -- strictReadFile :: FilePath -> IO LC.ByteString strictReadFile f = withFile f ReadMode readit where readit h = do size <- hFileSize h LC.hGet h $ fromInteger size catchIO :: IO a -> IO a -> IO a catchIO a h = catch a ((const :: a -> IOException -> a) h) catchPred :: Exception e => (e -> Bool) -> IO a -> IO a -> IO a catchPred predicate a h = catchJust test a runh where test e = if predicate e then Just () else Nothing runh () = h tryPred :: Exception e => (e -> Bool) -> IO a -> IO (Either e a) tryPred predicate a = tryJust test a where test e = if predicate e then Just e else Nothing ignoreErr :: IO () -> IO () ignoreErr m = catch m ((\_ -> return ()) :: IOException -> IO ()) -- |Delete a name whether it's a file or directory, by trying both. -- This is slow, but only used for error conditions when performance -- shouldn't matter. clean :: FilePath -> IO () clean path = removeFile path `catchIO` (removeDirectory path `catchIO` return ()) -- -- Exceptions thrown by this module -- data FSErr = FSCorruptLabel FilePath -- ^ File Containing Label is Corrupt deriving (Show, Typeable) instance Exception FSErr -- -- LDir functions -- prefix :: FilePath prefix = "ls" -- | File name in which labels are stored in 'LDir's. labelFile :: FilePath labelFile = "LABEL" -- | File name of root directory for each label rootFile :: FilePath rootFile = "ROOT" -- | Type containing the pathname of a @LabelHash@ directory (which -- must contain a file named 'labelFile'). newtype LDir = LDir FilePath deriving (Show) -- | The subdirectory depth of 'LDir's. Because many file systems -- have linear lookup time in large directories, it is better to use -- the first few characters of the hash of a label as subdirectories. -- Putting all hash values into one huge directory would get slow. lDirNdirs :: Int lDirNdirs = 3 -- | Hash a label down to the directory storing all 'Node's with that -- label. lDirOfLabel :: (Label l) => l -> LDir lDirOfLabel l = LDir $ doit lDirNdirs prefix hash where hash = armor32 $ bytestringDigest $ sha224 $ LC.pack $ show l doit 0 out h = out h doit n out (c:h) = doit (n - 1) (out [c]) h doit _ _ _ = error "lDirOfLabel bad sha" {- -- | Minimally validate that an LDir is in the right part of the file -- system, or throw 'FSIllegalPath'. checkLDir :: LDir -> IO () checkLDir (LDir path) = do dirlist <- liftM splitDirectories $ checkpref path unless (length dirlist == 1 + lDirNdirs && all (all a32Valid) dirlist) bad where bad = throwIO $ FSIllegalPath path checkpref p = case stripPrefix prefix p of Just (c:r) | isPathSeparator c -> return r _ -> bad -} -- | 'LDir' that contains a 'Node' lDirOfNode :: Node -> LDir lDirOfNode (NodeTCB n) = LDir $ takeDirectory n -- | 'LDir' that contains the directory that contains a file name. lDirOfName :: (Label l) => Name l -> LDir lDirOfName (NameTCB n) = LDir $ takeDirectory $ takeDirectory n lDirOfName (RootDir l) = lDirOfLabel l -- |Takes an LDir and returns the label stored in 'labelFile' in that -- directory. May throw 'FSCorruptLabel'. labelOfLDir :: (Label l) => LDir -> IO l labelOfLDir (LDir p) = do s <- strictReadFile target `catch` diagnose parseit s where target = (p labelFile) parseit s = case reads $ LC.unpack s of (l, "\n"):_ -> return l _ -> throwIO $ FSCorruptLabel target diagnose e | isDoesNotExistError e = do exists <- doesDirectoryExist p if exists then throwIO $ FSCorruptLabel target else throwIO e | otherwise = throwIO e -- |Gets the LDir for a particular label. Creates it if it does not -- exist. May throw 'FSCorruptLabel'. getLDir :: Label l => l -> IO LDir getLDir l = try (labelOfLDir ldir) >>= handler where ldir@(LDir dir) = lDirOfLabel l handler (Right l') | l' == l = return ldir | otherwise = dumplabel >> throwIO (FSCorruptLabel dir) handler (Left e) = case fromException e of Just e' | isDoesNotExistError e' -> makedir _ -> dumplabel >> throwIO e makelabel path = withFile path WriteMode $ \h -> do hPutStr h $ shows l "\n" hSync h -- Mostly unnecessary paranoia here, but one thread could create -- the label file, then another thread could overwrite it as the -- first thread is renaming the LDir~ -> LDir. If after that -- there's a power failure, then the label file could be -- corrupted. This way we ensure that once the LABEL file is in -- place, it never gets overwritten. makesafelabel path = do pid <- getProcessID tmp <- tmpName let tpath = path ++ "." ++ show pid ++ "." ++ tmp ++ newNodeExt makelabel tpath flip finally (ignoreErr $ removeLink tpath) $ catch (createLink tpath path) $ \e -> unless (isAlreadyExistsError e) (throwIO e) makedir = do let tdir = dir ++ newNodeExt createDirectoryIfMissing True tdir makesafelabel $ tdir labelFile rename tdir dir return ldir dumplabel = ignoreErr $ makelabel $ dir (labelFile ++ ".correct") -- -- Node functions -- -- |The @Node@ type represents filenames of the form -- @LabelHash\/OpaqueName@. These names must always point to regular -- files or directories (not symbolic links). There must always exist -- a file @LabalHash\/LABEL@ specifying the label of a @Node@. newtype Node = NodeTCB FilePath deriving (Show) -- |When a @Node@ is first created, it has a file name with a \'~\' -- character at the end. This is so that in the case of a crash, a -- node that was not linked to can be easily recognized and deleted. -- The @NewNode@ type wrapper represents a node that is not yet linked -- to. newtype NewNode = NewNode Node deriving (Show) -- |String that gets appended to new file names. After a crash these -- may need to be garbage collected. newNodeExt :: String newNodeExt = "~" -- |Label protecting the contents of a node. labelOfNode :: (Label l) => Node -> IO l labelOfNode = labelOfLDir . lDirOfNode -- | Create new Node in the appropriate directory for a given label. -- The node gets created with an extra ~ appended, and wrapped in the -- type 'NewNode' to reflect this fact. mkNode :: (Label l) => l -- ^Label for the new node -> (FilePath -> String -> IO (a, FilePath)) -- ^Either 'mkTmpDir' or 'mkTmpFile' with curried 'IOMode' -> IO (a, NewNode) -- ^Returns file handle or () and destination path mkNode l f = do (LDir d) <- getLDir l (a, p) <- f d newNodeExt -- We are done, except if we got really unlucky someone else may -- have created a node with the same name at the same time. (The -- node creation is exclusive, but we append newNodeExt, so someone -- might have renamed it before we created the newNodeExt file -- exclusively.) We simply start over if someone claimed the name -- in the mean time. let p' = take (length p - length newNodeExt) p exists <- catchIO (getFileStatus p' >> return True) (return False) if not exists then return (a, NewNode $ NodeTCB p') else do hPutStrLn stderr $ "mkNode: file " ++ p' ++ " already exists." -- XXX clean p mkNode l f -- |Wrapper around mkNode to create a directory. mkNodeDir :: (Label l) => l -> IO NewNode mkNodeDir l = liftM snd (mkNode l mkTmpDir) -- |Wrapper around mkNode to create a regular file. mkNodeReg :: (Label l) => IOMode -> l -> IO (Handle, NewNode) mkNodeReg m l = mkNode l (mkTmpFile m) -- | Used when creating a symbolic link named @src@ that points to -- @dst@. If both @src@ and @dst@ are relative to the current working -- directory and in subdirectories, then the contents of the symbolic -- link cannot just be @dst@, instead it is @makeRelativeTo dst src@. makeRelativeTo :: FilePath -- ^ Destination of symbolic link -> FilePath -- ^ Name of symbolic link -> FilePath -- ^ Returns contents to put in symbolic link makeRelativeTo dest src = doit (splitDirectories dest) (init $ splitDirectories src) where doit [] [] = "." doit (d1:ds) (s1:ss) | d1 == s1 = doit ds ss doit d s = joinPath (replicate (length s) ('.':'.':pathSeparator:[]) ++ d) -- | Assign a 'Name' to a 'NewNode', turning it into a 'Node'. Note -- that unlike the Unix file system, only a single link may be created -- to each node. linkNode :: (Label l) => NewNode -> Name l -> IO Node linkNode (NewNode (NodeTCB path)) name' = do let name = pathOfName name' tpath = path ++ newNodeExt createSymbolicLink (path `makeRelativeTo` name) name `onException` clean tpath -- The next line really shouldn't fail except for some catastrophic -- IO error. See the comment in mkNode. rename tpath path `onException` removeFile name return $ NodeTCB path -- | It's possible that either a program crashed before renaming a -- 'NewNode' into a 'Node', or that another thread is calling -- 'linkNode' and for some reason is being slow betweeen the -- 'createSymbolicLink' and 'rename' calls. Either way it should be -- fine for us just to 'rename' the 'NewNode', because the 'Name' -- would not exist if the 'NewNode' were not ready to be renamed. fixNode :: Node -> (FilePath -> IO a) -> IO a fixNode (NodeTCB file) action = action file `catch` fixup where fixup e | isDoesNotExistError e = do ignoreErr $ rename (file ++ newNodeExt) file action file fixup e = throwIO e -- | Thie function just calls 'openFile' on the filename in a 'Node'. -- However, on the off chance that the file system is in an -- inconsistent state (e.g., because of a crash during a call to -- 'linkNode'), it tries to finish creating a partially created -- 'Node'. openNode :: Node -> IOMode -> IO Handle openNode node mode = fixNode node $ flip openFile mode -- | Thie function is a wrapper around 'getDirectoryContents' that -- tries to fixup errors analogously to 'openNode'. getDirectoryContentsNode :: Node -> IO [FilePath] getDirectoryContentsNode node = fixNode node getDirectoryContents -- -- Name functions -- -- |The @Name@ type represents user-chosen (non-opaque) filenames of -- symbolic links, either @\"root\"@ or pathnames of the form -- @LabelHash\/OpaqueName\/filename@. Intermediary components of the -- file name must not be symbolic links. data Name l = NameTCB FilePath | RootDir l deriving (Show) -- |Label protecting the name of a file. Note that this is the label -- of the directory containing the file name, not the label of the -- Node that the file name designates. labelOfName :: (Label l) => Name l -> IO l labelOfName (RootDir l) = return l labelOfName n = labelOfLDir $ lDirOfName n {- unlinkName :: (FilePath -> IO ()) -> Name -> IO () unlinkName f (NameTCB name) = do (Node node) <- nodeOfName (NameTCB name) f node removeFile name -- |Remove a directory by name. unlinkNameDir = unlinkName removeDirectory -- |Remove a regular file by name. unlinkNameReg = unlinkName removeFile -} -- | This function reads the contents of a symbolic link and returns -- the pathname of its destination, relative to the current working -- directory. It elides ".." components at the begining of the -- symbolic link contents, so that if the link @foo\/bar -> ..\/baz@ -- exists, @expandLink \"foo\/bar\"@ will return @\"foo\/baz\"@. -- -- /Warning:/ This function assumes no itermediary components of the -- path to the symbolic link are also symbolic links. expandLink :: FilePath -> IO FilePath expandLink path = do suffix <- catchPred (\e -> ioeGetErrorType e == GHC.InvalidArgument) (readSymbolicLink path) (return "") return $ if (isAbsolute suffix) then suffix else domerge (takeDirectory path) suffix where domerge [] suffix = suffix domerge p [] = p domerge p ('.':'.':ps:suffix) | ps == pathSeparator = domerge (takeDirectory p) suffix domerge p suffix = p suffix pathOfName :: (Label l) => Name l -> FilePath pathOfName (NameTCB n) = n pathOfName (RootDir l) = case lDirOfLabel l of (LDir ldir) -> ldir rootFile -- | 'Node' that a 'Name' is pointing to. nodeOfName :: (Label l) => Name l -> IO Node nodeOfName n = liftM NodeTCB $ expandLink $ pathOfName n -- | Gives the 'Name' of a directory entry in a directory 'Node'. nodeEntry :: (Label l) => Node -> FilePath -> Name l nodeEntry (NodeTCB node) name = NameTCB (node name) mkRootDirIO :: (Label l) => l -> IO (Name l) mkRootDirIO label = do let name = RootDir label exists <- doesDirectoryExist $ pathOfName name unless exists $ do new <- mkNodeDir label linkNode new name >> return () return name defRoot :: FilePath defRoot = prefix rootFile initFS :: (Label l) => l -> IO () initFS l = do name <- mkRootDirIO l (NodeTCB node) <- nodeOfName name let root = node `makeRelativeTo` defRoot root' <- catchIO (readSymbolicLink defRoot) $ createSymbolicLink root defRoot >> return root when (root' /= root) $ error "default root doesn't match requested label" -- -- LIO Monad function -- -- | Return the root directory for the default root label. (There is -- a root directory for each label, but only one label is the -- default.) rootDir :: (Label l) => LIO l s (Name l) rootDir = return $ NameTCB $ defRoot -- | Get the root directory for a particular label. getRootDir :: (Label l) => l -> Name l getRootDir l = RootDir l -- | Creates a root directory for a particular label. mkRootDir :: (Priv l p) => p -> l -> LIO l s (Name l) mkRootDir priv label = do wguardP priv label name <- rtioTCB $ mkRootDirIO label return name -- | Looks up a FilePath, turning it into a 'Name', and raising to -- current label to reflect all directories traversed. Note that this -- only looks up a 'Name'; it does not ensure the 'Name' actually -- exists. The intent is that you call @lookupName@ before creating -- or opening files. -- -- Note that this function will touch bad parts of the file system if -- it is supplied with a malicous 'Name'. Thus, it is important to -- keep the constructor of 'Name' private, so that the only way for -- user code to generate names is to start with 'rootDir' and call -- @lookupName@. lookupName :: (Priv l p) => p -- ^ Privileges to limit tainting -> Name l -- ^ Start point -> FilePath -- ^ Name to look up -> LIO l s (Name l) lookupName priv start path = dolookup start (stripslash $ splitDirectories path) where stripslash ((c:_):t) | c == pathSeparator = t stripslash t = t dolookup name [] = return name dolookup name (".":rest) = dolookup name rest dolookup _ ("..":_) = throwIO $ mkIOError doesNotExistErrorType "illegal filename" Nothing (Just ".." ) dolookup name@(RootDir label) (cn:rest) = do taintP priv label node <- rtioTCB $ nodeOfName name dolookup (nodeEntry node cn) rest dolookup name (cn:rest) = do -- XXX next thing should deal with partially created nodes node <- rtioTCB $ nodeOfName name -- Could fail if name deleted label <- ioTCB $ labelOfNode node -- Shouldn't fail taintP priv label dolookup (nodeEntry node cn) rest lookupNode :: (Priv l p) => p -- ^ Privileges to limit tainting -> Name l -- ^ Start point (e.g., 'rootDir') -> FilePath -- ^ Name to look up -> Bool -- ^ True if you want to write it -> LIO l s Node lookupNode priv start path write = do name <- lookupName priv start path node <- rtioTCB $ nodeOfName name label <- ioTCB $ labelOfNode node if write then wguardP priv label else taintP priv label return node -- | Creates a temporary directory in an existing directory (or -- label-specific root directory, if the 'Name' argument comes from -- 'getRootDir'). mkTmpDirL :: (Priv l p) => p -- ^ Privileges to minimize tainting -> l -- ^ Label for the new directory -> Name l -- ^ 'Name' of dir in which to create directory -> String -- ^ Suffix for name of directory -> LIO l s (FilePath, Name l) -- ^ Returns both name in directory and 'Name' of new directory mkTmpDirL priv label name suffix = do aguard label ensureRoot name (NodeTCB node) <- lookupNode priv name "" True aguard label (NewNode (NodeTCB new)) <- rtioTCB $ mkNodeDir label let tnew = new ++ newNodeExt target = new `makeRelativeTo` (node "x") (_, tname) <- rtioTCB $ mkTmp (createSymbolicLink target) node suffix `onExceptionTCB` clean tnew rtioTCB $ rename tnew new `onExceptionTCB` removeFile tname return $ (takeFileName tname, NameTCB tname) where ensureRoot (RootDir l) = mkRootDir priv l >> return () ensureRoot _ = return ()