From 6869e6023e21698038da7e4a858cbaf6f7b7bbed Mon Sep 17 00:00:00 2001 From: Joey Hess Date: Mon, 28 Nov 2011 15:26:27 -0400 Subject: [PATCH] support .git/annex on a different disk than the rest of the repo The only fully supported thing is to have the main repository on one disk, and .git/annex on another. Only commands that move data in/out of the annex will need to copy it across devices. There is only partial support for putting arbitrary subdirectories of .git/annex on different devices. For one thing, but this can require more copies to be done. For example, when .git/annex/tmp is on one device, and .git/annex/journal on another, every journal write involves a call to mv(1). Also, there are a few places that make hard links between various subdirectories of .git/annex with createLink, that are not handled. In the common case without cross-device, the new moveFile is actually faster than renameFile, avoiding an unncessary stat to check that a file (not a directory) is being moved. Of course if a cross-device move is needed, it is as slow as mv(1) of the data. --- Annex/Branch.hs | 2 +- Annex/Content.hs | 8 +-- Command/Add.hs | 2 +- Command/Unlock.hs | 2 +- Common.hs | 1 + Utility/Directory.hs | 51 +++++++++++++++++++ debian/changelog | 2 + ...o_have_annex_on_a_separate_filesystem.mdwn | 13 +++-- ...it_on_ssd__44___annex_on_spindle_disk.mdwn | 11 +++- 9 files changed, 81 insertions(+), 11 deletions(-) create mode 100644 Utility/Directory.hs diff --git a/Annex/Branch.hs b/Annex/Branch.hs index ccc6145552..a92f05b2cc 100644 --- a/Annex/Branch.hs +++ b/Annex/Branch.hs @@ -312,7 +312,7 @@ setJournalFile file content = do let jfile = journalFile g file let tmpfile = gitAnnexTmpDir g takeFileName jfile writeBinaryFile tmpfile content - renameFile tmpfile jfile + moveFile tmpfile jfile {- Gets any journalled content for a file in the branch. -} getJournalFile :: FilePath -> Annex (Maybe String) diff --git a/Annex/Content.hs b/Annex/Content.hs index 83839ea135..f5571b54af 100644 --- a/Annex/Content.hs +++ b/Annex/Content.hs @@ -113,7 +113,7 @@ logStatus key status = do u <- getUUID logChange key u status -{- Runs an action, passing it a temporary filename to download, +{- Runs an action, passing it a temporary filename to get, - and if the action succeeds, moves the temp file into - the annex as a key's content. -} getViaTmp :: Key -> (FilePath -> Annex Bool) -> Annex Bool @@ -221,7 +221,7 @@ moveAnnex key src = do else liftIO $ do createDirectoryIfMissing True dir allowWrite dir -- in case the directory already exists - renameFile src dest + moveFile src dest preventWrite dest preventWrite dir @@ -243,7 +243,7 @@ fromAnnex :: Key -> FilePath -> Annex () fromAnnex key dest = withObjectLoc key $ \(dir, file) -> liftIO $ do allowWrite dir allowWrite file - renameFile file dest + moveFile file dest removeDirectory dir {- Moves a key out of .git/annex/objects/ into .git/annex/bad, and @@ -256,7 +256,7 @@ moveBad key = do liftIO $ do createDirectoryIfMissing True (parentDir dest) allowWrite (parentDir src) - renameFile src dest + moveFile src dest removeDirectory (parentDir src) logStatus key InfoMissing return dest diff --git a/Command/Add.hs b/Command/Add.hs index ab104b53cc..130f5e3110 100644 --- a/Command/Add.hs +++ b/Command/Add.hs @@ -61,7 +61,7 @@ undo file key e = do tryharder :: IOException -> Annex () tryharder _ = do src <- fromRepo $ gitAnnexLocation key - liftIO $ renameFile src file + liftIO $ moveFile src file cleanup :: FilePath -> Key -> Bool -> CommandCleanup cleanup file key hascontent = do diff --git a/Command/Unlock.hs b/Command/Unlock.hs index 22f9ce7108..b6f39488da 100644 --- a/Command/Unlock.hs +++ b/Command/Unlock.hs @@ -46,7 +46,7 @@ perform dest key = do then do liftIO $ do removeFile dest - renameFile tmpdest dest + moveFile tmpdest dest allowWrite dest next $ return True else error "copy failed!" diff --git a/Common.hs b/Common.hs index e0132d9e96..a3802da5f2 100644 --- a/Common.hs +++ b/Common.hs @@ -23,3 +23,4 @@ import Utility.Misc as X import Utility.Conditional as X import Utility.SafeCommand as X import Utility.Path as X +import Utility.Directory as X diff --git a/Utility/Directory.hs b/Utility/Directory.hs new file mode 100644 index 0000000000..7f8822fca5 --- /dev/null +++ b/Utility/Directory.hs @@ -0,0 +1,51 @@ +{- directory manipulation + - + - Copyright 2011 Joey Hess + - + - Licensed under the GNU GPL version 3 or higher. + -} + +module Utility.Directory where + +import System.IO.Error +import System.Posix.Files +import System.Directory +import Control.Exception (throw) + +import Utility.SafeCommand +import Utility.Conditional +import Utility.TempFile + +{- Moves one filename to another. + - First tries a rename, but falls back to moving across devices if needed. -} +moveFile :: FilePath -> FilePath -> IO () +moveFile src dest = try (rename src dest) >>= onrename + where + onrename (Right _) = return () + onrename (Left e) + | isPermissionError e = rethrow + | isDoesNotExistError e = rethrow + | otherwise = do + -- copyFile is likely not as optimised as + -- the mv command, so we'll use the latter. + -- But, mv will move into a directory if + -- dest is one, which is not desired. + whenM (isdir dest) rethrow + viaTmp mv dest undefined + where + rethrow = throw e + mv tmp _ = do + ok <- boolSystem "mv" [Param "-f", + Param src, Param tmp] + if ok + then return () + else do + -- delete any partial + _ <- try $ + removeFile tmp + rethrow + isdir f = do + r <- try (getFileStatus f) + case r of + (Left _) -> return False + (Right s) -> return $ isDirectory s diff --git a/debian/changelog b/debian/changelog index 943d1e01cd..265ba71846 100644 --- a/debian/changelog +++ b/debian/changelog @@ -6,6 +6,8 @@ git-annex (3.20111123) UNRELEASED; urgency=low doubled output. * Avoid needing haskell98 and other fixes for new ghc. Thanks, Mark Wright. * Bugfix: dropunused did not drop keys with two spaces in their name. + * Support for storing .git/annex on a different device than the rest of the + git repository. -- Joey Hess Tue, 22 Nov 2011 17:53:42 -0400 diff --git a/doc/bugs/not_possible_to_have_annex_on_a_separate_filesystem.mdwn b/doc/bugs/not_possible_to_have_annex_on_a_separate_filesystem.mdwn index e9a3ee95a4..7daf03284b 100644 --- a/doc/bugs/not_possible_to_have_annex_on_a_separate_filesystem.mdwn +++ b/doc/bugs/not_possible_to_have_annex_on_a_separate_filesystem.mdwn @@ -14,12 +14,19 @@ But when trying to add files i get: I have tried both using bind-mount and with a sym-link. -> I don't think this was a reversion; the forum post doesn't really -> indicate it ever worked. -> > Grepping for `renameFile` and `createLink` will find all the places > in git-annex that assume one filesystem. These would have to be changed > to catch errors and fall back to expensive copying. > > Putting a separate repository on the file server could work better > depending on what you're trying to do. --[[Joey]] + +>> I've added support for putting `.git/annex` on a separate filesystem +>> from the rest of the git repository. +>> +>> Putting individual subdirectories like `.git/annex/objects` on separate +>> filesystems from other subdirectories is not fully supported; it may +>> work but it may be slow and a few things (like `git annex migrate`) are +>> known to fail due to using hard links. I don't think this is worth +>> supporting. [[done]] +>> --[[Joey]] diff --git a/doc/forum/performance_improvement:_git_on_ssd__44___annex_on_spindle_disk.mdwn b/doc/forum/performance_improvement:_git_on_ssd__44___annex_on_spindle_disk.mdwn index a04c8b040b..f70c127025 100644 --- a/doc/forum/performance_improvement:_git_on_ssd__44___annex_on_spindle_disk.mdwn +++ b/doc/forum/performance_improvement:_git_on_ssd__44___annex_on_spindle_disk.mdwn @@ -1,3 +1,12 @@ This works with bind-mount, I might try with softlinks as well. -Going through git's data on push/pull can take ages on a spindle disk even if the repo is rather small in size. This is especially true if you are used to ssd speeds, but ssd storage is expensive. Storing the annex objects on a cheap spindle disk and everything else on a ssd makes things a _lot_ faster. +Going through git's data on push/pull can take ages on a spindle disk even +if the repo is rather small in size. This is especially true if you are +used to ssd speeds, but ssd storage is expensive. Storing the annex objects +on a cheap spindle disk and everything else on a ssd makes things a _lot_ +faster. + +> Update: git-annex supports `.git/annex/` being moved to a different disk +> than the rest of the repisitory, but does *not* support individual +> subdirectories, like `.git/annex/objects/` being on a different disk +> than the main `.git/annex/` directory. --[[Joey]]