diff --git a/Command.hs b/Command.hs index 690dd20ecf..b83e640b9b 100644 --- a/Command.hs +++ b/Command.hs @@ -117,6 +117,12 @@ withAttrFilesInGit attr a params = do files' <- filterFiles files pairs <- liftIO $ Git.checkAttr repo attr files' return $ map a pairs +withBackendFilesInGit :: CommandSeekBackendFiles +withBackendFilesInGit a params = do + repo <- Annex.gitRepo + files <- liftIO $ Git.inRepo repo params + files' <- filterFiles files + backendPairs a files' withFilesMissing :: CommandSeekStrings withFilesMissing a params = do files <- liftIO $ filterM missing params diff --git a/Command/Add.hs b/Command/Add.hs index bc869a67de..c74b726e3f 100644 --- a/Command/Add.hs +++ b/Command/Add.hs @@ -42,11 +42,12 @@ perform (file, backend) = do stored <- Backend.storeFileKey file backend case stored of Nothing -> return Nothing - Just (key, _) -> return $ Just $ cleanup file key + Just (key, _) -> do + moveAnnex key file + return $ Just $ cleanup file key cleanup :: FilePath -> Key -> CommandCleanup cleanup file key = do - moveAnnex key file logStatus key ValuePresent link <- calcGitLink file key diff --git a/Command/Migrate.hs b/Command/Migrate.hs new file mode 100644 index 0000000000..0caded6d13 --- /dev/null +++ b/Command/Migrate.hs @@ -0,0 +1,63 @@ +{- git-annex command + - + - Copyright 2010 Joey Hess + - + - Licensed under the GNU GPL version 3 or higher. + -} + +module Command.Migrate where + +import Control.Monad.State (liftIO) +import System.Posix.Files +import System.Directory + +import Command +import qualified Annex +import qualified Backend +import Locations +import Types +import Core +import Messages +import qualified Command.Add + +command :: [Command] +command = [Command "migrate" paramPath seek "switch data to different backend"] + +seek :: [CommandSeek] +seek = [withBackendFilesInGit start] + +start :: CommandStartBackendFile +start (_, Nothing) = return Nothing +start (file, Just newbackend) = isAnnexed file $ \(key, oldbackend) -> do + exists <- inAnnex key + if (newbackend /= oldbackend) && exists + then do + showStart "migrate" file + return $ Just $ perform file key newbackend + else + return Nothing + +perform :: FilePath -> Key -> Backend -> CommandPerform +perform file oldkey newbackend = do + g <- Annex.gitRepo + + -- Store the old backend's cached key in the new backend + -- (the file can't be stored as usual, because it's already a symlink). + -- The old backend's key is not dropped from it, because there may + -- be other files still pointing at that key. + let src = annexLocation g oldkey + stored <- Backend.storeFileKey src $ Just newbackend + case stored of + Nothing -> return Nothing + Just (newkey, _) -> do + ok <- getViaTmp newkey $ \t -> do + -- Make a hard link to the old backend's + -- cached key, to avoid wasting disk space. + liftIO $ createLink src t + return True + if ok + then do + -- Update symlink to use the new key. + liftIO $ removeFile file + return $ Just $ Command.Add.cleanup file newkey + else return Nothing diff --git a/GitAnnex.hs b/GitAnnex.hs index 24c9ace0ac..d9efdad2dd 100644 --- a/GitAnnex.hs +++ b/GitAnnex.hs @@ -32,6 +32,7 @@ import qualified Command.Unlock import qualified Command.Lock import qualified Command.PreCommit import qualified Command.Find +import qualified Command.Migrate import qualified Command.Uninit import qualified Command.Trust import qualified Command.Untrust @@ -59,6 +60,7 @@ cmds = concat , Command.Unused.command , Command.DropUnused.command , Command.Find.command + , Command.Migrate.command ] options :: [Option] diff --git a/TypeInternals.hs b/TypeInternals.hs index fe6e562f95..12a9080b33 100644 --- a/TypeInternals.hs +++ b/TypeInternals.hs @@ -103,3 +103,6 @@ data Backend = Backend { instance Show Backend where show backend = "Backend { name =\"" ++ name backend ++ "\" }" + +instance Eq Backend where + a == b = name a == name b diff --git a/debian/changelog b/debian/changelog index 7ca74f9945..85878113e9 100644 --- a/debian/changelog +++ b/debian/changelog @@ -1,10 +1,11 @@ git-annex (0.17) UNRELEASED; urgency=low * unannex: Now skips files whose content is not present, rather than - it being an error. This allows gradual conversion from one backend - to another by running unannex followed by add in each repository. + it being an error. + * New migrate subcommand can be used to switch files to using a different + backend, safely and with no duplication of content. - -- Joey Hess Sat, 08 Jan 2011 15:04:48 -0400 + -- Joey Hess Sat, 08 Jan 2011 13:45:06 -0400 git-annex (0.16) unstable; urgency=low diff --git a/doc/git-annex.mdwn b/doc/git-annex.mdwn index e99be4e409..6d106fea48 100644 --- a/doc/git-annex.mdwn +++ b/doc/git-annex.mdwn @@ -144,6 +144,14 @@ Many git-annex commands will stage changes for later `git commit` by you. With no parameters, defaults to finding all files in the current directory and its subdirectories. +* migrate [path ...] + + Changes the specified annexed files to store their content in the + default backend (or the one specified with --backend). + + Note that the content is not removed from the backend it was previously in. + Use `git annex unused` to find and remove such content. + * unannex [path ...] Use this to undo an accidental add command. This is not the command you diff --git a/doc/walkthrough.mdwn b/doc/walkthrough.mdwn index 47f05ebcf4..d2231c81e6 100644 --- a/doc/walkthrough.mdwn +++ b/doc/walkthrough.mdwn @@ -277,25 +277,32 @@ add something like this to `.gitattributes`: * annex.backend=SHA1 -## migrating between backends +## migrating data to a new backend -Perhaps you had been using the WORM backend, but now have configured -git-annex to use SHA1 for new files. Your old files are still in WORM. How -to migrate that content? A quick and dirty way is to use the unannex -subcommand, which removes a file from git-annex's control, followed by -a re-add of the file, to put it in the new backend. +Maybe you started out using the WORM backend, and have now configured +git-annex to use SHA1. But files you added to the annex before still +use the WORM backend. There is a simple command that can migrate that +data: - # git annex unannex my_cool_big_file - unannex my_cool_big_file ok - # git annex add my_cool_big_file - add my_cool_big_file (checksum ...) ok + # git annex migrate my_cool_big_file + migrate my_cool_big_file (checksum...) ok + +You can only migrate files whose content is currently available. Other +files will be skipped. + +After migrating a file to a new backend, the old content in the old backend +will still be present. That is necessary because multiple files +can point to the same content. The `git annex unused` sucommand can be +used to clear up that detritus later. Note that hard links are used, +to avoid wasting disk space. ## unused data It's possible for data to accumulate in the annex that no files point to -nymore. One way it can happen is if you `git rm` a file without +anymore. One way it can happen is if you `git rm` a file without first calling `git annex drop`. And, when you modify an annexed file, the old -content of the file remains in the annex. +content of the file remains in the annex. Another way is when migrating +between backends. This might be historical data you want to preserve, so git-annex defaults to preserving it. So from time to time, you may want to check for such data and @@ -318,6 +325,10 @@ data anymore, you can easily remove it: # git annex dropunused 1 dropunused 1 ok +Hint: To drop a lot of unused data, use a command like this: + + # git annex dropunused `seq 1 1000` + ## fsck: verifying your data You can use the fsck subcommand to check for problems in your data.