New migrate subcommand can be used to switch files to using a different backend, safely and with no duplication of content.
This commit is contained in:
parent
32b0e10390
commit
a78b0555e1
8 changed files with 112 additions and 17 deletions
|
@ -117,6 +117,12 @@ withAttrFilesInGit attr a params = do
|
|||
files' <- filterFiles files
|
||||
pairs <- liftIO $ Git.checkAttr repo attr files'
|
||||
return $ map a pairs
|
||||
withBackendFilesInGit :: CommandSeekBackendFiles
|
||||
withBackendFilesInGit a params = do
|
||||
repo <- Annex.gitRepo
|
||||
files <- liftIO $ Git.inRepo repo params
|
||||
files' <- filterFiles files
|
||||
backendPairs a files'
|
||||
withFilesMissing :: CommandSeekStrings
|
||||
withFilesMissing a params = do
|
||||
files <- liftIO $ filterM missing params
|
||||
|
|
|
@ -42,11 +42,12 @@ perform (file, backend) = do
|
|||
stored <- Backend.storeFileKey file backend
|
||||
case stored of
|
||||
Nothing -> return Nothing
|
||||
Just (key, _) -> return $ Just $ cleanup file key
|
||||
Just (key, _) -> do
|
||||
moveAnnex key file
|
||||
return $ Just $ cleanup file key
|
||||
|
||||
cleanup :: FilePath -> Key -> CommandCleanup
|
||||
cleanup file key = do
|
||||
moveAnnex key file
|
||||
logStatus key ValuePresent
|
||||
|
||||
link <- calcGitLink file key
|
||||
|
|
63
Command/Migrate.hs
Normal file
63
Command/Migrate.hs
Normal file
|
@ -0,0 +1,63 @@
|
|||
{- git-annex command
|
||||
-
|
||||
- Copyright 2010 Joey Hess <joey@kitenet.net>
|
||||
-
|
||||
- Licensed under the GNU GPL version 3 or higher.
|
||||
-}
|
||||
|
||||
module Command.Migrate where
|
||||
|
||||
import Control.Monad.State (liftIO)
|
||||
import System.Posix.Files
|
||||
import System.Directory
|
||||
|
||||
import Command
|
||||
import qualified Annex
|
||||
import qualified Backend
|
||||
import Locations
|
||||
import Types
|
||||
import Core
|
||||
import Messages
|
||||
import qualified Command.Add
|
||||
|
||||
command :: [Command]
|
||||
command = [Command "migrate" paramPath seek "switch data to different backend"]
|
||||
|
||||
seek :: [CommandSeek]
|
||||
seek = [withBackendFilesInGit start]
|
||||
|
||||
start :: CommandStartBackendFile
|
||||
start (_, Nothing) = return Nothing
|
||||
start (file, Just newbackend) = isAnnexed file $ \(key, oldbackend) -> do
|
||||
exists <- inAnnex key
|
||||
if (newbackend /= oldbackend) && exists
|
||||
then do
|
||||
showStart "migrate" file
|
||||
return $ Just $ perform file key newbackend
|
||||
else
|
||||
return Nothing
|
||||
|
||||
perform :: FilePath -> Key -> Backend -> CommandPerform
|
||||
perform file oldkey newbackend = do
|
||||
g <- Annex.gitRepo
|
||||
|
||||
-- Store the old backend's cached key in the new backend
|
||||
-- (the file can't be stored as usual, because it's already a symlink).
|
||||
-- The old backend's key is not dropped from it, because there may
|
||||
-- be other files still pointing at that key.
|
||||
let src = annexLocation g oldkey
|
||||
stored <- Backend.storeFileKey src $ Just newbackend
|
||||
case stored of
|
||||
Nothing -> return Nothing
|
||||
Just (newkey, _) -> do
|
||||
ok <- getViaTmp newkey $ \t -> do
|
||||
-- Make a hard link to the old backend's
|
||||
-- cached key, to avoid wasting disk space.
|
||||
liftIO $ createLink src t
|
||||
return True
|
||||
if ok
|
||||
then do
|
||||
-- Update symlink to use the new key.
|
||||
liftIO $ removeFile file
|
||||
return $ Just $ Command.Add.cleanup file newkey
|
||||
else return Nothing
|
|
@ -32,6 +32,7 @@ import qualified Command.Unlock
|
|||
import qualified Command.Lock
|
||||
import qualified Command.PreCommit
|
||||
import qualified Command.Find
|
||||
import qualified Command.Migrate
|
||||
import qualified Command.Uninit
|
||||
import qualified Command.Trust
|
||||
import qualified Command.Untrust
|
||||
|
@ -59,6 +60,7 @@ cmds = concat
|
|||
, Command.Unused.command
|
||||
, Command.DropUnused.command
|
||||
, Command.Find.command
|
||||
, Command.Migrate.command
|
||||
]
|
||||
|
||||
options :: [Option]
|
||||
|
|
|
@ -103,3 +103,6 @@ data Backend = Backend {
|
|||
|
||||
instance Show Backend where
|
||||
show backend = "Backend { name =\"" ++ name backend ++ "\" }"
|
||||
|
||||
instance Eq Backend where
|
||||
a == b = name a == name b
|
||||
|
|
7
debian/changelog
vendored
7
debian/changelog
vendored
|
@ -1,10 +1,11 @@
|
|||
git-annex (0.17) UNRELEASED; urgency=low
|
||||
|
||||
* unannex: Now skips files whose content is not present, rather than
|
||||
it being an error. This allows gradual conversion from one backend
|
||||
to another by running unannex followed by add in each repository.
|
||||
it being an error.
|
||||
* New migrate subcommand can be used to switch files to using a different
|
||||
backend, safely and with no duplication of content.
|
||||
|
||||
-- Joey Hess <joeyh@debian.org> Sat, 08 Jan 2011 15:04:48 -0400
|
||||
-- Joey Hess <joeyh@debian.org> Sat, 08 Jan 2011 13:45:06 -0400
|
||||
|
||||
git-annex (0.16) unstable; urgency=low
|
||||
|
||||
|
|
|
@ -144,6 +144,14 @@ Many git-annex commands will stage changes for later `git commit` by you.
|
|||
With no parameters, defaults to finding all files in the current directory
|
||||
and its subdirectories.
|
||||
|
||||
* migrate [path ...]
|
||||
|
||||
Changes the specified annexed files to store their content in the
|
||||
default backend (or the one specified with --backend).
|
||||
|
||||
Note that the content is not removed from the backend it was previously in.
|
||||
Use `git annex unused` to find and remove such content.
|
||||
|
||||
* unannex [path ...]
|
||||
|
||||
Use this to undo an accidental add command. This is not the command you
|
||||
|
|
|
@ -277,25 +277,32 @@ add something like this to `.gitattributes`:
|
|||
|
||||
* annex.backend=SHA1
|
||||
|
||||
## migrating between backends
|
||||
## migrating data to a new backend
|
||||
|
||||
Perhaps you had been using the WORM backend, but now have configured
|
||||
git-annex to use SHA1 for new files. Your old files are still in WORM. How
|
||||
to migrate that content? A quick and dirty way is to use the unannex
|
||||
subcommand, which removes a file from git-annex's control, followed by
|
||||
a re-add of the file, to put it in the new backend.
|
||||
Maybe you started out using the WORM backend, and have now configured
|
||||
git-annex to use SHA1. But files you added to the annex before still
|
||||
use the WORM backend. There is a simple command that can migrate that
|
||||
data:
|
||||
|
||||
# git annex unannex my_cool_big_file
|
||||
unannex my_cool_big_file ok
|
||||
# git annex add my_cool_big_file
|
||||
add my_cool_big_file (checksum ...) ok
|
||||
# git annex migrate my_cool_big_file
|
||||
migrate my_cool_big_file (checksum...) ok
|
||||
|
||||
You can only migrate files whose content is currently available. Other
|
||||
files will be skipped.
|
||||
|
||||
After migrating a file to a new backend, the old content in the old backend
|
||||
will still be present. That is necessary because multiple files
|
||||
can point to the same content. The `git annex unused` sucommand can be
|
||||
used to clear up that detritus later. Note that hard links are used,
|
||||
to avoid wasting disk space.
|
||||
|
||||
## unused data
|
||||
|
||||
It's possible for data to accumulate in the annex that no files point to
|
||||
nymore. One way it can happen is if you `git rm` a file without
|
||||
anymore. One way it can happen is if you `git rm` a file without
|
||||
first calling `git annex drop`. And, when you modify an annexed file, the old
|
||||
content of the file remains in the annex.
|
||||
content of the file remains in the annex. Another way is when migrating
|
||||
between backends.
|
||||
|
||||
This might be historical data you want to preserve, so git-annex defaults to
|
||||
preserving it. So from time to time, you may want to check for such data and
|
||||
|
@ -318,6 +325,10 @@ data anymore, you can easily remove it:
|
|||
# git annex dropunused 1
|
||||
dropunused 1 ok
|
||||
|
||||
Hint: To drop a lot of unused data, use a command like this:
|
||||
|
||||
# git annex dropunused `seq 1 1000`
|
||||
|
||||
## fsck: verifying your data
|
||||
|
||||
You can use the fsck subcommand to check for problems in your data.
|
||||
|
|
Loading…
Reference in a new issue