New migrate subcommand can be used to switch files to using a different backend, safely and with no duplication of content.
This commit is contained in:
parent
32b0e10390
commit
a78b0555e1
8 changed files with 112 additions and 17 deletions
|
@ -117,6 +117,12 @@ withAttrFilesInGit attr a params = do
|
||||||
files' <- filterFiles files
|
files' <- filterFiles files
|
||||||
pairs <- liftIO $ Git.checkAttr repo attr files'
|
pairs <- liftIO $ Git.checkAttr repo attr files'
|
||||||
return $ map a pairs
|
return $ map a pairs
|
||||||
|
withBackendFilesInGit :: CommandSeekBackendFiles
|
||||||
|
withBackendFilesInGit a params = do
|
||||||
|
repo <- Annex.gitRepo
|
||||||
|
files <- liftIO $ Git.inRepo repo params
|
||||||
|
files' <- filterFiles files
|
||||||
|
backendPairs a files'
|
||||||
withFilesMissing :: CommandSeekStrings
|
withFilesMissing :: CommandSeekStrings
|
||||||
withFilesMissing a params = do
|
withFilesMissing a params = do
|
||||||
files <- liftIO $ filterM missing params
|
files <- liftIO $ filterM missing params
|
||||||
|
|
|
@ -42,11 +42,12 @@ perform (file, backend) = do
|
||||||
stored <- Backend.storeFileKey file backend
|
stored <- Backend.storeFileKey file backend
|
||||||
case stored of
|
case stored of
|
||||||
Nothing -> return Nothing
|
Nothing -> return Nothing
|
||||||
Just (key, _) -> return $ Just $ cleanup file key
|
Just (key, _) -> do
|
||||||
|
moveAnnex key file
|
||||||
|
return $ Just $ cleanup file key
|
||||||
|
|
||||||
cleanup :: FilePath -> Key -> CommandCleanup
|
cleanup :: FilePath -> Key -> CommandCleanup
|
||||||
cleanup file key = do
|
cleanup file key = do
|
||||||
moveAnnex key file
|
|
||||||
logStatus key ValuePresent
|
logStatus key ValuePresent
|
||||||
|
|
||||||
link <- calcGitLink file key
|
link <- calcGitLink file key
|
||||||
|
|
63
Command/Migrate.hs
Normal file
63
Command/Migrate.hs
Normal file
|
@ -0,0 +1,63 @@
|
||||||
|
{- git-annex command
|
||||||
|
-
|
||||||
|
- Copyright 2010 Joey Hess <joey@kitenet.net>
|
||||||
|
-
|
||||||
|
- Licensed under the GNU GPL version 3 or higher.
|
||||||
|
-}
|
||||||
|
|
||||||
|
module Command.Migrate where
|
||||||
|
|
||||||
|
import Control.Monad.State (liftIO)
|
||||||
|
import System.Posix.Files
|
||||||
|
import System.Directory
|
||||||
|
|
||||||
|
import Command
|
||||||
|
import qualified Annex
|
||||||
|
import qualified Backend
|
||||||
|
import Locations
|
||||||
|
import Types
|
||||||
|
import Core
|
||||||
|
import Messages
|
||||||
|
import qualified Command.Add
|
||||||
|
|
||||||
|
command :: [Command]
|
||||||
|
command = [Command "migrate" paramPath seek "switch data to different backend"]
|
||||||
|
|
||||||
|
seek :: [CommandSeek]
|
||||||
|
seek = [withBackendFilesInGit start]
|
||||||
|
|
||||||
|
start :: CommandStartBackendFile
|
||||||
|
start (_, Nothing) = return Nothing
|
||||||
|
start (file, Just newbackend) = isAnnexed file $ \(key, oldbackend) -> do
|
||||||
|
exists <- inAnnex key
|
||||||
|
if (newbackend /= oldbackend) && exists
|
||||||
|
then do
|
||||||
|
showStart "migrate" file
|
||||||
|
return $ Just $ perform file key newbackend
|
||||||
|
else
|
||||||
|
return Nothing
|
||||||
|
|
||||||
|
perform :: FilePath -> Key -> Backend -> CommandPerform
|
||||||
|
perform file oldkey newbackend = do
|
||||||
|
g <- Annex.gitRepo
|
||||||
|
|
||||||
|
-- Store the old backend's cached key in the new backend
|
||||||
|
-- (the file can't be stored as usual, because it's already a symlink).
|
||||||
|
-- The old backend's key is not dropped from it, because there may
|
||||||
|
-- be other files still pointing at that key.
|
||||||
|
let src = annexLocation g oldkey
|
||||||
|
stored <- Backend.storeFileKey src $ Just newbackend
|
||||||
|
case stored of
|
||||||
|
Nothing -> return Nothing
|
||||||
|
Just (newkey, _) -> do
|
||||||
|
ok <- getViaTmp newkey $ \t -> do
|
||||||
|
-- Make a hard link to the old backend's
|
||||||
|
-- cached key, to avoid wasting disk space.
|
||||||
|
liftIO $ createLink src t
|
||||||
|
return True
|
||||||
|
if ok
|
||||||
|
then do
|
||||||
|
-- Update symlink to use the new key.
|
||||||
|
liftIO $ removeFile file
|
||||||
|
return $ Just $ Command.Add.cleanup file newkey
|
||||||
|
else return Nothing
|
|
@ -32,6 +32,7 @@ import qualified Command.Unlock
|
||||||
import qualified Command.Lock
|
import qualified Command.Lock
|
||||||
import qualified Command.PreCommit
|
import qualified Command.PreCommit
|
||||||
import qualified Command.Find
|
import qualified Command.Find
|
||||||
|
import qualified Command.Migrate
|
||||||
import qualified Command.Uninit
|
import qualified Command.Uninit
|
||||||
import qualified Command.Trust
|
import qualified Command.Trust
|
||||||
import qualified Command.Untrust
|
import qualified Command.Untrust
|
||||||
|
@ -59,6 +60,7 @@ cmds = concat
|
||||||
, Command.Unused.command
|
, Command.Unused.command
|
||||||
, Command.DropUnused.command
|
, Command.DropUnused.command
|
||||||
, Command.Find.command
|
, Command.Find.command
|
||||||
|
, Command.Migrate.command
|
||||||
]
|
]
|
||||||
|
|
||||||
options :: [Option]
|
options :: [Option]
|
||||||
|
|
|
@ -103,3 +103,6 @@ data Backend = Backend {
|
||||||
|
|
||||||
instance Show Backend where
|
instance Show Backend where
|
||||||
show backend = "Backend { name =\"" ++ name backend ++ "\" }"
|
show backend = "Backend { name =\"" ++ name backend ++ "\" }"
|
||||||
|
|
||||||
|
instance Eq Backend where
|
||||||
|
a == b = name a == name b
|
||||||
|
|
7
debian/changelog
vendored
7
debian/changelog
vendored
|
@ -1,10 +1,11 @@
|
||||||
git-annex (0.17) UNRELEASED; urgency=low
|
git-annex (0.17) UNRELEASED; urgency=low
|
||||||
|
|
||||||
* unannex: Now skips files whose content is not present, rather than
|
* unannex: Now skips files whose content is not present, rather than
|
||||||
it being an error. This allows gradual conversion from one backend
|
it being an error.
|
||||||
to another by running unannex followed by add in each repository.
|
* New migrate subcommand can be used to switch files to using a different
|
||||||
|
backend, safely and with no duplication of content.
|
||||||
|
|
||||||
-- Joey Hess <joeyh@debian.org> Sat, 08 Jan 2011 15:04:48 -0400
|
-- Joey Hess <joeyh@debian.org> Sat, 08 Jan 2011 13:45:06 -0400
|
||||||
|
|
||||||
git-annex (0.16) unstable; urgency=low
|
git-annex (0.16) unstable; urgency=low
|
||||||
|
|
||||||
|
|
|
@ -144,6 +144,14 @@ Many git-annex commands will stage changes for later `git commit` by you.
|
||||||
With no parameters, defaults to finding all files in the current directory
|
With no parameters, defaults to finding all files in the current directory
|
||||||
and its subdirectories.
|
and its subdirectories.
|
||||||
|
|
||||||
|
* migrate [path ...]
|
||||||
|
|
||||||
|
Changes the specified annexed files to store their content in the
|
||||||
|
default backend (or the one specified with --backend).
|
||||||
|
|
||||||
|
Note that the content is not removed from the backend it was previously in.
|
||||||
|
Use `git annex unused` to find and remove such content.
|
||||||
|
|
||||||
* unannex [path ...]
|
* unannex [path ...]
|
||||||
|
|
||||||
Use this to undo an accidental add command. This is not the command you
|
Use this to undo an accidental add command. This is not the command you
|
||||||
|
|
|
@ -277,25 +277,32 @@ add something like this to `.gitattributes`:
|
||||||
|
|
||||||
* annex.backend=SHA1
|
* annex.backend=SHA1
|
||||||
|
|
||||||
## migrating between backends
|
## migrating data to a new backend
|
||||||
|
|
||||||
Perhaps you had been using the WORM backend, but now have configured
|
Maybe you started out using the WORM backend, and have now configured
|
||||||
git-annex to use SHA1 for new files. Your old files are still in WORM. How
|
git-annex to use SHA1. But files you added to the annex before still
|
||||||
to migrate that content? A quick and dirty way is to use the unannex
|
use the WORM backend. There is a simple command that can migrate that
|
||||||
subcommand, which removes a file from git-annex's control, followed by
|
data:
|
||||||
a re-add of the file, to put it in the new backend.
|
|
||||||
|
|
||||||
# git annex unannex my_cool_big_file
|
# git annex migrate my_cool_big_file
|
||||||
unannex my_cool_big_file ok
|
migrate my_cool_big_file (checksum...) ok
|
||||||
# git annex add my_cool_big_file
|
|
||||||
add my_cool_big_file (checksum ...) ok
|
You can only migrate files whose content is currently available. Other
|
||||||
|
files will be skipped.
|
||||||
|
|
||||||
|
After migrating a file to a new backend, the old content in the old backend
|
||||||
|
will still be present. That is necessary because multiple files
|
||||||
|
can point to the same content. The `git annex unused` sucommand can be
|
||||||
|
used to clear up that detritus later. Note that hard links are used,
|
||||||
|
to avoid wasting disk space.
|
||||||
|
|
||||||
## unused data
|
## unused data
|
||||||
|
|
||||||
It's possible for data to accumulate in the annex that no files point to
|
It's possible for data to accumulate in the annex that no files point to
|
||||||
nymore. One way it can happen is if you `git rm` a file without
|
anymore. One way it can happen is if you `git rm` a file without
|
||||||
first calling `git annex drop`. And, when you modify an annexed file, the old
|
first calling `git annex drop`. And, when you modify an annexed file, the old
|
||||||
content of the file remains in the annex.
|
content of the file remains in the annex. Another way is when migrating
|
||||||
|
between backends.
|
||||||
|
|
||||||
This might be historical data you want to preserve, so git-annex defaults to
|
This might be historical data you want to preserve, so git-annex defaults to
|
||||||
preserving it. So from time to time, you may want to check for such data and
|
preserving it. So from time to time, you may want to check for such data and
|
||||||
|
@ -318,6 +325,10 @@ data anymore, you can easily remove it:
|
||||||
# git annex dropunused 1
|
# git annex dropunused 1
|
||||||
dropunused 1 ok
|
dropunused 1 ok
|
||||||
|
|
||||||
|
Hint: To drop a lot of unused data, use a command like this:
|
||||||
|
|
||||||
|
# git annex dropunused `seq 1 1000`
|
||||||
|
|
||||||
## fsck: verifying your data
|
## fsck: verifying your data
|
||||||
|
|
||||||
You can use the fsck subcommand to check for problems in your data.
|
You can use the fsck subcommand to check for problems in your data.
|
||||||
|
|
Loading…
Reference in a new issue