fsck --from remote --fast

Avoids expensive file transfers, at the expense of checking file size
and/or contents.

Required some reworking of the remote code.
This commit is contained in:
Joey Hess 2012-01-20 13:23:11 -04:00
parent e96726caa3
commit 61dbad505d
16 changed files with 109 additions and 47 deletions

View file

@ -306,9 +306,18 @@ downloadUrl urls file = do
{- Copies a key's content, when present, to a temp file. {- Copies a key's content, when present, to a temp file.
- This is used to speed up some rsyncs. -} - This is used to speed up some rsyncs. -}
preseedTmp :: Key -> FilePath -> Annex () preseedTmp :: Key -> FilePath -> Annex Bool
preseedTmp key file = preseedTmp key file = go =<< inAnnex key
unlessM (liftIO $ doesFileExist file) $ whenM (inAnnex key) $ do where
go False = return False
go True = do
ok <- copy
when ok $ liftIO $ allowWrite file
return ok
copy = do
present <- liftIO $ doesFileExist file
if present
then return True
else do
s <- inRepo $ gitAnnexLocation key s <- inRepo $ gitAnnexLocation key
liftIO $ whenM (copyFileExternal s file) $ liftIO $ copyFileExternal s file
allowWrite file

View file

@ -9,6 +9,7 @@ module Command.Fsck where
import Common.Annex import Common.Annex
import Command import Command
import qualified Annex
import qualified Remote import qualified Remote
import qualified Types.Backend import qualified Types.Backend
import qualified Types.Key import qualified Types.Key
@ -65,8 +66,8 @@ performRemote key file backend numcopies remote = do
showNote err showNote err
stop stop
Right True -> withtmp $ \tmpfile -> do Right True -> withtmp $ \tmpfile -> do
copied <- Remote.retrieveKeyFile remote key True tmpfile copied <- getfile tmpfile
if copied then go True (Just tmpfile) else go False Nothing if copied then go True (Just tmpfile) else go True Nothing
Right False -> go False Nothing Right False -> go False Nothing
where where
go present localcopy = check go present localcopy = check
@ -83,6 +84,15 @@ performRemote key file backend numcopies remote = do
let cleanup = liftIO $ catch (removeFile tmp) (const $ return ()) let cleanup = liftIO $ catch (removeFile tmp) (const $ return ())
cleanup cleanup
cleanup `after` a tmp cleanup `after` a tmp
getfile tmp = do
ok <- Remote.retrieveKeyFileCheap remote key tmp
if ok
then return ok
else do
fast <- Annex.getState Annex.fast
if fast
then return False
else Remote.retrieveKeyFile remote key tmp
{- To fsck a bare repository, fsck each key in the location log. -} {- To fsck a bare repository, fsck each key in the location log. -}
withBarePresentKeys :: (Key -> CommandStart) -> CommandSeek withBarePresentKeys :: (Key -> CommandStart) -> CommandSeek

View file

@ -72,7 +72,7 @@ getKeyFile key file = do
else return True else return True
docopy r continue = do docopy r continue = do
showAction $ "from " ++ Remote.name r showAction $ "from " ++ Remote.name r
copied <- Remote.retrieveKeyFile r key False file copied <- Remote.retrieveKeyFile r key file
if copied if copied
then return True then return True
else continue else continue

View file

@ -131,7 +131,7 @@ fromPerform src move key = moveLock move key $ do
then handle move True then handle move True
else do else do
showAction $ "from " ++ Remote.name src showAction $ "from " ++ Remote.name src
ok <- getViaTmp key $ Remote.retrieveKeyFile src key False ok <- getViaTmp key $ Remote.retrieveKeyFile src key
handle move ok handle move ok
where where
handle _ False = stop -- failed handle _ False = stop -- failed

View file

@ -11,6 +11,7 @@ module Remote (
name, name,
storeKey, storeKey,
retrieveKeyFile, retrieveKeyFile,
retrieveKeyFileCheap,
removeKey, removeKey,
hasKey, hasKey,
hasKeyCheap, hasKeyCheap,

View file

@ -50,6 +50,7 @@ gen r u c = do
name = Git.repoDescribe r, name = Git.repoDescribe r,
storeKey = store r buprepo, storeKey = store r buprepo,
retrieveKeyFile = retrieve buprepo, retrieveKeyFile = retrieve buprepo,
retrieveKeyFileCheap = retrieveCheap buprepo,
removeKey = remove, removeKey = remove,
hasKey = checkPresent r bupr', hasKey = checkPresent r bupr',
hasKeyCheap = bupLocal buprepo, hasKeyCheap = bupLocal buprepo,
@ -118,13 +119,16 @@ storeEncrypted r buprepo (cipher, enck) k = do
withEncryptedHandle cipher (L.readFile src) $ \h -> withEncryptedHandle cipher (L.readFile src) $ \h ->
pipeBup params (Just h) Nothing pipeBup params (Just h) Nothing
retrieve :: BupRepo -> Key -> Bool -> FilePath -> Annex Bool retrieve :: BupRepo -> Key -> FilePath -> Annex Bool
retrieve buprepo k _ f = do retrieve buprepo k f = do
let params = bupParams "join" buprepo [Param $ show k] let params = bupParams "join" buprepo [Param $ show k]
liftIO $ catchBoolIO $ do liftIO $ catchBoolIO $ do
tofile <- openFile f WriteMode tofile <- openFile f WriteMode
pipeBup params Nothing (Just tofile) pipeBup params Nothing (Just tofile)
retrieveCheap :: BupRepo -> Key -> FilePath -> Annex Bool
retrieveCheap _ _ _ = return False
retrieveEncrypted :: BupRepo -> (Cipher, Key) -> FilePath -> Annex Bool retrieveEncrypted :: BupRepo -> (Cipher, Key) -> FilePath -> Annex Bool
retrieveEncrypted buprepo (cipher, enck) f = do retrieveEncrypted buprepo (cipher, enck) f = do
let params = bupParams "join" buprepo [Param $ show enck] let params = bupParams "join" buprepo [Param $ show enck]

View file

@ -41,6 +41,7 @@ gen r u c = do
name = Git.repoDescribe r, name = Git.repoDescribe r,
storeKey = store dir, storeKey = store dir,
retrieveKeyFile = retrieve dir, retrieveKeyFile = retrieve dir,
retrieveKeyFileCheap = retrieveCheap dir,
removeKey = remove dir, removeKey = remove dir,
hasKey = checkPresent dir, hasKey = checkPresent dir,
hasKeyCheap = True, hasKeyCheap = True,
@ -109,11 +110,12 @@ storeHelper d key a = do
preventWrite dir preventWrite dir
return ok return ok
retrieve :: FilePath -> Key -> Bool -> FilePath -> Annex Bool retrieve :: FilePath -> Key -> FilePath -> Annex Bool
retrieve d k tmp f = liftIO $ withStoredFile d k $ \file -> retrieve d k f = liftIO $ withStoredFile d k $ \file -> copyFileExternal file f
if tmp
then catchBoolIO $ createSymbolicLink file f >> return True retrieveCheap :: FilePath -> Key -> FilePath -> Annex Bool
else copyFileExternal file f retrieveCheap d k f = liftIO $ withStoredFile d k $ \file ->
catchBoolIO $ createSymbolicLink file f >> return True
retrieveEncrypted :: FilePath -> (Cipher, Key) -> FilePath -> Annex Bool retrieveEncrypted :: FilePath -> (Cipher, Key) -> FilePath -> Annex Bool
retrieveEncrypted d (cipher, enck) f = retrieveEncrypted d (cipher, enck) f =

View file

@ -75,6 +75,7 @@ gen r u _ = do
name = Git.repoDescribe r', name = Git.repoDescribe r',
storeKey = copyToRemote r', storeKey = copyToRemote r',
retrieveKeyFile = copyFromRemote r', retrieveKeyFile = copyFromRemote r',
retrieveKeyFileCheap = copyFromRemoteCheap r',
removeKey = dropKey r', removeKey = dropKey r',
hasKey = inAnnex r', hasKey = inAnnex r',
hasKeyCheap = cheap, hasKeyCheap = cheap,
@ -198,20 +199,28 @@ dropKey r key
] ]
{- Tries to copy a key's content from a remote's annex to a file. -} {- Tries to copy a key's content from a remote's annex to a file. -}
copyFromRemote :: Git.Repo -> Key -> Bool -> FilePath -> Annex Bool copyFromRemote :: Git.Repo -> Key -> FilePath -> Annex Bool
copyFromRemote r key tmp file copyFromRemote r key file
| not $ Git.repoIsUrl r = do | not $ Git.repoIsUrl r = do
params <- rsyncParams r params <- rsyncParams r
loc <- liftIO $ gitAnnexLocation key r loc <- liftIO $ gitAnnexLocation key r
if tmp rsyncOrCopyFile params loc file
then liftIO $ catchBoolIO $ createSymbolicLink loc file >> return True | Git.repoIsSsh r = rsyncHelper =<< rsyncParamsRemote r True key file
else rsyncOrCopyFile params loc file
| Git.repoIsSsh r = do
when tmp $ Annex.Content.preseedTmp key file
rsyncHelper =<< rsyncParamsRemote r True key file
| Git.repoIsHttp r = Annex.Content.downloadUrl (keyUrls r key) file | Git.repoIsHttp r = Annex.Content.downloadUrl (keyUrls r key) file
| otherwise = error "copying from non-ssh, non-http repo not supported" | otherwise = error "copying from non-ssh, non-http repo not supported"
copyFromRemoteCheap :: Git.Repo -> Key -> FilePath -> Annex Bool
copyFromRemoteCheap r key file
| not $ Git.repoIsUrl r = do
loc <- liftIO $ gitAnnexLocation key r
liftIO $ catchBoolIO $ createSymbolicLink loc file >> return True
| Git.repoIsSsh r = do
ok <- Annex.Content.preseedTmp key file
if ok
then copyFromRemote r key file
else return False
| otherwise = return False
{- Tries to copy a key's content to a remote's annex. -} {- Tries to copy a key's content to a remote's annex. -}
copyToRemote :: Git.Repo -> Key -> Annex Bool copyToRemote :: Git.Repo -> Key -> Annex Bool
copyToRemote r key copyToRemote r key

View file

@ -47,6 +47,7 @@ encryptableRemote c storeKeyEncrypted retrieveKeyFileEncrypted r =
r { r {
storeKey = store, storeKey = store,
retrieveKeyFile = retrieve, retrieveKeyFile = retrieve,
retrieveKeyFileCheap = retrieveCheap,
removeKey = withkey $ removeKey r, removeKey = withkey $ removeKey r,
hasKey = withkey $ hasKey r, hasKey = withkey $ hasKey r,
cost = cost r + encryptedRemoteCostAdj cost = cost r + encryptedRemoteCostAdj
@ -55,9 +56,12 @@ encryptableRemote c storeKeyEncrypted retrieveKeyFileEncrypted r =
store k = cip k >>= maybe store k = cip k >>= maybe
(storeKey r k) (storeKey r k)
(`storeKeyEncrypted` k) (`storeKeyEncrypted` k)
retrieve k t f = cip k >>= maybe retrieve k f = cip k >>= maybe
(retrieveKeyFile r k t f) (retrieveKeyFile r k f)
(`retrieveKeyFileEncrypted` f) (`retrieveKeyFileEncrypted` f)
retrieveCheap k f = cip k >>= maybe
(retrieveKeyFileCheap r k f)
(\_ -> return False)
withkey a k = cip k >>= maybe (a k) (a . snd) withkey a k = cip k >>= maybe (a k) (a . snd)
cip = cipherKey c cip = cipherKey c

View file

@ -41,6 +41,7 @@ gen r u c = do
name = Git.repoDescribe r, name = Git.repoDescribe r,
storeKey = store hooktype, storeKey = store hooktype,
retrieveKeyFile = retrieve hooktype, retrieveKeyFile = retrieve hooktype,
retrieveKeyFileCheap = retrieveCheap hooktype,
removeKey = remove hooktype, removeKey = remove hooktype,
hasKey = checkPresent r hooktype, hasKey = checkPresent r hooktype,
hasKeyCheap = False, hasKeyCheap = False,
@ -106,8 +107,11 @@ storeEncrypted h (cipher, enck) k = withTmp enck $ \tmp -> do
liftIO $ withEncryptedContent cipher (L.readFile src) $ L.writeFile tmp liftIO $ withEncryptedContent cipher (L.readFile src) $ L.writeFile tmp
runHook h "store" enck (Just tmp) $ return True runHook h "store" enck (Just tmp) $ return True
retrieve :: String -> Key -> Bool -> FilePath -> Annex Bool retrieve :: String -> Key -> FilePath -> Annex Bool
retrieve h k _ f = runHook h "retrieve" k (Just f) $ return True retrieve h k f = runHook h "retrieve" k (Just f) $ return True
retrieveCheap :: String -> Key -> FilePath -> Annex Bool
retrieveCheap _ _ _ = return False
retrieveEncrypted :: String -> (Cipher, Key) -> FilePath -> Annex Bool retrieveEncrypted :: String -> (Cipher, Key) -> FilePath -> Annex Bool
retrieveEncrypted h (cipher, enck) f = withTmp enck $ \tmp -> retrieveEncrypted h (cipher, enck) f = withTmp enck $ \tmp ->

View file

@ -48,6 +48,7 @@ gen r u c = do
name = Git.repoDescribe r, name = Git.repoDescribe r,
storeKey = store o, storeKey = store o,
retrieveKeyFile = retrieve o, retrieveKeyFile = retrieve o,
retrieveKeyFileCheap = retrieveCheap o,
removeKey = remove o, removeKey = remove o,
hasKey = checkPresent r o, hasKey = checkPresent r o,
hasKeyCheap = False, hasKeyCheap = False,
@ -102,19 +103,24 @@ storeEncrypted o (cipher, enck) k = withTmp enck $ \tmp -> do
liftIO $ withEncryptedContent cipher (L.readFile src) $ L.writeFile tmp liftIO $ withEncryptedContent cipher (L.readFile src) $ L.writeFile tmp
rsyncSend o enck tmp rsyncSend o enck tmp
retrieve :: RsyncOpts -> Key -> Bool -> FilePath -> Annex Bool retrieve :: RsyncOpts -> Key -> FilePath -> Annex Bool
retrieve o k tmp f = untilTrue (rsyncUrls o k) $ \u -> do retrieve o k f = untilTrue (rsyncUrls o k) $ \u -> rsyncRemote o
when tmp $ preseedTmp k f
rsyncRemote o
-- use inplace when retrieving to support resuming -- use inplace when retrieving to support resuming
[ Param "--inplace" [ Param "--inplace"
, Param u , Param u
, Param f , Param f
] ]
retrieveCheap :: RsyncOpts -> Key -> FilePath -> Annex Bool
retrieveCheap o k f = do
ok <- preseedTmp k f
if ok
then retrieve o k f
else return False
retrieveEncrypted :: RsyncOpts -> (Cipher, Key) -> FilePath -> Annex Bool retrieveEncrypted :: RsyncOpts -> (Cipher, Key) -> FilePath -> Annex Bool
retrieveEncrypted o (cipher, enck) f = withTmp enck $ \tmp -> do retrieveEncrypted o (cipher, enck) f = withTmp enck $ \tmp -> do
res <- retrieve o enck False tmp res <- retrieve o enck tmp
if res if res
then liftIO $ catchBoolIO $ do then liftIO $ catchBoolIO $ do
withDecryptedContent cipher (L.readFile tmp) $ L.writeFile f withDecryptedContent cipher (L.readFile tmp) $ L.writeFile f

View file

@ -53,6 +53,7 @@ gen' r u c cst =
name = Git.repoDescribe r, name = Git.repoDescribe r,
storeKey = store this, storeKey = store this,
retrieveKeyFile = retrieve this, retrieveKeyFile = retrieve this,
retrieveKeyFileCheap = retrieveCheap this,
removeKey = remove this, removeKey = remove this,
hasKey = checkPresent this, hasKey = checkPresent this,
hasKeyCheap = False, hasKeyCheap = False,
@ -149,8 +150,8 @@ storeHelper (conn, bucket) r k file = do
xheaders = filter isxheader $ M.assocs $ fromJust $ config r xheaders = filter isxheader $ M.assocs $ fromJust $ config r
isxheader (h, _) = "x-amz-" `isPrefixOf` h isxheader (h, _) = "x-amz-" `isPrefixOf` h
retrieve :: Remote -> Key -> Bool -> FilePath -> Annex Bool retrieve :: Remote -> Key -> FilePath -> Annex Bool
retrieve r k _ f = s3Action r False $ \(conn, bucket) -> do retrieve r k f = s3Action r False $ \(conn, bucket) -> do
res <- liftIO $ getObject conn $ bucketKey r bucket k res <- liftIO $ getObject conn $ bucketKey r bucket k
case res of case res of
Right o -> do Right o -> do
@ -158,6 +159,9 @@ retrieve r k _ f = s3Action r False $ \(conn, bucket) -> do
return True return True
Left e -> s3Warning e Left e -> s3Warning e
retrieveCheap :: Remote -> Key -> FilePath -> Annex Bool
retrieveCheap _ _ _ = return False
retrieveEncrypted :: Remote -> (Cipher, Key) -> FilePath -> Annex Bool retrieveEncrypted :: Remote -> (Cipher, Key) -> FilePath -> Annex Bool
retrieveEncrypted r (cipher, enck) f = s3Action r False $ \(conn, bucket) -> do retrieveEncrypted r (cipher, enck) f = s3Action r False $ \(conn, bucket) -> do
res <- liftIO $ getObject conn $ bucketKey r bucket enck res <- liftIO $ getObject conn $ bucketKey r bucket enck

View file

@ -40,6 +40,7 @@ gen r _ _ =
name = Git.repoDescribe r, name = Git.repoDescribe r,
storeKey = uploadKey, storeKey = uploadKey,
retrieveKeyFile = downloadKey, retrieveKeyFile = downloadKey,
retrieveKeyFileCheap = downloadKeyCheap,
removeKey = dropKey, removeKey = dropKey,
hasKey = checkKey, hasKey = checkKey,
hasKeyCheap = False, hasKeyCheap = False,
@ -48,8 +49,8 @@ gen r _ _ =
remotetype = remote remotetype = remote
} }
downloadKey :: Key -> Bool -> FilePath -> Annex Bool downloadKey :: Key -> FilePath -> Annex Bool
downloadKey key _ file = get =<< getUrls key downloadKey key file = get =<< getUrls key
where where
get [] = do get [] = do
warning "no known url" warning "no known url"
@ -58,6 +59,9 @@ downloadKey key _ file = get =<< getUrls key
showOutput -- make way for download progress bar showOutput -- make way for download progress bar
downloadUrl urls file downloadUrl urls file
downloadKeyCheap :: Key -> FilePath -> Annex Bool
downloadKeyCheap _ _ = return False
uploadKey :: Key -> Annex Bool uploadKey :: Key -> Annex Bool
uploadKey _ = do uploadKey _ = do
warning "upload to web not supported" warning "upload to web not supported"

View file

@ -43,8 +43,10 @@ data RemoteA a = Remote {
cost :: Int, cost :: Int,
-- Transfers a key to the remote. -- Transfers a key to the remote.
storeKey :: Key -> a Bool, storeKey :: Key -> a Bool,
-- retrieves a key's contents to a file (possibly a tmp file) -- retrieves a key's contents to a file
retrieveKeyFile :: Key -> Bool -> FilePath -> a Bool, retrieveKeyFile :: Key -> FilePath -> a Bool,
-- retrieves a key's contents to a tmp file, if it can be done cheaply
retrieveKeyFileCheap :: Key -> FilePath -> a Bool,
-- removes a key's contents -- removes a key's contents
removeKey :: Key -> a Bool, removeKey :: Key -> a Bool,
-- Checks if a key is present in the remote; if the remote -- Checks if a key is present in the remote; if the remote

2
debian/changelog vendored
View file

@ -8,6 +8,8 @@ git-annex (3.20120117) UNRELEASED; urgency=low
* If you have any directory special remotes, now would be a good time to * If you have any directory special remotes, now would be a good time to
fsck them, in case you were hit by the data loss bug fixed in the fsck them, in case you were hit by the data loss bug fixed in the
previous release! previous release!
* fsck --from remote --fast: Avoids expensive file transfers, at the
expense of checking file size and/or contents.
-- Joey Hess <joeyh@debian.org> Thu, 19 Jan 2012 15:12:03 -0400 -- Joey Hess <joeyh@debian.org> Thu, 19 Jan 2012 15:12:03 -0400

View file

@ -210,10 +210,11 @@ subdirectories).
With parameters, only the specified files are checked. With parameters, only the specified files are checked.
To avoid expensive checksum calculations, specify --fast
To check a remote to fsck, specify --from. To check a remote to fsck, specify --from.
To avoid expensive checksum calculations (and expensive transfers when
fscking a remote), specify --fast
* unused * unused
Checks the annex for data that does not correspond to any files present Checks the annex for data that does not correspond to any files present