fsck --from remote --fast

Avoids expensive file transfers, at the expense of checking file size
and/or contents.

Required some reworking of the remote code.
This commit is contained in:
Joey Hess 2012-01-20 13:23:11 -04:00
parent e96726caa3
commit 61dbad505d
16 changed files with 109 additions and 47 deletions

View file

@ -306,9 +306,18 @@ downloadUrl urls file = do
{- Copies a key's content, when present, to a temp file.
- This is used to speed up some rsyncs. -}
preseedTmp :: Key -> FilePath -> Annex ()
preseedTmp key file =
unlessM (liftIO $ doesFileExist file) $ whenM (inAnnex key) $ do
preseedTmp :: Key -> FilePath -> Annex Bool
preseedTmp key file = go =<< inAnnex key
where
go False = return False
go True = do
ok <- copy
when ok $ liftIO $ allowWrite file
return ok
copy = do
present <- liftIO $ doesFileExist file
if present
then return True
else do
s <- inRepo $ gitAnnexLocation key
liftIO $ whenM (copyFileExternal s file) $
allowWrite file
liftIO $ copyFileExternal s file

View file

@ -9,6 +9,7 @@ module Command.Fsck where
import Common.Annex
import Command
import qualified Annex
import qualified Remote
import qualified Types.Backend
import qualified Types.Key
@ -65,8 +66,8 @@ performRemote key file backend numcopies remote = do
showNote err
stop
Right True -> withtmp $ \tmpfile -> do
copied <- Remote.retrieveKeyFile remote key True tmpfile
if copied then go True (Just tmpfile) else go False Nothing
copied <- getfile tmpfile
if copied then go True (Just tmpfile) else go True Nothing
Right False -> go False Nothing
where
go present localcopy = check
@ -83,6 +84,15 @@ performRemote key file backend numcopies remote = do
let cleanup = liftIO $ catch (removeFile tmp) (const $ return ())
cleanup
cleanup `after` a tmp
getfile tmp = do
ok <- Remote.retrieveKeyFileCheap remote key tmp
if ok
then return ok
else do
fast <- Annex.getState Annex.fast
if fast
then return False
else Remote.retrieveKeyFile remote key tmp
{- To fsck a bare repository, fsck each key in the location log. -}
withBarePresentKeys :: (Key -> CommandStart) -> CommandSeek

View file

@ -72,7 +72,7 @@ getKeyFile key file = do
else return True
docopy r continue = do
showAction $ "from " ++ Remote.name r
copied <- Remote.retrieveKeyFile r key False file
copied <- Remote.retrieveKeyFile r key file
if copied
then return True
else continue

View file

@ -131,7 +131,7 @@ fromPerform src move key = moveLock move key $ do
then handle move True
else do
showAction $ "from " ++ Remote.name src
ok <- getViaTmp key $ Remote.retrieveKeyFile src key False
ok <- getViaTmp key $ Remote.retrieveKeyFile src key
handle move ok
where
handle _ False = stop -- failed

View file

@ -11,6 +11,7 @@ module Remote (
name,
storeKey,
retrieveKeyFile,
retrieveKeyFileCheap,
removeKey,
hasKey,
hasKeyCheap,

View file

@ -50,6 +50,7 @@ gen r u c = do
name = Git.repoDescribe r,
storeKey = store r buprepo,
retrieveKeyFile = retrieve buprepo,
retrieveKeyFileCheap = retrieveCheap buprepo,
removeKey = remove,
hasKey = checkPresent r bupr',
hasKeyCheap = bupLocal buprepo,
@ -118,13 +119,16 @@ storeEncrypted r buprepo (cipher, enck) k = do
withEncryptedHandle cipher (L.readFile src) $ \h ->
pipeBup params (Just h) Nothing
retrieve :: BupRepo -> Key -> Bool -> FilePath -> Annex Bool
retrieve buprepo k _ f = do
retrieve :: BupRepo -> Key -> FilePath -> Annex Bool
retrieve buprepo k f = do
let params = bupParams "join" buprepo [Param $ show k]
liftIO $ catchBoolIO $ do
tofile <- openFile f WriteMode
pipeBup params Nothing (Just tofile)
retrieveCheap :: BupRepo -> Key -> FilePath -> Annex Bool
retrieveCheap _ _ _ = return False
retrieveEncrypted :: BupRepo -> (Cipher, Key) -> FilePath -> Annex Bool
retrieveEncrypted buprepo (cipher, enck) f = do
let params = bupParams "join" buprepo [Param $ show enck]

View file

@ -41,6 +41,7 @@ gen r u c = do
name = Git.repoDescribe r,
storeKey = store dir,
retrieveKeyFile = retrieve dir,
retrieveKeyFileCheap = retrieveCheap dir,
removeKey = remove dir,
hasKey = checkPresent dir,
hasKeyCheap = True,
@ -109,11 +110,12 @@ storeHelper d key a = do
preventWrite dir
return ok
retrieve :: FilePath -> Key -> Bool -> FilePath -> Annex Bool
retrieve d k tmp f = liftIO $ withStoredFile d k $ \file ->
if tmp
then catchBoolIO $ createSymbolicLink file f >> return True
else copyFileExternal file f
retrieve :: FilePath -> Key -> FilePath -> Annex Bool
retrieve d k f = liftIO $ withStoredFile d k $ \file -> copyFileExternal file f
retrieveCheap :: FilePath -> Key -> FilePath -> Annex Bool
retrieveCheap d k f = liftIO $ withStoredFile d k $ \file ->
catchBoolIO $ createSymbolicLink file f >> return True
retrieveEncrypted :: FilePath -> (Cipher, Key) -> FilePath -> Annex Bool
retrieveEncrypted d (cipher, enck) f =

View file

@ -75,6 +75,7 @@ gen r u _ = do
name = Git.repoDescribe r',
storeKey = copyToRemote r',
retrieveKeyFile = copyFromRemote r',
retrieveKeyFileCheap = copyFromRemoteCheap r',
removeKey = dropKey r',
hasKey = inAnnex r',
hasKeyCheap = cheap,
@ -198,20 +199,28 @@ dropKey r key
]
{- Tries to copy a key's content from a remote's annex to a file. -}
copyFromRemote :: Git.Repo -> Key -> Bool -> FilePath -> Annex Bool
copyFromRemote r key tmp file
copyFromRemote :: Git.Repo -> Key -> FilePath -> Annex Bool
copyFromRemote r key file
| not $ Git.repoIsUrl r = do
params <- rsyncParams r
loc <- liftIO $ gitAnnexLocation key r
if tmp
then liftIO $ catchBoolIO $ createSymbolicLink loc file >> return True
else rsyncOrCopyFile params loc file
| Git.repoIsSsh r = do
when tmp $ Annex.Content.preseedTmp key file
rsyncHelper =<< rsyncParamsRemote r True key file
rsyncOrCopyFile params loc file
| Git.repoIsSsh r = rsyncHelper =<< rsyncParamsRemote r True key file
| Git.repoIsHttp r = Annex.Content.downloadUrl (keyUrls r key) file
| otherwise = error "copying from non-ssh, non-http repo not supported"
copyFromRemoteCheap :: Git.Repo -> Key -> FilePath -> Annex Bool
copyFromRemoteCheap r key file
| not $ Git.repoIsUrl r = do
loc <- liftIO $ gitAnnexLocation key r
liftIO $ catchBoolIO $ createSymbolicLink loc file >> return True
| Git.repoIsSsh r = do
ok <- Annex.Content.preseedTmp key file
if ok
then copyFromRemote r key file
else return False
| otherwise = return False
{- Tries to copy a key's content to a remote's annex. -}
copyToRemote :: Git.Repo -> Key -> Annex Bool
copyToRemote r key

View file

@ -47,6 +47,7 @@ encryptableRemote c storeKeyEncrypted retrieveKeyFileEncrypted r =
r {
storeKey = store,
retrieveKeyFile = retrieve,
retrieveKeyFileCheap = retrieveCheap,
removeKey = withkey $ removeKey r,
hasKey = withkey $ hasKey r,
cost = cost r + encryptedRemoteCostAdj
@ -55,9 +56,12 @@ encryptableRemote c storeKeyEncrypted retrieveKeyFileEncrypted r =
store k = cip k >>= maybe
(storeKey r k)
(`storeKeyEncrypted` k)
retrieve k t f = cip k >>= maybe
(retrieveKeyFile r k t f)
retrieve k f = cip k >>= maybe
(retrieveKeyFile r k f)
(`retrieveKeyFileEncrypted` f)
retrieveCheap k f = cip k >>= maybe
(retrieveKeyFileCheap r k f)
(\_ -> return False)
withkey a k = cip k >>= maybe (a k) (a . snd)
cip = cipherKey c

View file

@ -41,6 +41,7 @@ gen r u c = do
name = Git.repoDescribe r,
storeKey = store hooktype,
retrieveKeyFile = retrieve hooktype,
retrieveKeyFileCheap = retrieveCheap hooktype,
removeKey = remove hooktype,
hasKey = checkPresent r hooktype,
hasKeyCheap = False,
@ -106,8 +107,11 @@ storeEncrypted h (cipher, enck) k = withTmp enck $ \tmp -> do
liftIO $ withEncryptedContent cipher (L.readFile src) $ L.writeFile tmp
runHook h "store" enck (Just tmp) $ return True
retrieve :: String -> Key -> Bool -> FilePath -> Annex Bool
retrieve h k _ f = runHook h "retrieve" k (Just f) $ return True
retrieve :: String -> Key -> FilePath -> Annex Bool
retrieve h k f = runHook h "retrieve" k (Just f) $ return True
retrieveCheap :: String -> Key -> FilePath -> Annex Bool
retrieveCheap _ _ _ = return False
retrieveEncrypted :: String -> (Cipher, Key) -> FilePath -> Annex Bool
retrieveEncrypted h (cipher, enck) f = withTmp enck $ \tmp ->

View file

@ -48,6 +48,7 @@ gen r u c = do
name = Git.repoDescribe r,
storeKey = store o,
retrieveKeyFile = retrieve o,
retrieveKeyFileCheap = retrieveCheap o,
removeKey = remove o,
hasKey = checkPresent r o,
hasKeyCheap = False,
@ -102,19 +103,24 @@ storeEncrypted o (cipher, enck) k = withTmp enck $ \tmp -> do
liftIO $ withEncryptedContent cipher (L.readFile src) $ L.writeFile tmp
rsyncSend o enck tmp
retrieve :: RsyncOpts -> Key -> Bool -> FilePath -> Annex Bool
retrieve o k tmp f = untilTrue (rsyncUrls o k) $ \u -> do
when tmp $ preseedTmp k f
rsyncRemote o
retrieve :: RsyncOpts -> Key -> FilePath -> Annex Bool
retrieve o k f = untilTrue (rsyncUrls o k) $ \u -> rsyncRemote o
-- use inplace when retrieving to support resuming
[ Param "--inplace"
, Param u
, Param f
]
retrieveCheap :: RsyncOpts -> Key -> FilePath -> Annex Bool
retrieveCheap o k f = do
ok <- preseedTmp k f
if ok
then retrieve o k f
else return False
retrieveEncrypted :: RsyncOpts -> (Cipher, Key) -> FilePath -> Annex Bool
retrieveEncrypted o (cipher, enck) f = withTmp enck $ \tmp -> do
res <- retrieve o enck False tmp
res <- retrieve o enck tmp
if res
then liftIO $ catchBoolIO $ do
withDecryptedContent cipher (L.readFile tmp) $ L.writeFile f

View file

@ -53,6 +53,7 @@ gen' r u c cst =
name = Git.repoDescribe r,
storeKey = store this,
retrieveKeyFile = retrieve this,
retrieveKeyFileCheap = retrieveCheap this,
removeKey = remove this,
hasKey = checkPresent this,
hasKeyCheap = False,
@ -149,8 +150,8 @@ storeHelper (conn, bucket) r k file = do
xheaders = filter isxheader $ M.assocs $ fromJust $ config r
isxheader (h, _) = "x-amz-" `isPrefixOf` h
retrieve :: Remote -> Key -> Bool -> FilePath -> Annex Bool
retrieve r k _ f = s3Action r False $ \(conn, bucket) -> do
retrieve :: Remote -> Key -> FilePath -> Annex Bool
retrieve r k f = s3Action r False $ \(conn, bucket) -> do
res <- liftIO $ getObject conn $ bucketKey r bucket k
case res of
Right o -> do
@ -158,6 +159,9 @@ retrieve r k _ f = s3Action r False $ \(conn, bucket) -> do
return True
Left e -> s3Warning e
retrieveCheap :: Remote -> Key -> FilePath -> Annex Bool
retrieveCheap _ _ _ = return False
retrieveEncrypted :: Remote -> (Cipher, Key) -> FilePath -> Annex Bool
retrieveEncrypted r (cipher, enck) f = s3Action r False $ \(conn, bucket) -> do
res <- liftIO $ getObject conn $ bucketKey r bucket enck

View file

@ -40,6 +40,7 @@ gen r _ _ =
name = Git.repoDescribe r,
storeKey = uploadKey,
retrieveKeyFile = downloadKey,
retrieveKeyFileCheap = downloadKeyCheap,
removeKey = dropKey,
hasKey = checkKey,
hasKeyCheap = False,
@ -48,8 +49,8 @@ gen r _ _ =
remotetype = remote
}
downloadKey :: Key -> Bool -> FilePath -> Annex Bool
downloadKey key _ file = get =<< getUrls key
downloadKey :: Key -> FilePath -> Annex Bool
downloadKey key file = get =<< getUrls key
where
get [] = do
warning "no known url"
@ -58,6 +59,9 @@ downloadKey key _ file = get =<< getUrls key
showOutput -- make way for download progress bar
downloadUrl urls file
downloadKeyCheap :: Key -> FilePath -> Annex Bool
downloadKeyCheap _ _ = return False
uploadKey :: Key -> Annex Bool
uploadKey _ = do
warning "upload to web not supported"

View file

@ -43,8 +43,10 @@ data RemoteA a = Remote {
cost :: Int,
-- Transfers a key to the remote.
storeKey :: Key -> a Bool,
-- retrieves a key's contents to a file (possibly a tmp file)
retrieveKeyFile :: Key -> Bool -> FilePath -> a Bool,
-- retrieves a key's contents to a file
retrieveKeyFile :: Key -> FilePath -> a Bool,
-- retrieves a key's contents to a tmp file, if it can be done cheaply
retrieveKeyFileCheap :: Key -> FilePath -> a Bool,
-- removes a key's contents
removeKey :: Key -> a Bool,
-- Checks if a key is present in the remote; if the remote

2
debian/changelog vendored
View file

@ -8,6 +8,8 @@ git-annex (3.20120117) UNRELEASED; urgency=low
* If you have any directory special remotes, now would be a good time to
fsck them, in case you were hit by the data loss bug fixed in the
previous release!
* fsck --from remote --fast: Avoids expensive file transfers, at the
expense of checking file size and/or contents.
-- Joey Hess <joeyh@debian.org> Thu, 19 Jan 2012 15:12:03 -0400

View file

@ -210,10 +210,11 @@ subdirectories).
With parameters, only the specified files are checked.
To avoid expensive checksum calculations, specify --fast
To check a remote to fsck, specify --from.
To avoid expensive checksum calculations (and expensive transfers when
fscking a remote), specify --fast
* unused
Checks the annex for data that does not correspond to any files present