From 61dbad505d648f13394018c31ce2d718c175007e Mon Sep 17 00:00:00 2001 From: Joey Hess Date: Fri, 20 Jan 2012 13:23:11 -0400 Subject: [PATCH] fsck --from remote --fast Avoids expensive file transfers, at the expense of checking file size and/or contents. Required some reworking of the remote code. --- Annex/Content.hs | 21 +++++++++++++++------ Command/Fsck.hs | 14 ++++++++++++-- Command/Get.hs | 2 +- Command/Move.hs | 2 +- Remote.hs | 1 + Remote/Bup.hs | 8 ++++++-- Remote/Directory.hs | 12 +++++++----- Remote/Git.hs | 25 +++++++++++++++++-------- Remote/Helper/Encryptable.hs | 8 ++++++-- Remote/Hook.hs | 8 ++++++-- Remote/Rsync.hs | 26 ++++++++++++++++---------- Remote/S3.hs | 8 ++++++-- Remote/Web.hs | 8 ++++++-- Types/Remote.hs | 6 ++++-- debian/changelog | 2 ++ doc/git-annex.mdwn | 5 +++-- 16 files changed, 109 insertions(+), 47 deletions(-) diff --git a/Annex/Content.hs b/Annex/Content.hs index efd360a09c..c21ac405ea 100644 --- a/Annex/Content.hs +++ b/Annex/Content.hs @@ -306,9 +306,18 @@ downloadUrl urls file = do {- Copies a key's content, when present, to a temp file. - This is used to speed up some rsyncs. -} -preseedTmp :: Key -> FilePath -> Annex () -preseedTmp key file = - unlessM (liftIO $ doesFileExist file) $ whenM (inAnnex key) $ do - s <- inRepo $ gitAnnexLocation key - liftIO $ whenM (copyFileExternal s file) $ - allowWrite file +preseedTmp :: Key -> FilePath -> Annex Bool +preseedTmp key file = go =<< inAnnex key + where + go False = return False + go True = do + ok <- copy + when ok $ liftIO $ allowWrite file + return ok + copy = do + present <- liftIO $ doesFileExist file + if present + then return True + else do + s <- inRepo $ gitAnnexLocation key + liftIO $ copyFileExternal s file diff --git a/Command/Fsck.hs b/Command/Fsck.hs index 9d856ce889..59af29edb1 100644 --- a/Command/Fsck.hs +++ b/Command/Fsck.hs @@ -9,6 +9,7 @@ module Command.Fsck where import Common.Annex import Command +import qualified Annex import qualified Remote import qualified Types.Backend import qualified Types.Key @@ -65,8 +66,8 @@ performRemote key file backend numcopies remote = do showNote err stop Right True -> withtmp $ \tmpfile -> do - copied <- Remote.retrieveKeyFile remote key True tmpfile - if copied then go True (Just tmpfile) else go False Nothing + copied <- getfile tmpfile + if copied then go True (Just tmpfile) else go True Nothing Right False -> go False Nothing where go present localcopy = check @@ -83,6 +84,15 @@ performRemote key file backend numcopies remote = do let cleanup = liftIO $ catch (removeFile tmp) (const $ return ()) cleanup cleanup `after` a tmp + getfile tmp = do + ok <- Remote.retrieveKeyFileCheap remote key tmp + if ok + then return ok + else do + fast <- Annex.getState Annex.fast + if fast + then return False + else Remote.retrieveKeyFile remote key tmp {- To fsck a bare repository, fsck each key in the location log. -} withBarePresentKeys :: (Key -> CommandStart) -> CommandSeek diff --git a/Command/Get.hs b/Command/Get.hs index 7f5c08a7e6..5d032e13c4 100644 --- a/Command/Get.hs +++ b/Command/Get.hs @@ -72,7 +72,7 @@ getKeyFile key file = do else return True docopy r continue = do showAction $ "from " ++ Remote.name r - copied <- Remote.retrieveKeyFile r key False file + copied <- Remote.retrieveKeyFile r key file if copied then return True else continue diff --git a/Command/Move.hs b/Command/Move.hs index 003ca27b86..2f2cd1b5d6 100644 --- a/Command/Move.hs +++ b/Command/Move.hs @@ -131,7 +131,7 @@ fromPerform src move key = moveLock move key $ do then handle move True else do showAction $ "from " ++ Remote.name src - ok <- getViaTmp key $ Remote.retrieveKeyFile src key False + ok <- getViaTmp key $ Remote.retrieveKeyFile src key handle move ok where handle _ False = stop -- failed diff --git a/Remote.hs b/Remote.hs index 133d3e2742..ffb53446b4 100644 --- a/Remote.hs +++ b/Remote.hs @@ -11,6 +11,7 @@ module Remote ( name, storeKey, retrieveKeyFile, + retrieveKeyFileCheap, removeKey, hasKey, hasKeyCheap, diff --git a/Remote/Bup.hs b/Remote/Bup.hs index 9a20d9e60b..7329167dae 100644 --- a/Remote/Bup.hs +++ b/Remote/Bup.hs @@ -50,6 +50,7 @@ gen r u c = do name = Git.repoDescribe r, storeKey = store r buprepo, retrieveKeyFile = retrieve buprepo, + retrieveKeyFileCheap = retrieveCheap buprepo, removeKey = remove, hasKey = checkPresent r bupr', hasKeyCheap = bupLocal buprepo, @@ -118,13 +119,16 @@ storeEncrypted r buprepo (cipher, enck) k = do withEncryptedHandle cipher (L.readFile src) $ \h -> pipeBup params (Just h) Nothing -retrieve :: BupRepo -> Key -> Bool -> FilePath -> Annex Bool -retrieve buprepo k _ f = do +retrieve :: BupRepo -> Key -> FilePath -> Annex Bool +retrieve buprepo k f = do let params = bupParams "join" buprepo [Param $ show k] liftIO $ catchBoolIO $ do tofile <- openFile f WriteMode pipeBup params Nothing (Just tofile) +retrieveCheap :: BupRepo -> Key -> FilePath -> Annex Bool +retrieveCheap _ _ _ = return False + retrieveEncrypted :: BupRepo -> (Cipher, Key) -> FilePath -> Annex Bool retrieveEncrypted buprepo (cipher, enck) f = do let params = bupParams "join" buprepo [Param $ show enck] diff --git a/Remote/Directory.hs b/Remote/Directory.hs index 5cdb89f33d..52f4263409 100644 --- a/Remote/Directory.hs +++ b/Remote/Directory.hs @@ -41,6 +41,7 @@ gen r u c = do name = Git.repoDescribe r, storeKey = store dir, retrieveKeyFile = retrieve dir, + retrieveKeyFileCheap = retrieveCheap dir, removeKey = remove dir, hasKey = checkPresent dir, hasKeyCheap = True, @@ -109,11 +110,12 @@ storeHelper d key a = do preventWrite dir return ok -retrieve :: FilePath -> Key -> Bool -> FilePath -> Annex Bool -retrieve d k tmp f = liftIO $ withStoredFile d k $ \file -> - if tmp - then catchBoolIO $ createSymbolicLink file f >> return True - else copyFileExternal file f +retrieve :: FilePath -> Key -> FilePath -> Annex Bool +retrieve d k f = liftIO $ withStoredFile d k $ \file -> copyFileExternal file f + +retrieveCheap :: FilePath -> Key -> FilePath -> Annex Bool +retrieveCheap d k f = liftIO $ withStoredFile d k $ \file -> + catchBoolIO $ createSymbolicLink file f >> return True retrieveEncrypted :: FilePath -> (Cipher, Key) -> FilePath -> Annex Bool retrieveEncrypted d (cipher, enck) f = diff --git a/Remote/Git.hs b/Remote/Git.hs index 2196292cdf..efe1829610 100644 --- a/Remote/Git.hs +++ b/Remote/Git.hs @@ -75,6 +75,7 @@ gen r u _ = do name = Git.repoDescribe r', storeKey = copyToRemote r', retrieveKeyFile = copyFromRemote r', + retrieveKeyFileCheap = copyFromRemoteCheap r', removeKey = dropKey r', hasKey = inAnnex r', hasKeyCheap = cheap, @@ -198,20 +199,28 @@ dropKey r key ] {- Tries to copy a key's content from a remote's annex to a file. -} -copyFromRemote :: Git.Repo -> Key -> Bool -> FilePath -> Annex Bool -copyFromRemote r key tmp file +copyFromRemote :: Git.Repo -> Key -> FilePath -> Annex Bool +copyFromRemote r key file | not $ Git.repoIsUrl r = do params <- rsyncParams r loc <- liftIO $ gitAnnexLocation key r - if tmp - then liftIO $ catchBoolIO $ createSymbolicLink loc file >> return True - else rsyncOrCopyFile params loc file - | Git.repoIsSsh r = do - when tmp $ Annex.Content.preseedTmp key file - rsyncHelper =<< rsyncParamsRemote r True key file + rsyncOrCopyFile params loc file + | Git.repoIsSsh r = rsyncHelper =<< rsyncParamsRemote r True key file | Git.repoIsHttp r = Annex.Content.downloadUrl (keyUrls r key) file | otherwise = error "copying from non-ssh, non-http repo not supported" +copyFromRemoteCheap :: Git.Repo -> Key -> FilePath -> Annex Bool +copyFromRemoteCheap r key file + | not $ Git.repoIsUrl r = do + loc <- liftIO $ gitAnnexLocation key r + liftIO $ catchBoolIO $ createSymbolicLink loc file >> return True + | Git.repoIsSsh r = do + ok <- Annex.Content.preseedTmp key file + if ok + then copyFromRemote r key file + else return False + | otherwise = return False + {- Tries to copy a key's content to a remote's annex. -} copyToRemote :: Git.Repo -> Key -> Annex Bool copyToRemote r key diff --git a/Remote/Helper/Encryptable.hs b/Remote/Helper/Encryptable.hs index ad99c3092e..0569cb5551 100644 --- a/Remote/Helper/Encryptable.hs +++ b/Remote/Helper/Encryptable.hs @@ -47,6 +47,7 @@ encryptableRemote c storeKeyEncrypted retrieveKeyFileEncrypted r = r { storeKey = store, retrieveKeyFile = retrieve, + retrieveKeyFileCheap = retrieveCheap, removeKey = withkey $ removeKey r, hasKey = withkey $ hasKey r, cost = cost r + encryptedRemoteCostAdj @@ -55,9 +56,12 @@ encryptableRemote c storeKeyEncrypted retrieveKeyFileEncrypted r = store k = cip k >>= maybe (storeKey r k) (`storeKeyEncrypted` k) - retrieve k t f = cip k >>= maybe - (retrieveKeyFile r k t f) + retrieve k f = cip k >>= maybe + (retrieveKeyFile r k f) (`retrieveKeyFileEncrypted` f) + retrieveCheap k f = cip k >>= maybe + (retrieveKeyFileCheap r k f) + (\_ -> return False) withkey a k = cip k >>= maybe (a k) (a . snd) cip = cipherKey c diff --git a/Remote/Hook.hs b/Remote/Hook.hs index 88124133aa..a08c4011ef 100644 --- a/Remote/Hook.hs +++ b/Remote/Hook.hs @@ -41,6 +41,7 @@ gen r u c = do name = Git.repoDescribe r, storeKey = store hooktype, retrieveKeyFile = retrieve hooktype, + retrieveKeyFileCheap = retrieveCheap hooktype, removeKey = remove hooktype, hasKey = checkPresent r hooktype, hasKeyCheap = False, @@ -106,8 +107,11 @@ storeEncrypted h (cipher, enck) k = withTmp enck $ \tmp -> do liftIO $ withEncryptedContent cipher (L.readFile src) $ L.writeFile tmp runHook h "store" enck (Just tmp) $ return True -retrieve :: String -> Key -> Bool -> FilePath -> Annex Bool -retrieve h k _ f = runHook h "retrieve" k (Just f) $ return True +retrieve :: String -> Key -> FilePath -> Annex Bool +retrieve h k f = runHook h "retrieve" k (Just f) $ return True + +retrieveCheap :: String -> Key -> FilePath -> Annex Bool +retrieveCheap _ _ _ = return False retrieveEncrypted :: String -> (Cipher, Key) -> FilePath -> Annex Bool retrieveEncrypted h (cipher, enck) f = withTmp enck $ \tmp -> diff --git a/Remote/Rsync.hs b/Remote/Rsync.hs index a1722fe17d..8de6ba6a74 100644 --- a/Remote/Rsync.hs +++ b/Remote/Rsync.hs @@ -48,6 +48,7 @@ gen r u c = do name = Git.repoDescribe r, storeKey = store o, retrieveKeyFile = retrieve o, + retrieveKeyFileCheap = retrieveCheap o, removeKey = remove o, hasKey = checkPresent r o, hasKeyCheap = False, @@ -102,19 +103,24 @@ storeEncrypted o (cipher, enck) k = withTmp enck $ \tmp -> do liftIO $ withEncryptedContent cipher (L.readFile src) $ L.writeFile tmp rsyncSend o enck tmp -retrieve :: RsyncOpts -> Key -> Bool -> FilePath -> Annex Bool -retrieve o k tmp f = untilTrue (rsyncUrls o k) $ \u -> do - when tmp $ preseedTmp k f - rsyncRemote o - -- use inplace when retrieving to support resuming - [ Param "--inplace" - , Param u - , Param f - ] +retrieve :: RsyncOpts -> Key -> FilePath -> Annex Bool +retrieve o k f = untilTrue (rsyncUrls o k) $ \u -> rsyncRemote o + -- use inplace when retrieving to support resuming + [ Param "--inplace" + , Param u + , Param f + ] + +retrieveCheap :: RsyncOpts -> Key -> FilePath -> Annex Bool +retrieveCheap o k f = do + ok <- preseedTmp k f + if ok + then retrieve o k f + else return False retrieveEncrypted :: RsyncOpts -> (Cipher, Key) -> FilePath -> Annex Bool retrieveEncrypted o (cipher, enck) f = withTmp enck $ \tmp -> do - res <- retrieve o enck False tmp + res <- retrieve o enck tmp if res then liftIO $ catchBoolIO $ do withDecryptedContent cipher (L.readFile tmp) $ L.writeFile f diff --git a/Remote/S3.hs b/Remote/S3.hs index b879448244..1d23b7d6f0 100644 --- a/Remote/S3.hs +++ b/Remote/S3.hs @@ -53,6 +53,7 @@ gen' r u c cst = name = Git.repoDescribe r, storeKey = store this, retrieveKeyFile = retrieve this, + retrieveKeyFileCheap = retrieveCheap this, removeKey = remove this, hasKey = checkPresent this, hasKeyCheap = False, @@ -149,8 +150,8 @@ storeHelper (conn, bucket) r k file = do xheaders = filter isxheader $ M.assocs $ fromJust $ config r isxheader (h, _) = "x-amz-" `isPrefixOf` h -retrieve :: Remote -> Key -> Bool -> FilePath -> Annex Bool -retrieve r k _ f = s3Action r False $ \(conn, bucket) -> do +retrieve :: Remote -> Key -> FilePath -> Annex Bool +retrieve r k f = s3Action r False $ \(conn, bucket) -> do res <- liftIO $ getObject conn $ bucketKey r bucket k case res of Right o -> do @@ -158,6 +159,9 @@ retrieve r k _ f = s3Action r False $ \(conn, bucket) -> do return True Left e -> s3Warning e +retrieveCheap :: Remote -> Key -> FilePath -> Annex Bool +retrieveCheap _ _ _ = return False + retrieveEncrypted :: Remote -> (Cipher, Key) -> FilePath -> Annex Bool retrieveEncrypted r (cipher, enck) f = s3Action r False $ \(conn, bucket) -> do res <- liftIO $ getObject conn $ bucketKey r bucket enck diff --git a/Remote/Web.hs b/Remote/Web.hs index 6db3429ebe..49c3f43f3a 100644 --- a/Remote/Web.hs +++ b/Remote/Web.hs @@ -40,6 +40,7 @@ gen r _ _ = name = Git.repoDescribe r, storeKey = uploadKey, retrieveKeyFile = downloadKey, + retrieveKeyFileCheap = downloadKeyCheap, removeKey = dropKey, hasKey = checkKey, hasKeyCheap = False, @@ -48,8 +49,8 @@ gen r _ _ = remotetype = remote } -downloadKey :: Key -> Bool -> FilePath -> Annex Bool -downloadKey key _ file = get =<< getUrls key +downloadKey :: Key -> FilePath -> Annex Bool +downloadKey key file = get =<< getUrls key where get [] = do warning "no known url" @@ -58,6 +59,9 @@ downloadKey key _ file = get =<< getUrls key showOutput -- make way for download progress bar downloadUrl urls file +downloadKeyCheap :: Key -> FilePath -> Annex Bool +downloadKeyCheap _ _ = return False + uploadKey :: Key -> Annex Bool uploadKey _ = do warning "upload to web not supported" diff --git a/Types/Remote.hs b/Types/Remote.hs index d524ea2ca5..003dd5342a 100644 --- a/Types/Remote.hs +++ b/Types/Remote.hs @@ -43,8 +43,10 @@ data RemoteA a = Remote { cost :: Int, -- Transfers a key to the remote. storeKey :: Key -> a Bool, - -- retrieves a key's contents to a file (possibly a tmp file) - retrieveKeyFile :: Key -> Bool -> FilePath -> a Bool, + -- retrieves a key's contents to a file + retrieveKeyFile :: Key -> FilePath -> a Bool, + -- retrieves a key's contents to a tmp file, if it can be done cheaply + retrieveKeyFileCheap :: Key -> FilePath -> a Bool, -- removes a key's contents removeKey :: Key -> a Bool, -- Checks if a key is present in the remote; if the remote diff --git a/debian/changelog b/debian/changelog index 6849931486..5adba128f7 100644 --- a/debian/changelog +++ b/debian/changelog @@ -8,6 +8,8 @@ git-annex (3.20120117) UNRELEASED; urgency=low * If you have any directory special remotes, now would be a good time to fsck them, in case you were hit by the data loss bug fixed in the previous release! + * fsck --from remote --fast: Avoids expensive file transfers, at the + expense of checking file size and/or contents. -- Joey Hess Thu, 19 Jan 2012 15:12:03 -0400 diff --git a/doc/git-annex.mdwn b/doc/git-annex.mdwn index edf300d8d7..a377665c68 100644 --- a/doc/git-annex.mdwn +++ b/doc/git-annex.mdwn @@ -210,9 +210,10 @@ subdirectories). With parameters, only the specified files are checked. - To avoid expensive checksum calculations, specify --fast - To check a remote to fsck, specify --from. + + To avoid expensive checksum calculations (and expensive transfers when + fscking a remote), specify --fast * unused