From de96ee72089c6673f36b02cfdc440288cd0be206 Mon Sep 17 00:00:00 2001 From: Joey Hess Date: Wed, 18 Jan 2012 16:19:57 -0400 Subject: [PATCH 01/22] thought --- ...vent_repeated_password_prompts_for_one_command.mdwn | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/doc/todo/wishlist:_Prevent_repeated_password_prompts_for_one_command.mdwn b/doc/todo/wishlist:_Prevent_repeated_password_prompts_for_one_command.mdwn index 6d1552fe4e..808b8496f7 100644 --- a/doc/todo/wishlist:_Prevent_repeated_password_prompts_for_one_command.mdwn +++ b/doc/todo/wishlist:_Prevent_repeated_password_prompts_for_one_command.mdwn @@ -6,3 +6,13 @@ Simple, when performing various git annex command over ssh, in particular a mult > > Combining multiple operations into a single ssh is on the todo list, but > very far down it. --[[Joey]] + +>> OTOH, automatically running ssh in ControlMaster mode (and stopping it +>> at exit) would be useful and not hard thing for git-annex to do. +>> +>> It'd just need to set the appropriate config options, setting +>> ControlPath to a per-remote socket location that includes git-annex's +>> pid. Then at shutdown, run `ssh -O exit` on each such socket. +>> +>> Complicated slightly by not doing this if the user has already set up +>> more broad ssh connection caching. --[[Joey]] From 50c063df069682bdac2af3b1746933da70a519b8 Mon Sep 17 00:00:00 2001 From: Joey Hess Date: Wed, 18 Jan 2012 17:30:21 -0400 Subject: [PATCH 02/22] add --- doc/todo/fsck_special_remotes.mdwn | 11 +++++++++++ 1 file changed, 11 insertions(+) create mode 100644 doc/todo/fsck_special_remotes.mdwn diff --git a/doc/todo/fsck_special_remotes.mdwn b/doc/todo/fsck_special_remotes.mdwn new file mode 100644 index 0000000000..c81c56c856 --- /dev/null +++ b/doc/todo/fsck_special_remotes.mdwn @@ -0,0 +1,11 @@ +`git annex fsck --from remote` + +Basically, this needs to receive each file in turn from the remote, to a +temp file, and then run the existing fsck code on it. Could be quite +expensive, but sometimes you really want to check. + +An unencrypted directory special remote could be optimised, by not actually +copying the file, just dropping a symlink, etc. + +The WORM backend doesn't care about file content, so it would be nice to +avoid transferring the content at all, and only send the size. From d36525e9745b90cc04abfeac6500ff646cb9c89b Mon Sep 17 00:00:00 2001 From: Joey Hess Date: Thu, 19 Jan 2012 13:51:30 -0400 Subject: [PATCH 03/22] convert fsckKey to a Maybe This way it's clear when a backend does not implement its own fsck checks. --- Backend/SHA.hs | 2 +- Backend/URL.hs | 2 +- Backend/WORM.hs | 2 +- Command/Fsck.hs | 4 +++- Types/Backend.hs | 4 ++-- 5 files changed, 8 insertions(+), 6 deletions(-) diff --git a/Backend/SHA.hs b/Backend/SHA.hs index a1124dfe2e..29f4e2e942 100644 --- a/Backend/SHA.hs +++ b/Backend/SHA.hs @@ -32,7 +32,7 @@ genBackend size b = Backend { name = shaName size , getKey = keyValue size - , fsckKey = checkKeyChecksum size + , fsckKey = Just $ checkKeyChecksum size } genBackendE :: SHASize -> Maybe Backend diff --git a/Backend/URL.hs b/Backend/URL.hs index 7f621b00f2..6406095ca1 100644 --- a/Backend/URL.hs +++ b/Backend/URL.hs @@ -21,7 +21,7 @@ backend :: Backend backend = Backend { name = "URL", getKey = const (return Nothing), - fsckKey = const (return True) + fsckKey = Nothing } fromUrl :: String -> Key diff --git a/Backend/WORM.hs b/Backend/WORM.hs index ae9833e30c..c022fd413b 100644 --- a/Backend/WORM.hs +++ b/Backend/WORM.hs @@ -18,7 +18,7 @@ backend :: Backend backend = Backend { name = "WORM", getKey = keyValue, - fsckKey = const (return True) + fsckKey = Nothing } {- The key includes the file size, modification time, and the diff --git a/Command/Fsck.hs b/Command/Fsck.hs index 680828748d..051a58fb47 100644 --- a/Command/Fsck.hs +++ b/Command/Fsck.hs @@ -137,7 +137,9 @@ checkKeySize key = do checkBackend :: Backend -> Key -> Annex Bool -checkBackend = Types.Backend.fsckKey +checkBackend backend key = case Types.Backend.fsckKey backend of + Nothing -> return True + Just a -> a key checkKeyNumCopies :: Key -> FilePath -> Maybe Int -> Annex Bool checkKeyNumCopies key file numcopies = do diff --git a/Types/Backend.hs b/Types/Backend.hs index 025293a906..1966d667f7 100644 --- a/Types/Backend.hs +++ b/Types/Backend.hs @@ -16,8 +16,8 @@ data BackendA a = Backend { name :: String, -- converts a filename to a key getKey :: FilePath -> a (Maybe Key), - -- called during fsck to check a key - fsckKey :: Key -> a Bool + -- called during fsck to check a key, if the backend has its own checks + fsckKey :: Maybe (Key -> a Bool) } instance Show (BackendA a) where From 90319afa41ca6d8a9ffe00d787dc3dcdff320f00 Mon Sep 17 00:00:00 2001 From: Joey Hess Date: Thu, 19 Jan 2012 15:24:05 -0400 Subject: [PATCH 04/22] fsck --from Fscking a remote is now supported. It's done by retrieving the contents of the specified files from the remote, and checking them, so can be an expensive operation. (Several optimisations are possible, to speed it up, of course.. This is the slow and stupid remote fsck to start with.) Still, if the remote is a special remote, or a git repository that you cannot run fsck in locally, it's nice to have the ability to fsck it. If you have any directory special remotes, now would be a good time to fsck them, in case you were hit by the data loss bug fixed in the previous release! --- Backend/SHA.hs | 15 ++---- Command/Drop.hs | 2 +- Command/Fsck.hs | 132 +++++++++++++++++++++++++++++++++++---------- Command/Move.hs | 3 +- Remote.hs | 6 +-- Types/Backend.hs | 2 +- debian/changelog | 13 +++++ doc/git-annex.mdwn | 2 + 8 files changed, 131 insertions(+), 44 deletions(-) diff --git a/Backend/SHA.hs b/Backend/SHA.hs index 29f4e2e942..3adac65d8c 100644 --- a/Backend/SHA.hs +++ b/Backend/SHA.hs @@ -9,7 +9,6 @@ module Backend.SHA (backends) where import Common.Annex import qualified Annex -import Annex.Content import Types.Backend import Types.Key import qualified Build.SysConfig as SysConfig @@ -97,18 +96,14 @@ keyValueE size file = keyValue size file >>= maybe (return Nothing) addE | otherwise = naiveextension {- A key's checksum is checked during fsck. -} -checkKeyChecksum :: SHASize -> Key -> Annex Bool -checkKeyChecksum size key = do +checkKeyChecksum :: SHASize -> Key -> FilePath -> Annex Bool +checkKeyChecksum size key file = do fast <- Annex.getState Annex.fast - file <- inRepo $ gitAnnexLocation key present <- liftIO $ doesFileExist file if not present || fast then return True - else check =<< shaN size file + else check <$> shaN size file where check s - | s == dropExtension (keyName key) = return True - | otherwise = do - dest <- moveBad key - warning $ "Bad file content; moved to " ++ dest - return False + | s == dropExtension (keyName key) = True + | otherwise = False diff --git a/Command/Drop.hs b/Command/Drop.hs index 578ab62b97..b40de00cb2 100644 --- a/Command/Drop.hs +++ b/Command/Drop.hs @@ -87,7 +87,7 @@ cleanupRemote key remote ok = do -- better safe than sorry: assume the remote dropped the key -- even if it seemed to fail; the failure could have occurred -- after it really dropped it - Remote.logStatus remote key False + Remote.logStatus remote key InfoMissing return ok {- Checks specified remotes to verify that enough copies of a key exist to diff --git a/Command/Fsck.hs b/Command/Fsck.hs index 051a58fb47..aec29a39b8 100644 --- a/Command/Fsck.hs +++ b/Command/Fsck.hs @@ -20,20 +20,31 @@ import Annex.UUID import Utility.DataUnits import Utility.FileMode import Config +import qualified Option def :: [Command] -def = [command "fsck" paramPaths seek "check for problems"] +def = [withOptions options $ command "fsck" paramPaths seek + "check for problems"] + +fromOption :: Option +fromOption = Option.field ['f'] "from" paramRemote "check remote" + +options :: [Option] +options = [fromOption] seek :: [CommandSeek] seek = - [ withNumCopies $ \n -> whenAnnexed $ start n + [ withField fromOption Remote.byName $ \from -> + withNumCopies $ \n -> whenAnnexed $ start from n , withBarePresentKeys startBare ] -start :: Maybe Int -> FilePath -> (Key, Backend) -> CommandStart -start numcopies file (key, backend) = do +start :: Maybe Remote -> Maybe Int -> FilePath -> (Key, Backend) -> CommandStart +start from numcopies file (key, backend) = do showStart "fsck" file - next $ perform key file backend numcopies + case from of + Nothing -> next $ perform key file backend numcopies + Just r -> next $ performRemote key file backend numcopies r perform :: Key -> FilePath -> Backend -> Maybe Int -> CommandPerform perform key file backend numcopies = check @@ -44,6 +55,27 @@ perform key file backend numcopies = check , checkKeyNumCopies key file numcopies ] +{- To fsck a remote, the content is retrieved to a tmp file, + - and checked locally. -} +performRemote :: Key -> FilePath -> Backend -> Maybe Int -> Remote -> CommandPerform +performRemote key file backend numcopies remote = withTmp key $ \tmpfile -> do + v <- Remote.hasKey remote key + case v of + Left err -> do + showNote err + stop + Right True -> do + copied <- Remote.retrieveKeyFile remote key tmpfile + if copied then go True (Just tmpfile) else go False Nothing + Right False -> go False Nothing + where + go present localcopy = check + [ verifyLocationLogRemote key file remote present + , checkKeySizeRemote key remote localcopy + , checkBackendRemote backend key remote localcopy + , checkKeyNumCopies key file numcopies + ] + {- To fsck a bare repository, fsck each key in the location log. -} withBarePresentKeys :: (Key -> CommandStart) -> CommandSeek withBarePresentKeys a params = isBareRepo >>= go @@ -93,26 +125,33 @@ verifyLocationLog key desc = do preventWrite (parentDir f) u <- getUUID - uuids <- Remote.keyLocations key + verifyLocationLog' key desc present u (logChange key u) +verifyLocationLogRemote :: Key -> String -> Remote -> Bool -> Annex Bool +verifyLocationLogRemote key desc remote present = + verifyLocationLog' key desc present (Remote.uuid remote) + (Remote.logStatus remote key) + +verifyLocationLog' :: Key -> String -> Bool -> UUID -> (LogStatus -> Annex ()) -> Annex Bool +verifyLocationLog' key desc present u bad = do + uuids <- Remote.keyLocations key case (present, u `elem` uuids) of (True, False) -> do - fix u InfoPresent + fix InfoPresent -- There is no data loss, so do not fail. return True (False, True) -> do - fix u InfoMissing + fix InfoMissing warning $ "** Based on the location log, " ++ desc ++ "\n** was expected to be present, " ++ "but its content is missing." return False _ -> return True - where - fix u s = do + fix s = do showNote "fixing location log" - logChange key u s + bad s {- The size of the data for a key is checked against the size encoded in - the key's metadata, if available. -} @@ -120,26 +159,49 @@ checkKeySize :: Key -> Annex Bool checkKeySize key = do file <- inRepo $ gitAnnexLocation key present <- liftIO $ doesFileExist file - case (present, Types.Key.keySize key) of - (_, Nothing) -> return True - (False, _) -> return True - (True, Just size) -> do - stat <- liftIO $ getFileStatus file - let size' = fromIntegral (fileSize stat) - if size == size' - then return True - else do - dest <- moveBad key - warning $ "Bad file size (" ++ - compareSizes storageUnits True size size' ++ - "); moved to " ++ dest - return False + if present + then checkKeySize' key file badContent + else return True +checkKeySizeRemote :: Key -> Remote -> Maybe FilePath -> Annex Bool +checkKeySizeRemote _ _ Nothing = return True +checkKeySizeRemote key remote (Just file) = checkKeySize' key file + (badContentRemote remote) + +checkKeySize' :: Key -> FilePath -> (Key -> Annex String) -> Annex Bool +checkKeySize' key file bad = case Types.Key.keySize key of + Nothing -> return True + Just size -> do + stat <- liftIO $ getFileStatus file + let size' = fromIntegral (fileSize stat) + if size == size' + then return True + else do + msg <- bad key + warning $ "Bad file size (" ++ + compareSizes storageUnits True size size' ++ + "); " ++ msg + return False checkBackend :: Backend -> Key -> Annex Bool -checkBackend backend key = case Types.Backend.fsckKey backend of +checkBackend backend key = do + file <- inRepo (gitAnnexLocation key) + checkBackend' backend key (Just file) badContent + +checkBackendRemote :: Backend -> Key -> Remote -> Maybe FilePath -> Annex Bool +checkBackendRemote backend key remote localcopy = + checkBackend' backend key localcopy (badContentRemote remote) + +checkBackend' :: Backend -> Key -> Maybe FilePath -> (Key -> Annex String) -> Annex Bool +checkBackend' _ _ Nothing _ = return True +checkBackend' backend key (Just file) bad = case Types.Backend.fsckKey backend of Nothing -> return True - Just a -> a key + Just a -> do + ok <- a key file + unless ok $ do + msg <- bad key + warning $ "Bad file content; " ++ msg + return ok checkKeyNumCopies :: Key -> FilePath -> Maybe Int -> Annex Bool checkKeyNumCopies key file numcopies = do @@ -168,3 +230,19 @@ missingNote file present needed untrusted = missingNote file present needed [] ++ "\nThe following untrusted locations may also have copies: " ++ "\n" ++ untrusted + +{- Bad content is moved aside. -} +badContent :: Key -> Annex String +badContent key = do + dest <- moveBad key + return $ "moved to " ++ dest + +badContentRemote :: Remote -> Key -> Annex String +badContentRemote remote key = do + ok <- Remote.removeKey remote key + -- better safe than sorry: assume the remote dropped the key + -- even if it seemed to fail; the failure could have occurred + -- after it really dropped it + Remote.logStatus remote key InfoMissing + return $ (if ok then "dropped from " else "failed to drop from ") + ++ Remote.name remote diff --git a/Command/Move.hs b/Command/Move.hs index 2efaebbcb1..2f2cd1b5d6 100644 --- a/Command/Move.hs +++ b/Command/Move.hs @@ -15,6 +15,7 @@ import Annex.Content import qualified Remote import Annex.UUID import qualified Option +import Logs.Presence def :: [Command] def = [withOptions options $ command "move" paramPaths seek @@ -97,7 +98,7 @@ toPerform dest move key = moveLock move key $ do Right True -> finish where finish = do - Remote.logStatus dest key True + Remote.logStatus dest key InfoPresent if move then do whenM (inAnnex key) $ removeAnnex key diff --git a/Remote.hs b/Remote.hs index 7feb84d615..133d3e2742 100644 --- a/Remote.hs +++ b/Remote.hs @@ -212,7 +212,5 @@ forceTrust level remotename = do - in the local repo, not on the remote. The process of transferring the - key to the remote, or removing the key from it *may* log the change - on the remote, but this cannot always be relied on. -} -logStatus :: Remote -> Key -> Bool -> Annex () -logStatus remote key present = logChange key (uuid remote) status - where - status = if present then InfoPresent else InfoMissing +logStatus :: Remote -> Key -> LogStatus -> Annex () +logStatus remote key present = logChange key (uuid remote) present diff --git a/Types/Backend.hs b/Types/Backend.hs index 1966d667f7..d52cec5471 100644 --- a/Types/Backend.hs +++ b/Types/Backend.hs @@ -17,7 +17,7 @@ data BackendA a = Backend { -- converts a filename to a key getKey :: FilePath -> a (Maybe Key), -- called during fsck to check a key, if the backend has its own checks - fsckKey :: Maybe (Key -> a Bool) + fsckKey :: Maybe (Key -> FilePath -> a Bool) } instance Show (BackendA a) where diff --git a/debian/changelog b/debian/changelog index d6c4419bb0..6849931486 100644 --- a/debian/changelog +++ b/debian/changelog @@ -1,3 +1,16 @@ +git-annex (3.20120117) UNRELEASED; urgency=low + + * fsck --from: Fscking a remote is now supported. It's done by retrieving + the contents of the specified files from the remote, and checking them, + so can be an expensive operation. Still, if the remote is a special + remote, or a git repository that you cannot run fsck in locally, it's + nice to have the ability to fsck it. + * If you have any directory special remotes, now would be a good time to + fsck them, in case you were hit by the data loss bug fixed in the + previous release! + + -- Joey Hess Thu, 19 Jan 2012 15:12:03 -0400 + git-annex (3.20120116) unstable; urgency=medium * Fix data loss bug in directory special remote, when moving a file diff --git a/doc/git-annex.mdwn b/doc/git-annex.mdwn index 59b756de83..edf300d8d7 100644 --- a/doc/git-annex.mdwn +++ b/doc/git-annex.mdwn @@ -212,6 +212,8 @@ subdirectories). To avoid expensive checksum calculations, specify --fast + To check a remote to fsck, specify --from. + * unused Checks the annex for data that does not correspond to any files present From 711c15456109f77a04832d8ca0871ce56ffaffe4 Mon Sep 17 00:00:00 2001 From: Joey Hess Date: Thu, 19 Jan 2012 15:26:43 -0400 Subject: [PATCH 05/22] update NEWS Add news item recommending fscking directory special remotes. Remote news item about URL backend being removed; it was later added back to be used by git annex addurl --fast. Link NEWS into top level. --- NEWS | 1 + debian/NEWS | 11 ++++++++--- 2 files changed, 9 insertions(+), 3 deletions(-) create mode 120000 NEWS diff --git a/NEWS b/NEWS new file mode 120000 index 0000000000..798088bec2 --- /dev/null +++ b/NEWS @@ -0,0 +1 @@ +debian/NEWS \ No newline at end of file diff --git a/debian/NEWS b/debian/NEWS index f807d05255..298ff1f7e0 100644 --- a/debian/NEWS +++ b/debian/NEWS @@ -1,8 +1,13 @@ -git-annex (3.20110702) unstable; urgency=low +git-annex (3.20120119) unstable; urgency=low - The URL backend has been removed. Instead the new web remote can be used. + There was a bug in the handling of directory special remotes that + could cause partial file contents to be stored in them. If you use + a directory special remote, you should fsck it, to avoid potential + data loss. - -- Joey Hess Fri, 01 Jul 2011 15:40:51 -0400 + Example: git annex fsck --from mydirectory + + -- Joey Hess Thu, 19 Jan 2012 15:24:23 -0400 git-annex (3.20110624) experimental; urgency=low From 94aa6b42b5bc5c37c7017fb3493010a56a9d211e Mon Sep 17 00:00:00 2001 From: Joey Hess Date: Thu, 19 Jan 2012 15:49:55 -0400 Subject: [PATCH 06/22] optimise fsck --from rsync special remote When a file is present locally, the remote's version can be rsynced to a copy of it, which will avoid wasting a lot of bandwidth. --- Remote/Rsync.hs | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/Remote/Rsync.hs b/Remote/Rsync.hs index c7b60467c4..eeb116675f 100644 --- a/Remote/Rsync.hs +++ b/Remote/Rsync.hs @@ -19,6 +19,8 @@ import Remote.Helper.Special import Remote.Helper.Encryptable import Crypto import Utility.RsyncFile +import Utility.CopyFile +import Utility.FileMode type RsyncUrl = String @@ -103,13 +105,20 @@ storeEncrypted o (cipher, enck) k = withTmp enck $ \tmp -> do rsyncSend o enck tmp retrieve :: RsyncOpts -> Key -> FilePath -> Annex Bool -retrieve o k f = untilTrue (rsyncUrls o k) $ \u -> +retrieve o k f = untilTrue (rsyncUrls o k) $ \u -> do + unlessM (liftIO $ doesFileExist f) $ whenM (inAnnex k) $ preseed rsyncRemote o -- use inplace when retrieving to support resuming [ Param "--inplace" , Param u , Param f ] + where + -- this speeds up fsck --from + preseed = do + s <- inRepo $ gitAnnexLocation k + liftIO $ whenM (copyFileExternal s f) $ + allowWrite f retrieveEncrypted :: RsyncOpts -> (Cipher, Key) -> FilePath -> Annex Bool retrieveEncrypted o (cipher, enck) f = withTmp enck $ \tmp -> do From 06b0cb6224377fd2ea86e4e209e94a502f92716e Mon Sep 17 00:00:00 2001 From: Joey Hess Date: Thu, 19 Jan 2012 16:07:36 -0400 Subject: [PATCH 07/22] add tmp flag parameter to retrieveKeyFile --- Command/Fsck.hs | 2 +- Command/Get.hs | 2 +- Command/Move.hs | 2 +- Remote/Bup.hs | 4 ++-- Remote/Directory.hs | 5 +++-- Remote/Git.hs | 4 ++-- Remote/Helper/Encryptable.hs | 4 ++-- Remote/Hook.hs | 4 ++-- Remote/Rsync.hs | 17 +++++++++-------- Remote/S3.hs | 4 ++-- Remote/Web.hs | 4 ++-- Types/Remote.hs | 4 ++-- 12 files changed, 29 insertions(+), 27 deletions(-) diff --git a/Command/Fsck.hs b/Command/Fsck.hs index aec29a39b8..77e189f436 100644 --- a/Command/Fsck.hs +++ b/Command/Fsck.hs @@ -65,7 +65,7 @@ performRemote key file backend numcopies remote = withTmp key $ \tmpfile -> do showNote err stop Right True -> do - copied <- Remote.retrieveKeyFile remote key tmpfile + copied <- Remote.retrieveKeyFile remote key True tmpfile if copied then go True (Just tmpfile) else go False Nothing Right False -> go False Nothing where diff --git a/Command/Get.hs b/Command/Get.hs index 5d032e13c4..7f5c08a7e6 100644 --- a/Command/Get.hs +++ b/Command/Get.hs @@ -72,7 +72,7 @@ getKeyFile key file = do else return True docopy r continue = do showAction $ "from " ++ Remote.name r - copied <- Remote.retrieveKeyFile r key file + copied <- Remote.retrieveKeyFile r key False file if copied then return True else continue diff --git a/Command/Move.hs b/Command/Move.hs index 2f2cd1b5d6..003ca27b86 100644 --- a/Command/Move.hs +++ b/Command/Move.hs @@ -131,7 +131,7 @@ fromPerform src move key = moveLock move key $ do then handle move True else do showAction $ "from " ++ Remote.name src - ok <- getViaTmp key $ Remote.retrieveKeyFile src key + ok <- getViaTmp key $ Remote.retrieveKeyFile src key False handle move ok where handle _ False = stop -- failed diff --git a/Remote/Bup.hs b/Remote/Bup.hs index 37f3e02e09..9a20d9e60b 100644 --- a/Remote/Bup.hs +++ b/Remote/Bup.hs @@ -118,8 +118,8 @@ storeEncrypted r buprepo (cipher, enck) k = do withEncryptedHandle cipher (L.readFile src) $ \h -> pipeBup params (Just h) Nothing -retrieve :: BupRepo -> Key -> FilePath -> Annex Bool -retrieve buprepo k f = do +retrieve :: BupRepo -> Key -> Bool -> FilePath -> Annex Bool +retrieve buprepo k _ f = do let params = bupParams "join" buprepo [Param $ show k] liftIO $ catchBoolIO $ do tofile <- openFile f WriteMode diff --git a/Remote/Directory.hs b/Remote/Directory.hs index 23265dabc9..9705d58435 100644 --- a/Remote/Directory.hs +++ b/Remote/Directory.hs @@ -109,8 +109,9 @@ storeHelper d key a = do preventWrite dir return ok -retrieve :: FilePath -> Key -> FilePath -> Annex Bool -retrieve d k f = liftIO $ withStoredFile d k $ \file -> copyFileExternal file f +retrieve :: FilePath -> Key -> Bool -> FilePath -> Annex Bool +retrieve d k _ f = do + liftIO $ withStoredFile d k $ \file -> copyFileExternal file f retrieveEncrypted :: FilePath -> (Cipher, Key) -> FilePath -> Annex Bool retrieveEncrypted d (cipher, enck) f = diff --git a/Remote/Git.hs b/Remote/Git.hs index 7964074496..5dae3334ec 100644 --- a/Remote/Git.hs +++ b/Remote/Git.hs @@ -198,8 +198,8 @@ dropKey r key ] {- Tries to copy a key's content from a remote's annex to a file. -} -copyFromRemote :: Git.Repo -> Key -> FilePath -> Annex Bool -copyFromRemote r key file +copyFromRemote :: Git.Repo -> Key -> Bool -> FilePath -> Annex Bool +copyFromRemote r key _ file | not $ Git.repoIsUrl r = do params <- rsyncParams r loc <- liftIO $ gitAnnexLocation key r diff --git a/Remote/Helper/Encryptable.hs b/Remote/Helper/Encryptable.hs index 3abea7bc6a..ad99c3092e 100644 --- a/Remote/Helper/Encryptable.hs +++ b/Remote/Helper/Encryptable.hs @@ -55,8 +55,8 @@ encryptableRemote c storeKeyEncrypted retrieveKeyFileEncrypted r = store k = cip k >>= maybe (storeKey r k) (`storeKeyEncrypted` k) - retrieve k f = cip k >>= maybe - (retrieveKeyFile r k f) + retrieve k t f = cip k >>= maybe + (retrieveKeyFile r k t f) (`retrieveKeyFileEncrypted` f) withkey a k = cip k >>= maybe (a k) (a . snd) cip = cipherKey c diff --git a/Remote/Hook.hs b/Remote/Hook.hs index 6c4a044ac9..88124133aa 100644 --- a/Remote/Hook.hs +++ b/Remote/Hook.hs @@ -106,8 +106,8 @@ storeEncrypted h (cipher, enck) k = withTmp enck $ \tmp -> do liftIO $ withEncryptedContent cipher (L.readFile src) $ L.writeFile tmp runHook h "store" enck (Just tmp) $ return True -retrieve :: String -> Key -> FilePath -> Annex Bool -retrieve h k f = runHook h "retrieve" k (Just f) $ return True +retrieve :: String -> Key -> Bool -> FilePath -> Annex Bool +retrieve h k _ f = runHook h "retrieve" k (Just f) $ return True retrieveEncrypted :: String -> (Cipher, Key) -> FilePath -> Annex Bool retrieveEncrypted h (cipher, enck) f = withTmp enck $ \tmp -> diff --git a/Remote/Rsync.hs b/Remote/Rsync.hs index eeb116675f..b4ff3d6f17 100644 --- a/Remote/Rsync.hs +++ b/Remote/Rsync.hs @@ -104,9 +104,9 @@ storeEncrypted o (cipher, enck) k = withTmp enck $ \tmp -> do liftIO $ withEncryptedContent cipher (L.readFile src) $ L.writeFile tmp rsyncSend o enck tmp -retrieve :: RsyncOpts -> Key -> FilePath -> Annex Bool -retrieve o k f = untilTrue (rsyncUrls o k) $ \u -> do - unlessM (liftIO $ doesFileExist f) $ whenM (inAnnex k) $ preseed +retrieve :: RsyncOpts -> Key -> Bool -> FilePath -> Annex Bool +retrieve o k tmp f = untilTrue (rsyncUrls o k) $ \u -> do + when tmp $ preseed rsyncRemote o -- use inplace when retrieving to support resuming [ Param "--inplace" @@ -115,14 +115,15 @@ retrieve o k f = untilTrue (rsyncUrls o k) $ \u -> do ] where -- this speeds up fsck --from - preseed = do - s <- inRepo $ gitAnnexLocation k - liftIO $ whenM (copyFileExternal s f) $ - allowWrite f + preseed = unlessM (liftIO $ doesFileExist f) $ + whenM (inAnnex k) $ do + s <- inRepo $ gitAnnexLocation k + liftIO $ whenM (copyFileExternal s f) $ + allowWrite f retrieveEncrypted :: RsyncOpts -> (Cipher, Key) -> FilePath -> Annex Bool retrieveEncrypted o (cipher, enck) f = withTmp enck $ \tmp -> do - res <- retrieve o enck tmp + res <- retrieve o enck False tmp if res then liftIO $ catchBoolIO $ do withDecryptedContent cipher (L.readFile tmp) $ L.writeFile f diff --git a/Remote/S3.hs b/Remote/S3.hs index bef89b5539..b879448244 100644 --- a/Remote/S3.hs +++ b/Remote/S3.hs @@ -149,8 +149,8 @@ storeHelper (conn, bucket) r k file = do xheaders = filter isxheader $ M.assocs $ fromJust $ config r isxheader (h, _) = "x-amz-" `isPrefixOf` h -retrieve :: Remote -> Key -> FilePath -> Annex Bool -retrieve r k f = s3Action r False $ \(conn, bucket) -> do +retrieve :: Remote -> Key -> Bool -> FilePath -> Annex Bool +retrieve r k _ f = s3Action r False $ \(conn, bucket) -> do res <- liftIO $ getObject conn $ bucketKey r bucket k case res of Right o -> do diff --git a/Remote/Web.hs b/Remote/Web.hs index 4d6348e596..6db3429ebe 100644 --- a/Remote/Web.hs +++ b/Remote/Web.hs @@ -48,8 +48,8 @@ gen r _ _ = remotetype = remote } -downloadKey :: Key -> FilePath -> Annex Bool -downloadKey key file = get =<< getUrls key +downloadKey :: Key -> Bool -> FilePath -> Annex Bool +downloadKey key _ file = get =<< getUrls key where get [] = do warning "no known url" diff --git a/Types/Remote.hs b/Types/Remote.hs index 216b34857d..d524ea2ca5 100644 --- a/Types/Remote.hs +++ b/Types/Remote.hs @@ -43,8 +43,8 @@ data RemoteA a = Remote { cost :: Int, -- Transfers a key to the remote. storeKey :: Key -> a Bool, - -- retrieves a key's contents to a file - retrieveKeyFile :: Key -> FilePath -> a Bool, + -- retrieves a key's contents to a file (possibly a tmp file) + retrieveKeyFile :: Key -> Bool -> FilePath -> a Bool, -- removes a key's contents removeKey :: Key -> a Bool, -- Checks if a key is present in the remote; if the remote From 71cb04bb6d7d787181f158cad15a67628b0b4402 Mon Sep 17 00:00:00 2001 From: Joey Hess Date: Thu, 19 Jan 2012 16:14:40 -0400 Subject: [PATCH 08/22] optimize fsck --from directory special remote No need to copy anything, just symlink to the file. --- Remote/Directory.hs | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/Remote/Directory.hs b/Remote/Directory.hs index 9705d58435..5cdb89f33d 100644 --- a/Remote/Directory.hs +++ b/Remote/Directory.hs @@ -110,8 +110,10 @@ storeHelper d key a = do return ok retrieve :: FilePath -> Key -> Bool -> FilePath -> Annex Bool -retrieve d k _ f = do - liftIO $ withStoredFile d k $ \file -> copyFileExternal file f +retrieve d k tmp f = liftIO $ withStoredFile d k $ \file -> + if tmp + then catchBoolIO $ createSymbolicLink file f >> return True + else copyFileExternal file f retrieveEncrypted :: FilePath -> (Cipher, Key) -> FilePath -> Annex Bool retrieveEncrypted d (cipher, enck) f = From f35a84fac750d8e246f3fcd1f25054951eff8b7e Mon Sep 17 00:00:00 2001 From: Joey Hess Date: Thu, 19 Jan 2012 16:46:15 -0400 Subject: [PATCH 09/22] use a different tmp file when fscking remote data Since the content might be symlinked into place, it's not appropriate to use withTmp here. --- Command/Fsck.hs | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/Command/Fsck.hs b/Command/Fsck.hs index 77e189f436..9d856ce889 100644 --- a/Command/Fsck.hs +++ b/Command/Fsck.hs @@ -58,13 +58,13 @@ perform key file backend numcopies = check {- To fsck a remote, the content is retrieved to a tmp file, - and checked locally. -} performRemote :: Key -> FilePath -> Backend -> Maybe Int -> Remote -> CommandPerform -performRemote key file backend numcopies remote = withTmp key $ \tmpfile -> do +performRemote key file backend numcopies remote = do v <- Remote.hasKey remote key case v of Left err -> do showNote err stop - Right True -> do + Right True -> withtmp $ \tmpfile -> do copied <- Remote.retrieveKeyFile remote key True tmpfile if copied then go True (Just tmpfile) else go False Nothing Right False -> go False Nothing @@ -75,6 +75,14 @@ performRemote key file backend numcopies remote = withTmp key $ \tmpfile -> do , checkBackendRemote backend key remote localcopy , checkKeyNumCopies key file numcopies ] + withtmp a = do + pid <- liftIO getProcessID + t <- fromRepo gitAnnexTmpDir + let tmp = t "fsck" ++ show pid ++ "." ++ keyFile key + liftIO $ createDirectoryIfMissing True t + let cleanup = liftIO $ catch (removeFile tmp) (const $ return ()) + cleanup + cleanup `after` a tmp {- To fsck a bare repository, fsck each key in the location log. -} withBarePresentKeys :: (Key -> CommandStart) -> CommandSeek From effaa298fabed963ec8a616d206662682e70e61a Mon Sep 17 00:00:00 2001 From: Joey Hess Date: Thu, 19 Jan 2012 17:05:39 -0400 Subject: [PATCH 10/22] optimise fsck --from normal git remotes For a local git remote, can symlink the file. For a git remote using rsync, can preseed any local content. There are a few reasons to use fsck --from on a normal git remote. One is if it's using gitosis or similar, and you don't have shell access to run git annex locally. Another reason could be if you just want to fsck certian files of a bare remote. --- Annex/Content.hs | 11 +++++++++++ Remote/Git.hs | 10 +++++++--- Remote/Rsync.hs | 11 +---------- 3 files changed, 19 insertions(+), 13 deletions(-) diff --git a/Annex/Content.hs b/Annex/Content.hs index ba67a2f151..efd360a09c 100644 --- a/Annex/Content.hs +++ b/Annex/Content.hs @@ -22,6 +22,7 @@ module Annex.Content ( getKeysPresent, saveState, downloadUrl, + preseedTmp, ) where import System.IO.Error (try) @@ -40,6 +41,7 @@ import Utility.FileMode import qualified Utility.Url as Url import Types.Key import Utility.DataUnits +import Utility.CopyFile import Config import Annex.Exception @@ -301,3 +303,12 @@ downloadUrl urls file = do g <- gitRepo o <- map Param . words <$> getConfig g "web-options" "" liftIO $ anyM (\u -> Url.download u o file) urls + +{- Copies a key's content, when present, to a temp file. + - This is used to speed up some rsyncs. -} +preseedTmp :: Key -> FilePath -> Annex () +preseedTmp key file = + unlessM (liftIO $ doesFileExist file) $ whenM (inAnnex key) $ do + s <- inRepo $ gitAnnexLocation key + liftIO $ whenM (copyFileExternal s file) $ + allowWrite file diff --git a/Remote/Git.hs b/Remote/Git.hs index 5dae3334ec..2196292cdf 100644 --- a/Remote/Git.hs +++ b/Remote/Git.hs @@ -199,12 +199,16 @@ dropKey r key {- Tries to copy a key's content from a remote's annex to a file. -} copyFromRemote :: Git.Repo -> Key -> Bool -> FilePath -> Annex Bool -copyFromRemote r key _ file +copyFromRemote r key tmp file | not $ Git.repoIsUrl r = do params <- rsyncParams r loc <- liftIO $ gitAnnexLocation key r - rsyncOrCopyFile params loc file - | Git.repoIsSsh r = rsyncHelper =<< rsyncParamsRemote r True key file + if tmp + then liftIO $ catchBoolIO $ createSymbolicLink loc file >> return True + else rsyncOrCopyFile params loc file + | Git.repoIsSsh r = do + when tmp $ Annex.Content.preseedTmp key file + rsyncHelper =<< rsyncParamsRemote r True key file | Git.repoIsHttp r = Annex.Content.downloadUrl (keyUrls r key) file | otherwise = error "copying from non-ssh, non-http repo not supported" diff --git a/Remote/Rsync.hs b/Remote/Rsync.hs index b4ff3d6f17..a1722fe17d 100644 --- a/Remote/Rsync.hs +++ b/Remote/Rsync.hs @@ -19,8 +19,6 @@ import Remote.Helper.Special import Remote.Helper.Encryptable import Crypto import Utility.RsyncFile -import Utility.CopyFile -import Utility.FileMode type RsyncUrl = String @@ -106,20 +104,13 @@ storeEncrypted o (cipher, enck) k = withTmp enck $ \tmp -> do retrieve :: RsyncOpts -> Key -> Bool -> FilePath -> Annex Bool retrieve o k tmp f = untilTrue (rsyncUrls o k) $ \u -> do - when tmp $ preseed + when tmp $ preseedTmp k f rsyncRemote o -- use inplace when retrieving to support resuming [ Param "--inplace" , Param u , Param f ] - where - -- this speeds up fsck --from - preseed = unlessM (liftIO $ doesFileExist f) $ - whenM (inAnnex k) $ do - s <- inRepo $ gitAnnexLocation k - liftIO $ whenM (copyFileExternal s f) $ - allowWrite f retrieveEncrypted :: RsyncOpts -> (Cipher, Key) -> FilePath -> Annex Bool retrieveEncrypted o (cipher, enck) f = withTmp enck $ \tmp -> do From 7fa95eff5e0bae92b27db793f8e2f4972218acd4 Mon Sep 17 00:00:00 2001 From: "https://www.google.com/accounts/o8/id?id=AItOawnBJ6Dv1glxzzi4qIzGFNa6F-mfHIvv9Ck" Date: Thu, 19 Jan 2012 23:10:25 +0000 Subject: [PATCH 11/22] --- doc/tips/visualizing_repositories_with_gource.mdwn | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/doc/tips/visualizing_repositories_with_gource.mdwn b/doc/tips/visualizing_repositories_with_gource.mdwn index 884f70c444..25a69c1b7a 100644 --- a/doc/tips/visualizing_repositories_with_gource.mdwn +++ b/doc/tips/visualizing_repositories_with_gource.mdwn @@ -15,8 +15,8 @@ or removed from them with git-annex. To use gource this way, first go into the directory you want to visualize, and use `git annex log` to make an input file for `gource`: - git annex log --gource | tee gorce.log + git annex log --gource | tee gource.log sort gource.log | gource --log-format custom - The `git annex log` can take a while, to speed it up you can use something -like `--after "4 monts ago"` to limit how far back it goes. +like `--after "4 months ago"` to limit how far back it goes. From 3783ccf2529b7e566183684579835bb9541a4596 Mon Sep 17 00:00:00 2001 From: Joey Hess Date: Thu, 19 Jan 2012 20:41:20 -0400 Subject: [PATCH 12/22] design --- ...ated_password_prompts_for_one_command.mdwn | 31 +++++++++++++++++++ 1 file changed, 31 insertions(+) diff --git a/doc/todo/wishlist:_Prevent_repeated_password_prompts_for_one_command.mdwn b/doc/todo/wishlist:_Prevent_repeated_password_prompts_for_one_command.mdwn index 808b8496f7..e5b5e3c5cc 100644 --- a/doc/todo/wishlist:_Prevent_repeated_password_prompts_for_one_command.mdwn +++ b/doc/todo/wishlist:_Prevent_repeated_password_prompts_for_one_command.mdwn @@ -16,3 +16,34 @@ Simple, when performing various git annex command over ssh, in particular a mult >> >> Complicated slightly by not doing this if the user has already set up >> more broad ssh connection caching. --[[Joey]] + +--- + +Slightly more elaborate design for using ssh connection caching: + +* Per-uuid ssh socket in `.git/annex/ssh/user@host.socket` +* Can be shared amoung concurrent git-annex processes. +* Run ssh like: `ssh -S .git/annex/ssh/user@host.socket -o ControlMaster=auto -o ControlPersist=yes user@host` +* At shutdown, enumerate all existing sockets, and on each: + 1. Rename to .old (prevents various races) + 2. `ssh -q -S .git/annex/ssh/user@host.old -o ControlMaster=auto -o ControlPersist=yes -O stop user@host` + (Will exit nonzero if ssh is not running on that socket.) + 3. And then remove the socket. +* Do same *at startup*. Why? In case an old git-annex was interrupted + and left behind a ssh. May have moved to a different network + in the meantime, etc, and be stalled waiting for a response from the + network, or talking to the wrong interface or something. + (Ie, the reason why I don't use ssh connection caching by default.) +* This would stop ssh's used by a concurrently running git-annex, + but only after they finish servicing their current connection. + Could use locks to detect if another git-annex is using a ssh + socket, but concurrent git-annex is rare enough, and the impact small + enough (next ssh it runs needs to do a full connect), that + the locks are probably not justified. Could be added later if needed tho. +* Could also set ControlPersist to something like "1h", in order to + auto-terminate leftover ssh's when git-annex is ctrl-c'd or somehow + exits. When transferring big enough files that the next ssh doesn't + happen for an hour, the overhead of that ssh needing to reconnect is + not significant. +* User should be able to override this, to use their own preferred + connection caching setup. `annex.sshcaching=false` From e96726caa31fd76413b450790860611f71d13915 Mon Sep 17 00:00:00 2001 From: Joey Hess Date: Thu, 19 Jan 2012 21:15:13 -0400 Subject: [PATCH 13/22] better design Avoids possible repeated password prompts, at the cost of a small bit of locking complication. --- ...ated_password_prompts_for_one_command.mdwn | 24 +++++++------------ 1 file changed, 9 insertions(+), 15 deletions(-) diff --git a/doc/todo/wishlist:_Prevent_repeated_password_prompts_for_one_command.mdwn b/doc/todo/wishlist:_Prevent_repeated_password_prompts_for_one_command.mdwn index e5b5e3c5cc..a047370ac7 100644 --- a/doc/todo/wishlist:_Prevent_repeated_password_prompts_for_one_command.mdwn +++ b/doc/todo/wishlist:_Prevent_repeated_password_prompts_for_one_command.mdwn @@ -22,28 +22,22 @@ Simple, when performing various git annex command over ssh, in particular a mult Slightly more elaborate design for using ssh connection caching: * Per-uuid ssh socket in `.git/annex/ssh/user@host.socket` -* Can be shared amoung concurrent git-annex processes. +* Can be shared amoung concurrent git-annex processes as well as ssh + invocations inside the current git-annex. +* Also a lock file, `.git/annex/ssh/user@host.lock`. + Open and take shared lock before running ssh; store lock in lock pool. + (Not locking socket directly, because ssh might want to.) * Run ssh like: `ssh -S .git/annex/ssh/user@host.socket -o ControlMaster=auto -o ControlPersist=yes user@host` * At shutdown, enumerate all existing sockets, and on each: - 1. Rename to .old (prevents various races) - 2. `ssh -q -S .git/annex/ssh/user@host.old -o ControlMaster=auto -o ControlPersist=yes -O stop user@host` + 1. Drop any shared lock. + 2. Attempt to take an exclusive lock (non-blocking). + 3. `ssh -q -S .git/annex/ssh/user@host.socket -o ControlMaster=auto -o ControlPersist=yes -O stop user@host` (Will exit nonzero if ssh is not running on that socket.) - 3. And then remove the socket. + 4. And then remove the socket and the lock file. * Do same *at startup*. Why? In case an old git-annex was interrupted and left behind a ssh. May have moved to a different network in the meantime, etc, and be stalled waiting for a response from the network, or talking to the wrong interface or something. (Ie, the reason why I don't use ssh connection caching by default.) -* This would stop ssh's used by a concurrently running git-annex, - but only after they finish servicing their current connection. - Could use locks to detect if another git-annex is using a ssh - socket, but concurrent git-annex is rare enough, and the impact small - enough (next ssh it runs needs to do a full connect), that - the locks are probably not justified. Could be added later if needed tho. -* Could also set ControlPersist to something like "1h", in order to - auto-terminate leftover ssh's when git-annex is ctrl-c'd or somehow - exits. When transferring big enough files that the next ssh doesn't - happen for an hour, the overhead of that ssh needing to reconnect is - not significant. * User should be able to override this, to use their own preferred connection caching setup. `annex.sshcaching=false` From 61dbad505d648f13394018c31ce2d718c175007e Mon Sep 17 00:00:00 2001 From: Joey Hess Date: Fri, 20 Jan 2012 13:23:11 -0400 Subject: [PATCH 14/22] fsck --from remote --fast Avoids expensive file transfers, at the expense of checking file size and/or contents. Required some reworking of the remote code. --- Annex/Content.hs | 21 +++++++++++++++------ Command/Fsck.hs | 14 ++++++++++++-- Command/Get.hs | 2 +- Command/Move.hs | 2 +- Remote.hs | 1 + Remote/Bup.hs | 8 ++++++-- Remote/Directory.hs | 12 +++++++----- Remote/Git.hs | 25 +++++++++++++++++-------- Remote/Helper/Encryptable.hs | 8 ++++++-- Remote/Hook.hs | 8 ++++++-- Remote/Rsync.hs | 26 ++++++++++++++++---------- Remote/S3.hs | 8 ++++++-- Remote/Web.hs | 8 ++++++-- Types/Remote.hs | 6 ++++-- debian/changelog | 2 ++ doc/git-annex.mdwn | 5 +++-- 16 files changed, 109 insertions(+), 47 deletions(-) diff --git a/Annex/Content.hs b/Annex/Content.hs index efd360a09c..c21ac405ea 100644 --- a/Annex/Content.hs +++ b/Annex/Content.hs @@ -306,9 +306,18 @@ downloadUrl urls file = do {- Copies a key's content, when present, to a temp file. - This is used to speed up some rsyncs. -} -preseedTmp :: Key -> FilePath -> Annex () -preseedTmp key file = - unlessM (liftIO $ doesFileExist file) $ whenM (inAnnex key) $ do - s <- inRepo $ gitAnnexLocation key - liftIO $ whenM (copyFileExternal s file) $ - allowWrite file +preseedTmp :: Key -> FilePath -> Annex Bool +preseedTmp key file = go =<< inAnnex key + where + go False = return False + go True = do + ok <- copy + when ok $ liftIO $ allowWrite file + return ok + copy = do + present <- liftIO $ doesFileExist file + if present + then return True + else do + s <- inRepo $ gitAnnexLocation key + liftIO $ copyFileExternal s file diff --git a/Command/Fsck.hs b/Command/Fsck.hs index 9d856ce889..59af29edb1 100644 --- a/Command/Fsck.hs +++ b/Command/Fsck.hs @@ -9,6 +9,7 @@ module Command.Fsck where import Common.Annex import Command +import qualified Annex import qualified Remote import qualified Types.Backend import qualified Types.Key @@ -65,8 +66,8 @@ performRemote key file backend numcopies remote = do showNote err stop Right True -> withtmp $ \tmpfile -> do - copied <- Remote.retrieveKeyFile remote key True tmpfile - if copied then go True (Just tmpfile) else go False Nothing + copied <- getfile tmpfile + if copied then go True (Just tmpfile) else go True Nothing Right False -> go False Nothing where go present localcopy = check @@ -83,6 +84,15 @@ performRemote key file backend numcopies remote = do let cleanup = liftIO $ catch (removeFile tmp) (const $ return ()) cleanup cleanup `after` a tmp + getfile tmp = do + ok <- Remote.retrieveKeyFileCheap remote key tmp + if ok + then return ok + else do + fast <- Annex.getState Annex.fast + if fast + then return False + else Remote.retrieveKeyFile remote key tmp {- To fsck a bare repository, fsck each key in the location log. -} withBarePresentKeys :: (Key -> CommandStart) -> CommandSeek diff --git a/Command/Get.hs b/Command/Get.hs index 7f5c08a7e6..5d032e13c4 100644 --- a/Command/Get.hs +++ b/Command/Get.hs @@ -72,7 +72,7 @@ getKeyFile key file = do else return True docopy r continue = do showAction $ "from " ++ Remote.name r - copied <- Remote.retrieveKeyFile r key False file + copied <- Remote.retrieveKeyFile r key file if copied then return True else continue diff --git a/Command/Move.hs b/Command/Move.hs index 003ca27b86..2f2cd1b5d6 100644 --- a/Command/Move.hs +++ b/Command/Move.hs @@ -131,7 +131,7 @@ fromPerform src move key = moveLock move key $ do then handle move True else do showAction $ "from " ++ Remote.name src - ok <- getViaTmp key $ Remote.retrieveKeyFile src key False + ok <- getViaTmp key $ Remote.retrieveKeyFile src key handle move ok where handle _ False = stop -- failed diff --git a/Remote.hs b/Remote.hs index 133d3e2742..ffb53446b4 100644 --- a/Remote.hs +++ b/Remote.hs @@ -11,6 +11,7 @@ module Remote ( name, storeKey, retrieveKeyFile, + retrieveKeyFileCheap, removeKey, hasKey, hasKeyCheap, diff --git a/Remote/Bup.hs b/Remote/Bup.hs index 9a20d9e60b..7329167dae 100644 --- a/Remote/Bup.hs +++ b/Remote/Bup.hs @@ -50,6 +50,7 @@ gen r u c = do name = Git.repoDescribe r, storeKey = store r buprepo, retrieveKeyFile = retrieve buprepo, + retrieveKeyFileCheap = retrieveCheap buprepo, removeKey = remove, hasKey = checkPresent r bupr', hasKeyCheap = bupLocal buprepo, @@ -118,13 +119,16 @@ storeEncrypted r buprepo (cipher, enck) k = do withEncryptedHandle cipher (L.readFile src) $ \h -> pipeBup params (Just h) Nothing -retrieve :: BupRepo -> Key -> Bool -> FilePath -> Annex Bool -retrieve buprepo k _ f = do +retrieve :: BupRepo -> Key -> FilePath -> Annex Bool +retrieve buprepo k f = do let params = bupParams "join" buprepo [Param $ show k] liftIO $ catchBoolIO $ do tofile <- openFile f WriteMode pipeBup params Nothing (Just tofile) +retrieveCheap :: BupRepo -> Key -> FilePath -> Annex Bool +retrieveCheap _ _ _ = return False + retrieveEncrypted :: BupRepo -> (Cipher, Key) -> FilePath -> Annex Bool retrieveEncrypted buprepo (cipher, enck) f = do let params = bupParams "join" buprepo [Param $ show enck] diff --git a/Remote/Directory.hs b/Remote/Directory.hs index 5cdb89f33d..52f4263409 100644 --- a/Remote/Directory.hs +++ b/Remote/Directory.hs @@ -41,6 +41,7 @@ gen r u c = do name = Git.repoDescribe r, storeKey = store dir, retrieveKeyFile = retrieve dir, + retrieveKeyFileCheap = retrieveCheap dir, removeKey = remove dir, hasKey = checkPresent dir, hasKeyCheap = True, @@ -109,11 +110,12 @@ storeHelper d key a = do preventWrite dir return ok -retrieve :: FilePath -> Key -> Bool -> FilePath -> Annex Bool -retrieve d k tmp f = liftIO $ withStoredFile d k $ \file -> - if tmp - then catchBoolIO $ createSymbolicLink file f >> return True - else copyFileExternal file f +retrieve :: FilePath -> Key -> FilePath -> Annex Bool +retrieve d k f = liftIO $ withStoredFile d k $ \file -> copyFileExternal file f + +retrieveCheap :: FilePath -> Key -> FilePath -> Annex Bool +retrieveCheap d k f = liftIO $ withStoredFile d k $ \file -> + catchBoolIO $ createSymbolicLink file f >> return True retrieveEncrypted :: FilePath -> (Cipher, Key) -> FilePath -> Annex Bool retrieveEncrypted d (cipher, enck) f = diff --git a/Remote/Git.hs b/Remote/Git.hs index 2196292cdf..efe1829610 100644 --- a/Remote/Git.hs +++ b/Remote/Git.hs @@ -75,6 +75,7 @@ gen r u _ = do name = Git.repoDescribe r', storeKey = copyToRemote r', retrieveKeyFile = copyFromRemote r', + retrieveKeyFileCheap = copyFromRemoteCheap r', removeKey = dropKey r', hasKey = inAnnex r', hasKeyCheap = cheap, @@ -198,20 +199,28 @@ dropKey r key ] {- Tries to copy a key's content from a remote's annex to a file. -} -copyFromRemote :: Git.Repo -> Key -> Bool -> FilePath -> Annex Bool -copyFromRemote r key tmp file +copyFromRemote :: Git.Repo -> Key -> FilePath -> Annex Bool +copyFromRemote r key file | not $ Git.repoIsUrl r = do params <- rsyncParams r loc <- liftIO $ gitAnnexLocation key r - if tmp - then liftIO $ catchBoolIO $ createSymbolicLink loc file >> return True - else rsyncOrCopyFile params loc file - | Git.repoIsSsh r = do - when tmp $ Annex.Content.preseedTmp key file - rsyncHelper =<< rsyncParamsRemote r True key file + rsyncOrCopyFile params loc file + | Git.repoIsSsh r = rsyncHelper =<< rsyncParamsRemote r True key file | Git.repoIsHttp r = Annex.Content.downloadUrl (keyUrls r key) file | otherwise = error "copying from non-ssh, non-http repo not supported" +copyFromRemoteCheap :: Git.Repo -> Key -> FilePath -> Annex Bool +copyFromRemoteCheap r key file + | not $ Git.repoIsUrl r = do + loc <- liftIO $ gitAnnexLocation key r + liftIO $ catchBoolIO $ createSymbolicLink loc file >> return True + | Git.repoIsSsh r = do + ok <- Annex.Content.preseedTmp key file + if ok + then copyFromRemote r key file + else return False + | otherwise = return False + {- Tries to copy a key's content to a remote's annex. -} copyToRemote :: Git.Repo -> Key -> Annex Bool copyToRemote r key diff --git a/Remote/Helper/Encryptable.hs b/Remote/Helper/Encryptable.hs index ad99c3092e..0569cb5551 100644 --- a/Remote/Helper/Encryptable.hs +++ b/Remote/Helper/Encryptable.hs @@ -47,6 +47,7 @@ encryptableRemote c storeKeyEncrypted retrieveKeyFileEncrypted r = r { storeKey = store, retrieveKeyFile = retrieve, + retrieveKeyFileCheap = retrieveCheap, removeKey = withkey $ removeKey r, hasKey = withkey $ hasKey r, cost = cost r + encryptedRemoteCostAdj @@ -55,9 +56,12 @@ encryptableRemote c storeKeyEncrypted retrieveKeyFileEncrypted r = store k = cip k >>= maybe (storeKey r k) (`storeKeyEncrypted` k) - retrieve k t f = cip k >>= maybe - (retrieveKeyFile r k t f) + retrieve k f = cip k >>= maybe + (retrieveKeyFile r k f) (`retrieveKeyFileEncrypted` f) + retrieveCheap k f = cip k >>= maybe + (retrieveKeyFileCheap r k f) + (\_ -> return False) withkey a k = cip k >>= maybe (a k) (a . snd) cip = cipherKey c diff --git a/Remote/Hook.hs b/Remote/Hook.hs index 88124133aa..a08c4011ef 100644 --- a/Remote/Hook.hs +++ b/Remote/Hook.hs @@ -41,6 +41,7 @@ gen r u c = do name = Git.repoDescribe r, storeKey = store hooktype, retrieveKeyFile = retrieve hooktype, + retrieveKeyFileCheap = retrieveCheap hooktype, removeKey = remove hooktype, hasKey = checkPresent r hooktype, hasKeyCheap = False, @@ -106,8 +107,11 @@ storeEncrypted h (cipher, enck) k = withTmp enck $ \tmp -> do liftIO $ withEncryptedContent cipher (L.readFile src) $ L.writeFile tmp runHook h "store" enck (Just tmp) $ return True -retrieve :: String -> Key -> Bool -> FilePath -> Annex Bool -retrieve h k _ f = runHook h "retrieve" k (Just f) $ return True +retrieve :: String -> Key -> FilePath -> Annex Bool +retrieve h k f = runHook h "retrieve" k (Just f) $ return True + +retrieveCheap :: String -> Key -> FilePath -> Annex Bool +retrieveCheap _ _ _ = return False retrieveEncrypted :: String -> (Cipher, Key) -> FilePath -> Annex Bool retrieveEncrypted h (cipher, enck) f = withTmp enck $ \tmp -> diff --git a/Remote/Rsync.hs b/Remote/Rsync.hs index a1722fe17d..8de6ba6a74 100644 --- a/Remote/Rsync.hs +++ b/Remote/Rsync.hs @@ -48,6 +48,7 @@ gen r u c = do name = Git.repoDescribe r, storeKey = store o, retrieveKeyFile = retrieve o, + retrieveKeyFileCheap = retrieveCheap o, removeKey = remove o, hasKey = checkPresent r o, hasKeyCheap = False, @@ -102,19 +103,24 @@ storeEncrypted o (cipher, enck) k = withTmp enck $ \tmp -> do liftIO $ withEncryptedContent cipher (L.readFile src) $ L.writeFile tmp rsyncSend o enck tmp -retrieve :: RsyncOpts -> Key -> Bool -> FilePath -> Annex Bool -retrieve o k tmp f = untilTrue (rsyncUrls o k) $ \u -> do - when tmp $ preseedTmp k f - rsyncRemote o - -- use inplace when retrieving to support resuming - [ Param "--inplace" - , Param u - , Param f - ] +retrieve :: RsyncOpts -> Key -> FilePath -> Annex Bool +retrieve o k f = untilTrue (rsyncUrls o k) $ \u -> rsyncRemote o + -- use inplace when retrieving to support resuming + [ Param "--inplace" + , Param u + , Param f + ] + +retrieveCheap :: RsyncOpts -> Key -> FilePath -> Annex Bool +retrieveCheap o k f = do + ok <- preseedTmp k f + if ok + then retrieve o k f + else return False retrieveEncrypted :: RsyncOpts -> (Cipher, Key) -> FilePath -> Annex Bool retrieveEncrypted o (cipher, enck) f = withTmp enck $ \tmp -> do - res <- retrieve o enck False tmp + res <- retrieve o enck tmp if res then liftIO $ catchBoolIO $ do withDecryptedContent cipher (L.readFile tmp) $ L.writeFile f diff --git a/Remote/S3.hs b/Remote/S3.hs index b879448244..1d23b7d6f0 100644 --- a/Remote/S3.hs +++ b/Remote/S3.hs @@ -53,6 +53,7 @@ gen' r u c cst = name = Git.repoDescribe r, storeKey = store this, retrieveKeyFile = retrieve this, + retrieveKeyFileCheap = retrieveCheap this, removeKey = remove this, hasKey = checkPresent this, hasKeyCheap = False, @@ -149,8 +150,8 @@ storeHelper (conn, bucket) r k file = do xheaders = filter isxheader $ M.assocs $ fromJust $ config r isxheader (h, _) = "x-amz-" `isPrefixOf` h -retrieve :: Remote -> Key -> Bool -> FilePath -> Annex Bool -retrieve r k _ f = s3Action r False $ \(conn, bucket) -> do +retrieve :: Remote -> Key -> FilePath -> Annex Bool +retrieve r k f = s3Action r False $ \(conn, bucket) -> do res <- liftIO $ getObject conn $ bucketKey r bucket k case res of Right o -> do @@ -158,6 +159,9 @@ retrieve r k _ f = s3Action r False $ \(conn, bucket) -> do return True Left e -> s3Warning e +retrieveCheap :: Remote -> Key -> FilePath -> Annex Bool +retrieveCheap _ _ _ = return False + retrieveEncrypted :: Remote -> (Cipher, Key) -> FilePath -> Annex Bool retrieveEncrypted r (cipher, enck) f = s3Action r False $ \(conn, bucket) -> do res <- liftIO $ getObject conn $ bucketKey r bucket enck diff --git a/Remote/Web.hs b/Remote/Web.hs index 6db3429ebe..49c3f43f3a 100644 --- a/Remote/Web.hs +++ b/Remote/Web.hs @@ -40,6 +40,7 @@ gen r _ _ = name = Git.repoDescribe r, storeKey = uploadKey, retrieveKeyFile = downloadKey, + retrieveKeyFileCheap = downloadKeyCheap, removeKey = dropKey, hasKey = checkKey, hasKeyCheap = False, @@ -48,8 +49,8 @@ gen r _ _ = remotetype = remote } -downloadKey :: Key -> Bool -> FilePath -> Annex Bool -downloadKey key _ file = get =<< getUrls key +downloadKey :: Key -> FilePath -> Annex Bool +downloadKey key file = get =<< getUrls key where get [] = do warning "no known url" @@ -58,6 +59,9 @@ downloadKey key _ file = get =<< getUrls key showOutput -- make way for download progress bar downloadUrl urls file +downloadKeyCheap :: Key -> FilePath -> Annex Bool +downloadKeyCheap _ _ = return False + uploadKey :: Key -> Annex Bool uploadKey _ = do warning "upload to web not supported" diff --git a/Types/Remote.hs b/Types/Remote.hs index d524ea2ca5..003dd5342a 100644 --- a/Types/Remote.hs +++ b/Types/Remote.hs @@ -43,8 +43,10 @@ data RemoteA a = Remote { cost :: Int, -- Transfers a key to the remote. storeKey :: Key -> a Bool, - -- retrieves a key's contents to a file (possibly a tmp file) - retrieveKeyFile :: Key -> Bool -> FilePath -> a Bool, + -- retrieves a key's contents to a file + retrieveKeyFile :: Key -> FilePath -> a Bool, + -- retrieves a key's contents to a tmp file, if it can be done cheaply + retrieveKeyFileCheap :: Key -> FilePath -> a Bool, -- removes a key's contents removeKey :: Key -> a Bool, -- Checks if a key is present in the remote; if the remote diff --git a/debian/changelog b/debian/changelog index 6849931486..5adba128f7 100644 --- a/debian/changelog +++ b/debian/changelog @@ -8,6 +8,8 @@ git-annex (3.20120117) UNRELEASED; urgency=low * If you have any directory special remotes, now would be a good time to fsck them, in case you were hit by the data loss bug fixed in the previous release! + * fsck --from remote --fast: Avoids expensive file transfers, at the + expense of checking file size and/or contents. -- Joey Hess Thu, 19 Jan 2012 15:12:03 -0400 diff --git a/doc/git-annex.mdwn b/doc/git-annex.mdwn index edf300d8d7..a377665c68 100644 --- a/doc/git-annex.mdwn +++ b/doc/git-annex.mdwn @@ -210,9 +210,10 @@ subdirectories). With parameters, only the specified files are checked. - To avoid expensive checksum calculations, specify --fast - To check a remote to fsck, specify --from. + + To avoid expensive checksum calculations (and expensive transfers when + fscking a remote), specify --fast * unused From 25f998679cd68cd4bb9b320998253f1b2ae23315 Mon Sep 17 00:00:00 2001 From: Joey Hess Date: Fri, 20 Jan 2012 15:06:17 -0400 Subject: [PATCH 15/22] typo --- Utility/Misc.hs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Utility/Misc.hs b/Utility/Misc.hs index 1d3c0e6763..c9bfcb953a 100644 --- a/Utility/Misc.hs +++ b/Utility/Misc.hs @@ -25,7 +25,7 @@ readFileStrict = readFile >=> \s -> length s `seq` return s - in the second result list. - - separate (== ':') "foo:bar" = ("foo", "bar") - - separate (== ':') "foobar" = ("foo, "") + - separate (== ':') "foobar" = ("foobar", "") -} separate :: (a -> Bool) -> [a] -> ([a], [a]) separate c l = unbreak $ break c l From 47250a153a6c5a2864fec15fb136290683aeb1c6 Mon Sep 17 00:00:00 2001 From: Joey Hess Date: Fri, 20 Jan 2012 15:34:52 -0400 Subject: [PATCH 16/22] ssh connection caching Ssh connection caching is now enabled automatically by git-annex. Only one ssh connection is made to each host per git-annex run, which can speed some things up a lot, as well as avoiding repeated password prompts. Concurrent git-annex processes also share ssh connections. Cached ssh connections are shut down when git-annex exits. Note: The rsync special remote does not yet participate in the ssh connection caching. --- Annex.hs | 3 + Annex/LockPool.hs | 43 +++++++ Annex/Ssh.hs | 107 ++++++++++++++++++ CmdLine.hs | 2 + Locations.hs | 5 + Remote/Helper/Ssh.hs | 14 +-- debian/changelog | 5 + ..._no_fixed_hostname_and_optimising_ssh.mdwn | 13 --- ...ated_password_prompts_for_one_command.mdwn | 4 +- 9 files changed, 173 insertions(+), 23 deletions(-) create mode 100644 Annex/LockPool.hs create mode 100644 Annex/Ssh.hs diff --git a/Annex.hs b/Annex.hs index b365132e5d..3b79ea2700 100644 --- a/Annex.hs +++ b/Annex.hs @@ -29,6 +29,7 @@ module Annex ( import Control.Monad.State import Control.Monad.Trans.Control (StM, MonadBaseControl, liftBaseWith, restoreM) import Control.Monad.Base (liftBase, MonadBase) +import System.Posix.Types (Fd) import Common import qualified Git @@ -86,6 +87,7 @@ data AnnexState = AnnexState , forcetrust :: TrustMap , trustmap :: Maybe TrustMap , ciphers :: M.Map EncryptedCipher Cipher + , lockpool :: M.Map FilePath Fd , flags :: M.Map String Bool , fields :: M.Map String String } @@ -108,6 +110,7 @@ newState gitrepo = AnnexState , forcetrust = M.empty , trustmap = Nothing , ciphers = M.empty + , lockpool = M.empty , flags = M.empty , fields = M.empty } diff --git a/Annex/LockPool.hs b/Annex/LockPool.hs new file mode 100644 index 0000000000..3fede5739b --- /dev/null +++ b/Annex/LockPool.hs @@ -0,0 +1,43 @@ +{- git-annex lock pool + - + - Copyright 2012 Joey Hess + - + - Licensed under the GNU GPL version 3 or higher. + -} + +module Annex.LockPool where + +import qualified Data.Map as M +import System.Posix.Types (Fd) + +import Common.Annex +import Annex + +{- Create a specified lock file, and takes a shared lock. -} +lockFile :: FilePath -> Annex () +lockFile file = go =<< fromPool file + where + go (Just _) = return () -- already locked + go Nothing = do + fd <- liftIO $ openFd file ReadOnly (Just stdFileMode) defaultFileFlags + liftIO $ waitToSetLock fd (ReadLock, AbsoluteSeek, 0, 0) + changePool $ M.insert file fd + +unlockFile :: FilePath -> Annex () +unlockFile file = go =<< fromPool file + where + go Nothing = return () + go (Just fd) = do + liftIO $ closeFd fd + changePool $ M.delete file + +getPool :: Annex (M.Map FilePath Fd) +getPool = getState lockpool + +fromPool :: FilePath -> Annex (Maybe Fd) +fromPool file = M.lookup file <$> getPool + +changePool :: (M.Map FilePath Fd -> M.Map FilePath Fd) -> Annex () +changePool a = do + m <- getPool + changeState $ \s -> s { lockpool = a m } diff --git a/Annex/Ssh.hs b/Annex/Ssh.hs new file mode 100644 index 0000000000..cd832a373f --- /dev/null +++ b/Annex/Ssh.hs @@ -0,0 +1,107 @@ +{- git-annex ssh interface, with connection caching + - + - Copyright 2012 Joey Hess + - + - Licensed under the GNU GPL version 3 or higher. + -} + +module Annex.Ssh ( + sshParams, + sshCleanup, +) where + +import qualified Data.Map as M +import System.IO.Error (try) + +import Common.Annex +import Annex.LockPool + +{- Generates parameters to ssh to a given host (or user@host) on a given + - port, with connection caching. -} +sshParams :: (String, Maybe Integer) -> Annex [CommandParam] +sshParams (host, port) = do + cleanstale + (socketfile, params) <- sshInfo (host, port) + liftIO $ createDirectoryIfMissing True $ parentDir socketfile + lockFile $ socket2lock socketfile + return params + where + -- If the lock pool is empty, this is the first ssh of this + -- run. There could be stale ssh connections hanging around + -- from a previous git-annex run that was interrupted. + cleanstale = whenM (null . filter isLock . M.keys <$> getPool) $ + sshCleanup + +sshInfo :: (String, Maybe Integer) -> Annex (FilePath, [CommandParam]) +sshInfo (host, port) = do + dir <- fromRepo $ gitAnnexSshDir + let socketfile = dir hostport2socket host port + return $ (socketfile, cacheParams socketfile ++ portParams port ++ [Param host]) + +cacheParams :: FilePath -> [CommandParam] +cacheParams socketfile = + [ Param "-S", Param socketfile + , Params "-o ControlMaster=auto -o ControlPersist=yes" + ] + +portParams :: Maybe Integer -> [CommandParam] +portParams Nothing = [] +portParams (Just port) = [Param "-p", Param $ show port] + +{- Stop any unused ssh processes. -} +sshCleanup :: Annex () +sshCleanup = do + dir <- fromRepo $ gitAnnexSshDir + liftIO $ createDirectoryIfMissing True dir + sockets <- filter (not . isLock) <$> liftIO (dirContents dir) + forM_ sockets cleanup + where + cleanup socketfile = do + -- Drop any shared lock we have, and take an + -- exclusive lock, without blocking. If the lock + -- succeeds, nothing is using this ssh, and it can + -- be stopped. + let lockfile = socket2lock socketfile + unlockFile lockfile + fd <- liftIO $ openFd lockfile ReadWrite (Just stdFileMode) defaultFileFlags + v <- liftIO $ try $ setLock fd (WriteLock, AbsoluteSeek, 0, 0) + case v of + Left _ -> return () + Right _ -> stopssh socketfile + liftIO $ closeFd fd + stopssh socketfile = do + (_, params) <- sshInfo $ socket2hostport socketfile + _ <- liftIO $ do + -- "ssh -O stop" is noisy on stderr even with -q + let cmd = unwords $ toCommand $ + [ Params "-O stop" + ] ++ params + _ <- boolSystem "sh" + [ Param "-c" + , Param $ "ssh " ++ cmd ++ " >/dev/null 2>/dev/null" + ] + --try $ removeFile socketfile + return () + -- Cannot remove the lock file; other processes may + -- be waiting on our exclusive lock to use it. + return () + +hostport2socket :: String -> Maybe Integer -> FilePath +hostport2socket host Nothing = host +hostport2socket host (Just port) = host ++ "!" ++ show port + +socket2hostport :: FilePath -> (String, Maybe Integer) +socket2hostport socket + | null p = (h, Nothing) + | otherwise = (h, readMaybe p) + where + (h, p) = separate (== '!') $ takeFileName socket + +socket2lock :: FilePath -> FilePath +socket2lock socket = socket ++ lockExt + +isLock :: FilePath -> Bool +isLock f = lockExt `isSuffixOf` f + +lockExt :: String +lockExt = ".lock" diff --git a/CmdLine.hs b/CmdLine.hs index 68157a01a9..29b95d01bd 100644 --- a/CmdLine.hs +++ b/CmdLine.hs @@ -22,6 +22,7 @@ import qualified Annex.Queue import qualified Git import qualified Git.Command import Annex.Content +import Annex.Ssh import Command type Params = [String] @@ -92,4 +93,5 @@ shutdown :: Annex Bool shutdown = do saveState liftIO Git.Command.reap -- zombies from long-running git processes + sshCleanup -- ssh connection caching return True diff --git a/Locations.hs b/Locations.hs index 73a2473b56..03d6deb1d7 100644 --- a/Locations.hs +++ b/Locations.hs @@ -22,6 +22,7 @@ module Locations ( gitAnnexJournalLock, gitAnnexIndex, gitAnnexIndexLock, + gitAnnexSshDir, isLinkToAnnex, annexHashes, hashDirMixed, @@ -142,6 +143,10 @@ gitAnnexIndex r = gitAnnexDir r "index" gitAnnexIndexLock :: Git.Repo -> FilePath gitAnnexIndexLock r = gitAnnexDir r "index.lck" +{- .git/annex/ssh/ is used for ssh connection caching -} +gitAnnexSshDir :: Git.Repo -> FilePath +gitAnnexSshDir r = addTrailingPathSeparator $ gitAnnexDir r "ssh" + {- Checks a symlink target to see if it appears to point to annexed content. -} isLinkToAnnex :: FilePath -> Bool isLinkToAnnex s = ("/.git/" ++ objectDir) `isInfixOf` s diff --git a/Remote/Helper/Ssh.hs b/Remote/Helper/Ssh.hs index 7c5eeddb8e..88b29fdb69 100644 --- a/Remote/Helper/Ssh.hs +++ b/Remote/Helper/Ssh.hs @@ -7,25 +7,21 @@ module Remote.Helper.Ssh where -import Common +import Common.Annex import qualified Git import qualified Git.Url -import Types import Config import Annex.UUID +import Annex.Ssh {- Generates parameters to ssh to a repository's host and run a command. - Caller is responsible for doing any neccessary shellEscaping of the - passed command. -} sshToRepo :: Git.Repo -> [CommandParam] -> Annex [CommandParam] sshToRepo repo sshcmd = do - s <- getConfig repo "ssh-options" "" - let sshoptions = map Param (words s) - let sshport = case Git.Url.port repo of - Nothing -> [] - Just p -> [Param "-p", Param (show p)] - let sshhost = Param $ Git.Url.hostuser repo - return $ sshoptions ++ sshport ++ [sshhost] ++ sshcmd + opts <- map Param . words <$> getConfig repo "ssh-options" "" + params <- sshParams (Git.Url.hostuser repo, Git.Url.port repo) + return $ opts ++ params ++ sshcmd {- Generates parameters to run a git-annex-shell command on a remote - repository. -} diff --git a/debian/changelog b/debian/changelog index 5adba128f7..f5fc107c7b 100644 --- a/debian/changelog +++ b/debian/changelog @@ -10,6 +10,11 @@ git-annex (3.20120117) UNRELEASED; urgency=low previous release! * fsck --from remote --fast: Avoids expensive file transfers, at the expense of checking file size and/or contents. + * Ssh connection caching is now enabled automatically by git-annex. + Only one ssh connection is made to each host per git-annex run, which + can speed some things up a lot, as well as avoiding repeated password + prompts. Concurrent git-annex processes also share ssh connections. + Cached ssh connections are shut down when git-annex exits. -- Joey Hess Thu, 19 Jan 2012 15:12:03 -0400 diff --git a/doc/tips/using_git_annex_with_no_fixed_hostname_and_optimising_ssh.mdwn b/doc/tips/using_git_annex_with_no_fixed_hostname_and_optimising_ssh.mdwn index 8fb2bf9db1..594d8c480f 100644 --- a/doc/tips/using_git_annex_with_no_fixed_hostname_and_optimising_ssh.mdwn +++ b/doc/tips/using_git_annex_with_no_fixed_hostname_and_optimising_ssh.mdwn @@ -57,16 +57,3 @@ b) From the desktop add the remote So now you can work on the train, pop on the wifi at work upon arrival, and sync up with a `git pull && git annex get`. An alternative solution may be to use direct tunnels over Openvpn. - -## Optimising SSH - -Running a `git annex get .`, at least in the version I have, creates a new SSH connection for every file transfer (maybe this should be a feature request?) - -Lot's of new small files in an _annex_ cause lot's of connections to be made quickly: this is an relatively expensive overhead and is enough for connection limiting to start in my case. The process can be made much faster by using SSH's connection sharing capabilities. An SSH config like this should do it: - - # Global Settings - ControlMaster auto - ControlPersist 30 - ControlPath ~/.ssh/master-%r@%h:%p - -This will create a master connection for sharing if one isn't present, maintain it for 30 seconds after closing down the connection (just-in-cases') and automatically use the master connection for subsequent connections. Wins all round! diff --git a/doc/todo/wishlist:_Prevent_repeated_password_prompts_for_one_command.mdwn b/doc/todo/wishlist:_Prevent_repeated_password_prompts_for_one_command.mdwn index a047370ac7..341a9afa45 100644 --- a/doc/todo/wishlist:_Prevent_repeated_password_prompts_for_one_command.mdwn +++ b/doc/todo/wishlist:_Prevent_repeated_password_prompts_for_one_command.mdwn @@ -15,7 +15,9 @@ Simple, when performing various git annex command over ssh, in particular a mult >> pid. Then at shutdown, run `ssh -O exit` on each such socket. >> >> Complicated slightly by not doing this if the user has already set up ->> more broad ssh connection caching. --[[Joey]] +>> more broad ssh connection caching. +>> +>> [[done]]! --[[Joey]] --- From 6ef82665de86195a7da6cffcba40874eba06424f Mon Sep 17 00:00:00 2001 From: Joey Hess Date: Fri, 20 Jan 2012 17:13:36 -0400 Subject: [PATCH 17/22] add annex.sshcaching config setting --- Annex/Ssh.hs | 29 +++++++++++++++++++---------- doc/git-annex.mdwn | 4 ++++ 2 files changed, 23 insertions(+), 10 deletions(-) diff --git a/Annex/Ssh.hs b/Annex/Ssh.hs index cd832a373f..7f5ba48d84 100644 --- a/Annex/Ssh.hs +++ b/Annex/Ssh.hs @@ -15,28 +15,37 @@ import System.IO.Error (try) import Common.Annex import Annex.LockPool +import qualified Git +import qualified Git.Config {- Generates parameters to ssh to a given host (or user@host) on a given - port, with connection caching. -} sshParams :: (String, Maybe Integer) -> Annex [CommandParam] -sshParams (host, port) = do - cleanstale - (socketfile, params) <- sshInfo (host, port) - liftIO $ createDirectoryIfMissing True $ parentDir socketfile - lockFile $ socket2lock socketfile - return params +sshParams (host, port) = go =<< sshInfo (host, port) where + go (Nothing, params) = return params + go (Just socketfile, params) = do + cleanstale + liftIO $ createDirectoryIfMissing True $ parentDir socketfile + lockFile $ socket2lock socketfile + return params -- If the lock pool is empty, this is the first ssh of this -- run. There could be stale ssh connections hanging around -- from a previous git-annex run that was interrupted. cleanstale = whenM (null . filter isLock . M.keys <$> getPool) $ sshCleanup -sshInfo :: (String, Maybe Integer) -> Annex (FilePath, [CommandParam]) +sshInfo :: (String, Maybe Integer) -> Annex (Maybe FilePath, [CommandParam]) sshInfo (host, port) = do - dir <- fromRepo $ gitAnnexSshDir - let socketfile = dir hostport2socket host port - return $ (socketfile, cacheParams socketfile ++ portParams port ++ [Param host]) + caching <- Git.configTrue <$> fromRepo (Git.Config.get "annex.sshcaching" "true") + if caching + then do + dir <- fromRepo $ gitAnnexSshDir + let socketfile = dir hostport2socket host port + return $ (Just socketfile, cacheParams socketfile ++ params) + else return (Nothing, params) + where + params = portParams port ++ [Param host] cacheParams :: FilePath -> [CommandParam] cacheParams socketfile = diff --git a/doc/git-annex.mdwn b/doc/git-annex.mdwn index a377665c68..148b6336de 100644 --- a/doc/git-annex.mdwn +++ b/doc/git-annex.mdwn @@ -575,6 +575,10 @@ Here are all the supported configuration settings. Automatically maintained, and used to automate upgrades between versions. +* `annex.sshcaching` + + By default, git-annex caches ssh connections. To disable this, set to `false`. + * `remote..annex-cost` When determining which repository to From eb9001044f3db682236d1007aded58f47109d6a6 Mon Sep 17 00:00:00 2001 From: Joey Hess Date: Fri, 20 Jan 2012 17:32:32 -0400 Subject: [PATCH 18/22] order user provided params after connection caching params So the user can override them. --- Annex/Ssh.hs | 15 +++++++-------- Remote/Helper/Ssh.hs | 4 ++-- 2 files changed, 9 insertions(+), 10 deletions(-) diff --git a/Annex/Ssh.hs b/Annex/Ssh.hs index 7f5ba48d84..c05e236040 100644 --- a/Annex/Ssh.hs +++ b/Annex/Ssh.hs @@ -20,15 +20,16 @@ import qualified Git.Config {- Generates parameters to ssh to a given host (or user@host) on a given - port, with connection caching. -} -sshParams :: (String, Maybe Integer) -> Annex [CommandParam] -sshParams (host, port) = go =<< sshInfo (host, port) +sshParams :: (String, Maybe Integer) -> [CommandParam] -> Annex [CommandParam] +sshParams (host, port) opts = go =<< sshInfo (host, port) where - go (Nothing, params) = return params + go (Nothing, params) = ret params go (Just socketfile, params) = do cleanstale liftIO $ createDirectoryIfMissing True $ parentDir socketfile lockFile $ socket2lock socketfile - return params + ret params + ret ps = return $ ps ++ opts ++ portParams port ++ [Param host] -- If the lock pool is empty, this is the first ssh of this -- run. There could be stale ssh connections hanging around -- from a previous git-annex run that was interrupted. @@ -42,10 +43,8 @@ sshInfo (host, port) = do then do dir <- fromRepo $ gitAnnexSshDir let socketfile = dir hostport2socket host port - return $ (Just socketfile, cacheParams socketfile ++ params) - else return (Nothing, params) - where - params = portParams port ++ [Param host] + return $ (Just socketfile, cacheParams socketfile) + else return (Nothing, []) cacheParams :: FilePath -> [CommandParam] cacheParams socketfile = diff --git a/Remote/Helper/Ssh.hs b/Remote/Helper/Ssh.hs index 88b29fdb69..c61d1b96f2 100644 --- a/Remote/Helper/Ssh.hs +++ b/Remote/Helper/Ssh.hs @@ -20,8 +20,8 @@ import Annex.Ssh sshToRepo :: Git.Repo -> [CommandParam] -> Annex [CommandParam] sshToRepo repo sshcmd = do opts <- map Param . words <$> getConfig repo "ssh-options" "" - params <- sshParams (Git.Url.hostuser repo, Git.Url.port repo) - return $ opts ++ params ++ sshcmd + params <- sshParams (Git.Url.hostuser repo, Git.Url.port repo) opts + return $ params ++ sshcmd {- Generates parameters to run a git-annex-shell command on a remote - repository. -} From 183bdacca219065e6a888385e3bf309708e827ec Mon Sep 17 00:00:00 2001 From: Joey Hess Date: Sat, 21 Jan 2012 02:24:12 -0400 Subject: [PATCH 19/22] treak --- Utility/Format.hs | 4 +++- Utility/Monad.hs | 10 +++++----- Utility/PartialPrelude.hs | 2 +- Utility/TempFile.hs | 4 +++- 4 files changed, 12 insertions(+), 8 deletions(-) diff --git a/Utility/Format.hs b/Utility/Format.hs index 2c2042cc22..d8b7e45493 100644 --- a/Utility/Format.hs +++ b/Utility/Format.hs @@ -37,9 +37,11 @@ data Frag = Const String | Var String Justify data Justify = LeftJustified Int | RightJustified Int | UnJustified deriving (Show) +type Variables = M.Map String String + {- Expands a Format using some variables, generating a formatted string. - This can be repeatedly called, efficiently. -} -format :: Format -> M.Map String String -> String +format :: Format -> Variables -> String format f vars = concatMap expand f where expand (Const s) = s diff --git a/Utility/Monad.hs b/Utility/Monad.hs index 95964361e6..28aa33ee82 100644 --- a/Utility/Monad.hs +++ b/Utility/Monad.hs @@ -12,7 +12,7 @@ import Control.Monad (liftM) {- Return the first value from a list, if any, satisfying the given - predicate -} -firstM :: (Monad m) => (a -> m Bool) -> [a] -> m (Maybe a) +firstM :: Monad m => (a -> m Bool) -> [a] -> m (Maybe a) firstM _ [] = return Nothing firstM p (x:xs) = do q <- p x @@ -22,20 +22,20 @@ firstM p (x:xs) = do {- Returns true if any value in the list satisfies the predicate, - stopping once one is found. -} -anyM :: (Monad m) => (a -> m Bool) -> [a] -> m Bool +anyM :: Monad m => (a -> m Bool) -> [a] -> m Bool anyM p = liftM isJust . firstM p {- Runs an action on values from a list until it succeeds. -} -untilTrue :: (Monad m) => [a] -> (a -> m Bool) -> m Bool +untilTrue :: Monad m => [a] -> (a -> m Bool) -> m Bool untilTrue = flip anyM {- Runs an action, passing its value to an observer before returning it. -} -observe :: (Monad m) => (a -> m b) -> m a -> m a +observe :: Monad m => (a -> m b) -> m a -> m a observe observer a = do r <- a _ <- observer r return r {- b `after` a runs first a, then b, and returns the value of a -} -after :: (Monad m) => m b -> m a -> m a +after :: Monad m => m b -> m a -> m a after = observe . const diff --git a/Utility/PartialPrelude.hs b/Utility/PartialPrelude.hs index ad857196d6..507fc6252b 100644 --- a/Utility/PartialPrelude.hs +++ b/Utility/PartialPrelude.hs @@ -37,7 +37,7 @@ last = Prelude.last - Ignores leading/trailing whitespace, and throws away any trailing - text after the part that can be read. -} -readMaybe :: (Read a) => String -> Maybe a +readMaybe :: Read a => String -> Maybe a readMaybe s = case reads s of ((x,_):_) -> Just x _ -> Nothing diff --git a/Utility/TempFile.hs b/Utility/TempFile.hs index 3887b422b6..469d52e8ce 100644 --- a/Utility/TempFile.hs +++ b/Utility/TempFile.hs @@ -26,8 +26,10 @@ viaTmp a file content = do a tmpfile content renameFile tmpfile file +type Template = String + {- Runs an action with a temp file, then removes the file. -} -withTempFile :: String -> (FilePath -> Handle -> IO a) -> IO a +withTempFile :: Template -> (FilePath -> Handle -> IO a) -> IO a withTempFile template a = bracket create remove use where create = do From ece239f08661929ae37afed547a8ad97bad2521d Mon Sep 17 00:00:00 2001 From: "https://www.google.com/accounts/o8/id?id=AItOawlB7-aXsqwzOi2BIR_Q4sUF8sjj24H6F3c" Date: Mon, 23 Jan 2012 18:38:14 +0000 Subject: [PATCH 20/22] --- ...in_directory_tree_below_objects__47__.mdwn | 77 +++++++++++++++++++ 1 file changed, 77 insertions(+) create mode 100644 doc/forum/Preserving_file_access_rights_in_directory_tree_below_objects__47__.mdwn diff --git a/doc/forum/Preserving_file_access_rights_in_directory_tree_below_objects__47__.mdwn b/doc/forum/Preserving_file_access_rights_in_directory_tree_below_objects__47__.mdwn new file mode 100644 index 0000000000..d89bcee290 --- /dev/null +++ b/doc/forum/Preserving_file_access_rights_in_directory_tree_below_objects__47__.mdwn @@ -0,0 +1,77 @@ +Hello, + +I have the problem that, while git-annex preserves the file access rights (user, group, others) for the actual file, it does not make sure that others can access this file through the directory tree above said file: + + /tmp $ mkdir test + /tmp $ chown claudius:media test + /tmp $ chmod 750 test + /tmp $ ls -dl test + drwxr-x--- 2 claudius media 40 2012-01-23 19:27 test/ + /tmp $ cd test + /tmp/test $ git init --shared=all + Initialized empty shared Git repository in /tmp/test/.git/ + /tmp/test $ git annex init "test" + init test ok + /tmp/test $ echo 123 > abc + /tmp/test $ chmod 640 abc + /tmp/test $ chown claudius:media abc + /tmp/test $ ls -l + total 4 + -rw-r----- 1 claudius media 4 2012-01-23 19:27 abc + /tmp/test $ git annex add . + add abc (checksum...) ok + (Recording state in git...) + /tmp/test $ ls -l + total 4 + lrwxrwxrwx 1 claudius claudius 176 2012-01-23 19:27 abc -> .git/annex/objects/8F/pj/SHA256-s4--181210f8f9c779c26da1d9b2075bde0127302ee0e3fca38c9a83f5b1dd8e5d3b/SHA256-s4--181210f8f9c779c26da1d9b2075bde0127302ee0e3fca38c9a83f5b1dd8e5d3b + /tmp/test $ ls -l .git/annex/objects/8F/pj/SHA256-s4--181210f8f9c779c26da1d9b2075bde0127302ee0e3fca38c9a83f5b1dd8e5d3b/SHA256-s4--181210f8f9c779c26da1d9b2075bde0127302ee0e3fca38c9a83f5b1dd8e5d3b + -r--r----- 1 claudius media 4 2012-01-23 19:27 .git/annex/objects/8F/pj/SHA256-s4--181210f8f9c779c26da1d9b2075bde0127302ee0e3fca38c9a83f5b1dd8e5d3b/SHA256-s4--181210f8f9c779c26da1d9b2075bde0127302ee0e3fca38c9a83f5b1dd8e5d3b + /tmp/test $ ls -lR .git/annex/objects/ + .git/annex/objects/: + total 0 + drwx--S--- 3 claudius claudius 60 2012-01-23 19:28 8F/ + + .git/annex/objects/8F: + total 0 + drwx--S--- 3 claudius claudius 60 2012-01-23 19:28 pj/ + + .git/annex/objects/8F/pj: + total 0 + dr-x--S--- 2 claudius claudius 60 2012-01-23 19:28 SHA256-s4--181210f8f9c779c26da1d9b2075bde0127302ee0e3fca38c9a83f5b1dd8e5d3b/ + + .git/annex/objects/8F/pj/SHA256-s4--181210f8f9c779c26da1d9b2075bde0127302ee0e3fca38c9a83f5b1dd8e5d3b: + total 4 + -r--r----- 1 claudius media 4 2012-01-23 19:27 SHA256-s4--181210f8f9c779c26da1d9b2075bde0127302ee0e3fca38c9a83f5b1dd8e5d3b + /tmp/test $ stat .git/annex/objects/ + File: `.git/annex/objects/' + Size: 60 Blocks: 0 IO Block: 4096 directory + Device: 11h/17d Inode: 2365970 Links: 3 + Access: (2700/drwx--S---) Uid: ( 1000/claudius) Gid: ( 1000/claudius) + Access: 2012-01-23 19:28:10.614948386 +0100 + Modify: 2012-01-23 19:28:10.614948386 +0100 + Change: 2012-01-23 19:28:10.614948386 +0100 + Birth: - + +The use case is that I have a rather large collection of music I would like to manage with git-annex in various locations (all of it on my external hard drive, some on my notebook etc. This music is played by MPD, which can access the collection because it is in the "media" group. After changing to git-annex, however, this fails. + +I tried to avoid this specific problem by declaring the git repository to be shared, which does appear to have some effect on the other files in .git: + + /tmp/test $ ls -l .git + total 16 + drwx--S--- 5 claudius claudius 160 2012-01-23 19:28 annex/ + drwxrwsr-x 2 claudius claudius 40 2012-01-23 19:27 branches/ + -rw-rw-r-- 1 claudius claudius 218 2012-01-23 19:27 config + -rw-rw-r-- 1 claudius claudius 73 2012-01-23 19:27 description + -rw-rw-r-- 1 claudius claudius 23 2012-01-23 19:27 HEAD + drwxrwsr-x 2 claudius claudius 220 2012-01-23 19:27 hooks/ + -rw-rw-r-- 1 claudius claudius 104 2012-01-23 19:28 index + drwxrwsr-x 2 claudius claudius 60 2012-01-23 19:27 info/ + drwxrwsr-x 3 claudius claudius 60 2012-01-23 19:27 logs/ + drwxrwsr-x 15 claudius claudius 300 2012-01-23 19:28 objects/ + drwxrwsr-x 4 claudius claudius 80 2012-01-23 19:27 refs/ + +I could obviously try to change the rights of annex/, annex/objects etc., but I would like to avoid having to adapt them each time a new folder is added somewhere below annex/objects/. + +My knowledge of git and especially git-annex is not too good, so it might well be that I missed something obvious. Any hints? :) + +(And thank you, of course, for taking the time to read all this) From 549bf0cddb08663de4703d7b70d50a42936066d5 Mon Sep 17 00:00:00 2001 From: "http://joey.kitenet.net/" Date: Mon, 23 Jan 2012 19:00:40 +0000 Subject: [PATCH 21/22] Added a comment --- ...ent_1_5dd978f9b5a0771f44ab9e086bf5a07f._comment | 14 ++++++++++++++ 1 file changed, 14 insertions(+) create mode 100644 doc/forum/Preserving_file_access_rights_in_directory_tree_below_objects__47__/comment_1_5dd978f9b5a0771f44ab9e086bf5a07f._comment diff --git a/doc/forum/Preserving_file_access_rights_in_directory_tree_below_objects__47__/comment_1_5dd978f9b5a0771f44ab9e086bf5a07f._comment b/doc/forum/Preserving_file_access_rights_in_directory_tree_below_objects__47__/comment_1_5dd978f9b5a0771f44ab9e086bf5a07f._comment new file mode 100644 index 0000000000..d2da5e94df --- /dev/null +++ b/doc/forum/Preserving_file_access_rights_in_directory_tree_below_objects__47__/comment_1_5dd978f9b5a0771f44ab9e086bf5a07f._comment @@ -0,0 +1,14 @@ +[[!comment format=mdwn + username="http://joey.kitenet.net/" + nickname="joey" + subject="comment 1" + date="2012-01-23T19:00:40Z" + content=""" +You say you started the repo with \"git init --shared\" .. but what that's really meant for is bare repositories, which can have several users pushing into it, not a non-bare repository. + +The strange mode on the directories \"dr-x--S---\" and files \"-r--r-----\" must be due to your umask setting though. My umask is 022 and the directories and files under `.git/annex/objects` are \"drwxr-xr-x\" and \"-r--r--r--\", which allows anyone to read them unless an upper directory blocks it -- and with this umask, none do unless I explicitly remove permissions from one to lock down a repository. + +About mpd, the obvious fix is to run mpd not as a system user but as yourself. I put \"@reboot mpd\" in my crontab to do this. + + +"""]] From 20d02888023e9707287742eb4e3568cb960c1fe8 Mon Sep 17 00:00:00 2001 From: Joey Hess Date: Mon, 23 Jan 2012 15:09:50 -0400 Subject: [PATCH 22/22] releasing version 3.20120123 --- debian/NEWS | 2 +- debian/changelog | 8 +++++--- git-annex.cabal | 2 +- 3 files changed, 7 insertions(+), 5 deletions(-) diff --git a/debian/NEWS b/debian/NEWS index 298ff1f7e0..1c95146912 100644 --- a/debian/NEWS +++ b/debian/NEWS @@ -1,4 +1,4 @@ -git-annex (3.20120119) unstable; urgency=low +git-annex (3.20120123) unstable; urgency=low There was a bug in the handling of directory special remotes that could cause partial file contents to be stored in them. If you use diff --git a/debian/changelog b/debian/changelog index f5fc107c7b..2f573e6d3f 100644 --- a/debian/changelog +++ b/debian/changelog @@ -1,4 +1,4 @@ -git-annex (3.20120117) UNRELEASED; urgency=low +git-annex (3.20120123) unstable; urgency=low * fsck --from: Fscking a remote is now supported. It's done by retrieving the contents of the specified files from the remote, and checking them, @@ -9,14 +9,16 @@ git-annex (3.20120117) UNRELEASED; urgency=low fsck them, in case you were hit by the data loss bug fixed in the previous release! * fsck --from remote --fast: Avoids expensive file transfers, at the - expense of checking file size and/or contents. + expense of not checking file size and/or contents. * Ssh connection caching is now enabled automatically by git-annex. Only one ssh connection is made to each host per git-annex run, which can speed some things up a lot, as well as avoiding repeated password prompts. Concurrent git-annex processes also share ssh connections. Cached ssh connections are shut down when git-annex exits. + * To disable the ssh caching (if for example you have your own broader + ssh caching configuration), set annex.sshcaching=false. - -- Joey Hess Thu, 19 Jan 2012 15:12:03 -0400 + -- Joey Hess Mon, 23 Jan 2012 13:48:48 -0400 git-annex (3.20120116) unstable; urgency=medium diff --git a/git-annex.cabal b/git-annex.cabal index 1a4bac9cfd..43901b6939 100644 --- a/git-annex.cabal +++ b/git-annex.cabal @@ -1,5 +1,5 @@ Name: git-annex -Version: 3.20120116 +Version: 3.20120123 Cabal-Version: >= 1.6 License: GPL Maintainer: Joey Hess