diff --git a/CHANGELOG b/CHANGELOG index f4b19ad2a8..61b1546dc8 100644 --- a/CHANGELOG +++ b/CHANGELOG @@ -1,5 +1,15 @@ git-annex (6.20180228) UNRELEASED; urgency=medium + * New protocol for communicating with git-annex-shell increases speed + of operations involving ssh remotes. When not transferring large files, + git-annex is between 200% and 400% faster using the new protocol. + (When the remote has an old git-annex-shell, git-annex falls back + to the old slower code.) + * Note that, due to not using rsync to transfer files to ssh remotes + any longer, permissions and other file metadata of annexed files + will no longer be preserved when copying them to and from ssh remotes. + Other remotes never supported preserving that information, so + this is not considered a regression. * Support exporttree=yes for rsync special remotes. * Dial back optimisation when building on arm, which prevents ghc and llc from running out of memory when optimising some files. @@ -16,7 +26,6 @@ git-annex (6.20180228) UNRELEASED; urgency=medium in direct mode, it neglected to check that the content was actually present when locking it. This could cause git annex drop to remove the only copy of a file when it thought the tor remote had a copy. - * git-annex-shell: Added p2pstdio mode. * Better ssh connection warmup when using -J for concurrency. Avoids ugly messages when forced ssh command is not git-annex-shell. * Note that Remote/Git.hs now contains AGPL licensed code, diff --git a/Messages/Progress.hs b/Messages/Progress.hs index cb924eeac2..eeaa8c25cb 100644 --- a/Messages/Progress.hs +++ b/Messages/Progress.hs @@ -77,15 +77,6 @@ metered othermeter key getsrcfile a = withMessageState $ \st -> Nothing -> return Nothing Just f -> catchMaybeIO $ liftIO $ getFileSize f -{- Use when the command's own progress output is preferred. - - The command's output will be suppressed and git-annex's progress meter - - used for concurrent output, and json progress. -} -commandMetered :: Maybe MeterUpdate -> Key -> Annex (Maybe FilePath) -> (MeterUpdate -> Annex a) -> Annex a -commandMetered combinemeterupdate key getsrcfile a = - withMessageState $ \s -> if needOutputMeter s - then metered combinemeterupdate key getsrcfile a - else a (fromMaybe nullMeterUpdate combinemeterupdate) - {- Poll file size to display meter, but only when concurrent output or - json progress needs the information. -} meteredFile :: FilePath -> Maybe MeterUpdate -> Key -> Annex a -> Annex a diff --git a/NEWS b/NEWS index 484f7dbf72..2271d8aa53 100644 --- a/NEWS +++ b/NEWS @@ -1,4 +1,14 @@ -git-annex (6.20170228) unstable; urgency=medium +git-annex (6.20180309) upstream; urgency=medium + + Note that, due to not using rsync to transfer files to ssh remotes + any longer, permissions and other file metadata of annexed files + will no longer be preserved when copying them to and from ssh remotes. + Other remotes never supported preserving that information, so + this is not considered a regression. + + -- Joey Hess Fri, 09 Mar 2018 13:22:47 -0400 + +git-annex (6.20170228) upstream; urgency=medium This version of git-annex has mitigations for SHA1 hash collision problems. @@ -10,7 +20,7 @@ git-annex (6.20170228) unstable; urgency=medium -- Joey Hess Tue, 28 Feb 2017 13:28:50 -0400 -git-annex (6.20170101) unstable; urgency=medium +git-annex (6.20170101) upstream; urgency=medium XMPP support has been removed from the assistant in this release. @@ -20,7 +30,7 @@ git-annex (6.20170101) unstable; urgency=medium -- Joey Hess Tue, 27 Dec 2016 16:37:46 -0400 -git-annex (4.20131002) unstable; urgency=low +git-annex (4.20131002) upstream; urgency=low The layout of gcrypt repositories has changed, and if you created one you must manually upgrade it. @@ -28,7 +38,7 @@ git-annex (4.20131002) unstable; urgency=low -- Joey Hess Tue, 24 Sep 2013 13:55:23 -0400 -git-annex (3.20120123) unstable; urgency=low +git-annex (3.20120123) upstream; urgency=low There was a bug in the handling of directory special remotes that could cause partial file contents to be stored in them. If you use @@ -39,7 +49,7 @@ git-annex (3.20120123) unstable; urgency=low -- Joey Hess Thu, 19 Jan 2012 15:24:23 -0400 -git-annex (3.20110624) experimental; urgency=low +git-annex (3.20110624) upstream; urgency=low There has been another change to the git-annex data store. Use `git annex upgrade` to migrate your repositories to the new @@ -56,7 +66,7 @@ git-annex (3.20110624) experimental; urgency=low -- Joey Hess Tue, 21 Jun 2011 20:18:00 -0400 -git-annex (0.20110316) experimental; urgency=low +git-annex (0.20110316) upstream; urgency=low This version reorganises the layout of git-annex's files in your repository. There is an upgrade process to convert a repository from the old git-annex diff --git a/Remote/Git.hs b/Remote/Git.hs index d908a7944a..56f2706cca 100644 --- a/Remote/Git.hs +++ b/Remote/Git.hs @@ -61,7 +61,6 @@ import qualified P2P.IO as P2P import P2P.Address import Annex.Path import Creds -import Messages.Progress import Types.NumCopies import Annex.Action @@ -150,24 +149,23 @@ gen r u c gc | Git.GCrypt.isEncrypted r = Remote.GCrypt.chainGen r u c gc | otherwise = case repoP2PAddress r of Nothing -> do - duc <- mkDeferredUUIDCheck r u gc - connpool <- Ssh.mkP2PSshConnectionPool - go duc connpool <$> remoteCost gc defcst + st <- mkState r u gc + go st <$> remoteCost gc defcst Just addr -> Remote.P2P.chainGen addr r u c gc where defcst = if repoCheap r then cheapRemoteCost else expensiveRemoteCost - go duc connpool cst = Just new + go st cst = Just new where new = Remote { uuid = u , cost = cst , name = Git.repoDescribe r - , storeKey = copyToRemote new duc - , retrieveKeyFile = copyFromRemote new - , retrieveKeyFileCheap = copyFromRemoteCheap new - , removeKey = dropKey new duc connpool - , lockContent = Just (lockKey new duc) - , checkPresent = inAnnex new duc + , storeKey = copyToRemote new st + , retrieveKeyFile = copyFromRemote new st + , retrieveKeyFileCheap = copyFromRemoteCheap new st + , removeKey = dropKey new st + , lockContent = Just (lockKey new st) + , checkPresent = inAnnex new st , checkPresentCheap = repoCheap r , exportActions = exportUnsupported , whereisKey = Nothing @@ -332,8 +330,8 @@ tryGitConfigRead autoinit r else [] {- Checks if a given remote has the content for a key in its annex. -} -inAnnex :: Remote -> DeferredUUIDCheck -> Key -> Annex Bool -inAnnex rmt duc key +inAnnex :: Remote -> State -> Key -> Annex Bool +inAnnex rmt (State connpool duc) key | Git.repoIsHttp r = checkhttp | Git.repoIsUrl r = checkremote | otherwise = checklocal @@ -345,7 +343,9 @@ inAnnex rmt duc key ( return True , giveup "not found" ) - checkremote = Ssh.inAnnex r key + checkremote = + let fallback = Ssh.inAnnex r key + in P2PHelper.checkpresent (runProto rmt connpool fallback) key checklocal = ifM duc ( guardUsable r (cantCheck r) $ maybe (cantCheck r) return @@ -370,8 +370,8 @@ keyUrls r key = map tourl locs' remoteconfig = gitconfig r cfg = remoteGitConfig remoteconfig -dropKey :: Remote -> DeferredUUIDCheck -> Ssh.P2PSshConnectionPool -> Key -> Annex Bool -dropKey r duc connpool key +dropKey :: Remote -> State -> Key -> Annex Bool +dropKey r (State connpool duc) key | not $ Git.repoIsUrl (repo r) = ifM duc ( guardUsable (repo r) (return False) $ commitOnCleanup r $ onLocalFast r $ do @@ -389,8 +389,8 @@ dropKey r duc connpool key let fallback = Ssh.dropKey (repo r) key P2PHelper.remove (runProto r connpool fallback) key -lockKey :: Remote -> DeferredUUIDCheck -> Key -> (VerifiedCopy -> Annex r) -> Annex r -lockKey r duc key callback +lockKey :: Remote -> State -> Key -> (VerifiedCopy -> Annex r) -> Annex r +lockKey r (State _ duc) key callback | not $ Git.repoIsUrl (repo r) = ifM duc ( guardUsable (repo r) failedlock $ do inorigrepo <- Annex.makeRunner @@ -446,15 +446,13 @@ lockKey r duc key callback failedlock = giveup "can't lock content" {- Tries to copy a key's content from a remote's annex to a file. -} -copyFromRemote :: Remote -> Key -> AssociatedFile -> FilePath -> MeterUpdate -> Annex (Bool, Verification) -copyFromRemote r key file dest p - | Git.repoIsHttp (repo r) = unVerified $ - Annex.Content.downloadUrl key p (keyUrls r key) dest - | otherwise = commandMetered (Just p) key (return Nothing) $ - copyFromRemote' r key file dest +copyFromRemote :: Remote -> State -> Key -> AssociatedFile -> FilePath -> MeterUpdate -> Annex (Bool, Verification) +copyFromRemote = copyFromRemote' False -copyFromRemote' :: Remote -> Key -> AssociatedFile -> FilePath -> MeterUpdate -> Annex (Bool, Verification) -copyFromRemote' r key file dest meterupdate +copyFromRemote' :: Bool -> Remote -> State -> Key -> AssociatedFile -> FilePath -> MeterUpdate -> Annex (Bool, Verification) +copyFromRemote' forcersync r (State connpool _) key file dest meterupdate + | Git.repoIsHttp (repo r) = unVerified $ + Annex.Content.downloadUrl key meterupdate (keyUrls r key) dest | not $ Git.repoIsUrl (repo r) = guardUsable (repo r) (unVerified (return False)) $ do params <- Ssh.rsyncParams r Download u <- getUUID @@ -470,11 +468,16 @@ copyFromRemote' r key file dest meterupdate runTransfer (Transfer Download u key) file forwardRetry (\p -> copier object dest (combineMeterUpdate p meterupdate) checksuccess) - | Git.repoIsSsh (repo r) = unVerified $ feedprogressback $ \p -> do - Ssh.rsyncHelper (Just (combineMeterUpdate meterupdate p)) - =<< Ssh.rsyncParamsRemote False r Download key dest file + | Git.repoIsSsh (repo r) = if forcersync + then unVerified fallback + else P2PHelper.retrieve + (runProto r connpool fallback) + key file dest meterupdate | otherwise = giveup "copying from non-ssh, non-http remote not supported" where + fallback = feedprogressback $ \p -> do + Ssh.rsyncHelper (Just (combineMeterUpdate meterupdate p)) + =<< Ssh.rsyncParamsRemote False r Download key dest file {- Feed local rsync's progress info back to the remote, - by forking a feeder thread that runs - git-annex-shell transferinfo at the same time @@ -535,9 +538,9 @@ copyFromRemote' r key file dest meterupdate =<< tryTakeMVar pidv bracketIO noop (const cleanup) (const $ a feeder) -copyFromRemoteCheap :: Remote -> Key -> AssociatedFile -> FilePath -> Annex Bool +copyFromRemoteCheap :: Remote -> State -> Key -> AssociatedFile -> FilePath -> Annex Bool #ifndef mingw32_HOST_OS -copyFromRemoteCheap r key af file +copyFromRemoteCheap r st key af file | not $ Git.repoIsUrl (repo r) = guardUsable (repo r) (return False) $ liftIO $ do loc <- gitAnnexLocation key (repo r) $ remoteGitConfig $ gitconfig r @@ -551,36 +554,31 @@ copyFromRemoteCheap r key af file ) | Git.repoIsSsh (repo r) = ifM (Annex.Content.preseedTmp key file) - ( fst <$> copyFromRemote r key af file nullMeterUpdate + ( fst <$> copyFromRemote' True r st key af file nullMeterUpdate , return False ) | otherwise = return False #else -copyFromRemoteCheap _ _ _ _ = return False +copyFromRemoteCheap _ _ _ _ _ = return False #endif {- Tries to copy a key's content to a remote's annex. -} -copyToRemote :: Remote -> DeferredUUIDCheck -> Key -> AssociatedFile -> MeterUpdate -> Annex Bool -copyToRemote r duc key file meterupdate +copyToRemote :: Remote -> State -> Key -> AssociatedFile -> MeterUpdate -> Annex Bool +copyToRemote r (State connpool duc) key file meterupdate | not $ Git.repoIsUrl (repo r) = ifM duc ( guardUsable (repo r) (return False) $ commitOnCleanup r $ copylocal =<< Annex.Content.prepSendAnnex key , return False ) | Git.repoIsSsh (repo r) = commitOnCleanup r $ - Annex.Content.sendAnnex key noop $ \object -> - withmeter object $ \p -> do - -- This is too broad really, but recvkey normally - -- verifies content anyway, so avoid complicating - -- it with a local sendAnnex check and rollback. - unlocked <- isDirect <||> versionSupportsUnlockedPointers - Ssh.rsyncHelper (Just p) - =<< Ssh.rsyncParamsRemote unlocked r Upload key object file + P2PHelper.store + (runProto r connpool copyremotefallback) + key file meterupdate + | otherwise = giveup "copying to non-ssh repo not supported" where - withmeter object = commandMetered (Just meterupdate) key (return $ Just object) copylocal Nothing = return False - copylocal (Just (object, checksuccess)) = withmeter object $ \p -> do + copylocal (Just (object, checksuccess)) = do -- The checksuccess action is going to be run in -- the remote's Annex, but it needs access to the local -- Annex monad's state. @@ -595,12 +593,19 @@ copyToRemote r duc key file meterupdate ensureInitialized copier <- mkCopier hardlink params let verify = Annex.Content.RemoteVerify r - runTransfer (Transfer Download u key) file forwardRetry $ \p' -> - let p'' = combineMeterUpdate p p' + runTransfer (Transfer Download u key) file forwardRetry $ \p -> + let p' = combineMeterUpdate meterupdate p in Annex.Content.saveState True `after` Annex.Content.getViaTmp verify key - (\dest -> copier object dest p'' (liftIO checksuccessio)) + (\dest -> copier object dest p' (liftIO checksuccessio)) ) + copyremotefallback = Annex.Content.sendAnnex key noop $ \object -> do + -- This is too broad really, but recvkey normally + -- verifies content anyway, so avoid complicating + -- it with a local sendAnnex check and rollback. + unlocked <- isDirect <||> versionSupportsUnlockedPointers + Ssh.rsyncHelper (Just meterupdate) + =<< Ssh.rsyncParamsRemote unlocked r Upload key object file fsckOnRemote :: Git.Repo -> [CommandParam] -> Annex (IO Bool) fsckOnRemote r params @@ -736,6 +741,13 @@ mkCopier remotewanthardlink rsyncparams = do , return copier ) +data State = State Ssh.P2PSshConnectionPool DeferredUUIDCheck + +mkState :: Git.Repo -> UUID -> RemoteGitConfig -> Annex State +mkState r u gc = State + <$> Ssh.mkP2PSshConnectionPool + <*> mkDeferredUUIDCheck r u gc + {- Normally the UUID of a local repository is checked at startup, - but annex-checkuuid config can prevent that. To avoid getting - confused, a deferred check is done just before the repository diff --git a/Remote/Helper/P2P.hs b/Remote/Helper/P2P.hs index 272489755b..23d99da511 100644 --- a/Remote/Helper/P2P.hs +++ b/Remote/Helper/P2P.hs @@ -14,7 +14,6 @@ import qualified P2P.Protocol as P2P import P2P.IO import Types.Remote import Annex.Content -import Config.Cost import Messages.Progress import Utility.Metered import Types.NumCopies diff --git a/doc/todo/accellerate_ssh_remotes_with_git-annex-shell_mass_protocol.mdwn b/doc/todo/accellerate_ssh_remotes_with_git-annex-shell_mass_protocol.mdwn index b7c44b6941..39efa3b77a 100644 --- a/doc/todo/accellerate_ssh_remotes_with_git-annex-shell_mass_protocol.mdwn +++ b/doc/todo/accellerate_ssh_remotes_with_git-annex-shell_mass_protocol.mdwn @@ -61,3 +61,5 @@ took 12s, so remote access over localhost seems faster now! Possibly there's a little bit more concurrency when git-annex and git-annex-shell are both running?) +Transferring a 30 mb file over ssh to localhost, speed increased from +3.288s to 3.031s. Due to rsync's several levels of checksumming?