Fix zombie leak and general inneficiency when copying files to a local git repo.

Benchmarking this with 1000 small files being copied, the time reduced from
15.98s to 14.64s -- an 8% improvement in the non-data-transfer overhead of
git-annex copy.
This commit is contained in:
Joey Hess 2014-03-06 17:12:50 -04:00
parent f61c6231a8
commit 4d06037fdd
3 changed files with 51 additions and 18 deletions

View file

@ -88,6 +88,7 @@ data AnnexState = AnnexState
, gitconfig :: GitConfig , gitconfig :: GitConfig
, backends :: [BackendA Annex] , backends :: [BackendA Annex]
, remotes :: [Types.Remote.RemoteA Annex] , remotes :: [Types.Remote.RemoteA Annex]
, remoteannexstate :: M.Map UUID AnnexState
, output :: MessageState , output :: MessageState
, force :: Bool , force :: Bool
, fast :: Bool , fast :: Bool
@ -128,6 +129,7 @@ newState c r = AnnexState
, gitconfig = c , gitconfig = c
, backends = [] , backends = []
, remotes = [] , remotes = []
, remoteannexstate = M.empty
, output = defaultMessageState , output = defaultMessageState
, force = False , force = False
, fast = False , fast = False

View file

@ -144,7 +144,7 @@ repoAvail r
else return True else return True
| Git.repoIsUrl r = return True | Git.repoIsUrl r = return True
| Git.repoIsLocalUnknown r = return False | Git.repoIsLocalUnknown r = return False
| otherwise = liftIO $ catchBoolIO $ onLocal r $ return True | otherwise = liftIO $ isJust <$> catchMaybeIO (Git.Config.read r)
{- Tries to read the config for a specified remote, updates state, and {- Tries to read the config for a specified remote, updates state, and
- returns the updated repo. -} - returns the updated repo. -}
@ -161,7 +161,10 @@ tryGitConfigRead r
| Git.repoIsHttp r = store geturlconfig | Git.repoIsHttp r = store geturlconfig
| Git.GCrypt.isEncrypted r = handlegcrypt =<< getConfigMaybe (remoteConfig r "uuid") | Git.GCrypt.isEncrypted r = handlegcrypt =<< getConfigMaybe (remoteConfig r "uuid")
| Git.repoIsUrl r = return r | Git.repoIsUrl r = return r
| otherwise = store $ safely $ onLocal r $ do | otherwise = store $ safely $ do
s <- Annex.new r
Annex.eval s $ do
Annex.BranchState.disableUpdate
ensureInitialized ensureInitialized
Annex.getState Annex.repo Annex.getState Annex.repo
where where
@ -267,8 +270,8 @@ inAnnex rmt key
checkremote = Ssh.inAnnex r key checkremote = Ssh.inAnnex r key
checklocal = guardUsable r (cantCheck r) $ dispatch <$> check checklocal = guardUsable r (cantCheck r) $ dispatch <$> check
where where
check = liftIO $ catchMsgIO $ onLocal r $ check = either (Left . show) Right
Annex.Content.inAnnexSafe key <$> tryAnnex (onLocal rmt $ Annex.Content.inAnnexSafe key)
dispatch (Left e) = Left e dispatch (Left e) = Left e
dispatch (Right (Just b)) = Right b dispatch (Right (Just b)) = Right b
dispatch (Right Nothing) = cantCheck r dispatch (Right Nothing) = cantCheck r
@ -291,7 +294,7 @@ keyUrls r key = map tourl locs'
dropKey :: Remote -> Key -> Annex Bool dropKey :: Remote -> Key -> Annex Bool
dropKey r key dropKey r key
| not $ Git.repoIsUrl (repo r) = | not $ Git.repoIsUrl (repo r) =
guardUsable (repo r) False $ commitOnCleanup r $ liftIO $ onLocal (repo r) $ do guardUsable (repo r) False $ commitOnCleanup r $ onLocal r $ do
ensureInitialized ensureInitialized
whenM (Annex.Content.inAnnex key) $ do whenM (Annex.Content.inAnnex key) $ do
Annex.Content.lockContent key $ Annex.Content.lockContent key $
@ -311,7 +314,7 @@ copyFromRemote' r key file dest
let params = Ssh.rsyncParams r Download let params = Ssh.rsyncParams r Download
u <- getUUID u <- getUUID
-- run copy from perspective of remote -- run copy from perspective of remote
liftIO $ onLocal (repo r) $ do onLocal r $ do
ensureInitialized ensureInitialized
v <- Annex.Content.prepSendAnnex key v <- Annex.Content.prepSendAnnex key
case v of case v of
@ -410,7 +413,7 @@ copyToRemote r key file p
let params = Ssh.rsyncParams r Upload let params = Ssh.rsyncParams r Upload
u <- getUUID u <- getUUID
-- run copy from perspective of remote -- run copy from perspective of remote
liftIO $ onLocal (repo r) $ ifM (Annex.Content.inAnnex key) onLocal r $ ifM (Annex.Content.inAnnex key)
( return True ( return True
, do , do
ensureInitialized ensureInitialized
@ -439,18 +442,39 @@ fsckOnRemote r params
{- The passed repair action is run in the Annex monad of the remote. -} {- The passed repair action is run in the Annex monad of the remote. -}
repairRemote :: Git.Repo -> Annex Bool -> Annex (IO Bool) repairRemote :: Git.Repo -> Annex Bool -> Annex (IO Bool)
repairRemote r a = return $ Remote.Git.onLocal r a repairRemote r a = return $ do
{- Runs an action on a local repository inexpensively, by making an annex
- monad using that repository. -}
onLocal :: Git.Repo -> Annex a -> IO a
onLocal r a = do
s <- Annex.new r s <- Annex.new r
Annex.eval s $ do Annex.eval s $ do
-- No need to update the branch; its data is not used Annex.BranchState.disableUpdate
-- for anything onLocal is used to do. ensureInitialized
a
{- Runs an action from the perspective of a local remote.
-
- The AnnexState is cached for speed and to avoid resource leaks.
-
- The repository's git-annex branch is not updated, as an optimisation.
- No caller of onLocal can query data from the branch and be ensured
- it gets a current value. Caller of onLocal can make changes to
- the branch, however.
-}
onLocal :: Remote -> Annex a -> Annex a
onLocal r a = do
m <- Annex.getState Annex.remoteannexstate
case M.lookup (uuid r) m of
Nothing -> do
st <- liftIO $ Annex.new (repo r)
go st $ do
Annex.BranchState.disableUpdate Annex.BranchState.disableUpdate
a a
Just st -> go st a
where
cache st = Annex.changeState $ \s -> s
{ Annex.remoteannexstate = M.insert (uuid r) st (Annex.remoteannexstate s) }
go st a' = do
(ret, st') <- liftIO $ Annex.run st a'
cache st'
return ret
{- Copys a file with rsync unless both locations are on the same {- Copys a file with rsync unless both locations are on the same
- filesystem. Then cp could be faster. -} - filesystem. Then cp could be faster. -}
@ -488,7 +512,7 @@ commitOnCleanup r a = go `after` a
where where
go = Annex.addCleanup (Git.repoLocation $ repo r) cleanup go = Annex.addCleanup (Git.repoLocation $ repo r) cleanup
cleanup cleanup
| not $ Git.repoIsUrl (repo r) = liftIO $ onLocal (repo r) $ | not $ Git.repoIsUrl (repo r) = onLocal r $
doQuietSideAction $ doQuietSideAction $
Annex.Branch.commit "update" Annex.Branch.commit "update"
| otherwise = void $ do | otherwise = void $ do

7
debian/changelog vendored
View file

@ -1,3 +1,10 @@
git-annex (5.20140307) UNRELEASED; urgency=medium
* Fix zombie leak and general inneficiency when copying files to a
local git repo.
-- Joey Hess <joeyh@debian.org> Thu, 06 Mar 2014 16:17:01 -0400
git-annex (5.20140306) unstable; urgency=high git-annex (5.20140306) unstable; urgency=high
* sync: Fix bug in direct mode that caused a file that was not * sync: Fix bug in direct mode that caused a file that was not