Fix zombie leak and general inneficiency when copying files to a local git repo.

Benchmarking this with 1000 small files being copied, the time reduced from
15.98s to 14.64s -- an 8% improvement in the non-data-transfer overhead of
git-annex copy.
This commit is contained in:
Joey Hess 2014-03-06 17:12:50 -04:00
parent f61c6231a8
commit 4d06037fdd
3 changed files with 51 additions and 18 deletions

View file

@ -88,6 +88,7 @@ data AnnexState = AnnexState
, gitconfig :: GitConfig
, backends :: [BackendA Annex]
, remotes :: [Types.Remote.RemoteA Annex]
, remoteannexstate :: M.Map UUID AnnexState
, output :: MessageState
, force :: Bool
, fast :: Bool
@ -128,6 +129,7 @@ newState c r = AnnexState
, gitconfig = c
, backends = []
, remotes = []
, remoteannexstate = M.empty
, output = defaultMessageState
, force = False
, fast = False

View file

@ -144,7 +144,7 @@ repoAvail r
else return True
| Git.repoIsUrl r = return True
| Git.repoIsLocalUnknown r = return False
| otherwise = liftIO $ catchBoolIO $ onLocal r $ return True
| otherwise = liftIO $ isJust <$> catchMaybeIO (Git.Config.read r)
{- Tries to read the config for a specified remote, updates state, and
- returns the updated repo. -}
@ -161,9 +161,12 @@ tryGitConfigRead r
| Git.repoIsHttp r = store geturlconfig
| Git.GCrypt.isEncrypted r = handlegcrypt =<< getConfigMaybe (remoteConfig r "uuid")
| Git.repoIsUrl r = return r
| otherwise = store $ safely $ onLocal r $ do
ensureInitialized
Annex.getState Annex.repo
| otherwise = store $ safely $ do
s <- Annex.new r
Annex.eval s $ do
Annex.BranchState.disableUpdate
ensureInitialized
Annex.getState Annex.repo
where
haveconfig = not . M.null . Git.config
@ -267,8 +270,8 @@ inAnnex rmt key
checkremote = Ssh.inAnnex r key
checklocal = guardUsable r (cantCheck r) $ dispatch <$> check
where
check = liftIO $ catchMsgIO $ onLocal r $
Annex.Content.inAnnexSafe key
check = either (Left . show) Right
<$> tryAnnex (onLocal rmt $ Annex.Content.inAnnexSafe key)
dispatch (Left e) = Left e
dispatch (Right (Just b)) = Right b
dispatch (Right Nothing) = cantCheck r
@ -291,7 +294,7 @@ keyUrls r key = map tourl locs'
dropKey :: Remote -> Key -> Annex Bool
dropKey r key
| not $ Git.repoIsUrl (repo r) =
guardUsable (repo r) False $ commitOnCleanup r $ liftIO $ onLocal (repo r) $ do
guardUsable (repo r) False $ commitOnCleanup r $ onLocal r $ do
ensureInitialized
whenM (Annex.Content.inAnnex key) $ do
Annex.Content.lockContent key $
@ -311,7 +314,7 @@ copyFromRemote' r key file dest
let params = Ssh.rsyncParams r Download
u <- getUUID
-- run copy from perspective of remote
liftIO $ onLocal (repo r) $ do
onLocal r $ do
ensureInitialized
v <- Annex.Content.prepSendAnnex key
case v of
@ -410,7 +413,7 @@ copyToRemote r key file p
let params = Ssh.rsyncParams r Upload
u <- getUUID
-- run copy from perspective of remote
liftIO $ onLocal (repo r) $ ifM (Annex.Content.inAnnex key)
onLocal r $ ifM (Annex.Content.inAnnex key)
( return True
, do
ensureInitialized
@ -439,19 +442,40 @@ fsckOnRemote r params
{- The passed repair action is run in the Annex monad of the remote. -}
repairRemote :: Git.Repo -> Annex Bool -> Annex (IO Bool)
repairRemote r a = return $ Remote.Git.onLocal r a
{- Runs an action on a local repository inexpensively, by making an annex
- monad using that repository. -}
onLocal :: Git.Repo -> Annex a -> IO a
onLocal r a = do
repairRemote r a = return $ do
s <- Annex.new r
Annex.eval s $ do
-- No need to update the branch; its data is not used
-- for anything onLocal is used to do.
Annex.BranchState.disableUpdate
ensureInitialized
a
{- Runs an action from the perspective of a local remote.
-
- The AnnexState is cached for speed and to avoid resource leaks.
-
- The repository's git-annex branch is not updated, as an optimisation.
- No caller of onLocal can query data from the branch and be ensured
- it gets a current value. Caller of onLocal can make changes to
- the branch, however.
-}
onLocal :: Remote -> Annex a -> Annex a
onLocal r a = do
m <- Annex.getState Annex.remoteannexstate
case M.lookup (uuid r) m of
Nothing -> do
st <- liftIO $ Annex.new (repo r)
go st $ do
Annex.BranchState.disableUpdate
a
Just st -> go st a
where
cache st = Annex.changeState $ \s -> s
{ Annex.remoteannexstate = M.insert (uuid r) st (Annex.remoteannexstate s) }
go st a' = do
(ret, st') <- liftIO $ Annex.run st a'
cache st'
return ret
{- Copys a file with rsync unless both locations are on the same
- filesystem. Then cp could be faster. -}
rsyncOrCopyFile :: [CommandParam] -> FilePath -> FilePath -> MeterUpdate -> Annex Bool
@ -488,7 +512,7 @@ commitOnCleanup r a = go `after` a
where
go = Annex.addCleanup (Git.repoLocation $ repo r) cleanup
cleanup
| not $ Git.repoIsUrl (repo r) = liftIO $ onLocal (repo r) $
| not $ Git.repoIsUrl (repo r) = onLocal r $
doQuietSideAction $
Annex.Branch.commit "update"
| otherwise = void $ do

7
debian/changelog vendored
View file

@ -1,3 +1,10 @@
git-annex (5.20140307) UNRELEASED; urgency=medium
* Fix zombie leak and general inneficiency when copying files to a
local git repo.
-- Joey Hess <joeyh@debian.org> Thu, 06 Mar 2014 16:17:01 -0400
git-annex (5.20140306) unstable; urgency=high
* sync: Fix bug in direct mode that caused a file that was not