Fix zombie leak and general inneficiency when copying files to a local git repo.
Benchmarking this with 1000 small files being copied, the time reduced from 15.98s to 14.64s -- an 8% improvement in the non-data-transfer overhead of git-annex copy.
This commit is contained in:
parent
f61c6231a8
commit
4d06037fdd
3 changed files with 51 additions and 18 deletions
2
Annex.hs
2
Annex.hs
|
@ -88,6 +88,7 @@ data AnnexState = AnnexState
|
||||||
, gitconfig :: GitConfig
|
, gitconfig :: GitConfig
|
||||||
, backends :: [BackendA Annex]
|
, backends :: [BackendA Annex]
|
||||||
, remotes :: [Types.Remote.RemoteA Annex]
|
, remotes :: [Types.Remote.RemoteA Annex]
|
||||||
|
, remoteannexstate :: M.Map UUID AnnexState
|
||||||
, output :: MessageState
|
, output :: MessageState
|
||||||
, force :: Bool
|
, force :: Bool
|
||||||
, fast :: Bool
|
, fast :: Bool
|
||||||
|
@ -128,6 +129,7 @@ newState c r = AnnexState
|
||||||
, gitconfig = c
|
, gitconfig = c
|
||||||
, backends = []
|
, backends = []
|
||||||
, remotes = []
|
, remotes = []
|
||||||
|
, remoteannexstate = M.empty
|
||||||
, output = defaultMessageState
|
, output = defaultMessageState
|
||||||
, force = False
|
, force = False
|
||||||
, fast = False
|
, fast = False
|
||||||
|
|
|
@ -144,7 +144,7 @@ repoAvail r
|
||||||
else return True
|
else return True
|
||||||
| Git.repoIsUrl r = return True
|
| Git.repoIsUrl r = return True
|
||||||
| Git.repoIsLocalUnknown r = return False
|
| Git.repoIsLocalUnknown r = return False
|
||||||
| otherwise = liftIO $ catchBoolIO $ onLocal r $ return True
|
| otherwise = liftIO $ isJust <$> catchMaybeIO (Git.Config.read r)
|
||||||
|
|
||||||
{- Tries to read the config for a specified remote, updates state, and
|
{- Tries to read the config for a specified remote, updates state, and
|
||||||
- returns the updated repo. -}
|
- returns the updated repo. -}
|
||||||
|
@ -161,7 +161,10 @@ tryGitConfigRead r
|
||||||
| Git.repoIsHttp r = store geturlconfig
|
| Git.repoIsHttp r = store geturlconfig
|
||||||
| Git.GCrypt.isEncrypted r = handlegcrypt =<< getConfigMaybe (remoteConfig r "uuid")
|
| Git.GCrypt.isEncrypted r = handlegcrypt =<< getConfigMaybe (remoteConfig r "uuid")
|
||||||
| Git.repoIsUrl r = return r
|
| Git.repoIsUrl r = return r
|
||||||
| otherwise = store $ safely $ onLocal r $ do
|
| otherwise = store $ safely $ do
|
||||||
|
s <- Annex.new r
|
||||||
|
Annex.eval s $ do
|
||||||
|
Annex.BranchState.disableUpdate
|
||||||
ensureInitialized
|
ensureInitialized
|
||||||
Annex.getState Annex.repo
|
Annex.getState Annex.repo
|
||||||
where
|
where
|
||||||
|
@ -267,8 +270,8 @@ inAnnex rmt key
|
||||||
checkremote = Ssh.inAnnex r key
|
checkremote = Ssh.inAnnex r key
|
||||||
checklocal = guardUsable r (cantCheck r) $ dispatch <$> check
|
checklocal = guardUsable r (cantCheck r) $ dispatch <$> check
|
||||||
where
|
where
|
||||||
check = liftIO $ catchMsgIO $ onLocal r $
|
check = either (Left . show) Right
|
||||||
Annex.Content.inAnnexSafe key
|
<$> tryAnnex (onLocal rmt $ Annex.Content.inAnnexSafe key)
|
||||||
dispatch (Left e) = Left e
|
dispatch (Left e) = Left e
|
||||||
dispatch (Right (Just b)) = Right b
|
dispatch (Right (Just b)) = Right b
|
||||||
dispatch (Right Nothing) = cantCheck r
|
dispatch (Right Nothing) = cantCheck r
|
||||||
|
@ -291,7 +294,7 @@ keyUrls r key = map tourl locs'
|
||||||
dropKey :: Remote -> Key -> Annex Bool
|
dropKey :: Remote -> Key -> Annex Bool
|
||||||
dropKey r key
|
dropKey r key
|
||||||
| not $ Git.repoIsUrl (repo r) =
|
| not $ Git.repoIsUrl (repo r) =
|
||||||
guardUsable (repo r) False $ commitOnCleanup r $ liftIO $ onLocal (repo r) $ do
|
guardUsable (repo r) False $ commitOnCleanup r $ onLocal r $ do
|
||||||
ensureInitialized
|
ensureInitialized
|
||||||
whenM (Annex.Content.inAnnex key) $ do
|
whenM (Annex.Content.inAnnex key) $ do
|
||||||
Annex.Content.lockContent key $
|
Annex.Content.lockContent key $
|
||||||
|
@ -311,7 +314,7 @@ copyFromRemote' r key file dest
|
||||||
let params = Ssh.rsyncParams r Download
|
let params = Ssh.rsyncParams r Download
|
||||||
u <- getUUID
|
u <- getUUID
|
||||||
-- run copy from perspective of remote
|
-- run copy from perspective of remote
|
||||||
liftIO $ onLocal (repo r) $ do
|
onLocal r $ do
|
||||||
ensureInitialized
|
ensureInitialized
|
||||||
v <- Annex.Content.prepSendAnnex key
|
v <- Annex.Content.prepSendAnnex key
|
||||||
case v of
|
case v of
|
||||||
|
@ -410,7 +413,7 @@ copyToRemote r key file p
|
||||||
let params = Ssh.rsyncParams r Upload
|
let params = Ssh.rsyncParams r Upload
|
||||||
u <- getUUID
|
u <- getUUID
|
||||||
-- run copy from perspective of remote
|
-- run copy from perspective of remote
|
||||||
liftIO $ onLocal (repo r) $ ifM (Annex.Content.inAnnex key)
|
onLocal r $ ifM (Annex.Content.inAnnex key)
|
||||||
( return True
|
( return True
|
||||||
, do
|
, do
|
||||||
ensureInitialized
|
ensureInitialized
|
||||||
|
@ -439,18 +442,39 @@ fsckOnRemote r params
|
||||||
|
|
||||||
{- The passed repair action is run in the Annex monad of the remote. -}
|
{- The passed repair action is run in the Annex monad of the remote. -}
|
||||||
repairRemote :: Git.Repo -> Annex Bool -> Annex (IO Bool)
|
repairRemote :: Git.Repo -> Annex Bool -> Annex (IO Bool)
|
||||||
repairRemote r a = return $ Remote.Git.onLocal r a
|
repairRemote r a = return $ do
|
||||||
|
|
||||||
{- Runs an action on a local repository inexpensively, by making an annex
|
|
||||||
- monad using that repository. -}
|
|
||||||
onLocal :: Git.Repo -> Annex a -> IO a
|
|
||||||
onLocal r a = do
|
|
||||||
s <- Annex.new r
|
s <- Annex.new r
|
||||||
Annex.eval s $ do
|
Annex.eval s $ do
|
||||||
-- No need to update the branch; its data is not used
|
Annex.BranchState.disableUpdate
|
||||||
-- for anything onLocal is used to do.
|
ensureInitialized
|
||||||
|
a
|
||||||
|
|
||||||
|
{- Runs an action from the perspective of a local remote.
|
||||||
|
-
|
||||||
|
- The AnnexState is cached for speed and to avoid resource leaks.
|
||||||
|
-
|
||||||
|
- The repository's git-annex branch is not updated, as an optimisation.
|
||||||
|
- No caller of onLocal can query data from the branch and be ensured
|
||||||
|
- it gets a current value. Caller of onLocal can make changes to
|
||||||
|
- the branch, however.
|
||||||
|
-}
|
||||||
|
onLocal :: Remote -> Annex a -> Annex a
|
||||||
|
onLocal r a = do
|
||||||
|
m <- Annex.getState Annex.remoteannexstate
|
||||||
|
case M.lookup (uuid r) m of
|
||||||
|
Nothing -> do
|
||||||
|
st <- liftIO $ Annex.new (repo r)
|
||||||
|
go st $ do
|
||||||
Annex.BranchState.disableUpdate
|
Annex.BranchState.disableUpdate
|
||||||
a
|
a
|
||||||
|
Just st -> go st a
|
||||||
|
where
|
||||||
|
cache st = Annex.changeState $ \s -> s
|
||||||
|
{ Annex.remoteannexstate = M.insert (uuid r) st (Annex.remoteannexstate s) }
|
||||||
|
go st a' = do
|
||||||
|
(ret, st') <- liftIO $ Annex.run st a'
|
||||||
|
cache st'
|
||||||
|
return ret
|
||||||
|
|
||||||
{- Copys a file with rsync unless both locations are on the same
|
{- Copys a file with rsync unless both locations are on the same
|
||||||
- filesystem. Then cp could be faster. -}
|
- filesystem. Then cp could be faster. -}
|
||||||
|
@ -488,7 +512,7 @@ commitOnCleanup r a = go `after` a
|
||||||
where
|
where
|
||||||
go = Annex.addCleanup (Git.repoLocation $ repo r) cleanup
|
go = Annex.addCleanup (Git.repoLocation $ repo r) cleanup
|
||||||
cleanup
|
cleanup
|
||||||
| not $ Git.repoIsUrl (repo r) = liftIO $ onLocal (repo r) $
|
| not $ Git.repoIsUrl (repo r) = onLocal r $
|
||||||
doQuietSideAction $
|
doQuietSideAction $
|
||||||
Annex.Branch.commit "update"
|
Annex.Branch.commit "update"
|
||||||
| otherwise = void $ do
|
| otherwise = void $ do
|
||||||
|
|
7
debian/changelog
vendored
7
debian/changelog
vendored
|
@ -1,3 +1,10 @@
|
||||||
|
git-annex (5.20140307) UNRELEASED; urgency=medium
|
||||||
|
|
||||||
|
* Fix zombie leak and general inneficiency when copying files to a
|
||||||
|
local git repo.
|
||||||
|
|
||||||
|
-- Joey Hess <joeyh@debian.org> Thu, 06 Mar 2014 16:17:01 -0400
|
||||||
|
|
||||||
git-annex (5.20140306) unstable; urgency=high
|
git-annex (5.20140306) unstable; urgency=high
|
||||||
|
|
||||||
* sync: Fix bug in direct mode that caused a file that was not
|
* sync: Fix bug in direct mode that caused a file that was not
|
||||||
|
|
Loading…
Reference in a new issue