Speed up startup time by caching the refs that have been merged into the git-annex branch.

This can speed up git-annex commands by as much as a second, depending on
the number of remotes.
This commit is contained in:
Joey Hess 2016-07-17 12:11:05 -04:00
parent c04d9d6a01
commit 154c939830
Failed to extract signature
3 changed files with 82 additions and 38 deletions

View file

@ -1,6 +1,6 @@
{- management of the git-annex branch {- management of the git-annex branch
- -
- Copyright 2011-2013 Joey Hess <id@joeyh.name> - Copyright 2011-2016 Joey Hess <id@joeyh.name>
- -
- Licensed under the GNU GPL version 3 or higher. - Licensed under the GNU GPL version 3 or higher.
-} -}
@ -30,6 +30,7 @@ import qualified Data.ByteString.Lazy as L
import qualified Data.Set as S import qualified Data.Set as S
import qualified Data.Map as M import qualified Data.Map as M
import Data.Bits.Utils import Data.Bits.Utils
import Data.Function
import Control.Concurrent (threadDelay) import Control.Concurrent (threadDelay)
import Annex.Common import Annex.Common
@ -81,9 +82,9 @@ hasOrigin = inRepo $ Git.Ref.exists originname
hasSibling :: Annex Bool hasSibling :: Annex Bool
hasSibling = not . null <$> siblingBranches hasSibling = not . null <$> siblingBranches
{- List of git-annex (refs, branches), including the main one and any {- List of git-annex (shas, branches), including the main one and any
- from remotes. Duplicate refs are filtered out. -} - from remotes. Duplicates are filtered out. -}
siblingBranches :: Annex [(Git.Ref, Git.Branch)] siblingBranches :: Annex [(Git.Sha, Git.Branch)]
siblingBranches = inRepo $ Git.Ref.matchingUniq [name] siblingBranches = inRepo $ Git.Ref.matchingUniq [name]
{- Creates the branch, if it does not already exist. -} {- Creates the branch, if it does not already exist. -}
@ -133,40 +134,50 @@ forceUpdate = updateTo =<< siblingBranches
- -
- Returns True if any refs were merged in, False otherwise. - Returns True if any refs were merged in, False otherwise.
-} -}
updateTo :: [(Git.Ref, Git.Branch)] -> Annex Bool updateTo :: [(Git.Sha, Git.Branch)] -> Annex Bool
updateTo pairs = do updateTo pairs = do
-- ensure branch exists, and get its current ref -- ensure branch exists, and get its current ref
branchref <- getBranch branchref <- getBranch
dirty <- journalDirty dirty <- journalDirty
ignoredrefs <- getIgnoredRefs ignoredrefs <- getIgnoredRefs
(refs, branches) <- unzip <$> filterM (isnewer ignoredrefs) pairs let unignoredrefs = excludeset ignoredrefs pairs
if null refs tomerge <- if null unignoredrefs
then return []
else do
mergedrefs <- getMergedRefs
filterM isnewer (excludeset mergedrefs unignoredrefs)
if null tomerge
{- Even when no refs need to be merged, the index {- Even when no refs need to be merged, the index
- may still be updated if the branch has gotten ahead - may still be updated if the branch has gotten ahead
- of the index. -} - of the index. -}
then whenM (needUpdateIndex branchref) $ lockJournal $ \jl -> do then do
forceUpdateIndex jl branchref whenM (needUpdateIndex branchref) $ lockJournal $ \jl -> do
{- When there are journalled changes forceUpdateIndex jl branchref
- as well as the branch being updated, {- When there are journalled changes
- a commit needs to be done. -} - as well as the branch being updated,
when dirty $ - a commit needs to be done. -}
go branchref True [] [] jl when dirty $
else lockJournal $ go branchref dirty refs branches go branchref True [] jl
return $ not $ null refs {- Only needed for a while, to populate the
- newly added merged refs cache with already
- merged refs. Can be safely removed at any time. -}
addMergedRefs unignoredrefs
else lockJournal $ go branchref dirty tomerge
return $ not $ null tomerge
where where
isnewer ignoredrefs (r, _) excludeset s = filter (\(r, _) -> S.notMember r s)
| S.member r ignoredrefs = return False isnewer (r, _) = inRepo $ Git.Branch.changed fullname r
| otherwise = inRepo $ Git.Branch.changed fullname r go branchref dirty tomerge jl = withIndex $ do
go branchref dirty refs branches jl = withIndex $ do let (refs, branches) = unzip tomerge
cleanjournal <- if dirty then stageJournal jl else return noop cleanjournal <- if dirty then stageJournal jl else return noop
let merge_desc = if null branches let merge_desc = if null tomerge
then "update" then "update"
else "merging " ++ else "merging " ++
unwords (map Git.Ref.describe branches) ++ unwords (map Git.Ref.describe branches) ++
" into " ++ fromRef name " into " ++ fromRef name
localtransitions <- parseTransitionsStrictly "local" localtransitions <- parseTransitionsStrictly "local"
<$> getLocal transitionsLog <$> getLocal transitionsLog
unless (null branches) $ do unless (null tomerge) $ do
showSideAction merge_desc showSideAction merge_desc
mapM_ checkBranchDifferences refs mapM_ checkBranchDifferences refs
mergeIndex jl refs mergeIndex jl refs
@ -181,6 +192,7 @@ updateTo pairs = do
then updateIndex jl branchref then updateIndex jl branchref
else commitIndex jl branchref merge_desc commitrefs else commitIndex jl branchref merge_desc commitrefs
) )
addMergedRefs tomerge
liftIO cleanjournal liftIO cleanjournal
{- Gets the content of a file, which may be in the journal, or in the index {- Gets the content of a file, which may be in the journal, or in the index
@ -493,21 +505,6 @@ handleTransitions jl localts refs = do
<$> catFile ref transitionsLog <$> catFile ref transitionsLog
return (ref, ts) return (ref, ts)
ignoreRefs :: [Git.Ref] -> Annex ()
ignoreRefs rs = do
old <- getIgnoredRefs
let s = S.unions [old, S.fromList rs]
f <- fromRepo gitAnnexIgnoredRefs
replaceFile f $ \tmp -> liftIO $ writeFile tmp $
unlines $ map fromRef $ S.elems s
getIgnoredRefs :: Annex (S.Set Git.Ref)
getIgnoredRefs = S.fromList . mapMaybe Git.Sha.extractSha . lines <$> content
where
content = do
f <- fromRepo gitAnnexIgnoredRefs
liftIO $ catchDefaultIO "" $ readFile f
{- Performs the specified transitions on the contents of the index file, {- Performs the specified transitions on the contents of the index file,
- commits it to the branch, or creates a new branch. - commits it to the branch, or creates a new branch.
-} -}
@ -578,3 +575,41 @@ checkBranchDifferences ref = do
mydiffs <- annexDifferences <$> Annex.getGitConfig mydiffs <- annexDifferences <$> Annex.getGitConfig
when (theirdiffs /= mydiffs) $ when (theirdiffs /= mydiffs) $
error "Remote repository is tuned in incompatable way; cannot be merged with local repository." error "Remote repository is tuned in incompatable way; cannot be merged with local repository."
ignoreRefs :: [Git.Sha] -> Annex ()
ignoreRefs rs = do
old <- getIgnoredRefs
let s = S.unions [old, S.fromList rs]
f <- fromRepo gitAnnexIgnoredRefs
replaceFile f $ \tmp -> liftIO $ writeFile tmp $
unlines $ map fromRef $ S.elems s
getIgnoredRefs :: Annex (S.Set Git.Sha)
getIgnoredRefs = S.fromList . mapMaybe Git.Sha.extractSha . lines <$> content
where
content = do
f <- fromRepo gitAnnexIgnoredRefs
liftIO $ catchDefaultIO "" $ readFile f
addMergedRefs :: [(Git.Sha, Git.Branch)] -> Annex ()
addMergedRefs [] = return ()
addMergedRefs new = do
old <- getMergedRefs'
-- Keep only the newest sha for each branch.
let l = nubBy ((==) `on` snd) (new ++ old)
f <- fromRepo gitAnnexMergedRefs
replaceFile f $ \tmp -> liftIO $ writeFile tmp $
unlines $ map (\(s, b) -> fromRef s ++ '\t' : fromRef b) l
getMergedRefs :: Annex (S.Set Git.Sha)
getMergedRefs = S.fromList . map fst <$> getMergedRefs'
getMergedRefs' :: Annex [(Git.Sha, Git.Branch)]
getMergedRefs' = do
f <- fromRepo gitAnnexMergedRefs
s <- liftIO $ catchDefaultIO "" $ readFile f
return $ map parse $ lines s
where
parse l =
let (s, b) = separate (== '\t') l
in (Ref s, Ref b)

View file

@ -52,6 +52,7 @@ module Annex.Locations (
gitAnnexIndexStatus, gitAnnexIndexStatus,
gitAnnexViewIndex, gitAnnexViewIndex,
gitAnnexViewLog, gitAnnexViewLog,
gitAnnexMergedRefs,
gitAnnexIgnoredRefs, gitAnnexIgnoredRefs,
gitAnnexPidFile, gitAnnexPidFile,
gitAnnexPidLockFile, gitAnnexPidLockFile,
@ -357,6 +358,10 @@ gitAnnexViewIndex r = gitAnnexDir r </> "viewindex"
gitAnnexViewLog :: Git.Repo -> FilePath gitAnnexViewLog :: Git.Repo -> FilePath
gitAnnexViewLog r = gitAnnexDir r </> "viewlog" gitAnnexViewLog r = gitAnnexDir r </> "viewlog"
{- List of refs that have already been merged into the git-annex branch. -}
gitAnnexMergedRefs :: Git.Repo -> FilePath
gitAnnexMergedRefs r = gitAnnexDir r </> "mergedrefs"
{- List of refs that should not be merged into the git-annex branch. -} {- List of refs that should not be merged into the git-annex branch. -}
gitAnnexIgnoredRefs :: Git.Repo -> FilePath gitAnnexIgnoredRefs :: Git.Repo -> FilePath
gitAnnexIgnoredRefs r = gitAnnexDir r </> "ignoredrefs" gitAnnexIgnoredRefs r = gitAnnexDir r </> "ignoredrefs"

View file

@ -8,7 +8,11 @@ git-annex (6.20160614) UNRELEASED; urgency=medium
* testremote: Fix crash when testing a freshly made external special remote. * testremote: Fix crash when testing a freshly made external special remote.
* Remove unnecessary rpaths in the git-annex binary, but only when * Remove unnecessary rpaths in the git-annex binary, but only when
it's built using make, not cabal. it's built using make, not cabal.
This speeds up git-annex statup time by around 50%. This speeds up git-annex startup time by around 50%.
* Speed up startup time by caching the refs that have been merged into
the git-annex branch.
This can speed up git-annex commands by as much as a second,
depending on the number of remotes.
* Remove the EKG build flag, since Gentoo for some reason decided to * Remove the EKG build flag, since Gentoo for some reason decided to
enable this flag, depsite it not being intended for production use and enable this flag, depsite it not being intended for production use and
so disabled by default. so disabled by default.