sync: use log to track adjusted branch needs updating
Speeds up sync in an adjusted branch by avoiding re-adjusting the branch unncessarily, particularly when it is adjusted with --hide-missing or --unlock-present. When there are a lot of files, that was the majority of the time of a --no-content sync. Uses a log file, which is updated when content presence changes. This adds a little bit of overhead to every file get/drop when on such an adjusted branch. The overhead is minimal for get of any size of file, but might be noticable for drop in some cases. It seems like a reasonable trade-off. It would be possible to update the log file only at the end, but then it would not happen if the command is interrupted. When not in an adjusted branch, there should be no additional overhead. (getCurrentBranch is an MVar read, and it avoids the MVar read of getGitConfig.) Note that this does not deal with situations such as: git checkout master, git-annex get, git checkout adjusted branch, git-annex sync. The sync won't know that the adjusted branch needs to be updated. Dealing with that would add overhead to operation in non-adjusted branches, which I don't like. Also, there are other situations like having two adjusted branches that both need to be updated like this, and switching between them and sync not updating. This does mean a behavior change to sync, since it did previously deal with those situations. But, the documentation did not say that it did. The man pages only talk about sync updating the adjusted branch after it transfers content. I did consider making sync keep track of content it transferred (and dropped) and only update the adjusted branch then, not to catch up to other changes made previously. That would perform better. But it seemed rather hard to implement, and also it would have problems with races with a concurrent get/drop, which this implementation avoids. And it seemed pretty likely someone had gotten used to get/drop followed by sync updating the branch. It seems much less likely someone is switching branches, doing get/drop, and then switching back and expecting sync to update the branch. Re-running git-annex adjust still does a full re-adjusting of the branch, for anyone who needs that. Sponsored-by: Leon Schuermann on Patreon
This commit is contained in:
parent
637f19bebb
commit
6821ba8dab
5 changed files with 128 additions and 19 deletions
|
@ -67,7 +67,9 @@ import Types.CleanupActions
|
|||
import qualified Database.Keys
|
||||
import Config
|
||||
import Logs.View (is_branchView)
|
||||
import Logs.AdjustedBranchUpdate
|
||||
|
||||
import Data.Time.Clock.POSIX
|
||||
import qualified Data.Map as M
|
||||
|
||||
class AdjustTreeItem t where
|
||||
|
@ -223,9 +225,13 @@ enterAdjustedBranch adj = inRepo Git.Branch.current >>= \case
|
|||
]
|
||||
return False
|
||||
, do
|
||||
starttime <- liftIO getPOSIXTime
|
||||
b <- preventCommits $ const $
|
||||
adjustBranch adj origbranch
|
||||
checkoutAdjustedBranch b False
|
||||
ok <- checkoutAdjustedBranch b False
|
||||
when ok $
|
||||
recordAdjustedBranchUpdateFinished starttime
|
||||
return ok
|
||||
)
|
||||
|
||||
checkoutAdjustedBranch :: AdjBranch -> Bool -> Annex Bool
|
||||
|
@ -304,18 +310,22 @@ updateAdjustedBranch adj (AdjBranch currbranch) origbranch
|
|||
adjustedBranchRefresh :: AssociatedFile -> Annex a -> Annex a
|
||||
adjustedBranchRefresh _af a = do
|
||||
r <- a
|
||||
annexAdjustedBranchRefresh <$> Annex.getGitConfig >>= \case
|
||||
0 -> return ()
|
||||
n -> go n
|
||||
go
|
||||
return r
|
||||
where
|
||||
go n = getCurrentBranch >>= \case
|
||||
go = getCurrentBranch >>= \case
|
||||
(Just origbranch, Just adj) ->
|
||||
unless (adjustmentIsStable adj) $
|
||||
ifM (checkcounter n)
|
||||
( update adj origbranch
|
||||
unless (adjustmentIsStable adj) $ do
|
||||
recordAdjustedBranchUpdateNeeded
|
||||
n <- annexAdjustedBranchRefresh <$> Annex.getGitConfig
|
||||
unless (n == 0) $ ifM (checkcounter n)
|
||||
-- This is slow, it would be better to incrementally
|
||||
-- adjust the AssociatedFile, and only call this once
|
||||
-- at shutdown to handle cases where not all
|
||||
-- AssociatedFiles are known.
|
||||
( adjustedBranchRefreshFull' adj origbranch
|
||||
, Annex.addCleanupAction AdjustedBranchUpdate $
|
||||
adjustedBranchRefreshFull adj origbranch
|
||||
adjustedBranchRefreshFull' adj origbranch
|
||||
)
|
||||
_ -> return ()
|
||||
|
||||
|
@ -329,23 +339,24 @@ adjustedBranchRefresh _af a = do
|
|||
!s' = s { Annex.adjustedbranchrefreshcounter = c' }
|
||||
in pure (s', enough)
|
||||
|
||||
-- This is slow, it would be better to incrementally
|
||||
-- adjust the AssociatedFile, and only call this once
|
||||
-- at shutdown to handle cases where not all
|
||||
-- AssociatedFiles are known.
|
||||
update adj origbranch =
|
||||
adjustedBranchRefreshFull adj origbranch
|
||||
|
||||
{- Slow, but more dependable version of adjustedBranchRefresh that
|
||||
- does not rely on all AssociatedFiles being known. -}
|
||||
adjustedBranchRefreshFull :: Adjustment -> OrigBranch -> Annex ()
|
||||
adjustedBranchRefreshFull adj origbranch = do
|
||||
adjustedBranchRefreshFull adj origbranch =
|
||||
whenM isAdjustedBranchUpdateNeeded $ do
|
||||
adjustedBranchRefreshFull' adj origbranch
|
||||
|
||||
adjustedBranchRefreshFull' :: Adjustment -> OrigBranch -> Annex ()
|
||||
adjustedBranchRefreshFull' adj origbranch = do
|
||||
-- Restage pointer files so modifications to them due to get/drop
|
||||
-- do not prevent checking out the updated adjusted branch.
|
||||
restagePointerFiles =<< Annex.gitRepo
|
||||
starttime <- liftIO getPOSIXTime
|
||||
let adjbranch = originalToAdjusted origbranch adj
|
||||
unlessM (updateAdjustedBranch adj adjbranch origbranch) $
|
||||
warning "Updating adjusted branch failed."
|
||||
ifM (updateAdjustedBranch adj adjbranch origbranch)
|
||||
( recordAdjustedBranchUpdateFinished starttime
|
||||
, warning "Updating adjusted branch failed."
|
||||
)
|
||||
|
||||
adjustToCrippledFileSystem :: Annex ()
|
||||
adjustToCrippledFileSystem = do
|
||||
|
|
|
@ -52,6 +52,8 @@ module Annex.Locations (
|
|||
gitAnnexRestageLog,
|
||||
gitAnnexRestageLogOld,
|
||||
gitAnnexRestageLock,
|
||||
gitAnnexAdjustedBranchUpdateLog,
|
||||
gitAnnexAdjustedBranchUpdateLock,
|
||||
gitAnnexMoveLog,
|
||||
gitAnnexMoveLock,
|
||||
gitAnnexExportDir,
|
||||
|
@ -395,6 +397,14 @@ gitAnnexRestageLogOld r = gitAnnexDir r P.</> "restage.old"
|
|||
gitAnnexRestageLock :: Git.Repo -> RawFilePath
|
||||
gitAnnexRestageLock r = gitAnnexDir r P.</> "restage.lck"
|
||||
|
||||
{- .git/annex/adjust.log is used to log when the adjusted branch needs to
|
||||
- be updated. -}
|
||||
gitAnnexAdjustedBranchUpdateLog :: Git.Repo -> RawFilePath
|
||||
gitAnnexAdjustedBranchUpdateLog r = gitAnnexDir r P.</> "adjust.log"
|
||||
|
||||
gitAnnexAdjustedBranchUpdateLock :: Git.Repo -> RawFilePath
|
||||
gitAnnexAdjustedBranchUpdateLock r = gitAnnexDir r P.</> "adjust.lck"
|
||||
|
||||
{- .git/annex/move.log is used to log moves that are in progress,
|
||||
- to better support resuming an interrupted move. -}
|
||||
gitAnnexMoveLog :: Git.Repo -> RawFilePath
|
||||
|
|
|
@ -81,6 +81,9 @@ git-annex (10.20230408) UNRELEASED; urgency=medium
|
|||
* Some other speedups to importing trees from special remotes.
|
||||
* Cache negative lookups of global numcopies and mincopies.
|
||||
Speeds up eg git-annex sync --content by up to 50%.
|
||||
* Speed up sync in an adjusted branch by avoiding re-adjusting the branch
|
||||
unncessarily, particularly when it is adjusted with --hide-missing
|
||||
or --unlock-present.
|
||||
|
||||
-- Joey Hess <id@joeyh.name> Sat, 08 Apr 2023 13:57:18 -0400
|
||||
|
||||
|
|
84
Logs/AdjustedBranchUpdate.hs
Normal file
84
Logs/AdjustedBranchUpdate.hs
Normal file
|
@ -0,0 +1,84 @@
|
|||
{- git-annex log file that indicates when the adjusted branch needs to be
|
||||
- updated due to changes in content availability.
|
||||
-
|
||||
- Copyright 2023 Joey Hess <id@joeyh.name>
|
||||
-
|
||||
- Licensed under the GNU AGPL version 3 or higher.
|
||||
-}
|
||||
|
||||
{-# LANGUAGE OverloadedStrings #-}
|
||||
|
||||
module Logs.AdjustedBranchUpdate (
|
||||
recordAdjustedBranchUpdateNeeded,
|
||||
recordAdjustedBranchUpdateFinished,
|
||||
isAdjustedBranchUpdateNeeded,
|
||||
) where
|
||||
|
||||
import Annex.Common
|
||||
import Logs.File
|
||||
import Utility.TimeStamp
|
||||
|
||||
import qualified Data.ByteString.Lazy as L
|
||||
import Data.Time.Clock.POSIX
|
||||
|
||||
-- | Updates the log to indicate that an update is needed.
|
||||
recordAdjustedBranchUpdateNeeded :: Annex ()
|
||||
recordAdjustedBranchUpdateNeeded = do
|
||||
now <- liftIO getPOSIXTime
|
||||
logf <- fromRepo gitAnnexAdjustedBranchUpdateLog
|
||||
lckf <- fromRepo gitAnnexAdjustedBranchUpdateLock
|
||||
-- Replace any other log entries, because an update is needed now,
|
||||
-- so an entry that says an update finished must be in the past.
|
||||
-- And, if there were clock skew, an entry that says an update is
|
||||
-- needed in the future would be wrong information.
|
||||
modifyLogFile logf lckf (const [formatAdjustLog True now])
|
||||
|
||||
-- | Called after an update has finished. The time is when the update
|
||||
-- started. If recordAdjustedBranchUpdateNeeded was called during the
|
||||
-- update, the log is left indicating that an update is still needed.
|
||||
recordAdjustedBranchUpdateFinished :: POSIXTime -> Annex ()
|
||||
recordAdjustedBranchUpdateFinished starttime = do
|
||||
now <- liftIO getPOSIXTime
|
||||
logf <- fromRepo gitAnnexAdjustedBranchUpdateLog
|
||||
lckf <- fromRepo gitAnnexAdjustedBranchUpdateLock
|
||||
modifyLogFile logf lckf (go now)
|
||||
where
|
||||
go now logged
|
||||
| null $ filter (isnewer now) $ mapMaybe parseAdjustLog logged =
|
||||
[formatAdjustLog False starttime]
|
||||
| otherwise = logged
|
||||
|
||||
-- If the logged time is in the future, there was clock skew,
|
||||
-- so disregard that log entry.
|
||||
isnewer now (_, loggedtime) =
|
||||
loggedtime >= starttime && loggedtime <= now
|
||||
|
||||
isAdjustedBranchUpdateNeeded :: Annex Bool
|
||||
isAdjustedBranchUpdateNeeded = do
|
||||
logf <- fromRepo gitAnnexAdjustedBranchUpdateLog
|
||||
lckf <- fromRepo gitAnnexAdjustedBranchUpdateLock
|
||||
calcLogFile logf lckf Nothing go >>= return . \case
|
||||
Just b -> b
|
||||
-- No log, so assume an update is needed.
|
||||
-- This handles upgrades from before this log was written.
|
||||
Nothing -> True
|
||||
where
|
||||
go l p = case parseAdjustLog l of
|
||||
Nothing -> p
|
||||
Just (b, _t) -> case p of
|
||||
Nothing -> Just b
|
||||
Just b' -> Just (b' || b)
|
||||
|
||||
formatAdjustLog :: Bool -> POSIXTime -> L.ByteString
|
||||
formatAdjustLog b t = encodeBL (show t) <> " " <> if b then "1" else "0"
|
||||
|
||||
parseAdjustLog :: L.ByteString -> Maybe (Bool, POSIXTime)
|
||||
parseAdjustLog l =
|
||||
let (ts, bs) = separate (== ' ') (decodeBL l)
|
||||
in do
|
||||
b <- case bs of
|
||||
"1" -> Just True
|
||||
"0" -> Just False
|
||||
_ -> Nothing
|
||||
t <- parsePOSIXTime ts
|
||||
return (b, t)
|
|
@ -898,6 +898,7 @@ Executable git-annex
|
|||
Limit.Wanted
|
||||
Logs
|
||||
Logs.Activity
|
||||
Logs.AdjustedBranchUpdate
|
||||
Logs.Chunk
|
||||
Logs.Chunk.Pure
|
||||
Logs.Config
|
||||
|
|
Loading…
Reference in a new issue