From e006acef229d97eb2174cccc5f056e0947d990d4 Mon Sep 17 00:00:00 2001 From: Joey Hess Date: Wed, 28 Aug 2024 10:52:34 -0400 Subject: [PATCH] avoid reposize database locking overhead when not needed Only when the preferred content expression being matched uses balanced preferred content is this overhead needed. It might be possible to eliminate the locking entirely. Eg, check the live changes before and after the action and re-run if they are not stable. For now, this is good enough, it avoids existing preferred content getting slow. If balanced preferred content turns out to be too slow to check, that could be tried later. --- Annex/FileMatcher.hs | 3 ++- Annex/RepoSize/LiveUpdate.hs | 15 ++++++++++++--- Limit.hs | 25 +++++++++++++++++++++++++ Limit/Wanted.hs | 2 ++ Types/FileMatcher.hs | 2 ++ doc/todo/git-annex_proxies.mdwn | 8 -------- 6 files changed, 43 insertions(+), 12 deletions(-) diff --git a/Annex/FileMatcher.hs b/Annex/FileMatcher.hs index 3c2840c73d..a2bfd23dce 100644 --- a/Annex/FileMatcher.hs +++ b/Annex/FileMatcher.hs @@ -90,7 +90,7 @@ checkMatcher matcher mkey afile lu notpresent notconfigured d checkMatcher' :: FileMatcher Annex -> MatchInfo -> LiveUpdate -> AssumeNotPresent -> Annex Bool checkMatcher' (matcher, (MatcherDesc matcherdesc)) mi lu notpresent = - checkLiveUpdate lu go + checkLiveUpdate lu matcher go where go = do (matches, desc) <- runWriterT $ matchMrun' matcher $ \op -> @@ -281,6 +281,7 @@ call desc (Right sub) = Right $ Operation $ MatchFiles , matchNeedsFileContent = any matchNeedsFileContent sub , matchNeedsKey = any matchNeedsKey sub , matchNeedsLocationLog = any matchNeedsLocationLog sub + , matchNeedsLiveRepoSize = any matchNeedsLiveRepoSize sub , matchDesc = matchDescSimple desc } call _ (Left err) = Left err diff --git a/Annex/RepoSize/LiveUpdate.hs b/Annex/RepoSize/LiveUpdate.hs index aaacb31450..602e8f374a 100644 --- a/Annex/RepoSize/LiveUpdate.hs +++ b/Annex/RepoSize/LiveUpdate.hs @@ -13,6 +13,8 @@ import Annex.Common import Logs.Presence.Pure import qualified Database.RepoSize as Db import Annex.UUID +import Types.FileMatcher +import qualified Utility.Matcher as Matcher import Control.Concurrent import System.Process @@ -95,9 +97,16 @@ needLiveUpdate lu = liftIO $ void $ tryPutMVar (liveUpdateNeeded lu) () -- This serializes calls to the action, so that if the action -- queries getLiveRepoSizes it will not race with another such action -- that may also be starting a live update. -checkLiveUpdate :: LiveUpdate -> Annex Bool -> Annex Bool -checkLiveUpdate NoLiveUpdate a = a -checkLiveUpdate lu a = Db.lockDbWhile (const go) go +checkLiveUpdate + :: LiveUpdate + -> Matcher.Matcher (MatchFiles Annex) + -> Annex Bool + -> Annex Bool +checkLiveUpdate NoLiveUpdate _ a = a +checkLiveUpdate lu matcher a + | Matcher.introspect matchNeedsLiveRepoSize matcher = + Db.lockDbWhile (const go) go + | otherwise = a where go = do r <- a diff --git a/Limit.hs b/Limit.hs index b824bcc640..778a7e65d8 100644 --- a/Limit.hs +++ b/Limit.hs @@ -114,6 +114,7 @@ limitInclude glob = Right $ MatchFiles , matchNeedsFileContent = False , matchNeedsKey = False , matchNeedsLocationLog = False + , matchNeedsLiveRepoSize = False , matchDesc = "include" =? glob } @@ -128,6 +129,7 @@ limitExclude glob = Right $ MatchFiles , matchNeedsFileContent = False , matchNeedsKey = False , matchNeedsLocationLog = False + , matchNeedsLiveRepoSize = False , matchDesc = "exclude" =? glob } @@ -153,6 +155,7 @@ limitIncludeSameContent glob = Right $ MatchFiles , matchNeedsFileContent = False , matchNeedsKey = False , matchNeedsLocationLog = False + , matchNeedsLiveRepoSize = False , matchDesc = "includesamecontent" =? glob } @@ -168,6 +171,7 @@ limitExcludeSameContent glob = Right $ MatchFiles , matchNeedsFileContent = False , matchNeedsKey = False , matchNeedsLocationLog = False + , matchNeedsLiveRepoSize = False , matchDesc = "excludesamecontent" =? glob } @@ -244,6 +248,7 @@ matchMagic limitname querymagic selectprovidedinfo selectuserprovidedinfo (Just , matchNeedsFileContent = True , matchNeedsKey = False , matchNeedsLocationLog = False + , matchNeedsLiveRepoSize = False , matchDesc = limitname =? glob } where @@ -271,6 +276,7 @@ addUnlocked = addLimit $ Right $ MatchFiles , matchNeedsFileContent = False , matchNeedsKey = False , matchNeedsLocationLog = False + , matchNeedsLiveRepoSize = False , matchDesc = matchDescSimple "unlocked" } @@ -281,6 +287,7 @@ addLocked = addLimit $ Right $ MatchFiles , matchNeedsFileContent = False , matchNeedsKey = False , matchNeedsLocationLog = False + , matchNeedsLiveRepoSize = False , matchDesc = matchDescSimple "locked" } @@ -316,6 +323,7 @@ addIn s = do , matchNeedsFileContent = False , matchNeedsKey = True , matchNeedsLocationLog = not inhere + , matchNeedsLiveRepoSize = False , matchDesc = "in" =? s } checkinuuid u notpresent key @@ -346,6 +354,7 @@ addExpectedPresent = do , matchNeedsFileContent = False , matchNeedsKey = True , matchNeedsLocationLog = True + , matchNeedsLiveRepoSize = False , matchDesc = matchDescSimple "expected-present" } @@ -363,6 +372,7 @@ limitPresent u = MatchFiles , matchNeedsFileContent = False , matchNeedsKey = True , matchNeedsLocationLog = not (isNothing u) + , matchNeedsLiveRepoSize = False , matchDesc = matchDescSimple "present" } @@ -374,6 +384,7 @@ limitInDir dir desc = MatchFiles , matchNeedsFileContent = False , matchNeedsKey = False , matchNeedsLocationLog = False + , matchNeedsLiveRepoSize = False , matchDesc = matchDescSimple desc } where @@ -406,6 +417,7 @@ limitCopies want = case splitc ':' want of , matchNeedsFileContent = False , matchNeedsKey = True , matchNeedsLocationLog = True + , matchNeedsLiveRepoSize = False , matchDesc = "copies" =? want } go' n good notpresent key = do @@ -431,6 +443,7 @@ limitLackingCopies desc approx want = case readish want of , matchNeedsFileContent = False , matchNeedsKey = True , matchNeedsLocationLog = True + , matchNeedsLiveRepoSize = False , matchDesc = matchDescSimple desc } Nothing -> Left "bad value for number of lacking copies" @@ -461,6 +474,7 @@ limitUnused = MatchFiles , matchNeedsFileContent = False , matchNeedsKey = True , matchNeedsLocationLog = False + , matchNeedsLiveRepoSize = False , matchDesc = matchDescSimple "unused" } where @@ -484,6 +498,7 @@ limitAnything = MatchFiles , matchNeedsFileContent = False , matchNeedsKey = False , matchNeedsLocationLog = False + , matchNeedsLiveRepoSize = False , matchDesc = matchDescSimple "anything" } @@ -499,6 +514,7 @@ limitNothing = MatchFiles , matchNeedsFileContent = False , matchNeedsKey = False , matchNeedsLocationLog = False + , matchNeedsLiveRepoSize = False , matchDesc = matchDescSimple "nothing" } @@ -522,6 +538,7 @@ limitInAllGroup getgroupmap groupname = Right $ MatchFiles , matchNeedsFileContent = False , matchNeedsKey = True , matchNeedsLocationLog = True + , matchNeedsLiveRepoSize = False , matchDesc = "inallgroup" =? groupname } where @@ -547,6 +564,7 @@ limitOnlyInGroup getgroupmap groupname = Right $ MatchFiles , matchNeedsFileContent = False , matchNeedsKey = True , matchNeedsLocationLog = True + , matchNeedsLiveRepoSize = False , matchDesc = "inallgroup" =? groupname } where @@ -592,6 +610,7 @@ limitBalanced' termname fullybalanced mu groupname = do matchNeedsLocationLog present || matchNeedsLocationLog fullybalanced || matchNeedsLocationLog copies + , matchNeedsLiveRepoSize = True , matchDesc = termname =? groupname } @@ -677,6 +696,7 @@ limitFullyBalanced''' filtercandidates termname mu getgroupmap g n want = Right , matchNeedsFileContent = False , matchNeedsKey = True , matchNeedsLocationLog = False + , matchNeedsLiveRepoSize = True , matchDesc = termname =? want } @@ -736,6 +756,7 @@ limitInBackend name = Right $ MatchFiles , matchNeedsFileContent = False , matchNeedsKey = True , matchNeedsLocationLog = False + , matchNeedsLiveRepoSize = False , matchDesc = "inbackend" =? name } where @@ -753,6 +774,7 @@ limitSecureHash = MatchFiles , matchNeedsFileContent = False , matchNeedsKey = True , matchNeedsLocationLog = False + , matchNeedsLiveRepoSize = False , matchDesc = matchDescSimple "securehash" } @@ -774,6 +796,7 @@ limitSize lb desc vs s = case readSize dataUnits s of , matchNeedsFileContent = False , matchNeedsKey = False , matchNeedsLocationLog = False + , matchNeedsLiveRepoSize = False , matchDesc = desc =? s } where @@ -804,6 +827,7 @@ limitMetaData s = case parseMetaDataMatcher s of , matchNeedsFileContent = False , matchNeedsKey = True , matchNeedsLocationLog = False + , matchNeedsLiveRepoSize = False , matchDesc = "metadata" =? s } where @@ -820,6 +844,7 @@ addAccessedWithin duration = do , matchNeedsFileContent = False , matchNeedsKey = False , matchNeedsLocationLog = False + , matchNeedsLiveRepoSize = False , matchDesc = "accessedwithin" =? fromDuration duration } where diff --git a/Limit/Wanted.hs b/Limit/Wanted.hs index a31f08be92..1e1be390d8 100644 --- a/Limit/Wanted.hs +++ b/Limit/Wanted.hs @@ -40,12 +40,14 @@ addPreferredContentLimit desc a = do nfc <- introspectPreferredRequiredContent matchNeedsFileContent Nothing nk <- introspectPreferredRequiredContent matchNeedsKey Nothing nl <- introspectPreferredRequiredContent matchNeedsLocationLog Nothing + lsz <- introspectPreferredRequiredContent matchNeedsLiveRepoSize Nothing addLimit $ Right $ MatchFiles { matchAction = const $ const a , matchNeedsFileName = nfn , matchNeedsFileContent = nfc , matchNeedsKey = nk , matchNeedsLocationLog = nl + , matchNeedsLiveRepoSize = lsz , matchDesc = matchDescSimple desc } diff --git a/Types/FileMatcher.hs b/Types/FileMatcher.hs index 1f0b6de5a8..4fa7d20e67 100644 --- a/Types/FileMatcher.hs +++ b/Types/FileMatcher.hs @@ -96,6 +96,8 @@ data MatchFiles a = MatchFiles -- ^ does the matchAction look at information about the key? , matchNeedsLocationLog :: Bool -- ^ does the matchAction look at the location log? + , matchNeedsLiveRepoSize :: Bool + -- ^ does the matchAction need live repo size information? , matchDesc :: Bool -> MatchDesc -- ^ displayed to the user to describe whether it matched or not } diff --git a/doc/todo/git-annex_proxies.mdwn b/doc/todo/git-annex_proxies.mdwn index 481df391df..7621f4f878 100644 --- a/doc/todo/git-annex_proxies.mdwn +++ b/doc/todo/git-annex_proxies.mdwn @@ -37,14 +37,6 @@ Planned schedule of work: * Test that live repo size data is correct and really works. -* Avoid using checkLiveUpdate except when checking a preferred content - expression that does use balanced preferred content. No reason to pay - its time penalty otherwise. - - Alternatively, make it not use file locking. It could rely on a database - transaction, or it could check the live changes before and after and - re-run the Annex action if they are not stable. - * When loading the live update table, check if PIDs in it are still running (and are still git-annex), and if not, remove stale entries from it, which can accumulate when processes are interrupted.