avoid reposize database locking overhead when not needed

Only when the preferred content expression being matched uses balanced
preferred content is this overhead needed.

It might be possible to eliminate the locking entirely. Eg, check the
live changes before and after the action and re-run if they are not
stable. For now, this is good enough, it avoids existing preferred
content getting slow. If balanced preferred content turns out to be too
slow to check, that could be tried later.
This commit is contained in:
Joey Hess 2024-08-28 10:52:34 -04:00
parent 09955deebe
commit e006acef22
No known key found for this signature in database
GPG key ID: DB12DB0FF05F8F38
6 changed files with 43 additions and 12 deletions

View file

@ -90,7 +90,7 @@ checkMatcher matcher mkey afile lu notpresent notconfigured d
checkMatcher' :: FileMatcher Annex -> MatchInfo -> LiveUpdate -> AssumeNotPresent -> Annex Bool checkMatcher' :: FileMatcher Annex -> MatchInfo -> LiveUpdate -> AssumeNotPresent -> Annex Bool
checkMatcher' (matcher, (MatcherDesc matcherdesc)) mi lu notpresent = checkMatcher' (matcher, (MatcherDesc matcherdesc)) mi lu notpresent =
checkLiveUpdate lu go checkLiveUpdate lu matcher go
where where
go = do go = do
(matches, desc) <- runWriterT $ matchMrun' matcher $ \op -> (matches, desc) <- runWriterT $ matchMrun' matcher $ \op ->
@ -281,6 +281,7 @@ call desc (Right sub) = Right $ Operation $ MatchFiles
, matchNeedsFileContent = any matchNeedsFileContent sub , matchNeedsFileContent = any matchNeedsFileContent sub
, matchNeedsKey = any matchNeedsKey sub , matchNeedsKey = any matchNeedsKey sub
, matchNeedsLocationLog = any matchNeedsLocationLog sub , matchNeedsLocationLog = any matchNeedsLocationLog sub
, matchNeedsLiveRepoSize = any matchNeedsLiveRepoSize sub
, matchDesc = matchDescSimple desc , matchDesc = matchDescSimple desc
} }
call _ (Left err) = Left err call _ (Left err) = Left err

View file

@ -13,6 +13,8 @@ import Annex.Common
import Logs.Presence.Pure import Logs.Presence.Pure
import qualified Database.RepoSize as Db import qualified Database.RepoSize as Db
import Annex.UUID import Annex.UUID
import Types.FileMatcher
import qualified Utility.Matcher as Matcher
import Control.Concurrent import Control.Concurrent
import System.Process import System.Process
@ -95,9 +97,16 @@ needLiveUpdate lu = liftIO $ void $ tryPutMVar (liveUpdateNeeded lu) ()
-- This serializes calls to the action, so that if the action -- This serializes calls to the action, so that if the action
-- queries getLiveRepoSizes it will not race with another such action -- queries getLiveRepoSizes it will not race with another such action
-- that may also be starting a live update. -- that may also be starting a live update.
checkLiveUpdate :: LiveUpdate -> Annex Bool -> Annex Bool checkLiveUpdate
checkLiveUpdate NoLiveUpdate a = a :: LiveUpdate
checkLiveUpdate lu a = Db.lockDbWhile (const go) go -> Matcher.Matcher (MatchFiles Annex)
-> Annex Bool
-> Annex Bool
checkLiveUpdate NoLiveUpdate _ a = a
checkLiveUpdate lu matcher a
| Matcher.introspect matchNeedsLiveRepoSize matcher =
Db.lockDbWhile (const go) go
| otherwise = a
where where
go = do go = do
r <- a r <- a

View file

@ -114,6 +114,7 @@ limitInclude glob = Right $ MatchFiles
, matchNeedsFileContent = False , matchNeedsFileContent = False
, matchNeedsKey = False , matchNeedsKey = False
, matchNeedsLocationLog = False , matchNeedsLocationLog = False
, matchNeedsLiveRepoSize = False
, matchDesc = "include" =? glob , matchDesc = "include" =? glob
} }
@ -128,6 +129,7 @@ limitExclude glob = Right $ MatchFiles
, matchNeedsFileContent = False , matchNeedsFileContent = False
, matchNeedsKey = False , matchNeedsKey = False
, matchNeedsLocationLog = False , matchNeedsLocationLog = False
, matchNeedsLiveRepoSize = False
, matchDesc = "exclude" =? glob , matchDesc = "exclude" =? glob
} }
@ -153,6 +155,7 @@ limitIncludeSameContent glob = Right $ MatchFiles
, matchNeedsFileContent = False , matchNeedsFileContent = False
, matchNeedsKey = False , matchNeedsKey = False
, matchNeedsLocationLog = False , matchNeedsLocationLog = False
, matchNeedsLiveRepoSize = False
, matchDesc = "includesamecontent" =? glob , matchDesc = "includesamecontent" =? glob
} }
@ -168,6 +171,7 @@ limitExcludeSameContent glob = Right $ MatchFiles
, matchNeedsFileContent = False , matchNeedsFileContent = False
, matchNeedsKey = False , matchNeedsKey = False
, matchNeedsLocationLog = False , matchNeedsLocationLog = False
, matchNeedsLiveRepoSize = False
, matchDesc = "excludesamecontent" =? glob , matchDesc = "excludesamecontent" =? glob
} }
@ -244,6 +248,7 @@ matchMagic limitname querymagic selectprovidedinfo selectuserprovidedinfo (Just
, matchNeedsFileContent = True , matchNeedsFileContent = True
, matchNeedsKey = False , matchNeedsKey = False
, matchNeedsLocationLog = False , matchNeedsLocationLog = False
, matchNeedsLiveRepoSize = False
, matchDesc = limitname =? glob , matchDesc = limitname =? glob
} }
where where
@ -271,6 +276,7 @@ addUnlocked = addLimit $ Right $ MatchFiles
, matchNeedsFileContent = False , matchNeedsFileContent = False
, matchNeedsKey = False , matchNeedsKey = False
, matchNeedsLocationLog = False , matchNeedsLocationLog = False
, matchNeedsLiveRepoSize = False
, matchDesc = matchDescSimple "unlocked" , matchDesc = matchDescSimple "unlocked"
} }
@ -281,6 +287,7 @@ addLocked = addLimit $ Right $ MatchFiles
, matchNeedsFileContent = False , matchNeedsFileContent = False
, matchNeedsKey = False , matchNeedsKey = False
, matchNeedsLocationLog = False , matchNeedsLocationLog = False
, matchNeedsLiveRepoSize = False
, matchDesc = matchDescSimple "locked" , matchDesc = matchDescSimple "locked"
} }
@ -316,6 +323,7 @@ addIn s = do
, matchNeedsFileContent = False , matchNeedsFileContent = False
, matchNeedsKey = True , matchNeedsKey = True
, matchNeedsLocationLog = not inhere , matchNeedsLocationLog = not inhere
, matchNeedsLiveRepoSize = False
, matchDesc = "in" =? s , matchDesc = "in" =? s
} }
checkinuuid u notpresent key checkinuuid u notpresent key
@ -346,6 +354,7 @@ addExpectedPresent = do
, matchNeedsFileContent = False , matchNeedsFileContent = False
, matchNeedsKey = True , matchNeedsKey = True
, matchNeedsLocationLog = True , matchNeedsLocationLog = True
, matchNeedsLiveRepoSize = False
, matchDesc = matchDescSimple "expected-present" , matchDesc = matchDescSimple "expected-present"
} }
@ -363,6 +372,7 @@ limitPresent u = MatchFiles
, matchNeedsFileContent = False , matchNeedsFileContent = False
, matchNeedsKey = True , matchNeedsKey = True
, matchNeedsLocationLog = not (isNothing u) , matchNeedsLocationLog = not (isNothing u)
, matchNeedsLiveRepoSize = False
, matchDesc = matchDescSimple "present" , matchDesc = matchDescSimple "present"
} }
@ -374,6 +384,7 @@ limitInDir dir desc = MatchFiles
, matchNeedsFileContent = False , matchNeedsFileContent = False
, matchNeedsKey = False , matchNeedsKey = False
, matchNeedsLocationLog = False , matchNeedsLocationLog = False
, matchNeedsLiveRepoSize = False
, matchDesc = matchDescSimple desc , matchDesc = matchDescSimple desc
} }
where where
@ -406,6 +417,7 @@ limitCopies want = case splitc ':' want of
, matchNeedsFileContent = False , matchNeedsFileContent = False
, matchNeedsKey = True , matchNeedsKey = True
, matchNeedsLocationLog = True , matchNeedsLocationLog = True
, matchNeedsLiveRepoSize = False
, matchDesc = "copies" =? want , matchDesc = "copies" =? want
} }
go' n good notpresent key = do go' n good notpresent key = do
@ -431,6 +443,7 @@ limitLackingCopies desc approx want = case readish want of
, matchNeedsFileContent = False , matchNeedsFileContent = False
, matchNeedsKey = True , matchNeedsKey = True
, matchNeedsLocationLog = True , matchNeedsLocationLog = True
, matchNeedsLiveRepoSize = False
, matchDesc = matchDescSimple desc , matchDesc = matchDescSimple desc
} }
Nothing -> Left "bad value for number of lacking copies" Nothing -> Left "bad value for number of lacking copies"
@ -461,6 +474,7 @@ limitUnused = MatchFiles
, matchNeedsFileContent = False , matchNeedsFileContent = False
, matchNeedsKey = True , matchNeedsKey = True
, matchNeedsLocationLog = False , matchNeedsLocationLog = False
, matchNeedsLiveRepoSize = False
, matchDesc = matchDescSimple "unused" , matchDesc = matchDescSimple "unused"
} }
where where
@ -484,6 +498,7 @@ limitAnything = MatchFiles
, matchNeedsFileContent = False , matchNeedsFileContent = False
, matchNeedsKey = False , matchNeedsKey = False
, matchNeedsLocationLog = False , matchNeedsLocationLog = False
, matchNeedsLiveRepoSize = False
, matchDesc = matchDescSimple "anything" , matchDesc = matchDescSimple "anything"
} }
@ -499,6 +514,7 @@ limitNothing = MatchFiles
, matchNeedsFileContent = False , matchNeedsFileContent = False
, matchNeedsKey = False , matchNeedsKey = False
, matchNeedsLocationLog = False , matchNeedsLocationLog = False
, matchNeedsLiveRepoSize = False
, matchDesc = matchDescSimple "nothing" , matchDesc = matchDescSimple "nothing"
} }
@ -522,6 +538,7 @@ limitInAllGroup getgroupmap groupname = Right $ MatchFiles
, matchNeedsFileContent = False , matchNeedsFileContent = False
, matchNeedsKey = True , matchNeedsKey = True
, matchNeedsLocationLog = True , matchNeedsLocationLog = True
, matchNeedsLiveRepoSize = False
, matchDesc = "inallgroup" =? groupname , matchDesc = "inallgroup" =? groupname
} }
where where
@ -547,6 +564,7 @@ limitOnlyInGroup getgroupmap groupname = Right $ MatchFiles
, matchNeedsFileContent = False , matchNeedsFileContent = False
, matchNeedsKey = True , matchNeedsKey = True
, matchNeedsLocationLog = True , matchNeedsLocationLog = True
, matchNeedsLiveRepoSize = False
, matchDesc = "inallgroup" =? groupname , matchDesc = "inallgroup" =? groupname
} }
where where
@ -592,6 +610,7 @@ limitBalanced' termname fullybalanced mu groupname = do
matchNeedsLocationLog present || matchNeedsLocationLog present ||
matchNeedsLocationLog fullybalanced || matchNeedsLocationLog fullybalanced ||
matchNeedsLocationLog copies matchNeedsLocationLog copies
, matchNeedsLiveRepoSize = True
, matchDesc = termname =? groupname , matchDesc = termname =? groupname
} }
@ -677,6 +696,7 @@ limitFullyBalanced''' filtercandidates termname mu getgroupmap g n want = Right
, matchNeedsFileContent = False , matchNeedsFileContent = False
, matchNeedsKey = True , matchNeedsKey = True
, matchNeedsLocationLog = False , matchNeedsLocationLog = False
, matchNeedsLiveRepoSize = True
, matchDesc = termname =? want , matchDesc = termname =? want
} }
@ -736,6 +756,7 @@ limitInBackend name = Right $ MatchFiles
, matchNeedsFileContent = False , matchNeedsFileContent = False
, matchNeedsKey = True , matchNeedsKey = True
, matchNeedsLocationLog = False , matchNeedsLocationLog = False
, matchNeedsLiveRepoSize = False
, matchDesc = "inbackend" =? name , matchDesc = "inbackend" =? name
} }
where where
@ -753,6 +774,7 @@ limitSecureHash = MatchFiles
, matchNeedsFileContent = False , matchNeedsFileContent = False
, matchNeedsKey = True , matchNeedsKey = True
, matchNeedsLocationLog = False , matchNeedsLocationLog = False
, matchNeedsLiveRepoSize = False
, matchDesc = matchDescSimple "securehash" , matchDesc = matchDescSimple "securehash"
} }
@ -774,6 +796,7 @@ limitSize lb desc vs s = case readSize dataUnits s of
, matchNeedsFileContent = False , matchNeedsFileContent = False
, matchNeedsKey = False , matchNeedsKey = False
, matchNeedsLocationLog = False , matchNeedsLocationLog = False
, matchNeedsLiveRepoSize = False
, matchDesc = desc =? s , matchDesc = desc =? s
} }
where where
@ -804,6 +827,7 @@ limitMetaData s = case parseMetaDataMatcher s of
, matchNeedsFileContent = False , matchNeedsFileContent = False
, matchNeedsKey = True , matchNeedsKey = True
, matchNeedsLocationLog = False , matchNeedsLocationLog = False
, matchNeedsLiveRepoSize = False
, matchDesc = "metadata" =? s , matchDesc = "metadata" =? s
} }
where where
@ -820,6 +844,7 @@ addAccessedWithin duration = do
, matchNeedsFileContent = False , matchNeedsFileContent = False
, matchNeedsKey = False , matchNeedsKey = False
, matchNeedsLocationLog = False , matchNeedsLocationLog = False
, matchNeedsLiveRepoSize = False
, matchDesc = "accessedwithin" =? fromDuration duration , matchDesc = "accessedwithin" =? fromDuration duration
} }
where where

View file

@ -40,12 +40,14 @@ addPreferredContentLimit desc a = do
nfc <- introspectPreferredRequiredContent matchNeedsFileContent Nothing nfc <- introspectPreferredRequiredContent matchNeedsFileContent Nothing
nk <- introspectPreferredRequiredContent matchNeedsKey Nothing nk <- introspectPreferredRequiredContent matchNeedsKey Nothing
nl <- introspectPreferredRequiredContent matchNeedsLocationLog Nothing nl <- introspectPreferredRequiredContent matchNeedsLocationLog Nothing
lsz <- introspectPreferredRequiredContent matchNeedsLiveRepoSize Nothing
addLimit $ Right $ MatchFiles addLimit $ Right $ MatchFiles
{ matchAction = const $ const a { matchAction = const $ const a
, matchNeedsFileName = nfn , matchNeedsFileName = nfn
, matchNeedsFileContent = nfc , matchNeedsFileContent = nfc
, matchNeedsKey = nk , matchNeedsKey = nk
, matchNeedsLocationLog = nl , matchNeedsLocationLog = nl
, matchNeedsLiveRepoSize = lsz
, matchDesc = matchDescSimple desc , matchDesc = matchDescSimple desc
} }

View file

@ -96,6 +96,8 @@ data MatchFiles a = MatchFiles
-- ^ does the matchAction look at information about the key? -- ^ does the matchAction look at information about the key?
, matchNeedsLocationLog :: Bool , matchNeedsLocationLog :: Bool
-- ^ does the matchAction look at the location log? -- ^ does the matchAction look at the location log?
, matchNeedsLiveRepoSize :: Bool
-- ^ does the matchAction need live repo size information?
, matchDesc :: Bool -> MatchDesc , matchDesc :: Bool -> MatchDesc
-- ^ displayed to the user to describe whether it matched or not -- ^ displayed to the user to describe whether it matched or not
} }

View file

@ -37,14 +37,6 @@ Planned schedule of work:
* Test that live repo size data is correct and really works. * Test that live repo size data is correct and really works.
* Avoid using checkLiveUpdate except when checking a preferred content
expression that does use balanced preferred content. No reason to pay
its time penalty otherwise.
Alternatively, make it not use file locking. It could rely on a database
transaction, or it could check the live changes before and after and
re-run the Annex action if they are not stable.
* When loading the live update table, check if PIDs in it are still * When loading the live update table, check if PIDs in it are still
running (and are still git-annex), and if not, remove stale entries running (and are still git-annex), and if not, remove stale entries
from it, which can accumulate when processes are interrupted. from it, which can accumulate when processes are interrupted.