sync --all avoid unncessary first pass
Sped up seeking to around twice as fast, by avoiding a pass over the worktree files when preferred content expressions of the local repo and remotes don't use include=/exclude=. Thanks to Lukey for identifying the optimisation. This commit was sponsored by Brock Spratlen on Patreon.
This commit is contained in:
parent
b45b37b088
commit
d89984b121
9 changed files with 86 additions and 12 deletions
|
@ -267,6 +267,7 @@ call :: Either String (FileMatcher Annex) -> ParseResult (MatchFiles Annex)
|
|||
call (Right sub) = Right $ Operation $ MatchFiles
|
||||
{ matchAction = \notpresent mi ->
|
||||
matchMrun sub $ \o -> matchAction o notpresent mi
|
||||
, matchNeedsFileName = any matchNeedsFileName sub
|
||||
, matchNeedsFileContent = any matchNeedsFileContent sub
|
||||
}
|
||||
call (Left err) = Left err
|
||||
|
|
|
@ -18,6 +18,9 @@ git-annex (8.20200909) UNRELEASED; urgency=medium
|
|||
message, with some hints for the user for what to do.
|
||||
* Improve --debug output to show pid of processes that are started and
|
||||
stopped.
|
||||
* sync --all: Sped up seeking to around twice as fast, by avoiding a
|
||||
pass over the worktree files when preferred content expressions of the
|
||||
local repo and remotes don't use include=/exclude=.
|
||||
|
||||
-- Joey Hess <id@joeyh.name> Mon, 14 Sep 2020 18:34:37 -0400
|
||||
|
||||
|
|
|
@ -57,6 +57,7 @@ import Annex.Drop
|
|||
import Annex.UUID
|
||||
import Logs.UUID
|
||||
import Logs.Export
|
||||
import Logs.PreferredContent
|
||||
import Annex.AutoMerge
|
||||
import Annex.AdjustedBranch
|
||||
import Annex.Ssh
|
||||
|
@ -65,6 +66,7 @@ import Annex.UpdateInstead
|
|||
import Annex.Export
|
||||
import Annex.TaggedPush
|
||||
import Annex.CurrentBranch
|
||||
import Types.FileMatcher
|
||||
import qualified Database.Export as Export
|
||||
import Utility.Bloom
|
||||
import Utility.OptParse
|
||||
|
@ -633,9 +635,11 @@ newer remote b = do
|
|||
- (Or, when in an ajusted branch where some files are hidden, at files in
|
||||
- the original branch.)
|
||||
-
|
||||
- With --all, makes a second pass over all keys.
|
||||
- This ensures that preferred content expressions that match on
|
||||
- filenames work, even when in --all mode.
|
||||
- With --all, when preferred content expressions look at filenames,
|
||||
- makes a first pass over the files in the work tree so those preferred
|
||||
- content expressions will match. The second pass is over all keys,
|
||||
- and only preferred content expressions that don't look at filenames
|
||||
- will match.
|
||||
-
|
||||
- Returns true if any file transfers were made.
|
||||
-
|
||||
|
@ -646,7 +650,12 @@ seekSyncContent _ [] _ = return False
|
|||
seekSyncContent o rs currbranch = do
|
||||
mvar <- liftIO newEmptyMVar
|
||||
bloom <- case keyOptions o of
|
||||
Just WantAllKeys -> Just <$> genBloomFilter (seekworktree mvar (WorkTreeItems []))
|
||||
Just WantAllKeys -> ifM preferredcontentmatchesfilenames
|
||||
( Just <$> genBloomFilter (seekworktree mvar (WorkTreeItems []))
|
||||
, do
|
||||
liftIO $ print "skipped first pass"
|
||||
pure Nothing
|
||||
)
|
||||
_ -> case currbranch of
|
||||
(Just origbranch, Just adj) | adjustmentHidesFiles adj -> do
|
||||
l <- workTreeItems' (AllowHidden True) ww (contentOfOption o)
|
||||
|
@ -692,6 +701,12 @@ seekSyncContent o rs currbranch = do
|
|||
void $ liftIO $ tryPutMVar mvar ()
|
||||
next $ return True
|
||||
|
||||
preferredcontentmatchesfilenames =
|
||||
preferredcontentmatchesfilenames' Nothing
|
||||
<||> anyM (preferredcontentmatchesfilenames' . Just . Remote.uuid) rs
|
||||
preferredcontentmatchesfilenames' =
|
||||
introspectPreferredRequiredContent matchNeedsFileName
|
||||
|
||||
{- If it's preferred content, and we don't have it, get it from one of the
|
||||
- listed remotes (preferring the cheaper earlier ones).
|
||||
-
|
||||
|
@ -717,11 +732,13 @@ syncFile ebloom rs af k = do
|
|||
u <- getUUID
|
||||
let locs' = concat [if inhere || got then [u] else [], putrs, locs]
|
||||
|
||||
-- A bloom filter is populated with all the keys in the first pass.
|
||||
-- On the second pass, avoid dropping keys that were seen in the
|
||||
-- first pass, which would happen otherwise when preferred content
|
||||
-- matches on the filename, which is not available in the second
|
||||
-- pass.
|
||||
-- To handle --all, a bloom filter is populated with all the keys
|
||||
-- of files in the working tree in the first pass. On the second
|
||||
-- pass, avoid dropping keys that were seen in the first pass, which
|
||||
-- would happen otherwise when preferred content matches on the
|
||||
-- filename, which is not available in the second pass.
|
||||
-- (When the preferred content expressions do not match on
|
||||
-- filenames, the first pass is skipped for speed.)
|
||||
--
|
||||
-- When there's a false positive in the bloom filter, the result
|
||||
-- is keeping a key that preferred content doesn't really want.
|
||||
|
|
22
Limit.hs
22
Limit.hs
|
@ -1,6 +1,6 @@
|
|||
{- user-specified limits on files to act on
|
||||
-
|
||||
- Copyright 2011-2019 Joey Hess <id@joeyh.name>
|
||||
- Copyright 2011-2020 Joey Hess <id@joeyh.name>
|
||||
-
|
||||
- Licensed under the GNU AGPL version 3 or higher.
|
||||
-}
|
||||
|
@ -88,6 +88,7 @@ addInclude = addLimit . limitInclude
|
|||
limitInclude :: MkLimit Annex
|
||||
limitInclude glob = Right $ MatchFiles
|
||||
{ matchAction = const $ matchGlobFile glob
|
||||
, matchNeedsFileName = True
|
||||
, matchNeedsFileContent = False
|
||||
}
|
||||
|
||||
|
@ -98,6 +99,7 @@ addExclude = addLimit . limitExclude
|
|||
limitExclude :: MkLimit Annex
|
||||
limitExclude glob = Right $ MatchFiles
|
||||
{ matchAction = const $ not <$$> matchGlobFile glob
|
||||
, matchNeedsFileName = True
|
||||
, matchNeedsFileContent = False
|
||||
}
|
||||
|
||||
|
@ -136,6 +138,7 @@ matchMagic :: String -> (Magic -> FilePath -> Annex (Maybe String)) -> (Provided
|
|||
matchMagic _limitname querymagic selectprovidedinfo (Just magic) glob =
|
||||
Right $ MatchFiles
|
||||
{ matchAction = const go
|
||||
, matchNeedsFileName = False
|
||||
, matchNeedsFileContent = True
|
||||
}
|
||||
where
|
||||
|
@ -152,12 +155,14 @@ matchMagic limitname _ _ Nothing _ =
|
|||
addUnlocked :: Annex ()
|
||||
addUnlocked = addLimit $ Right $ MatchFiles
|
||||
{ matchAction = const $ matchLockStatus False
|
||||
, matchNeedsFileName = True
|
||||
, matchNeedsFileContent = False
|
||||
}
|
||||
|
||||
addLocked :: Annex ()
|
||||
addLocked = addLimit $ Right $ MatchFiles
|
||||
{ matchAction = const $ matchLockStatus True
|
||||
, matchNeedsFileName = True
|
||||
, matchNeedsFileContent = False
|
||||
}
|
||||
|
||||
|
@ -184,6 +189,7 @@ addIn s = do
|
|||
(name, date) = separate (== '@') s
|
||||
use a = Right $ MatchFiles
|
||||
{ matchAction = checkKey . a
|
||||
, matchNeedsFileName = False
|
||||
, matchNeedsFileContent = False
|
||||
}
|
||||
inuuid u notpresent key
|
||||
|
@ -211,6 +217,7 @@ limitPresent u = MatchFiles
|
|||
else do
|
||||
us <- Remote.keyLocations key
|
||||
return $ maybe False (`elem` us) u
|
||||
, matchNeedsFileName = False
|
||||
, matchNeedsFileContent = False
|
||||
}
|
||||
|
||||
|
@ -218,6 +225,7 @@ limitPresent u = MatchFiles
|
|||
limitInDir :: FilePath -> MatchFiles Annex
|
||||
limitInDir dir = MatchFiles
|
||||
{ matchAction = const go
|
||||
, matchNeedsFileName = True
|
||||
, matchNeedsFileContent = False
|
||||
}
|
||||
where
|
||||
|
@ -247,6 +255,7 @@ limitCopies want = case splitc ':' want of
|
|||
Just n -> Right $ MatchFiles
|
||||
{ matchAction = \notpresent -> checkKey $
|
||||
go' n good notpresent
|
||||
, matchNeedsFileName = False
|
||||
, matchNeedsFileContent = False
|
||||
}
|
||||
go' n good notpresent key = do
|
||||
|
@ -268,6 +277,7 @@ limitLackingCopies approx want = case readish want of
|
|||
Just needed -> Right $ MatchFiles
|
||||
{ matchAction = \notpresent mi -> flip checkKey mi $
|
||||
go mi needed notpresent
|
||||
, matchNeedsFileName = False
|
||||
, matchNeedsFileContent = False
|
||||
}
|
||||
Nothing -> Left "bad value for number of lacking copies"
|
||||
|
@ -293,6 +303,7 @@ limitLackingCopies approx want = case readish want of
|
|||
limitUnused :: MatchFiles Annex
|
||||
limitUnused = MatchFiles
|
||||
{ matchAction = go
|
||||
, matchNeedsFileName = False
|
||||
, matchNeedsFileContent = False
|
||||
}
|
||||
where
|
||||
|
@ -306,6 +317,7 @@ limitUnused = MatchFiles
|
|||
limitAnything :: MatchFiles Annex
|
||||
limitAnything = MatchFiles
|
||||
{ matchAction = \_ _ -> return True
|
||||
, matchNeedsFileName = False
|
||||
, matchNeedsFileContent = False
|
||||
}
|
||||
|
||||
|
@ -313,6 +325,7 @@ limitAnything = MatchFiles
|
|||
limitNothing :: MatchFiles Annex
|
||||
limitNothing = MatchFiles
|
||||
{ matchAction = \_ _ -> return False
|
||||
, matchNeedsFileName = False
|
||||
, matchNeedsFileContent = False
|
||||
}
|
||||
|
||||
|
@ -332,6 +345,7 @@ limitInAllGroup getgroupmap groupname = Right $ MatchFiles
|
|||
else if not (S.null (S.intersection want notpresent))
|
||||
then return False
|
||||
else checkKey (check want) mi
|
||||
, matchNeedsFileName = False
|
||||
, matchNeedsFileContent = False
|
||||
}
|
||||
where
|
||||
|
@ -346,6 +360,7 @@ addInBackend = addLimit . limitInBackend
|
|||
limitInBackend :: MkLimit Annex
|
||||
limitInBackend name = Right $ MatchFiles
|
||||
{ matchAction = const $ checkKey check
|
||||
, matchNeedsFileName = False
|
||||
, matchNeedsFileContent = False
|
||||
}
|
||||
where
|
||||
|
@ -359,6 +374,7 @@ addSecureHash = addLimit $ Right limitSecureHash
|
|||
limitSecureHash :: MatchFiles Annex
|
||||
limitSecureHash = MatchFiles
|
||||
{ matchAction = const $ checkKey isCryptographicallySecure
|
||||
, matchNeedsFileName = False
|
||||
, matchNeedsFileContent = False
|
||||
}
|
||||
|
||||
|
@ -374,6 +390,7 @@ limitSize lb vs s = case readSize dataUnits s of
|
|||
Nothing -> Left "bad size"
|
||||
Just sz -> Right $ MatchFiles
|
||||
{ matchAction = go sz
|
||||
, matchNeedsFileName = False
|
||||
, matchNeedsFileContent = False
|
||||
}
|
||||
where
|
||||
|
@ -399,6 +416,7 @@ limitMetaData s = case parseMetaDataMatcher s of
|
|||
Left e -> Left e
|
||||
Right (f, matching) -> Right $ MatchFiles
|
||||
{ matchAction = const $ checkKey (check f matching)
|
||||
, matchNeedsFileName = False
|
||||
, matchNeedsFileContent = False
|
||||
}
|
||||
where
|
||||
|
@ -419,6 +437,7 @@ addTimeLimit duration = do
|
|||
shutdown True
|
||||
liftIO $ exitWith $ ExitFailure 101
|
||||
else return True
|
||||
, matchNeedsFileName = False
|
||||
, matchNeedsFileContent = False
|
||||
}
|
||||
|
||||
|
@ -427,6 +446,7 @@ addAccessedWithin duration = do
|
|||
now <- liftIO getPOSIXTime
|
||||
addLimit $ Right $ MatchFiles
|
||||
{ matchAction = const $ checkKey $ check now
|
||||
, matchNeedsFileName = False
|
||||
, matchNeedsFileContent = False
|
||||
}
|
||||
where
|
||||
|
|
|
@ -15,12 +15,14 @@ import Types.FileMatcher
|
|||
addWantGet :: Annex ()
|
||||
addWantGet = addLimit $ Right $ MatchFiles
|
||||
{ matchAction = const $ checkWant $ wantGet False Nothing
|
||||
, matchNeedsFileName = False
|
||||
, matchNeedsFileContent = False
|
||||
}
|
||||
|
||||
addWantDrop :: Annex ()
|
||||
addWantDrop = addLimit $ Right $ MatchFiles
|
||||
{ matchAction = const $ checkWant $ wantDrop False Nothing Nothing
|
||||
, matchNeedsFileName = False
|
||||
, matchNeedsFileContent = False
|
||||
}
|
||||
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
{- git-annex preferred content matcher configuration
|
||||
-
|
||||
- Copyright 2012-2019 Joey Hess <id@joeyh.name>
|
||||
- Copyright 2012-2020 Joey Hess <id@joeyh.name>
|
||||
-
|
||||
- Licensed under the GNU AGPL version 3 or higher.
|
||||
-}
|
||||
|
@ -21,6 +21,7 @@ module Logs.PreferredContent (
|
|||
defaultStandardGroup,
|
||||
preferredRequiredMapsLoad,
|
||||
preferredRequiredMapsLoad',
|
||||
introspectPreferredRequiredContent,
|
||||
prop_standardGroups_parse,
|
||||
) where
|
||||
|
||||
|
@ -61,6 +62,16 @@ checkMap getmap mu notpresent mkey afile d = do
|
|||
Nothing -> return d
|
||||
Just matcher -> checkMatcher matcher mkey afile notpresent (return d) (return d)
|
||||
|
||||
{- Checks if the preferred or required content for the specified repository
|
||||
- (or the current repository if none is specified) contains any terms
|
||||
- that meet the condition. -}
|
||||
introspectPreferredRequiredContent :: (MatchFiles Annex -> Bool) -> Maybe UUID -> Annex Bool
|
||||
introspectPreferredRequiredContent c mu = do
|
||||
u <- maybe getUUID return mu
|
||||
check u preferredContentMap <||> check u requiredContentMap
|
||||
where
|
||||
check u mk = mk >>= return . maybe False (any c) . M.lookup u
|
||||
|
||||
preferredContentMap :: Annex (FileMatcherMap Annex)
|
||||
preferredContentMap = maybe (fst <$> preferredRequiredMapsLoad preferredContentTokens) return
|
||||
=<< Annex.getState Annex.preferredcontentmap
|
||||
|
|
|
@ -57,8 +57,11 @@ type AssumeNotPresent = S.Set UUID
|
|||
|
||||
data MatchFiles a = MatchFiles
|
||||
{ matchAction :: AssumeNotPresent -> MatchInfo -> a Bool
|
||||
, matchNeedsFileName :: Bool
|
||||
-- ^ does the matchAction need a filename in order to match?
|
||||
, matchNeedsFileContent :: Bool
|
||||
-- ^ does the matchAction need the file content to be present?
|
||||
-- ^ does the matchAction need the file content to be present in
|
||||
-- order to succeed?
|
||||
}
|
||||
|
||||
type FileMatcher a = Matcher (MatchFiles a)
|
||||
|
|
|
@ -19,3 +19,5 @@ and it led to a 2x speedup (with warm cache):
|
|||
|
||||
|
||||
This repo has 25641 keys and all of them are in the worktree too.
|
||||
|
||||
> [[done]]! --[[Joey]]
|
||||
|
|
|
@ -0,0 +1,15 @@
|
|||
[[!comment format=mdwn
|
||||
username="joey"
|
||||
subject="""comment 2"""
|
||||
date="2020-09-24T19:04:32Z"
|
||||
content="""
|
||||
One side effect of this optimisation is that, while sync --all used to
|
||||
tell the filenames it was getting or dropping, when operating on files
|
||||
in the working tree, when the optimsation is enabled it will only
|
||||
display the keys. So, its behavior in 2 different repos might seem
|
||||
inconsistent to a user, who doesn't know about all these gory 2 pass details.
|
||||
|
||||
I think, if that became a problem, the best fix would be to only display
|
||||
the keys, and never the worktree filenames, even when running the first
|
||||
pass. But I'll wait and see if that needs to be done, I suppose.
|
||||
"""]]
|
Loading…
Reference in a new issue