benchmarked numcopies .gitattributes in preferred content
Checking .gitattributes adds a full minute to a git annex find looking for files that don't have enough copies. 2:25 increasts to 3:27. I feel this is too much of a slowdown to justify making it the default. So, exposed two versions of the preferred content expression, a slow one and a fast but approximate one. I'm using the approximate one in the default preferred content expressions to avoid slowing down the assistant.
This commit is contained in:
parent
f7cdc40f7b
commit
f2713a3bb9
8 changed files with 41 additions and 35 deletions
|
@ -70,7 +70,8 @@ parseToken checkpresent checkpreferreddir groupmap t
|
|||
[ ("include", limitInclude)
|
||||
, ("exclude", limitExclude)
|
||||
, ("copies", limitCopies)
|
||||
, ("numcopiesneeded", limitNumCopiesNeeded)
|
||||
, ("lackingcopies", limitLackingCopies False)
|
||||
, ("approxlackingcopies", limitLackingCopies True)
|
||||
, ("inbackend", limitInBackend)
|
||||
, ("largerthan", limitSize (>))
|
||||
, ("smallerthan", limitSize (<))
|
||||
|
|
|
@ -42,8 +42,10 @@ options = Option.common ++
|
|||
"match files present in a remote"
|
||||
, Option ['C'] ["copies"] (ReqArg Limit.addCopies paramNumber)
|
||||
"skip files with fewer copies"
|
||||
, Option [] ["numcopiesneeded"] (ReqArg Limit.addNumCopiesNeeded paramNumber)
|
||||
, Option [] ["lackingcopies"] (ReqArg (Limit.addLackingCopies False) paramNumber)
|
||||
"match files that need more copies"
|
||||
, Option [] ["approxlackingcopies"] (ReqArg (Limit.addLackingCopies True) paramNumber)
|
||||
"match files that need more copies (faster)"
|
||||
, Option ['B'] ["inbackend"] (ReqArg Limit.addInBackend paramName)
|
||||
"match files using a key-value backend"
|
||||
, Option [] ["inallgroup"] (ReqArg Limit.addInAllGroup paramGroup)
|
||||
|
|
39
Limit.hs
39
Limit.hs
|
@ -178,29 +178,26 @@ limitCopies want = case split ":" want of
|
|||
| "+" `isSuffixOf` s = (>=) <$> readTrustLevel (beginning s)
|
||||
| otherwise = (==) <$> readTrustLevel s
|
||||
|
||||
{- Adds a limit to match files that need more copies made.
|
||||
-
|
||||
- Does not look at annex.numcopies .gitattributes, because that
|
||||
- would require querying git check-attr every time a preferred content
|
||||
- expression is checked, which would probably be quite slow.
|
||||
-}
|
||||
addNumCopiesNeeded :: String -> Annex ()
|
||||
addNumCopiesNeeded = addLimit . limitNumCopiesNeeded
|
||||
{- Adds a limit to match files that need more copies made. -}
|
||||
addLackingCopies :: Bool -> String -> Annex ()
|
||||
addLackingCopies approx = addLimit . limitLackingCopies approx
|
||||
|
||||
limitNumCopiesNeeded :: MkLimit
|
||||
limitNumCopiesNeeded want = case readish want of
|
||||
Just needed -> Right $ \notpresent -> checkKey $
|
||||
handle needed notpresent
|
||||
Nothing -> Left "bad value for numcopiesneeded"
|
||||
limitLackingCopies :: Bool -> MkLimit
|
||||
limitLackingCopies approx want = case readish want of
|
||||
Just needed -> Right $ \notpresent mi -> flip checkKey mi $
|
||||
handle mi needed notpresent
|
||||
Nothing -> Left "bad value for number of lacking copies"
|
||||
where
|
||||
handle needed notpresent key = do
|
||||
gv <- getGlobalNumCopies
|
||||
case gv of
|
||||
Nothing -> return False
|
||||
Just (NumCopies numcopies) -> do
|
||||
us <- filter (`S.notMember` notpresent)
|
||||
<$> (trustExclude UnTrusted =<< Remote.keyLocations key)
|
||||
return $ numcopies - length us >= needed
|
||||
handle mi needed notpresent key = do
|
||||
NumCopies numcopies <- if approx
|
||||
then approxNumCopies
|
||||
else case mi of
|
||||
MatchingKey _ -> approxNumCopies
|
||||
MatchingFile fi -> getGlobalFileNumCopies $ matchFile fi
|
||||
us <- filter (`S.notMember` notpresent)
|
||||
<$> (trustExclude UnTrusted =<< Remote.keyLocations key)
|
||||
return $ numcopies - length us >= needed
|
||||
approxNumCopies = fromMaybe defaultNumCopies <$> getGlobalNumCopies
|
||||
|
||||
{- Adds a limit to skip files not believed to be present in all
|
||||
- repositories in the specified group. -}
|
||||
|
|
|
@ -93,6 +93,8 @@ notArchived :: String
|
|||
notArchived = "not (copies=archive:1 or copies=smallarchive:1)"
|
||||
|
||||
{- Most repositories want any content that is only on untrusted
|
||||
- or dead repositories, or that otherwise does not have enough copies. -}
|
||||
- or dead repositories, or that otherwise does not have enough copies.
|
||||
- Does not look at .gitattributes since that is quite a lot slower.
|
||||
-}
|
||||
lastResort :: String -> PreferredContentExpression
|
||||
lastResort s = "(" ++ s ++ ") or numcopiesneeded=1"
|
||||
lastResort s = "(" ++ s ++ ") or approxlackingcopies=1"
|
||||
|
|
2
debian/changelog
vendored
2
debian/changelog
vendored
|
@ -14,7 +14,7 @@ git-annex (5.20140118) UNRELEASED; urgency=medium
|
|||
command is used to set the global number of copies, any annex.numcopies
|
||||
git configs will be ignored.
|
||||
* assistant: Make the prefs page set the global numcopies.
|
||||
* Add numcopiesneeded preferred content expression.
|
||||
* Add lackingcopies and approxlackingcopies preferred content expressions.
|
||||
* Client, transfer, incremental backup, and archive repositories
|
||||
now want to get content that does not yet have enough copies.
|
||||
* repair: Check git version at run time.
|
||||
|
|
|
@ -1022,14 +1022,16 @@ file contents are present at either of two repositories.
|
|||
copies, on remotes in the specified group. For example,
|
||||
`--copies=archive:2`
|
||||
|
||||
* `--numcopiesneeded=number`
|
||||
* `--lackingcopies=number`
|
||||
|
||||
Matches only files that git-annex believes need the specified number or
|
||||
more additional copies to be made in order to satisfy their numcopies
|
||||
setting, as configured by the global numcopies setting of the repository.
|
||||
settings.
|
||||
|
||||
Note that for various reasons, including speed, this does not look
|
||||
at the annex.numcopies .gitattributes settings of files.
|
||||
* `--approxlackingcopies=number`
|
||||
|
||||
Like lackingcopies, but does not look at .gitattributes annex.numcopies
|
||||
settings. This makes it significantly faster.
|
||||
|
||||
* `--inbackend=name`
|
||||
|
||||
|
|
|
@ -113,7 +113,7 @@ any repository that can will back it up.)
|
|||
All content is preferred, unless it's for a file in a "archive" directory,
|
||||
which has reached an archive repository.
|
||||
|
||||
`((exclude=*/archive/* and exclude=archive/*) or (not (copies=archive:1 or copies=smallarchive:1))) or numcopiesneeded=1`
|
||||
`((exclude=*/archive/* and exclude=archive/*) or (not (copies=archive:1 or copies=smallarchive:1))) or roughlylackingcopies=1`
|
||||
|
||||
### transfer
|
||||
|
||||
|
@ -147,20 +147,20 @@ All content is preferred.
|
|||
Only prefers content that's not already backed up to another backup
|
||||
or incremental backup repository.
|
||||
|
||||
`(include=* and (not copies=backup:1) and (not copies=incrementalbackup:1)) or numcopiesneeded=1`
|
||||
`(include=* and (not copies=backup:1) and (not copies=incrementalbackup:1)) or approxlackingcopies=1`
|
||||
|
||||
### small archive
|
||||
|
||||
Only prefers content that's located in an "archive" directory, and
|
||||
only if it's not already been archived somewhere else.
|
||||
|
||||
`((include=*/archive/* or include=archive/*) and not (copies=archive:1 or copies=smallarchive:1)) or numcopiesneeded=1`
|
||||
`((include=*/archive/* or include=archive/*) and not (copies=archive:1 or copies=smallarchive:1)) or approxlackingcopies=1`
|
||||
|
||||
### full archive
|
||||
|
||||
All content is preferred, unless it's already been archived somewhere else.
|
||||
|
||||
`(not (copies=archive:1 or copies=smallarchive:1)) or numcopiesneeded=1`
|
||||
`(not (copies=archive:1 or copies=smallarchive:1)) or approxlackingcopies=1`
|
||||
|
||||
Note that if you want to archive multiple copies (not a bad idea!),
|
||||
you should instead configure all your archive repositories with a
|
||||
|
|
|
@ -59,7 +59,9 @@ Conclusion:
|
|||
to instead end with "or numcopiesneeded=1" **done**
|
||||
* See if "numcopiesneeded=N" can check .gitattributes without getting
|
||||
a lot slower. If now, perhaps add a "numcopiesneededaccurate=N" that
|
||||
checks it.
|
||||
checks it. **done**
|
||||
|
||||
[[done]]
|
||||
|
||||
## Stability analysis
|
||||
|
||||
|
|
Loading…
Reference in a new issue