benchmarked numcopies .gitattributes in preferred content
Checking .gitattributes adds a full minute to a git annex find looking for files that don't have enough copies. 2:25 increasts to 3:27. I feel this is too much of a slowdown to justify making it the default. So, exposed two versions of the preferred content expression, a slow one and a fast but approximate one. I'm using the approximate one in the default preferred content expressions to avoid slowing down the assistant.
This commit is contained in:
parent
f7cdc40f7b
commit
f2713a3bb9
8 changed files with 41 additions and 35 deletions
|
@ -70,7 +70,8 @@ parseToken checkpresent checkpreferreddir groupmap t
|
||||||
[ ("include", limitInclude)
|
[ ("include", limitInclude)
|
||||||
, ("exclude", limitExclude)
|
, ("exclude", limitExclude)
|
||||||
, ("copies", limitCopies)
|
, ("copies", limitCopies)
|
||||||
, ("numcopiesneeded", limitNumCopiesNeeded)
|
, ("lackingcopies", limitLackingCopies False)
|
||||||
|
, ("approxlackingcopies", limitLackingCopies True)
|
||||||
, ("inbackend", limitInBackend)
|
, ("inbackend", limitInBackend)
|
||||||
, ("largerthan", limitSize (>))
|
, ("largerthan", limitSize (>))
|
||||||
, ("smallerthan", limitSize (<))
|
, ("smallerthan", limitSize (<))
|
||||||
|
|
|
@ -42,8 +42,10 @@ options = Option.common ++
|
||||||
"match files present in a remote"
|
"match files present in a remote"
|
||||||
, Option ['C'] ["copies"] (ReqArg Limit.addCopies paramNumber)
|
, Option ['C'] ["copies"] (ReqArg Limit.addCopies paramNumber)
|
||||||
"skip files with fewer copies"
|
"skip files with fewer copies"
|
||||||
, Option [] ["numcopiesneeded"] (ReqArg Limit.addNumCopiesNeeded paramNumber)
|
, Option [] ["lackingcopies"] (ReqArg (Limit.addLackingCopies False) paramNumber)
|
||||||
"match files that need more copies"
|
"match files that need more copies"
|
||||||
|
, Option [] ["approxlackingcopies"] (ReqArg (Limit.addLackingCopies True) paramNumber)
|
||||||
|
"match files that need more copies (faster)"
|
||||||
, Option ['B'] ["inbackend"] (ReqArg Limit.addInBackend paramName)
|
, Option ['B'] ["inbackend"] (ReqArg Limit.addInBackend paramName)
|
||||||
"match files using a key-value backend"
|
"match files using a key-value backend"
|
||||||
, Option [] ["inallgroup"] (ReqArg Limit.addInAllGroup paramGroup)
|
, Option [] ["inallgroup"] (ReqArg Limit.addInAllGroup paramGroup)
|
||||||
|
|
33
Limit.hs
33
Limit.hs
|
@ -178,29 +178,26 @@ limitCopies want = case split ":" want of
|
||||||
| "+" `isSuffixOf` s = (>=) <$> readTrustLevel (beginning s)
|
| "+" `isSuffixOf` s = (>=) <$> readTrustLevel (beginning s)
|
||||||
| otherwise = (==) <$> readTrustLevel s
|
| otherwise = (==) <$> readTrustLevel s
|
||||||
|
|
||||||
{- Adds a limit to match files that need more copies made.
|
{- Adds a limit to match files that need more copies made. -}
|
||||||
-
|
addLackingCopies :: Bool -> String -> Annex ()
|
||||||
- Does not look at annex.numcopies .gitattributes, because that
|
addLackingCopies approx = addLimit . limitLackingCopies approx
|
||||||
- would require querying git check-attr every time a preferred content
|
|
||||||
- expression is checked, which would probably be quite slow.
|
|
||||||
-}
|
|
||||||
addNumCopiesNeeded :: String -> Annex ()
|
|
||||||
addNumCopiesNeeded = addLimit . limitNumCopiesNeeded
|
|
||||||
|
|
||||||
limitNumCopiesNeeded :: MkLimit
|
limitLackingCopies :: Bool -> MkLimit
|
||||||
limitNumCopiesNeeded want = case readish want of
|
limitLackingCopies approx want = case readish want of
|
||||||
Just needed -> Right $ \notpresent -> checkKey $
|
Just needed -> Right $ \notpresent mi -> flip checkKey mi $
|
||||||
handle needed notpresent
|
handle mi needed notpresent
|
||||||
Nothing -> Left "bad value for numcopiesneeded"
|
Nothing -> Left "bad value for number of lacking copies"
|
||||||
where
|
where
|
||||||
handle needed notpresent key = do
|
handle mi needed notpresent key = do
|
||||||
gv <- getGlobalNumCopies
|
NumCopies numcopies <- if approx
|
||||||
case gv of
|
then approxNumCopies
|
||||||
Nothing -> return False
|
else case mi of
|
||||||
Just (NumCopies numcopies) -> do
|
MatchingKey _ -> approxNumCopies
|
||||||
|
MatchingFile fi -> getGlobalFileNumCopies $ matchFile fi
|
||||||
us <- filter (`S.notMember` notpresent)
|
us <- filter (`S.notMember` notpresent)
|
||||||
<$> (trustExclude UnTrusted =<< Remote.keyLocations key)
|
<$> (trustExclude UnTrusted =<< Remote.keyLocations key)
|
||||||
return $ numcopies - length us >= needed
|
return $ numcopies - length us >= needed
|
||||||
|
approxNumCopies = fromMaybe defaultNumCopies <$> getGlobalNumCopies
|
||||||
|
|
||||||
{- Adds a limit to skip files not believed to be present in all
|
{- Adds a limit to skip files not believed to be present in all
|
||||||
- repositories in the specified group. -}
|
- repositories in the specified group. -}
|
||||||
|
|
|
@ -93,6 +93,8 @@ notArchived :: String
|
||||||
notArchived = "not (copies=archive:1 or copies=smallarchive:1)"
|
notArchived = "not (copies=archive:1 or copies=smallarchive:1)"
|
||||||
|
|
||||||
{- Most repositories want any content that is only on untrusted
|
{- Most repositories want any content that is only on untrusted
|
||||||
- or dead repositories, or that otherwise does not have enough copies. -}
|
- or dead repositories, or that otherwise does not have enough copies.
|
||||||
|
- Does not look at .gitattributes since that is quite a lot slower.
|
||||||
|
-}
|
||||||
lastResort :: String -> PreferredContentExpression
|
lastResort :: String -> PreferredContentExpression
|
||||||
lastResort s = "(" ++ s ++ ") or numcopiesneeded=1"
|
lastResort s = "(" ++ s ++ ") or approxlackingcopies=1"
|
||||||
|
|
2
debian/changelog
vendored
2
debian/changelog
vendored
|
@ -14,7 +14,7 @@ git-annex (5.20140118) UNRELEASED; urgency=medium
|
||||||
command is used to set the global number of copies, any annex.numcopies
|
command is used to set the global number of copies, any annex.numcopies
|
||||||
git configs will be ignored.
|
git configs will be ignored.
|
||||||
* assistant: Make the prefs page set the global numcopies.
|
* assistant: Make the prefs page set the global numcopies.
|
||||||
* Add numcopiesneeded preferred content expression.
|
* Add lackingcopies and approxlackingcopies preferred content expressions.
|
||||||
* Client, transfer, incremental backup, and archive repositories
|
* Client, transfer, incremental backup, and archive repositories
|
||||||
now want to get content that does not yet have enough copies.
|
now want to get content that does not yet have enough copies.
|
||||||
* repair: Check git version at run time.
|
* repair: Check git version at run time.
|
||||||
|
|
|
@ -1022,14 +1022,16 @@ file contents are present at either of two repositories.
|
||||||
copies, on remotes in the specified group. For example,
|
copies, on remotes in the specified group. For example,
|
||||||
`--copies=archive:2`
|
`--copies=archive:2`
|
||||||
|
|
||||||
* `--numcopiesneeded=number`
|
* `--lackingcopies=number`
|
||||||
|
|
||||||
Matches only files that git-annex believes need the specified number or
|
Matches only files that git-annex believes need the specified number or
|
||||||
more additional copies to be made in order to satisfy their numcopies
|
more additional copies to be made in order to satisfy their numcopies
|
||||||
setting, as configured by the global numcopies setting of the repository.
|
settings.
|
||||||
|
|
||||||
Note that for various reasons, including speed, this does not look
|
* `--approxlackingcopies=number`
|
||||||
at the annex.numcopies .gitattributes settings of files.
|
|
||||||
|
Like lackingcopies, but does not look at .gitattributes annex.numcopies
|
||||||
|
settings. This makes it significantly faster.
|
||||||
|
|
||||||
* `--inbackend=name`
|
* `--inbackend=name`
|
||||||
|
|
||||||
|
|
|
@ -113,7 +113,7 @@ any repository that can will back it up.)
|
||||||
All content is preferred, unless it's for a file in a "archive" directory,
|
All content is preferred, unless it's for a file in a "archive" directory,
|
||||||
which has reached an archive repository.
|
which has reached an archive repository.
|
||||||
|
|
||||||
`((exclude=*/archive/* and exclude=archive/*) or (not (copies=archive:1 or copies=smallarchive:1))) or numcopiesneeded=1`
|
`((exclude=*/archive/* and exclude=archive/*) or (not (copies=archive:1 or copies=smallarchive:1))) or roughlylackingcopies=1`
|
||||||
|
|
||||||
### transfer
|
### transfer
|
||||||
|
|
||||||
|
@ -147,20 +147,20 @@ All content is preferred.
|
||||||
Only prefers content that's not already backed up to another backup
|
Only prefers content that's not already backed up to another backup
|
||||||
or incremental backup repository.
|
or incremental backup repository.
|
||||||
|
|
||||||
`(include=* and (not copies=backup:1) and (not copies=incrementalbackup:1)) or numcopiesneeded=1`
|
`(include=* and (not copies=backup:1) and (not copies=incrementalbackup:1)) or approxlackingcopies=1`
|
||||||
|
|
||||||
### small archive
|
### small archive
|
||||||
|
|
||||||
Only prefers content that's located in an "archive" directory, and
|
Only prefers content that's located in an "archive" directory, and
|
||||||
only if it's not already been archived somewhere else.
|
only if it's not already been archived somewhere else.
|
||||||
|
|
||||||
`((include=*/archive/* or include=archive/*) and not (copies=archive:1 or copies=smallarchive:1)) or numcopiesneeded=1`
|
`((include=*/archive/* or include=archive/*) and not (copies=archive:1 or copies=smallarchive:1)) or approxlackingcopies=1`
|
||||||
|
|
||||||
### full archive
|
### full archive
|
||||||
|
|
||||||
All content is preferred, unless it's already been archived somewhere else.
|
All content is preferred, unless it's already been archived somewhere else.
|
||||||
|
|
||||||
`(not (copies=archive:1 or copies=smallarchive:1)) or numcopiesneeded=1`
|
`(not (copies=archive:1 or copies=smallarchive:1)) or approxlackingcopies=1`
|
||||||
|
|
||||||
Note that if you want to archive multiple copies (not a bad idea!),
|
Note that if you want to archive multiple copies (not a bad idea!),
|
||||||
you should instead configure all your archive repositories with a
|
you should instead configure all your archive repositories with a
|
||||||
|
|
|
@ -59,7 +59,9 @@ Conclusion:
|
||||||
to instead end with "or numcopiesneeded=1" **done**
|
to instead end with "or numcopiesneeded=1" **done**
|
||||||
* See if "numcopiesneeded=N" can check .gitattributes without getting
|
* See if "numcopiesneeded=N" can check .gitattributes without getting
|
||||||
a lot slower. If now, perhaps add a "numcopiesneededaccurate=N" that
|
a lot slower. If now, perhaps add a "numcopiesneededaccurate=N" that
|
||||||
checks it.
|
checks it. **done**
|
||||||
|
|
||||||
|
[[done]]
|
||||||
|
|
||||||
## Stability analysis
|
## Stability analysis
|
||||||
|
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue