balanced preferred content and --rebalance

This all works fine. But it doesn't check repository sizes yet, and
without repository size checking, once a repository gets full, there
will be no other repository that will want its files.

Use of sha2 seems unncessary, probably alder2 or md5 or crc would have
been enough. Possibly just summing up the bytes of the key mod the number
of repositories would have sufficed. But sha2 is there, and probably
hardware accellerated. I doubt very much there is any security benefit
to using it though. If someone wants to construct a key that will be
balanced onto a given repository, sha2 is certianly not going to stop
them.
This commit is contained in:
Joey Hess 2024-08-09 14:16:09 -04:00
parent 152c87140b
commit 3ce2e95a5f
No known key found for this signature in database
GPG key ID: DB12DB0FF05F8F38
11 changed files with 169 additions and 17 deletions

View file

@ -1,6 +1,6 @@
{- user-specified limits on files to act on
-
- Copyright 2011-2023 Joey Hess <id@joeyh.name>
- Copyright 2011-2024 Joey Hess <id@joeyh.name>
-
- Licensed under the GNU AGPL version 3 or higher.
-}
@ -37,6 +37,7 @@ import Git.Types (RefDate(..))
import Utility.Glob
import Utility.HumanTime
import Utility.DataUnits
import Utility.Hash
import qualified Database.Keys
import qualified Utility.RawFilePath as R
import Backend
@ -47,6 +48,8 @@ import qualified Data.Set as S
import qualified Data.Map as M
import qualified System.FilePath.ByteString as P
import System.PosixCompat.Files (accessTime, isSymbolicLink)
import qualified Data.ByteArray as BA
import Data.Bits (shiftL)
{- Some limits can look at the current status of files on
- disk, or in the annex. This allows controlling which happens. -}
@ -553,6 +556,76 @@ limitOnlyInGroup getgroupmap groupname = Right $ MatchFiles
return $ not (S.null $ present `S.intersection` want)
&& S.null (S.filter (`S.notMember` want) present)
limitBalanced :: Maybe UUID -> Annex GroupMap -> MkLimit Annex
limitBalanced mu getgroupmap groupname = do
fullybalanced <- limitFullyBalanced mu getgroupmap groupname
copies <- limitCopies $ if ':' `elem` groupname
then groupname
else groupname ++ ":1"
let present = limitPresent mu
Right $ MatchFiles
{ matchAction = \a i ->
ifM (Annex.getRead Annex.rebalance)
( matchAction fullybalanced a i
, matchAction present a i <||>
((not <$> matchAction copies a i)
<&&> matchAction fullybalanced a i
)
)
, matchNeedsFileName =
matchNeedsFileName present ||
matchNeedsFileName fullybalanced ||
matchNeedsFileName copies
, matchNeedsFileContent =
matchNeedsFileContent present ||
matchNeedsFileContent fullybalanced ||
matchNeedsFileContent copies
, matchNeedsKey =
matchNeedsKey present ||
matchNeedsKey fullybalanced ||
matchNeedsKey copies
, matchNeedsLocationLog =
matchNeedsLocationLog present ||
matchNeedsLocationLog fullybalanced ||
matchNeedsLocationLog copies
, matchDesc = "balanced" =? groupname
}
limitFullyBalanced :: Maybe UUID -> Annex GroupMap -> MkLimit Annex
limitFullyBalanced mu getgroupmap groupname = Right $ MatchFiles
{ matchAction = const $ checkKey $ \key -> do
groupmembers <- fromMaybe S.empty
. M.lookup (toGroup groupname)
. uuidsByGroup
<$> getgroupmap
-- TODO free space checking
return $ case mu of
Just u -> u == pickBalanced key groupmembers
Nothing -> False
, matchNeedsFileName = False
, matchNeedsFileContent = False
, matchNeedsKey = True
, matchNeedsLocationLog = False
, matchDesc = "fullybalanced" =? groupname
}
where
pickBalanced :: Key -> S.Set UUID -> UUID
pickBalanced key s =
let m = fromIntegral (S.size s)
n = keyToInteger key
in S.elemAt (fromIntegral (n `mod` m)) s
{- Converts a Key into a stable Integer.
-
- The SHA2 hash of the key is used to constrain the size of the Integer
- and to get an even distribution.
-}
keyToInteger :: Key -> Integer
keyToInteger key =
foldl' (\i b -> (i `shiftL` 8) + fromIntegral b) 0 $
BA.unpack (sha2_256s (serializeKey' key))
{- Adds a limit to skip files not using a specified key-value backend. -}
addInBackend :: String -> Annex ()
addInBackend = addLimit . limitInBackend