prevent dropping required content of other file using same content

When two files have the same content, and a required content expression
matches one but not the other, dropping the latter file will fail as it
would also remove the content of the required file.

This will slow down drop (w/o --auto), dropunused, mirror, and move, by one
keys db lookup per file. But I did include an optimisation to avoid a
double db lookup in the drop --auto / sync --content case. I suspect that
dropunused could also use PreferredContentChecked True, but haven't
entirely thought it through and it's rarely used with enough files for the
optimisation to matter.

Sponsored-by: Dartmouth College's Datalad project
This commit is contained in:
Joey Hess 2021-05-25 10:57:06 -04:00
parent 7029ef1c3d
commit cedc28a783
No known key found for this signature in database
GPG key ID: DB12DB0FF05F8F38
9 changed files with 105 additions and 60 deletions

View file

@ -27,8 +27,8 @@ import qualified Data.Set as S
type Reason = String type Reason = String
{- Drop a key from local and/or remote when allowed by the preferred content {- Drop a key from local and/or remote when allowed by the preferred content,
- and numcopies settings. - required content, and numcopies settings.
- -
- Skips trying to drop from remotes that are appendonly, since those drops - Skips trying to drop from remotes that are appendonly, since those drops
- would presumably fail. Also skips dropping from exporttree/importtree remotes, - would presumably fail. Also skips dropping from exporttree/importtree remotes,
@ -105,8 +105,9 @@ handleDropsFrom locs rs reason fromhere key afile si preverified runner = do
checkdrop fs n u a = checkdrop fs n u a =
let afs = map (AssociatedFile . Just) fs let afs = map (AssociatedFile . Just) fs
pcc = Command.Drop.PreferredContentChecked True
in ifM (wantDrop True u (Just key) afile (Just afs)) in ifM (wantDrop True u (Just key) afile (Just afs))
( dodrop n u a ( dodrop n u (a pcc)
, return n , return n
) )
@ -126,12 +127,12 @@ handleDropsFrom locs rs reason fromhere key afile si preverified runner = do
, return n , return n
) )
dropl fs n = checkdrop fs n Nothing $ \numcopies mincopies -> dropl fs n = checkdrop fs n Nothing $ \pcc numcopies mincopies ->
stopUnless (inAnnex key) $ stopUnless (inAnnex key) $
Command.Drop.startLocal afile ai si numcopies mincopies key preverified Command.Drop.startLocal pcc afile ai si numcopies mincopies key preverified
dropr fs r n = checkdrop fs n (Just $ Remote.uuid r) $ \numcopies mincopies -> dropr fs r n = checkdrop fs n (Just $ Remote.uuid r) $ \pcc numcopies mincopies ->
Command.Drop.startRemote afile ai si numcopies mincopies key r Command.Drop.startRemote pcc afile ai si numcopies mincopies key r
ai = mkActionItem (key, afile) ai = mkActionItem (key, afile)

View file

@ -13,6 +13,7 @@ import Annex.UUID
import Annex.CatFile import Annex.CatFile
import Git.FilePath import Git.FilePath
import qualified Database.Keys import qualified Database.Keys
import Types.FileMatcher
import qualified Data.Set as S import qualified Data.Set as S
@ -20,12 +21,12 @@ import qualified Data.Set as S
wantGet :: Bool -> Maybe Key -> AssociatedFile -> Annex Bool wantGet :: Bool -> Maybe Key -> AssociatedFile -> Annex Bool
wantGet d key file = isPreferredContent Nothing S.empty key file d wantGet d key file = isPreferredContent Nothing S.empty key file d
{- Check if a file is preferred content for a remote. -} {- Check if a file is preferred content for a repository. -}
wantSend :: Bool -> Maybe Key -> AssociatedFile -> UUID -> Annex Bool wantSend :: Bool -> Maybe Key -> AssociatedFile -> UUID -> Annex Bool
wantSend d key file to = isPreferredContent (Just to) S.empty key file d wantSend d key file to = isPreferredContent (Just to) S.empty key file d
{- Check if a file can be dropped, maybe from a remote. {- Check if a file is not preferred or required content, and can be
- Don't drop files that are preferred content. - dropped. When a UUID is provided, checks for that repository.
- -
- The AssociatedFile is the one that the user requested to drop. - The AssociatedFile is the one that the user requested to drop.
- There may be other files that use the same key, and preferred content - There may be other files that use the same key, and preferred content
@ -34,12 +35,21 @@ wantSend d key file to = isPreferredContent (Just to) S.empty key file d
- they can be provided, otherwise this looks them up. - they can be provided, otherwise this looks them up.
-} -}
wantDrop :: Bool -> Maybe UUID -> Maybe Key -> AssociatedFile -> (Maybe [AssociatedFile]) -> Annex Bool wantDrop :: Bool -> Maybe UUID -> Maybe Key -> AssociatedFile -> (Maybe [AssociatedFile]) -> Annex Bool
wantDrop d from key file others = do wantDrop d from key file others =
isNothing <$> checkDrop isPreferredContent d from key file others
{- Generalization of wantDrop that can also be used with isRequiredContent.
-
- When the content should not be dropped, returns Just the file that
- the checker matches.
-}
checkDrop :: (Maybe UUID -> AssumeNotPresent -> Maybe Key -> AssociatedFile -> Bool -> Annex Bool) -> Bool -> Maybe UUID -> Maybe Key -> AssociatedFile -> (Maybe [AssociatedFile]) -> Annex (Maybe AssociatedFile)
checkDrop checker d from key file others = do
u <- maybe getUUID (pure . id) from u <- maybe getUUID (pure . id) from
let s = S.singleton u let s = S.singleton u
let checkwant f = isPreferredContent (Just u) s key f d let checker' f = checker (Just u) s key f d
ifM (checkwant file) ifM (checker' file)
( return False ( return (Just file)
, do , do
others' <- case others of others' <- case others of
Just afs -> pure (filter (/= file) afs) Just afs -> pure (filter (/= file) afs)
@ -48,18 +58,18 @@ wantDrop d from key file others = do
mapM (\f -> AssociatedFile . Just <$> fromRepo (fromTopFilePath f)) mapM (\f -> AssociatedFile . Just <$> fromRepo (fromTopFilePath f))
=<< Database.Keys.getAssociatedFiles k =<< Database.Keys.getAssociatedFiles k
Nothing -> pure [] Nothing -> pure []
l <- filterM checkwant others' l <- filterM checker' others'
if null l if null l
then return True then return Nothing
else checkassociated l else checkassociated l
) )
where where
-- Some associated files that are in the keys database may no -- Some associated files that are in the keys database may no
-- longer correspond to files in the repository, and should -- longer correspond to files in the repository, and should
-- not prevent dropping. -- not prevent dropping.
checkassociated [] = return True checkassociated [] = return Nothing
checkassociated (AssociatedFile (Just af):fs) = checkassociated (af@(AssociatedFile (Just f)):fs) =
catKeyFile af >>= \case catKeyFile f >>= \case
Just k | Just k == key -> return False Just k | Just k == key -> return (Just af)
_ -> checkassociated fs _ -> checkassociated fs
checkassociated (AssociatedFile Nothing:fs) = checkassociated fs checkassociated (AssociatedFile Nothing:fs) = checkassociated fs

View file

@ -1,5 +1,8 @@
git-annex (8.20210429) UNRELEASED; urgency=medium git-annex (8.20210429) UNRELEASED; urgency=medium
* When two files have the same content, and a required content expression
matches one but not the other, dropping the latter file will fail as it
would also remove the content of the required file.
* drop --auto: When two files have the same content, and a preferred content * drop --auto: When two files have the same content, and a preferred content
expression matches one but not the other, do not drop the content. expression matches one but not the other, do not drop the content.
* sync --content, assistant: When two unlocked files have the same * sync --content, assistant: When two unlocked files have the same

View file

@ -21,8 +21,6 @@ import Annex.Content
import Annex.Wanted import Annex.Wanted
import Annex.Notification import Annex.Notification
import qualified Data.Set as S
cmd :: Command cmd :: Command
cmd = withGlobalOptions [jobsOption, jsonOptions, annexedMatchingOptions] $ cmd = withGlobalOptions [jobsOption, jsonOptions, annexedMatchingOptions] $
command "drop" SectionCommon command "drop" SectionCommon
@ -88,31 +86,32 @@ start' o from key afile ai si =
checkDropAuto (autoMode o) from afile key $ \numcopies mincopies -> checkDropAuto (autoMode o) from afile key $ \numcopies mincopies ->
stopUnless wantdrop $ stopUnless wantdrop $
case from of case from of
Nothing -> startLocal afile ai si numcopies mincopies key [] Nothing -> startLocal pcc afile ai si numcopies mincopies key []
Just remote -> startRemote afile ai si numcopies mincopies key remote Just remote -> startRemote pcc afile ai si numcopies mincopies key remote
where where
wantdrop wantdrop
| autoMode o = wantDrop False (Remote.uuid <$> from) (Just key) afile Nothing | autoMode o = wantDrop False (Remote.uuid <$> from) (Just key) afile Nothing
| otherwise = return True | otherwise = return True
pcc = PreferredContentChecked (autoMode o)
startKeys :: DropOptions -> Maybe Remote -> (SeekInput, Key, ActionItem) -> CommandStart startKeys :: DropOptions -> Maybe Remote -> (SeekInput, Key, ActionItem) -> CommandStart
startKeys o from (si, key, ai) = start' o from key (AssociatedFile Nothing) ai si startKeys o from (si, key, ai) = start' o from key (AssociatedFile Nothing) ai si
startLocal :: AssociatedFile -> ActionItem -> SeekInput -> NumCopies -> MinCopies -> Key -> [VerifiedCopy] -> CommandStart startLocal :: PreferredContentChecked -> AssociatedFile -> ActionItem -> SeekInput -> NumCopies -> MinCopies -> Key -> [VerifiedCopy] -> CommandStart
startLocal afile ai si numcopies mincopies key preverified = startLocal pcc afile ai si numcopies mincopies key preverified =
starting "drop" (OnlyActionOn key ai) si $ starting "drop" (OnlyActionOn key ai) si $
performLocal key afile numcopies mincopies preverified performLocal pcc key afile numcopies mincopies preverified
startRemote :: AssociatedFile -> ActionItem -> SeekInput -> NumCopies -> MinCopies -> Key -> Remote -> CommandStart startRemote :: PreferredContentChecked -> AssociatedFile -> ActionItem -> SeekInput -> NumCopies -> MinCopies -> Key -> Remote -> CommandStart
startRemote afile ai si numcopies mincopies key remote = startRemote pcc afile ai si numcopies mincopies key remote =
starting ("drop " ++ Remote.name remote) (OnlyActionOn key ai) si $ starting ("drop " ++ Remote.name remote) (OnlyActionOn key ai) si $
performRemote key afile numcopies mincopies remote performRemote pcc key afile numcopies mincopies remote
performLocal :: Key -> AssociatedFile -> NumCopies -> MinCopies -> [VerifiedCopy] -> CommandPerform performLocal :: PreferredContentChecked -> Key -> AssociatedFile -> NumCopies -> MinCopies -> [VerifiedCopy] -> CommandPerform
performLocal key afile numcopies mincopies preverified = lockContentForRemoval key fallback $ \contentlock -> do performLocal pcc key afile numcopies mincopies preverified = lockContentForRemoval key fallback $ \contentlock -> do
u <- getUUID u <- getUUID
(tocheck, verified) <- verifiableCopies key [u] (tocheck, verified) <- verifiableCopies key [u]
doDrop u (Just contentlock) key afile numcopies mincopies [] (preverified ++ verified) tocheck doDrop pcc u (Just contentlock) key afile numcopies mincopies [] (preverified ++ verified) tocheck
( \proof -> do ( \proof -> do
fastDebug "Command.Drop" $ unwords fastDebug "Command.Drop" $ unwords
[ "Dropping from here" [ "Dropping from here"
@ -134,12 +133,12 @@ performLocal key afile numcopies mincopies preverified = lockContentForRemoval k
-- to be done except for cleaning up. -- to be done except for cleaning up.
fallback = next $ cleanupLocal key fallback = next $ cleanupLocal key
performRemote :: Key -> AssociatedFile -> NumCopies -> MinCopies -> Remote -> CommandPerform performRemote :: PreferredContentChecked -> Key -> AssociatedFile -> NumCopies -> MinCopies -> Remote -> CommandPerform
performRemote key afile numcopies mincopies remote = do performRemote pcc key afile numcopies mincopies remote = do
-- Filter the uuid it's being dropped from out of the lists of -- Filter the uuid it's being dropped from out of the lists of
-- places assumed to have the key, and places to check. -- places assumed to have the key, and places to check.
(tocheck, verified) <- verifiableCopies key [uuid] (tocheck, verified) <- verifiableCopies key [uuid]
doDrop uuid Nothing key afile numcopies mincopies [uuid] verified tocheck doDrop pcc uuid Nothing key afile numcopies mincopies [uuid] verified tocheck
( \proof -> do ( \proof -> do
fastDebug "Command.Drop" $ unwords fastDebug "Command.Drop" $ unwords
[ "Dropping from remote" [ "Dropping from remote"
@ -169,12 +168,11 @@ cleanupRemote key remote ok = do
- verify that enough copies of a key exist to allow it to be - verify that enough copies of a key exist to allow it to be
- safely removed (with no data loss). - safely removed (with no data loss).
- -
- Also checks if it's required content, and refuses to drop if so.
-
- --force overrides and always allows dropping. - --force overrides and always allows dropping.
-} -}
doDrop doDrop
:: UUID :: PreferredContentChecked
-> UUID
-> Maybe ContentRemovalLock -> Maybe ContentRemovalLock
-> Key -> Key
-> AssociatedFile -> AssociatedFile
@ -185,10 +183,10 @@ doDrop
-> [UnVerifiedCopy] -> [UnVerifiedCopy]
-> (Maybe SafeDropProof -> CommandPerform, CommandPerform) -> (Maybe SafeDropProof -> CommandPerform, CommandPerform)
-> CommandPerform -> CommandPerform
doDrop dropfrom contentlock key afile numcopies mincopies skip preverified check (dropaction, nodropaction) = doDrop pcc dropfrom contentlock key afile numcopies mincopies skip preverified check (dropaction, nodropaction) =
ifM (Annex.getState Annex.force) ifM (Annex.getState Annex.force)
( dropaction Nothing ( dropaction Nothing
, ifM (checkRequiredContent dropfrom key afile) , ifM (checkRequiredContent pcc dropfrom key afile)
( verifyEnoughCopiesToDrop nolocmsg key ( verifyEnoughCopiesToDrop nolocmsg key
contentlock numcopies mincopies contentlock numcopies mincopies
skip preverified check skip preverified check
@ -203,18 +201,27 @@ doDrop dropfrom contentlock key afile numcopies mincopies skip preverified check
showLongNote "(Use --force to override this check, or adjust numcopies.)" showLongNote "(Use --force to override this check, or adjust numcopies.)"
a a
checkRequiredContent :: UUID -> Key -> AssociatedFile -> Annex Bool {- Checking preferred content also checks required content, so when
checkRequiredContent u k afile = - auto mode causes preferred content to be checked, it's redundant
ifM (isRequiredContent (Just u) S.empty (Just k) afile False) - for checkRequiredContent to separately check required content, and
( requiredContent - providing this avoids that extra work. -}
, return True newtype PreferredContentChecked = PreferredContentChecked Bool
)
requiredContent :: Annex Bool checkRequiredContent :: PreferredContentChecked -> UUID -> Key -> AssociatedFile -> Annex Bool
requiredContent = do checkRequiredContent (PreferredContentChecked True) _ _ _ = return True
showLongNote "That file is required content, it cannot be dropped!" checkRequiredContent (PreferredContentChecked False) u k afile =
showLongNote "(Use --force to override this check, or adjust required content configuration.)" checkDrop isRequiredContent False (Just u) (Just k) afile Nothing >>= \case
return False Nothing -> return True
Just afile' -> do
if afile == afile'
then showLongNote "That file is required content. It cannot be dropped!"
else showLongNote $ "That file has the same content as another file"
++ case afile' of
AssociatedFile (Just f) -> " (" ++ fromRawFilePath f ++ "),"
AssociatedFile Nothing -> ""
++ " which is required content. It cannot be dropped!"
showLongNote "(Use --force to override this check, or adjust required content configuration.)"
return False
{- In auto mode, only runs the action if there are enough {- In auto mode, only runs the action if there are enough
- copies on other semitrusted repositories. -} - copies on other semitrusted repositories. -}

View file

@ -49,7 +49,7 @@ perform :: Maybe Remote -> NumCopies -> MinCopies -> Key -> CommandPerform
perform from numcopies mincopies key = case from of perform from numcopies mincopies key = case from of
Just r -> do Just r -> do
showAction $ "from " ++ Remote.name r showAction $ "from " ++ Remote.name r
Command.Drop.performRemote key (AssociatedFile Nothing) numcopies mincopies r Command.Drop.performRemote pcc key (AssociatedFile Nothing) numcopies mincopies r
Nothing -> ifM (inAnnex key) Nothing -> ifM (inAnnex key)
( droplocal ( droplocal
, ifM (objectFileExists key) , ifM (objectFileExists key)
@ -63,7 +63,8 @@ perform from numcopies mincopies key = case from of
) )
) )
where where
droplocal = Command.Drop.performLocal key (AssociatedFile Nothing) numcopies mincopies [] droplocal = Command.Drop.performLocal pcc key (AssociatedFile Nothing) numcopies mincopies []
pcc = Command.Drop.PreferredContentChecked False
performOther :: (Key -> Git.Repo -> RawFilePath) -> Key -> CommandPerform performOther :: (Key -> Git.Repo -> RawFilePath) -> Key -> CommandPerform
performOther filespec key = do performOther filespec key = do

View file

@ -69,7 +69,7 @@ startKey o afile (si, key, ai) = case fromToOptions o of
( Command.Move.toStart Command.Move.RemoveNever afile key ai si =<< getParsed r ( Command.Move.toStart Command.Move.RemoveNever afile key ai si =<< getParsed r
, do , do
(numcopies, mincopies) <- getnummincopies (numcopies, mincopies) <- getnummincopies
Command.Drop.startRemote afile ai si numcopies mincopies key =<< getParsed r Command.Drop.startRemote pcc afile ai si numcopies mincopies key =<< getParsed r
) )
FromRemote r -> checkFailedTransferDirection ai Download $ do FromRemote r -> checkFailedTransferDirection ai Download $ do
haskey <- flip Remote.hasKey key =<< getParsed r haskey <- flip Remote.hasKey key =<< getParsed r
@ -82,10 +82,11 @@ startKey o afile (si, key, ai) = case fromToOptions o of
Right False -> ifM (inAnnex key) Right False -> ifM (inAnnex key)
( do ( do
(numcopies, mincopies) <- getnummincopies (numcopies, mincopies) <- getnummincopies
Command.Drop.startLocal afile ai si numcopies mincopies key [] Command.Drop.startLocal pcc afile ai si numcopies mincopies key []
, stop , stop
) )
where where
getnummincopies = case afile of getnummincopies = case afile of
AssociatedFile Nothing -> (,) <$> getNumCopies <*> getMinCopies AssociatedFile Nothing -> (,) <$> getNumCopies <*> getMinCopies
AssociatedFile (Just af) -> getFileNumMinCopies af AssociatedFile (Just af) -> getFileNumMinCopies af
pcc = Command.Drop.PreferredContentChecked False

View file

@ -293,7 +293,7 @@ toHereStart removewhen afile key ai si =
- repository reduces the number of copies, and should fail if - repository reduces the number of copies, and should fail if
- that would violate numcopies settings. - that would violate numcopies settings.
- -
- On the other hand, when the destiation repository does not already - On the other hand, when the destination repository does not already
- have a copy of a file, it can be dropped without making numcopies - have a copy of a file, it can be dropped without making numcopies
- worse, so the move is allowed even if numcopies is not met. - worse, so the move is allowed even if numcopies is not met.
- -
@ -311,7 +311,7 @@ toHereStart removewhen afile key ai si =
-} -}
willDropMakeItWorse :: UUID -> UUID -> DestStartedWithCopy -> Key -> AssociatedFile -> Annex DropCheck willDropMakeItWorse :: UUID -> UUID -> DestStartedWithCopy -> Key -> AssociatedFile -> Annex DropCheck
willDropMakeItWorse srcuuid destuuid (DestStartedWithCopy deststartedwithcopy) key afile = willDropMakeItWorse srcuuid destuuid (DestStartedWithCopy deststartedwithcopy) key afile =
ifM (Command.Drop.checkRequiredContent srcuuid key afile) ifM (Command.Drop.checkRequiredContent (Command.Drop.PreferredContentChecked False) srcuuid key afile)
( if deststartedwithcopy ( if deststartedwithcopy
then unlessforced DropCheckNumCopies then unlessforced DropCheckNumCopies
else ifM checktrustlevel else ifM checktrustlevel

View file

@ -46,8 +46,8 @@ import Logs.Remote
import Types.StandardGroups import Types.StandardGroups
import Limit import Limit
{- Checks if a file is preferred content for the specified repository {- Checks if a file is preferred content (or required content) for the
- (or the current repository if none is specified). -} - specified repository (or the current repository if none is specified). -}
isPreferredContent :: Maybe UUID -> AssumeNotPresent -> Maybe Key -> AssociatedFile -> Bool -> Annex Bool isPreferredContent :: Maybe UUID -> AssumeNotPresent -> Maybe Key -> AssociatedFile -> Bool -> Annex Bool
isPreferredContent = checkMap preferredContentMap isPreferredContent = checkMap preferredContentMap

22
Test.hs
View file

@ -376,6 +376,7 @@ unitTests note = testGroup ("Unit Tests " ++ note)
, testCase "bup remote" test_bup_remote , testCase "bup remote" test_bup_remote
, testCase "crypto" test_crypto , testCase "crypto" test_crypto
, testCase "preferred content" test_preferred_content , testCase "preferred content" test_preferred_content
, testCase "required_content" test_required_content
, testCase "add subdirs" test_add_subdirs , testCase "add subdirs" test_add_subdirs
, testCase "addurl" test_addurl , testCase "addurl" test_addurl
] ]
@ -749,6 +750,27 @@ test_preferred_content = intmpclonerepo $ do
git_annex "get" ["--auto", annexedfile] "get --auto of file with exclude=*" git_annex "get" ["--auto", annexedfile] "get --auto of file with exclude=*"
annexed_notpresent annexedfile annexed_notpresent annexedfile
test_required_content :: Assertion
test_required_content = intmpclonerepo $ do
git_annex "get" [annexedfile] "get"
annexed_present annexedfile
git_annex "required" [".", "include=" ++ annexedfile] "annexedfile required"
git_annex_shouldfail "drop" [annexedfile] "drop of required content should fail"
annexed_present annexedfile
git_annex "drop" ["--auto", annexedfile] "drop --auto of required content skips it"
annexed_present annexedfile
writecontent annexedfiledup $ content annexedfiledup
add_annex annexedfiledup "add of second file with same content failed"
annexed_present annexedfiledup
annexed_present annexedfile
git_annex_shouldfail "drop" [annexedfiledup] "drop of file sharing required content should fail"
annexed_present annexedfiledup
annexed_present annexedfile
test_lock :: Assertion test_lock :: Assertion
test_lock = intmpclonerepo $ do test_lock = intmpclonerepo $ do
annexed_notpresent annexedfile annexed_notpresent annexedfile