sync --content: Fix dropping unwanted content from the local repository

This fixes a bug with the numcopies counting when using sync --content.
It did not always pass the local repo uuid to handleDropsFrom, and so the
numcopies counting was off by one, and unwanted local content would only be
dropped when there were numcopies+1 remote copies.

Also, support dropping local content that has reached an
exporttree remote that is not untrusted (currently only S3 remotes
with versioning).
This commit is contained in:
Joey Hess 2018-12-18 13:58:12 -04:00
parent 9438ecc30b
commit 6d381df0e6
No known key found for this signature in database
GPG key ID: DB12DB0FF05F8F38
4 changed files with 36 additions and 13 deletions

View file

@ -11,11 +11,12 @@ import Annex.Common
import qualified Annex import qualified Annex
import Logs.Trust import Logs.Trust
import Annex.NumCopies import Annex.NumCopies
import Types.Remote (uuid, appendonly) import Types.Remote (uuid, appendonly, config)
import qualified Remote import qualified Remote
import qualified Command.Drop import qualified Command.Drop
import Command import Command
import Annex.Wanted import Annex.Wanted
import Annex.Export
import Config import Config
import Annex.Content.Direct import Annex.Content.Direct
import qualified Database.Keys import qualified Database.Keys
@ -30,7 +31,8 @@ type Reason = String
- and numcopies settings. - and numcopies settings.
- -
- Skips trying to drop from remotes that are appendonly, since those drops - Skips trying to drop from remotes that are appendonly, since those drops
- would presumably fail. - would presumably fail. Also skips dropping from exporttree remotes,
- which don't allow dropping individual keys.
- -
- The UUIDs are ones where the content is believed to be present. - The UUIDs are ones where the content is believed to be present.
- The Remote list can include other remotes that do not have the content; - The Remote list can include other remotes that do not have the content;
@ -61,10 +63,9 @@ handleDropsFrom locs rs reason fromhere key afile preverified runner = do
AssociatedFile (Just f) -> nub (f : l) AssociatedFile (Just f) -> nub (f : l)
AssociatedFile Nothing -> l AssociatedFile Nothing -> l
n <- getcopies fs n <- getcopies fs
let rs' = filter (not . appendonly) rs
void $ if fromhere && checkcopies n Nothing void $ if fromhere && checkcopies n Nothing
then go fs rs' n >>= dropl fs then go fs rs n >>= dropl fs
else go fs rs' n else go fs rs n
where where
getcopies fs = do getcopies fs = do
(untrusted, have) <- trustPartition UnTrusted locs (untrusted, have) <- trustPartition UnTrusted locs
@ -91,6 +92,8 @@ handleDropsFrom locs rs reason fromhere key afile preverified runner = do
go _ [] n = pure n go _ [] n = pure n
go fs (r:rest) n go fs (r:rest) n
| uuid r `S.notMember` slocs = go fs rest n | uuid r `S.notMember` slocs = go fs rest n
| appendonly r = go fs rest n
| exportTree (config r) = go fs rest n
| checkcopies n (Just $ Remote.uuid r) = | checkcopies n (Just $ Remote.uuid r) =
dropr fs r n >>= go fs rest dropr fs r n >>= go fs rest
| otherwise = pure n | otherwise = pure n

View file

@ -1,3 +1,12 @@
git-annex (7.20181212) UNRELEASED; urgency=medium
* sync --content: Fix dropping unwanted content from the local repository.
* sync --content: Support dropping local content that has reached an
exporttree remote that is not untrusted (currently only S3 remotes
with versioning).
-- Joey Hess <id@joeyh.name> Tue, 18 Dec 2018 12:24:52 -0400
git-annex (7.20181211) upstream; urgency=medium git-annex (7.20181211) upstream; urgency=medium
* S3: Improve diagnostics when a remote is configured with exporttree and * S3: Improve diagnostics when a remote is configured with exporttree and

View file

@ -166,9 +166,9 @@ seek o = allowConcurrentOutput $ do
remotes <- syncRemotes (syncWith o) remotes <- syncRemotes (syncWith o)
let gitremotes = filter Remote.gitSyncableRemote remotes let gitremotes = filter Remote.gitSyncableRemote remotes
(exportremotes, dataremotes) <- partition (exportTree . Remote.config) dataremotes <- filter (\r -> Remote.uuid r /= NoUUID)
. filter (\r -> Remote.uuid r /= NoUUID)
<$> filterM (not <$$> liftIO . getDynamicConfig . remoteAnnexIgnore . Remote.gitconfig) remotes <$> filterM (not <$$> liftIO . getDynamicConfig . remoteAnnexIgnore . Remote.gitconfig) remotes
let exportremotes = filter (exportTree . Remote.config) dataremotes
if cleanupOption o if cleanupOption o
then do then do
@ -187,8 +187,11 @@ seek o = allowConcurrentOutput $ do
] ]
whenM shouldsynccontent $ do whenM shouldsynccontent $ do
syncedcontent <- withbranch $ seekSyncContent o dataremotes -- Send content to any exports first, in
-- case that lets content be dropped from
-- other repositories.
exportedcontent <- withbranch $ seekExportContent exportremotes exportedcontent <- withbranch $ seekExportContent exportremotes
syncedcontent <- withbranch $ seekSyncContent o dataremotes
-- Transferring content can take a while, -- Transferring content can take a while,
-- and other changes can be pushed to the -- and other changes can be pushed to the
-- git-annex branch on the remotes in the -- git-annex branch on the remotes in the
@ -618,14 +621,15 @@ seekSyncContent o rs currbranch = do
-} -}
syncFile :: Either (Maybe (Bloom Key)) (Key -> Annex ()) -> [Remote] -> AssociatedFile -> Key -> Annex Bool syncFile :: Either (Maybe (Bloom Key)) (Key -> Annex ()) -> [Remote] -> AssociatedFile -> Key -> Annex Bool
syncFile ebloom rs af k = onlyActionOn' k $ do syncFile ebloom rs af k = onlyActionOn' k $ do
inhere <- inAnnex k
locs <- map Remote.uuid <$> Remote.keyPossibilities k locs <- map Remote.uuid <$> Remote.keyPossibilities k
let (have, lack) = partition (\r -> Remote.uuid r `elem` locs) rs let (have, lack) = partition (\r -> Remote.uuid r `elem` locs) rs
got <- anyM id =<< handleget have got <- anyM id =<< handleget have inhere
putrs <- handleput lack putrs <- handleput lack
u <- getUUID u <- getUUID
let locs' = concat [[u | got], putrs, locs] let locs' = concat [if inhere || got then [u] else [], putrs, locs]
-- A bloom filter is populated with all the keys in the first pass. -- A bloom filter is populated with all the keys in the first pass.
-- On the second pass, avoid dropping keys that were seen in the -- On the second pass, avoid dropping keys that were seen in the
@ -649,12 +653,12 @@ syncFile ebloom rs af k = onlyActionOn' k $ do
return (got || not (null putrs)) return (got || not (null putrs))
where where
wantget have = allM id wantget have inhere = allM id
[ pure (not $ null have) [ pure (not $ null have)
, not <$> inAnnex k , pure (not inhere)
, wantGet True (Just k) af , wantGet True (Just k) af
] ]
handleget have = ifM (wantget have) handleget have inhere = ifM (wantget have inhere)
( return [ get have ] ( return [ get have ]
, return [] , return []
) )

View file

@ -4,3 +4,10 @@ has been transferred to an exporttree remote.
This normally doesn't matter since exporttree remotes are untrusted, but This normally doesn't matter since exporttree remotes are untrusted, but
S3 with versioning enabled is not untrusted and once a file reaches such a S3 with versioning enabled is not untrusted and once a file reaches such a
remote it should be able to be dropped locally. --[[Joey]] remote it should be able to be dropped locally. --[[Joey]]
Actually, there are two bugs here, because sync --content also fails to
drop local unwanted content that's got only 1 copy on another remote.
It forgot to include the local copy as a currently present copy, throwing
off the numcopies counting. --[[Joey]]
Both [fixed|done]] --[[Joey]]