2014-01-19 21:35:36 +00:00
|
|
|
{- dropping of unwanted content
|
|
|
|
-
|
2021-01-06 18:11:08 +00:00
|
|
|
- Copyright 2012-2021 Joey Hess <id@joeyh.name>
|
2014-01-19 21:35:36 +00:00
|
|
|
-
|
2019-03-13 19:48:14 +00:00
|
|
|
- Licensed under the GNU AGPL version 3 or higher.
|
2014-01-19 21:35:36 +00:00
|
|
|
-}
|
|
|
|
|
2021-04-05 17:40:31 +00:00
|
|
|
{-# LANGUAGE OverloadedStrings #-}
|
|
|
|
|
2014-01-19 21:35:36 +00:00
|
|
|
module Annex.Drop where
|
|
|
|
|
2016-01-20 20:36:33 +00:00
|
|
|
import Annex.Common
|
2014-01-19 21:35:36 +00:00
|
|
|
import Logs.Trust
|
2015-04-30 18:02:56 +00:00
|
|
|
import Annex.NumCopies
|
2020-12-21 17:46:04 +00:00
|
|
|
import Types.Remote (uuid, appendonly, config, remotetype, thirdPartyPopulated)
|
2014-01-19 21:35:36 +00:00
|
|
|
import qualified Remote
|
|
|
|
import qualified Command.Drop
|
|
|
|
import Command
|
|
|
|
import Annex.Wanted
|
2020-07-25 22:17:33 +00:00
|
|
|
import Annex.Content
|
2020-01-14 16:35:08 +00:00
|
|
|
import Annex.SpecialRemote.Config
|
2015-12-26 19:09:53 +00:00
|
|
|
import qualified Database.Keys
|
2014-01-19 21:35:36 +00:00
|
|
|
|
|
|
|
import qualified Data.Set as S
|
|
|
|
|
|
|
|
type Reason = String
|
|
|
|
|
2021-05-25 14:57:06 +00:00
|
|
|
{- Drop a key from local and/or remote when allowed by the preferred content,
|
|
|
|
- required content, and numcopies settings.
|
2014-01-19 21:35:36 +00:00
|
|
|
-
|
2018-08-30 15:23:57 +00:00
|
|
|
- Skips trying to drop from remotes that are appendonly, since those drops
|
2020-12-18 19:11:53 +00:00
|
|
|
- would presumably fail. Also skips dropping from exporttree/importtree remotes,
|
2020-12-28 18:37:15 +00:00
|
|
|
- which don't allow dropping individual keys, and from thirdPartyPopulated
|
|
|
|
- remotes.
|
2018-08-30 15:23:57 +00:00
|
|
|
-
|
2014-01-20 17:31:03 +00:00
|
|
|
- The UUIDs are ones where the content is believed to be present.
|
|
|
|
- The Remote list can include other remotes that do not have the content;
|
|
|
|
- only ones that match the UUIDs will be dropped from.
|
2015-10-14 16:33:02 +00:00
|
|
|
-
|
|
|
|
- If allowed to drop fromhere, that drop will be done last. This is done
|
|
|
|
- because local drops do not need any LockedCopy evidence, and so dropping
|
|
|
|
- from local last allows the content to be removed from more remotes.
|
2014-01-19 21:35:36 +00:00
|
|
|
-
|
2015-10-08 20:55:11 +00:00
|
|
|
- A VerifiedCopy can be provided as an optimisation when eg, a key
|
|
|
|
- has just been uploaded to a remote.
|
2014-01-19 21:35:36 +00:00
|
|
|
-
|
finish CommandStart transition
The hoped for optimisation of CommandStart with -J did not materialize.
In fact, not runnign CommandStart in parallel is slower than -J3.
So, CommandStart are still run in parallel.
(The actual bad performance I've been seeing with -J in my big repo
has to do with building the remoteList.)
But, this is still progress toward making -J faster, because it gets rid
of the onlyActionOn roadblock in the way of making CommandCleanup jobs
run separate from CommandPerform jobs.
Added OnlyActionOn constructor for ActionItem which fixes the
onlyActionOn breakage in the last commit.
Made CustomOutput include an ActionItem, so even things using it can
specify OnlyActionOn.
In Command.Move and Command.Sync, there were CommandStarts that used
includeCommandAction, so output messages, which is no longer allowed.
Fixed by using startingCustomOutput, but that's still not quite right,
since it prevents message display for the includeCommandAction run
inside it too.
2019-06-12 13:23:26 +00:00
|
|
|
- The runner is used to run CommandStart sequentially, it's typically
|
|
|
|
- callCommandAction.
|
2014-01-19 21:35:36 +00:00
|
|
|
-}
|
2020-09-14 20:49:33 +00:00
|
|
|
handleDropsFrom :: [UUID] -> [Remote] -> Reason -> Bool -> Key -> AssociatedFile -> SeekInput -> [VerifiedCopy] -> (CommandStart -> CommandCleanup) -> Annex ()
|
|
|
|
handleDropsFrom locs rs reason fromhere key afile si preverified runner = do
|
2021-06-15 15:12:27 +00:00
|
|
|
fs <- Database.Keys.getAssociatedFilesIncluding afile key
|
2014-01-19 21:35:36 +00:00
|
|
|
n <- getcopies fs
|
2015-10-14 16:33:02 +00:00
|
|
|
void $ if fromhere && checkcopies n Nothing
|
2018-12-18 17:58:12 +00:00
|
|
|
then go fs rs n >>= dropl fs
|
|
|
|
else go fs rs n
|
2014-01-19 21:35:36 +00:00
|
|
|
where
|
|
|
|
getcopies fs = do
|
|
|
|
(untrusted, have) <- trustPartition UnTrusted locs
|
2021-06-15 15:38:44 +00:00
|
|
|
(numcopies, mincopies) <- getSafestNumMinCopies' afile key fs
|
2021-04-27 17:37:03 +00:00
|
|
|
return (length have, numcopies, mincopies, S.fromList untrusted)
|
2014-01-19 21:35:36 +00:00
|
|
|
|
|
|
|
{- Check that we have enough copies still to drop the content.
|
|
|
|
- When the remote being dropped from is untrusted, it was not
|
|
|
|
- counted as a copy, so having only numcopies suffices. Otherwise,
|
2021-01-06 18:11:08 +00:00
|
|
|
- we need more than numcopies to safely drop.
|
|
|
|
-
|
|
|
|
- This is not the final check that it's safe to drop, but it
|
|
|
|
- avoids doing extra work to do that check later in cases where it
|
|
|
|
- will surely fail.
|
|
|
|
-}
|
2021-04-27 17:37:03 +00:00
|
|
|
checkcopies (have, numcopies, mincopies, _untrusted) Nothing =
|
|
|
|
NumCopies have > numcopies && MinCopies have > mincopies
|
|
|
|
checkcopies (have, numcopies, mincopies, untrusted) (Just u)
|
|
|
|
| S.member u untrusted = NumCopies have >= numcopies && MinCopies have >= mincopies
|
|
|
|
| otherwise = NumCopies have > numcopies && MinCopies have > mincopies
|
2014-01-19 21:35:36 +00:00
|
|
|
|
2021-01-06 18:11:08 +00:00
|
|
|
decrcopies (have, numcopies, mincopies, untrusted) Nothing =
|
2021-04-27 17:37:03 +00:00
|
|
|
(have - 1, numcopies, mincopies, untrusted)
|
2021-01-06 18:11:08 +00:00
|
|
|
decrcopies v@(_have, _numcopies, _mincopies, untrusted) (Just u)
|
2014-01-19 21:35:36 +00:00
|
|
|
| S.member u untrusted = v
|
|
|
|
| otherwise = decrcopies v Nothing
|
|
|
|
|
2015-10-14 16:33:02 +00:00
|
|
|
go _ [] n = pure n
|
2014-01-19 21:35:36 +00:00
|
|
|
go fs (r:rest) n
|
|
|
|
| uuid r `S.notMember` slocs = go fs rest n
|
2018-12-18 17:58:12 +00:00
|
|
|
| appendonly r = go fs rest n
|
|
|
|
| exportTree (config r) = go fs rest n
|
2020-12-18 19:11:53 +00:00
|
|
|
| importTree (config r) = go fs rest n
|
2020-12-21 17:46:04 +00:00
|
|
|
| thirdPartyPopulated (remotetype r) = go fs rest n
|
2014-01-19 21:35:36 +00:00
|
|
|
| checkcopies n (Just $ Remote.uuid r) =
|
|
|
|
dropr fs r n >>= go fs rest
|
2015-10-14 16:33:02 +00:00
|
|
|
| otherwise = pure n
|
2014-01-19 21:35:36 +00:00
|
|
|
|
fix longstanding indeterminite preferred content for duplicated file problem
* drop: When two files have the same content, and a preferred content
expression matches one but not the other, do not drop the file.
* sync --content, assistant: Fix an edge case where a file that is not
preferred content did not get dropped.
The sync --content edge case is that handleDropsFrom loaded associated files
and used them without verifying that the information from the database was
not stale.
It seemed best to avoid changing --want-drop's behavior, this way when
debugging a preferred content expression with it, the files matched will
still reflect the expression. So added a note to the --want-drop documentation,
to make clear it may not behave identically to git-annex drop --auto.
While it would be possible to introspect the preferred content
expression to see if it matches on filenames, and only look up the
associated files when it does, it's generally fairly rare for 2 files to
have the same content, and the database lookup is already avoided when
there's only 1 file, so I did not implement that further optimisation.
Note that there are still some situations where the associated files
database does not get locked files recorded in it, which will prevent
this fix from working.
Sponsored-by: Dartmouth College's Datalad project
2021-05-24 18:02:50 +00:00
|
|
|
checkdrop fs n u a =
|
|
|
|
let afs = map (AssociatedFile . Just) fs
|
2021-05-25 14:57:06 +00:00
|
|
|
pcc = Command.Drop.PreferredContentChecked True
|
fix longstanding indeterminite preferred content for duplicated file problem
* drop: When two files have the same content, and a preferred content
expression matches one but not the other, do not drop the file.
* sync --content, assistant: Fix an edge case where a file that is not
preferred content did not get dropped.
The sync --content edge case is that handleDropsFrom loaded associated files
and used them without verifying that the information from the database was
not stale.
It seemed best to avoid changing --want-drop's behavior, this way when
debugging a preferred content expression with it, the files matched will
still reflect the expression. So added a note to the --want-drop documentation,
to make clear it may not behave identically to git-annex drop --auto.
While it would be possible to introspect the preferred content
expression to see if it matches on filenames, and only look up the
associated files when it does, it's generally fairly rare for 2 files to
have the same content, and the database lookup is already avoided when
there's only 1 file, so I did not implement that further optimisation.
Note that there are still some situations where the associated files
database does not get locked files recorded in it, which will prevent
this fix from working.
Sponsored-by: Dartmouth College's Datalad project
2021-05-24 18:02:50 +00:00
|
|
|
in ifM (wantDrop True u (Just key) afile (Just afs))
|
2021-05-25 14:57:06 +00:00
|
|
|
( dodrop n u (a pcc)
|
fix longstanding indeterminite preferred content for duplicated file problem
* drop: When two files have the same content, and a preferred content
expression matches one but not the other, do not drop the file.
* sync --content, assistant: Fix an edge case where a file that is not
preferred content did not get dropped.
The sync --content edge case is that handleDropsFrom loaded associated files
and used them without verifying that the information from the database was
not stale.
It seemed best to avoid changing --want-drop's behavior, this way when
debugging a preferred content expression with it, the files matched will
still reflect the expression. So added a note to the --want-drop documentation,
to make clear it may not behave identically to git-annex drop --auto.
While it would be possible to introspect the preferred content
expression to see if it matches on filenames, and only look up the
associated files when it does, it's generally fairly rare for 2 files to
have the same content, and the database lookup is already avoided when
there's only 1 file, so I did not implement that further optimisation.
Note that there are still some situations where the associated files
database does not get locked files recorded in it, which will prevent
this fix from working.
Sponsored-by: Dartmouth College's Datalad project
2021-05-24 18:02:50 +00:00
|
|
|
, return n
|
|
|
|
)
|
2014-01-23 20:37:08 +00:00
|
|
|
|
2021-01-06 18:11:08 +00:00
|
|
|
dodrop n@(have, numcopies, mincopies, _untrusted) u a =
|
|
|
|
ifM (safely $ runner $ a numcopies mincopies)
|
2014-01-23 20:37:08 +00:00
|
|
|
( do
|
2021-04-06 19:41:24 +00:00
|
|
|
fastDebug "Annex.Drop" $ unwords
|
2014-01-23 20:37:08 +00:00
|
|
|
[ "dropped"
|
2017-03-10 17:12:24 +00:00
|
|
|
, case afile of
|
2019-01-14 17:03:35 +00:00
|
|
|
AssociatedFile Nothing -> serializeKey key
|
2019-12-04 17:15:34 +00:00
|
|
|
AssociatedFile (Just af) -> fromRawFilePath af
|
2014-01-23 20:37:08 +00:00
|
|
|
, "(from " ++ maybe "here" show u ++ ")"
|
2021-04-27 17:37:03 +00:00
|
|
|
, "(copies now " ++ show (have - 1) ++ ")"
|
2014-01-23 20:37:08 +00:00
|
|
|
, ": " ++ reason
|
|
|
|
]
|
|
|
|
return $ decrcopies n u
|
2014-01-19 21:35:36 +00:00
|
|
|
, return n
|
|
|
|
)
|
|
|
|
|
2021-05-25 14:57:06 +00:00
|
|
|
dropl fs n = checkdrop fs n Nothing $ \pcc numcopies mincopies ->
|
2020-07-25 22:17:33 +00:00
|
|
|
stopUnless (inAnnex key) $
|
2021-06-25 19:22:05 +00:00
|
|
|
Command.Drop.startLocal pcc afile ai si numcopies mincopies key preverified (Command.Drop.DroppingUnused False)
|
2014-01-19 21:35:36 +00:00
|
|
|
|
2021-05-25 14:57:06 +00:00
|
|
|
dropr fs r n = checkdrop fs n (Just $ Remote.uuid r) $ \pcc numcopies mincopies ->
|
2021-06-25 19:22:05 +00:00
|
|
|
Command.Drop.startRemote pcc afile ai si numcopies mincopies key (Command.Drop.DroppingUnused False) r
|
2019-06-06 16:53:24 +00:00
|
|
|
|
|
|
|
ai = mkActionItem (key, afile)
|
2014-01-19 21:35:36 +00:00
|
|
|
|
2014-01-20 17:31:03 +00:00
|
|
|
slocs = S.fromList locs
|
|
|
|
|
unify exception handling into Utility.Exception
Removed old extensible-exceptions, only needed for very old ghc.
Made webdav use Utility.Exception, to work after some changes in DAV's
exception handling.
Removed Annex.Exception. Mostly this was trivial, but note that
tryAnnex is replaced with tryNonAsync and catchAnnex replaced with
catchNonAsync. In theory that could be a behavior change, since the former
caught all exceptions, and the latter don't catch async exceptions.
However, in practice, nothing in the Annex monad uses async exceptions.
Grepping for throwTo and killThread only find stuff in the assistant,
which does not seem related.
Command.Add.undo is changed to accept a SomeException, and things
that use it for rollback now catch non-async exceptions, rather than
only IOExceptions.
2014-08-08 01:55:44 +00:00
|
|
|
safely a = either (const False) id <$> tryNonAsync a
|
2014-01-19 21:35:36 +00:00
|
|
|
|