From a6c1d9752b707fedb9bd4cf743742d1bf16538e7 Mon Sep 17 00:00:00 2001 From: Joey Hess Date: Wed, 18 Jan 2023 14:42:39 -0400 Subject: [PATCH 1/9] move/copy: option parsing for --from with --to Allowing --from and --to as an alternative to --from or --to is hard to do with optparse-applicative! The obvious approach of (pfrom <|> pto <|> pfromandto) does not work when pfromandto uses the same option names as pfrom and pto do. It compiles but the generated parser does not work for all desired combinations. Instead, have to parse optionally from and optionally to. When neither is provided, the parser succeeds, but it's a result that can't be handled. So, have to giveup after option parsing. There does not seem to be a way to make an optparse-applicative Parser give up internally either. Also, need seek' because I first tried making fto be a where binding, but that resulted in a hang when git-annex move was run without --from or --to. I think because startConcurrency was not expecting the stages value to contain an exception and so ended up blocking. Sponsored-by: Dartmouth College's DANDI project --- CHANGELOG | 2 ++ CmdLine/GitAnnex/Options.hs | 41 ++++++++++++++++++++++------------- Command/Copy.hs | 43 ++++++++++++++++++++++--------------- Command/Move.hs | 42 +++++++++++++++++++++--------------- doc/git-annex-copy.mdwn | 7 ++++++ doc/git-annex-move.mdwn | 7 ++++++ 6 files changed, 93 insertions(+), 49 deletions(-) diff --git a/CHANGELOG b/CHANGELOG index c11d40cce7..d9293a49ff 100644 --- a/CHANGELOG +++ b/CHANGELOG @@ -23,6 +23,8 @@ git-annex (10.20221213) UNRELEASED; urgency=medium * Added an optional cost= configuration to all special remotes. * adb: Support the remote.name.cost and remote.name.cost-command configs. * findkeys: New command, very similar to git-annex find but operating on keys. + * move, copy: Support combining --from and --to in order to move/copy + the content of files that are in one remote to another remote. -- Joey Hess Mon, 12 Dec 2022 13:04:54 -0400 diff --git a/CmdLine/GitAnnex/Options.hs b/CmdLine/GitAnnex/Options.hs index 37203c2076..36434aad14 100644 --- a/CmdLine/GitAnnex/Options.hs +++ b/CmdLine/GitAnnex/Options.hs @@ -1,6 +1,6 @@ {- git-annex command-line option parsing - - - Copyright 2010-2021 Joey Hess + - Copyright 2010-2023 Joey Hess - - Licensed under the GNU AGPL version 3 or higher. -} @@ -134,7 +134,7 @@ parseDryRunOption = DryRun <$> switch <> help "don't make changes, but show what would be done" ) --- | From or To a remote. +-- | From or To a remote but not both. data FromToOptions = FromRemote (DeferredParse Remote) | ToRemote (DeferredParse Remote) @@ -162,23 +162,34 @@ parseToOption = strOption <> completeRemotes ) --- | Like FromToOptions, but with a special --to=here -type FromToHereOptions = Either ToHere FromToOptions +-- | From or to a remote, or both, or a special --to=here +data FromToHereOptions + = FromOrToRemote FromToOptions + | ToHere + | FromRemoteToRemote (DeferredParse Remote) (DeferredParse Remote) -data ToHere = ToHere - -parseFromToHereOptions :: Parser FromToHereOptions -parseFromToHereOptions = parsefrom <|> parseto +parseFromToHereOptions :: Parser (Maybe FromToHereOptions) +parseFromToHereOptions = go + <$> optional parseFromOption + <*> optional parseToOption where - parsefrom = Right . FromRemote . parseRemoteOption <$> parseFromOption - parseto = herespecialcase <$> parseToOption - where - herespecialcase "here" = Left ToHere - herespecialcase "." = Left ToHere - herespecialcase n = Right $ ToRemote $ parseRemoteOption n + go (Just from) (Just to) = Just $ FromRemoteToRemote + (parseRemoteOption from) + (parseRemoteOption to) + go (Just from) Nothing = Just $ FromOrToRemote + (FromRemote $ parseRemoteOption from) + go Nothing (Just to) = Just $ case to of + "here" -> ToHere + "." -> ToHere + _ -> FromOrToRemote $ ToRemote $ parseRemoteOption to + go Nothing Nothing = Nothing instance DeferredParseClass FromToHereOptions where - finishParse = either (pure . Left) (Right <$$> finishParse) + finishParse (FromOrToRemote v) = FromOrToRemote <$> finishParse v + finishParse ToHere = pure ToHere + finishParse (FromRemoteToRemote v1 v2) = FromRemoteToRemote + <$> finishParse v1 + <*> finishParse v2 -- Options for acting on keys, rather than work tree files. data KeyOptions diff --git a/Command/Copy.hs b/Command/Copy.hs index 267fa71d3d..11ef1ddd08 100644 --- a/Command/Copy.hs +++ b/Command/Copy.hs @@ -1,6 +1,6 @@ {- git-annex command - - - Copyright 2010 Joey Hess + - Copyright 2010-2023 Joey Hess - - Licensed under the GNU AGPL version 3 or higher. -} @@ -21,7 +21,7 @@ cmd = withAnnexOptions [jobsOption, jsonOptions, jsonProgressOption, annexedMatc data CopyOptions = CopyOptions { copyFiles :: CmdParams - , fromToOptions :: FromToHereOptions + , fromToOptions :: Maybe FromToHereOptions , keyOptions :: Maybe KeyOptions , autoMode :: Bool , batchOption :: BatchMode @@ -38,13 +38,19 @@ optParser desc = CopyOptions instance DeferredParseClass CopyOptions where finishParse v = CopyOptions <$> pure (copyFiles v) - <*> finishParse (fromToOptions v) + <*> maybe (pure Nothing) (Just <$$> finishParse) + (fromToOptions v) <*> pure (keyOptions v) <*> pure (autoMode v) <*> pure (batchOption v) seek :: CopyOptions -> CommandSeek -seek o = startConcurrency commandStages $ do +seek o = case fromToOptions o of + Just fto -> seek' o fto + Nothing -> giveup "Specify --from or --to" + +seek' :: CopyOptions -> FromToHereOptions -> CommandSeek +seek' o fto = startConcurrency commandStages $ do case batchOption o of NoBatch -> withKeyOptions (keyOptions o) (autoMode o) seeker @@ -57,30 +63,33 @@ seek o = startConcurrency commandStages $ do ww = WarnUnmatchLsFiles seeker = AnnexedFileSeeker - { startAction = start o - , checkContentPresent = case fromToOptions o of - Right (FromRemote _) -> Just False - Right (ToRemote _) -> Just True - Left ToHere -> Just False + { startAction = start o fto + , checkContentPresent = case fto of + FromOrToRemote (FromRemote _) -> Just False + FromOrToRemote (ToRemote _) -> Just True + ToHere -> Just False + FromRemoteToRemote _ _ -> Just False , usesLocationLog = True } - keyaction = Command.Move.startKey (fromToOptions o) Command.Move.RemoveNever + keyaction = Command.Move.startKey fto Command.Move.RemoveNever {- A copy is just a move that does not delete the source file. - However, auto mode avoids unnecessary copies, and avoids getting or - sending non-preferred content. -} -start :: CopyOptions -> SeekInput -> RawFilePath -> Key -> CommandStart -start o si file key = stopUnless shouldCopy $ - Command.Move.start (fromToOptions o) Command.Move.RemoveNever si file key +start :: CopyOptions -> FromToHereOptions -> SeekInput -> RawFilePath -> Key -> CommandStart +start o fto si file key = stopUnless shouldCopy $ + Command.Move.start fto Command.Move.RemoveNever si file key where shouldCopy | autoMode o = want <||> numCopiesCheck file key (<) | otherwise = return True - want = case fromToOptions o of - Right (ToRemote dest) -> + want = case fto of + FromOrToRemote (ToRemote dest) -> + (Remote.uuid <$> getParsed dest) >>= checkwantsend + FromOrToRemote (FromRemote _) -> checkwantget + ToHere -> checkwantget + FromRemoteToRemote _ dest -> (Remote.uuid <$> getParsed dest) >>= checkwantsend - Right (FromRemote _) -> checkwantget - Left ToHere -> checkwantget checkwantsend = wantGetBy False (Just key) (AssociatedFile (Just file)) checkwantget = wantGet False (Just key) (AssociatedFile (Just file)) diff --git a/Command/Move.hs b/Command/Move.hs index abe207c2f2..b14707eb27 100644 --- a/Command/Move.hs +++ b/Command/Move.hs @@ -1,6 +1,6 @@ {- git-annex command - - - Copyright 2010-2022 Joey Hess + - Copyright 2010-2023 Joey Hess - - Licensed under the GNU AGPL version 3 or higher. -} @@ -32,7 +32,7 @@ cmd = withAnnexOptions [jobsOption, jsonOptions, jsonProgressOption, annexedMatc data MoveOptions = MoveOptions { moveFiles :: CmdParams - , fromToOptions :: FromToHereOptions + , fromToOptions :: Maybe FromToHereOptions , removeWhen :: RemoveWhen , keyOptions :: Maybe KeyOptions , batchOption :: BatchMode @@ -49,7 +49,8 @@ optParser desc = MoveOptions instance DeferredParseClass MoveOptions where finishParse v = MoveOptions <$> pure (moveFiles v) - <*> finishParse (fromToOptions v) + <*> maybe (pure Nothing) (Just <$$> finishParse) + (fromToOptions v) <*> pure (removeWhen v) <*> pure (keyOptions v) <*> pure (batchOption v) @@ -58,7 +59,12 @@ data RemoveWhen = RemoveSafe | RemoveNever deriving (Show, Eq) seek :: MoveOptions -> CommandSeek -seek o = startConcurrency stages $ do +seek o = case fromToOptions o of + Just fto -> seek' o fto + Nothing -> giveup "Specify --from or --to" + +seek' :: MoveOptions -> FromToHereOptions -> CommandSeek +seek' o fto = startConcurrency stages $ do case batchOption o of NoBatch -> withKeyOptions (keyOptions o) False seeker (commandAction . keyaction) @@ -68,18 +74,20 @@ seek o = startConcurrency stages $ do batchAnnexed fmt seeker keyaction where seeker = AnnexedFileSeeker - { startAction = start (fromToOptions o) (removeWhen o) - , checkContentPresent = case fromToOptions o of - Right (FromRemote _) -> Nothing - Right (ToRemote _) -> Just True - Left ToHere -> Nothing + { startAction = start fto (removeWhen o) + , checkContentPresent = case fto of + FromOrToRemote (FromRemote _) -> Nothing + FromOrToRemote (ToRemote _) -> Just True + ToHere -> Nothing + FromRemoteToRemote _ _ -> Nothing , usesLocationLog = True } - stages = case fromToOptions o of - Right (FromRemote _) -> downloadStages - Right (ToRemote _) -> commandStages - Left ToHere -> downloadStages - keyaction = startKey (fromToOptions o) (removeWhen o) + stages = case fto of + FromOrToRemote (FromRemote _) -> downloadStages + FromOrToRemote (ToRemote _) -> commandStages + ToHere -> downloadStages + FromRemoteToRemote _ _ -> commandStages + keyaction = startKey fto (removeWhen o) ww = WarnUnmatchLsFiles start :: FromToHereOptions -> RemoveWhen -> SeekInput -> RawFilePath -> Key -> CommandStart @@ -95,13 +103,13 @@ startKey fromto removewhen (si, k, ai) = start' :: FromToHereOptions -> RemoveWhen -> AssociatedFile -> SeekInput -> Key -> ActionItem -> CommandStart start' fromto removewhen afile si key ai = case fromto of - Right (FromRemote src) -> + FromOrToRemote (FromRemote src) -> checkFailedTransferDirection ai Download $ fromStart removewhen afile key ai si =<< getParsed src - Right (ToRemote dest) -> + FromOrToRemote (ToRemote dest) -> checkFailedTransferDirection ai Upload $ toStart removewhen afile key ai si =<< getParsed dest - Left ToHere -> + ToHere -> checkFailedTransferDirection ai Download $ toHereStart removewhen afile key ai si diff --git a/doc/git-annex-copy.mdwn b/doc/git-annex-copy.mdwn index 5a4e805cdf..03563bda11 100644 --- a/doc/git-annex-copy.mdwn +++ b/doc/git-annex-copy.mdwn @@ -32,6 +32,13 @@ Paths of files or directories to operate on can be specified. Copy the content of files from all reachable remotes to the local repository. +* `--from=remote1 --to=remote2` + + Copy the content of files that are in remote1 to remote2. Does not change + what is stored in the local repository. + + Note: This may need to store an intermediate copy of the content on disk. + * `--jobs=N` `-JN` Enables parallel transfers with up to the specified number of jobs diff --git a/doc/git-annex-move.mdwn b/doc/git-annex-move.mdwn index 0fbea84d58..c2d5ec1e0c 100644 --- a/doc/git-annex-move.mdwn +++ b/doc/git-annex-move.mdwn @@ -28,6 +28,13 @@ Paths of files or directories to operate on can be specified. Move the content of files from all reachable remotes to the local repository. +* `--from=remote1 --to=remote2` + + Move the content of files that are in remote1 to remote2. Does not change + what is stored in the local repository. + + Note: This may need to store an intermediate copy of the content on disk. + * `--force` Override numcopies and required content checking, and always remove From f74904ee2c7cec936cac3f2536daa1d426739b80 Mon Sep 17 00:00:00 2001 From: Joey Hess Date: Wed, 18 Jan 2023 15:15:41 -0400 Subject: [PATCH 2/9] remove --fast from man page git-annex move does not actually behave any differently with --fast than without it. (git-annex copy does) --- doc/git-annex-move.mdwn | 6 ------ 1 file changed, 6 deletions(-) diff --git a/doc/git-annex-move.mdwn b/doc/git-annex-move.mdwn index c2d5ec1e0c..7b29284d7c 100644 --- a/doc/git-annex-move.mdwn +++ b/doc/git-annex-move.mdwn @@ -75,12 +75,6 @@ Paths of files or directories to operate on can be specified. Use this option to move a specified key. -* `--fast` - - When moving content to a remote, avoid a round trip to check if the remote - already has content. This can be faster, but might skip moving content - to the remote in some cases. - * matching options The [[git-annex-matching-options]](1) From a46c385aec2584419330c5dbb571c19ceb92f6fb Mon Sep 17 00:00:00 2001 From: Joey Hess Date: Fri, 20 Jan 2023 11:10:38 -0400 Subject: [PATCH 3/9] move/copy: started implementing --from src --to dest This is not in a usable state, but I have a possible plan for how to do it. Sponsored-by: Dartmouth College's DANDI project --- Command/Get.hs | 2 ++ Command/Move.hs | 65 +++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 67 insertions(+) diff --git a/Command/Get.hs b/Command/Get.hs index a25fd8bf16..41e37a2e42 100644 --- a/Command/Get.hs +++ b/Command/Get.hs @@ -115,5 +115,7 @@ getKey' key afile = dispatch either (const False) id <$> Remote.hasKey r key | otherwise = return True docopy r witness = do + liftIO $ print "read line" + void $ liftIO $ getLine showAction $ "from " ++ Remote.name r download r key afile stdRetry witness diff --git a/Command/Move.hs b/Command/Move.hs index b14707eb27..fede141fbe 100644 --- a/Command/Move.hs +++ b/Command/Move.hs @@ -112,6 +112,10 @@ start' fromto removewhen afile si key ai = ToHere -> checkFailedTransferDirection ai Download $ toHereStart removewhen afile key ai si + FromRemoteToRemote src dest -> do + src' <- getParsed src + dest' <- getParsed dest + fromToStart removewhen afile key ai si src' dest' describeMoveAction :: RemoveWhen -> String describeMoveAction RemoveNever = "copy" @@ -305,6 +309,67 @@ toHereStart removewhen afile key ai si = fromPerform r removewhen key afile next $ return True +fromToStart :: RemoveWhen -> AssociatedFile -> Key -> ActionItem -> SeekInput -> Remote -> Remote -> CommandStart +fromToStart removewhen afile key ai si src dest = do + if Remote.uuid src == Remote.uuid dest + then stop + else do + u <- getUUID + if u == Remote.uuid src + then toStart removewhen afile key ai si dest + else if u == Remote.uuid dest + then fromStart removewhen afile key ai si src + else stopUnless (fromOk src key) $ + starting (describeMoveAction removewhen) (OnlyActionOn key ai) si $ + fromToPerform src dest removewhen key afile + +{- If there is a local copy, transfer it to the dest, and drop from the src. + - Otherwise, download a copy from the dest, populating the local annex + - copy, but not updating location logs. Then transfer that to the dest, + - drop the local copy, and finally drop from the src. + - + - Using a regular download of the local copy, rather than download to + - some other file makes resuming an interruped download work as usual, + - and simplifies implementation. It does mean that, if git-annex get of + - the same content is being run at the same time, it will see that + - the local copy exists, but then it would get deleted. To avoid that + - unexpected behavior, check the location log before dropping the local + - copy, and if it has been updated (by another process) to say that the + - content is present locally, skip dropping the local copy. + - + - (That leaves a small race, where the other process updates the location + - log after we check it. And another where the other process sees the + - local copy exists just before we drop it.) + - + - The other complication of this approach is that the temporary local + - copy could be seen by another process that uses it as one of the + - necessary copies when dropping from somewhere else. To avoid the number + - of copies being reduced in such a situation, lock the local copy for + - drop before downloading it (v10) or immediately after download + - (v9 or older). + -} +fromToPerform :: Remote -> Remote -> RemoveWhen -> Key -> AssociatedFile -> CommandPerform +fromToPerform src dest removewhen key afile = do + present <- inAnnex key + if present + then do + showAction $ "to " ++ Remote.name dest + sendlocaltodest + dropfromsrc + error "TODO" + else do + showAction $ "from " ++ Remote.name src + downloadsrctotemp + sendtemptodest + dropfromsrc + showAction $ "to " ++ Remote.name dest + error "TODO" + where + sendlocaltodest = error "TODO" + downloadsrctotemp = error "TODO" + sendtemptodest = error "TODO" + dropfromsrc = error "TODO" + {- The goal of this command is to allow the user maximum freedom to move - files as they like, while avoiding making bad situations any worse - than they already were. From 8c349b8802fd57b6d7e79460758a9d850822630e Mon Sep 17 00:00:00 2001 From: Joey Hess Date: Mon, 23 Jan 2023 13:16:14 -0400 Subject: [PATCH 4/9] implement move --from --to when there is a local copy already This is rather trivial, since it does not need to temporarily get the local copy. Added fromPerform' to handle the situation where the local copy is dropped by another process during the copy to the dest. This avoids ever re-downloading the local copy before dropping from the src. Sponsored-by: Dartmouth College's DANDI project --- Command/Move.hs | 38 ++++++++++++++++++++++++++------------ 1 file changed, 26 insertions(+), 12 deletions(-) diff --git a/Command/Move.hs b/Command/Move.hs index fede141fbe..8d5760f9ed 100644 --- a/Command/Move.hs +++ b/Command/Move.hs @@ -245,8 +245,12 @@ fromOk src key fromPerform :: Remote -> RemoveWhen -> Key -> AssociatedFile -> CommandPerform fromPerform src removewhen key afile = do - showAction $ "from " ++ Remote.name src present <- inAnnex key + fromPerform' present src removewhen key afile + +fromPerform' :: Bool -> Remote -> RemoveWhen -> Key -> AssociatedFile -> CommandPerform +fromPerform' present src removewhen key afile = do + showAction $ "from " ++ Remote.name src destuuid <- getUUID logMove srcuuid destuuid present key $ \deststartedwithcopy -> if present @@ -323,14 +327,15 @@ fromToStart removewhen afile key ai si src dest = do starting (describeMoveAction removewhen) (OnlyActionOn key ai) si $ fromToPerform src dest removewhen key afile -{- If there is a local copy, transfer it to the dest, and drop from the src. +{- When there is a local copy, transfer it to the dest, and drop from the src. + - - Otherwise, download a copy from the dest, populating the local annex - copy, but not updating location logs. Then transfer that to the dest, - drop the local copy, and finally drop from the src. - - Using a regular download of the local copy, rather than download to - some other file makes resuming an interruped download work as usual, - - and simplifies implementation. It does mean that, if git-annex get of + - and simplifies implementation. It does mean that, if `git-annex get` of - the same content is being run at the same time, it will see that - the local copy exists, but then it would get deleted. To avoid that - unexpected behavior, check the location log before dropping the local @@ -339,24 +344,22 @@ fromToStart removewhen afile key ai si src dest = do - - (That leaves a small race, where the other process updates the location - log after we check it. And another where the other process sees the - - local copy exists just before we drop it.) + - local copy exists just before we drop it. In either case the resulting + - behavior is similar to `git-annex move --to` being run concurrently + - with `git-annex get`.) - - The other complication of this approach is that the temporary local - copy could be seen by another process that uses it as one of the - necessary copies when dropping from somewhere else. To avoid the number - - of copies being reduced in such a situation, lock the local copy for - - drop before downloading it (v10) or immediately after download - - (v9 or older). + - of copies being reduced in such a situation (or the local copy not being + - able to be safely dropped), lock the local copy for drop before + - downloading it (v10) or immediately after download (v9 or older). -} fromToPerform :: Remote -> Remote -> RemoveWhen -> Key -> AssociatedFile -> CommandPerform fromToPerform src dest removewhen key afile = do present <- inAnnex key if present - then do - showAction $ "to " ++ Remote.name dest - sendlocaltodest - dropfromsrc - error "TODO" + then gopresent else do showAction $ "from " ++ Remote.name src downloadsrctotemp @@ -370,6 +373,17 @@ fromToPerform src dest removewhen key afile = do sendtemptodest = error "TODO" dropfromsrc = error "TODO" + gopresent = do + haskey <- Remote.hasKey dest key + toPerform dest RemoveNever key afile False haskey >>= \case + Just cleanup -> fromPerform' True src removewhen key afile >>= \case + Just cleanup' -> return $ Just $ do + ok <- cleanup + ok' <- cleanup' + return (ok && ok') + Nothing -> return $ Just cleanup + Nothing -> return Nothing + {- The goal of this command is to allow the user maximum freedom to move - files as they like, while avoiding making bad situations any worse - than they already were. From 1abd457e98dbe3b7d96227f16dc79fde6f5b59b8 Mon Sep 17 00:00:00 2001 From: Joey Hess Date: Mon, 23 Jan 2023 13:45:26 -0400 Subject: [PATCH 5/9] push location log updating up to callers of download Prep for move --to --from, which needs to download from a src repo without updating the location log for the local repo, before sending the content on to the dest repo. Note that caller of download' already update the log themselves. See previous commit a422a056f2785999220cde55e07b0fbf999edd44 that pushed it up to download from getViaTmpFrom. (Also removed in passing a debug print + readline that I accidentially committed last week on this branch.) Sponsored-by: Dartmouth College's DANDI project --- Annex/Transfer.hs | 3 +-- Command/Get.hs | 8 ++++---- Command/Move.hs | 4 +++- 3 files changed, 8 insertions(+), 7 deletions(-) diff --git a/Annex/Transfer.hs b/Annex/Transfer.hs index 315033d219..e33c309fc8 100644 --- a/Annex/Transfer.hs +++ b/Annex/Transfer.hs @@ -29,7 +29,6 @@ import Annex.Notification as X import Annex.Content import Annex.Perms import Annex.Action -import Logs.Location import Utility.Metered import Utility.ThreadScheduler import Annex.LockPool @@ -73,7 +72,7 @@ alwaysUpload u key f sd d a _witness = guardHaveUUID u $ -- Download, supporting canceling detected stalls. download :: Remote -> Key -> AssociatedFile -> RetryDecider -> NotifyWitness -> Annex Bool -download r key f d witness = logStatusAfter key $ +download r key f d witness = case remoteAnnexStallDetection (Remote.gitconfig r) of Nothing -> go (Just ProbeStallDetection) Just StallDetectionDisabled -> go Nothing diff --git a/Command/Get.hs b/Command/Get.hs index 41e37a2e42..a31b6470a4 100644 --- a/Command/Get.hs +++ b/Command/Get.hs @@ -1,6 +1,6 @@ {- git-annex command - - - Copyright 2010, 2013 Joey Hess + - Copyright 2010-2023 Joey Hess - - Licensed under the GNU AGPL version 3 or higher. -} @@ -13,6 +13,7 @@ import Annex.Transfer import Annex.NumCopies import Annex.Wanted import qualified Command.Move +import Logs.Location cmd :: Command cmd = withAnnexOptions [jobsOption, jsonOptions, jsonProgressOption, annexedMatchingOptions] $ @@ -115,7 +116,6 @@ getKey' key afile = dispatch either (const False) id <$> Remote.hasKey r key | otherwise = return True docopy r witness = do - liftIO $ print "read line" - void $ liftIO $ getLine showAction $ "from " ++ Remote.name r - download r key afile stdRetry witness + logStatusAfter key $ + download r key afile stdRetry witness diff --git a/Command/Move.hs b/Command/Move.hs index 8d5760f9ed..945d9c0eb3 100644 --- a/Command/Move.hs +++ b/Command/Move.hs @@ -19,6 +19,7 @@ import Annex.Transfer import Logs.Presence import Logs.Trust import Logs.File +import Logs.Location import Annex.NumCopies import qualified Data.ByteString.Char8 as B8 @@ -258,7 +259,8 @@ fromPerform' present src removewhen key afile = do else dispatch removewhen deststartedwithcopy =<< get where get = notifyTransfer Download afile $ - download src key afile stdRetry + logStatusAfter key . + download src key afile stdRetry dispatch _ deststartedwithcopy False = do logMoveCleanup deststartedwithcopy From f5f799f17e8749c2ee6ceba29d2d18a644e8710e Mon Sep 17 00:00:00 2001 From: Joey Hess Date: Mon, 23 Jan 2023 15:36:06 -0400 Subject: [PATCH 6/9] fully working move --from --to (not release quality) When the destination already has a copy, it behaves the same as drop --from really, but display it as a move and implement it reusing the factored out code from fromPerform. (Note that willDropMakeItWorse never returns DropAllowed in that situation, because it's told that dest has a copy. So numcopies is always checked.) And when only the source and not the local repo or destination have a copy, do the full copy from source to local, then copy from local to dest, then drop from local, then drop from source dance. This is complicated by fromPerform being hardcoded to assume there is a local copy, but the local copy has already been dropped. That's why it uses cleanupfromsrc RemoveNever to avoid the code that makes that assumption, and finishes with a call to dropfromsrc. And, since the location log has not yet been updated, checking numcopies was not working, until I added UnVerifiedRemote dest to the list of things to check. This is not yet quite mergeable though. There are two things in the comment above fromToPerform that are not implemented yet: Checking the location log before dropping the local copy, and locking the temporary local copy for drop. Sponsored-by: Dartmouth College's DANDI project --- Command/Move.hs | 155 ++++++++++++++++++++++++++++++++---------------- 1 file changed, 103 insertions(+), 52 deletions(-) diff --git a/Command/Move.hs b/Command/Move.hs index 945d9c0eb3..7671d587f8 100644 --- a/Command/Move.hs +++ b/Command/Move.hs @@ -247,43 +247,53 @@ fromOk src key fromPerform :: Remote -> RemoveWhen -> Key -> AssociatedFile -> CommandPerform fromPerform src removewhen key afile = do present <- inAnnex key - fromPerform' present src removewhen key afile + finish <- fromPerform' present True src key afile + finish removewhen -fromPerform' :: Bool -> Remote -> RemoveWhen -> Key -> AssociatedFile -> CommandPerform -fromPerform' present src removewhen key afile = do +fromPerform' :: Bool -> Bool -> Remote -> Key -> AssociatedFile -> Annex (RemoveWhen -> CommandPerform) +fromPerform' present updatelocationlog src key afile = do showAction $ "from " ++ Remote.name src destuuid <- getUUID - logMove srcuuid destuuid present key $ \deststartedwithcopy -> + logMove (Remote.uuid src) destuuid present key $ \deststartedwithcopy -> if present - then dispatch removewhen deststartedwithcopy True - else dispatch removewhen deststartedwithcopy =<< get + then return $ finish deststartedwithcopy True + else do + got <- get + return $ finish deststartedwithcopy got where get = notifyTransfer Download afile $ - logStatusAfter key . + logdownload . download src key afile stdRetry - dispatch _ deststartedwithcopy False = do + logdownload a + | updatelocationlog = logStatusAfter key a + | otherwise = a + + finish deststartedwithcopy False _ = do logMoveCleanup deststartedwithcopy stop -- copy failed - dispatch RemoveNever deststartedwithcopy True = do + finish deststartedwithcopy True RemoveNever = do logMoveCleanup deststartedwithcopy next $ return True -- copy complete - dispatch RemoveSafe deststartedwithcopy True = lockContentShared key $ \_lck -> do + finish deststartedwithcopy True RemoveSafe = do destuuid <- getUUID - willDropMakeItWorse srcuuid destuuid deststartedwithcopy key afile >>= \case - DropAllowed -> dropremote deststartedwithcopy "moved" - DropCheckNumCopies -> do - (numcopies, mincopies) <- getSafestNumMinCopies afile key - (tocheck, verified) <- verifiableCopies key [Remote.uuid src] - verifyEnoughCopiesToDrop "" key Nothing numcopies mincopies [Remote.uuid src] verified - tocheck (dropremote deststartedwithcopy . showproof) (faileddropremote deststartedwithcopy) - DropWorse -> faileddropremote deststartedwithcopy - - srcuuid = Remote.uuid src - + lockContentShared key $ \_lck -> + fromDrop src destuuid deststartedwithcopy key afile id + +fromDrop :: Remote -> UUID -> DestStartedWithCopy -> Key -> AssociatedFile -> ([UnVerifiedCopy] -> [UnVerifiedCopy])-> CommandPerform +fromDrop src destuuid deststartedwithcopy key afile adjusttocheck = + willDropMakeItWorse (Remote.uuid src) destuuid deststartedwithcopy key afile >>= \case + DropAllowed -> dropremote "moved" + DropCheckNumCopies -> do + (numcopies, mincopies) <- getSafestNumMinCopies afile key + (tocheck, verified) <- verifiableCopies key [Remote.uuid src] + verifyEnoughCopiesToDrop "" key Nothing numcopies mincopies [Remote.uuid src] verified + (adjusttocheck tocheck) (dropremote . showproof) faileddropremote + DropWorse -> faileddropremote + where showproof proof = "proof: " ++ show proof - - dropremote deststartedwithcopy reason = do + + dropremote reason = do fastDebug "Command.Move" $ unwords [ "Dropping from remote" , show src @@ -293,8 +303,8 @@ fromPerform' present src removewhen key afile = do when ok $ logMoveCleanup deststartedwithcopy next $ Command.Drop.cleanupRemote key src (Command.Drop.DroppingUnused False) ok - - faileddropremote deststartedwithcopy = do + + faileddropremote = do showLongNote "(Use --force to override this check, or adjust numcopies.)" showLongNote $ "Content not dropped from " ++ Remote.name src ++ "." logMoveCleanup deststartedwithcopy @@ -330,6 +340,8 @@ fromToStart removewhen afile key ai si src dest = do fromToPerform src dest removewhen key afile {- When there is a local copy, transfer it to the dest, and drop from the src. + - + - When the dest has a copy, drop it from the src. - - Otherwise, download a copy from the dest, populating the local annex - copy, but not updating location logs. Then transfer that to the dest, @@ -358,32 +370,70 @@ fromToStart removewhen afile key ai si src dest = do - downloading it (v10) or immediately after download (v9 or older). -} fromToPerform :: Remote -> Remote -> RemoveWhen -> Key -> AssociatedFile -> CommandPerform -fromToPerform src dest removewhen key afile = do - present <- inAnnex key - if present - then gopresent - else do - showAction $ "from " ++ Remote.name src - downloadsrctotemp - sendtemptodest - dropfromsrc - showAction $ "to " ++ Remote.name dest - error "TODO" +fromToPerform src dest removewhen key afile = go =<< inAnnex key where - sendlocaltodest = error "TODO" - downloadsrctotemp = error "TODO" - sendtemptodest = error "TODO" - dropfromsrc = error "TODO" - - gopresent = do + go True = do haskey <- Remote.hasKey dest key - toPerform dest RemoveNever key afile False haskey >>= \case - Just cleanup -> fromPerform' True src removewhen key afile >>= \case - Just cleanup' -> return $ Just $ do - ok <- cleanup - ok' <- cleanup' - return (ok && ok') - Nothing -> return $ Just cleanup + -- Prepare to drop from src later. Doing this first + -- makes "from src" be shown consistently before + -- "to dest" + dropsrc <- fromsrc True + combinecleanups + -- Send to dest, preserve local copy. + (todest RemoveNever haskey) + (\senttodest -> if senttodest + then dropsrc removewhen + else stop + ) + go False = do + haskey <- Remote.hasKey dest key + case haskey of + Left err -> do + showNote err + stop + Right True -> do + showAction $ "from " ++ Remote.name src + showAction $ "to " ++ Remote.name dest + -- Drop from src, checking copies including + -- the one already in dest. + dropfromsrc id + Right False -> do + -- Get local copy from src, defer dropping + -- from src until later. + cleanupfromsrc <- fromsrc False + combinecleanups + -- Send to dest and remove local copy. + (todest RemoveSafe haskey) + (\senttodest -> + -- Drop from src, checking + -- copies including dest. + combinecleanups + (cleanupfromsrc RemoveNever) + (\_ -> if senttodest + then dropfromsrc (\l -> UnVerifiedRemote dest : l) + else stop + ) + ) + + fromsrc present = fromPerform' present False src key afile + + todest removewhen' = toPerform dest removewhen' key afile False + + dropfromsrc adjusttocheck = + logMove (Remote.uuid src) (Remote.uuid dest) True key $ \deststartedwithcopy -> + fromDrop src (Remote.uuid dest) deststartedwithcopy key afile adjusttocheck + + combinecleanups a b = a >>= \case + Just cleanupa -> b True >>= \case + Just cleanupb -> return $ Just $ do + oka <- cleanupa + okb <- cleanupb + return (oka && okb) + Nothing -> return (Just cleanupa) + Nothing -> b False >>= \case + Just cleanupb -> return $ Just $ do + void cleanupb + return False Nothing -> return Nothing {- The goal of this command is to allow the user maximum freedom to move @@ -395,9 +445,10 @@ fromToPerform src dest removewhen key afile = do - repository reduces the number of copies, and should fail if - that would violate numcopies settings. - - - On the other hand, when the destination repository does not already - - have a copy of a file, it can be dropped without making numcopies - - worse, so the move is allowed even if numcopies is not met. + - On the other hand, when the destination repository did not start + - with a copy of a file, it can be dropped from the source without + - making numcopies worse, so the move is allowed even if numcopies + - is not met. - - Similarly, a file can move from an untrusted repository to another - untrusted repository, even if that is the only copy of the file. From acc3f6211fd27d47a920d1419da8ea43ac44bf86 Mon Sep 17 00:00:00 2001 From: Joey Hess Date: Mon, 23 Jan 2023 17:07:21 -0400 Subject: [PATCH 7/9] finishing up move --from --to Lock the local content for drop after getting it from src, to prevent another process from using the local content as a copy and dropping it from src, which would prevent dropping the local content after sending it to dest. Support resuming an interrupted move that downloaded the content from src, leaving the local content populated. In this case, the location log has not been updated to say the content is present locally, so we can assume that it's resuming and go ahead and drop the local content after sending it to dest. Note that if a `git-annex get` is being ran at the same time as a `git-annex move --from --to`, it may get a file just before the move processes it. So the location log has not been updated yet, and the move thinks it's resuming. Resulting in local copy being dropped after it's sent to the dest. This race is something we'll just have to live with, it seems. I also gave up on the idea of checking if the location log had been updated by a `git-annex get` that is ran at the same time. That wouldn't work, because the location log is precached in the seek stage, so reading it again after sending the content to dest would not notice changes made to it, unless the cache were invalidated, which would slow it down a lot. That idea anyway was subject to races where it would not detect the concurrent `git-annex get`. So concurrent `git-annex get` will have results that may be surprising. To make that less surprising, updated the documentation of this feature to be explicit that it downloads content to the local repository temporarily. Sponsored-by: Dartmouth College's DANDI project --- Annex/Content.hs | 3 +- Command/Move.hs | 93 +++++++++++++++++++++++------------------ doc/git-annex-move.mdwn | 7 +++- 3 files changed, 60 insertions(+), 43 deletions(-) diff --git a/Annex/Content.hs b/Annex/Content.hs index 15eab12c2f..589df43215 100644 --- a/Annex/Content.hs +++ b/Annex/Content.hs @@ -139,7 +139,8 @@ lockContentShared key a = lockContentUsing lock key notpresent $ - - If locking fails, throws an exception rather than running the action. - - - If locking fails because the the content is not present, runs the + - When the content file itself is used as the lock file, + - and locking fails because the the content is not present, runs the - fallback action instead. However, the content is not guaranteed to be - present when this succeeds. -} diff --git a/Command/Move.hs b/Command/Move.hs index 7671d587f8..b105306bf2 100644 --- a/Command/Move.hs +++ b/Command/Move.hs @@ -149,7 +149,10 @@ expectedPresent dest key = do return $ dest `elem` remotes toPerform :: Remote -> RemoveWhen -> Key -> AssociatedFile -> Bool -> Either String Bool -> CommandPerform -toPerform dest removewhen key afile fastcheck isthere = do +toPerform = toPerform' Nothing + +toPerform' :: Maybe ContentRemovalLock -> Remote -> RemoveWhen -> Key -> AssociatedFile -> Bool -> Either String Bool -> CommandPerform +toPerform' mcontentlock dest removewhen key afile fastcheck isthere = do srcuuid <- getUUID case isthere of Left err -> do @@ -178,7 +181,7 @@ toPerform dest removewhen key afile fastcheck isthere = do setpresentremote logMoveCleanup deststartedwithcopy next $ return True - RemoveSafe -> lockContentForRemoval key lockfailed $ \contentlock -> do + RemoveSafe -> lockcontentforremoval $ \contentlock -> do srcuuid <- getUUID r <- willDropMakeItWorse srcuuid destuuid deststartedwithcopy key afile >>= \case DropAllowed -> drophere setpresentremote contentlock "moved" @@ -213,6 +216,10 @@ toPerform dest removewhen key afile fastcheck isthere = do () <- setpresentremote return False + lockcontentforremoval a = case mcontentlock of + Nothing -> lockContentForRemoval key lockfailed a + Just contentlock -> a contentlock + -- This occurs when, for example, two files are being dropped -- and have the same content. The seek stage checks if the content -- is present, but due to buffering, may find it present for the @@ -350,29 +357,26 @@ fromToStart removewhen afile key ai si src dest = do - Using a regular download of the local copy, rather than download to - some other file makes resuming an interruped download work as usual, - and simplifies implementation. It does mean that, if `git-annex get` of - - the same content is being run at the same time, it will see that - - the local copy exists, but then it would get deleted. To avoid that - - unexpected behavior, check the location log before dropping the local - - copy, and if it has been updated (by another process) to say that the - - content is present locally, skip dropping the local copy. - - - - (That leaves a small race, where the other process updates the location - - log after we check it. And another where the other process sees the - - local copy exists just before we drop it. In either case the resulting - - behavior is similar to `git-annex move --to` being run concurrently - - with `git-annex get`.) - - - - The other complication of this approach is that the temporary local - - copy could be seen by another process that uses it as one of the - - necessary copies when dropping from somewhere else. To avoid the number - - of copies being reduced in such a situation (or the local copy not being - - able to be safely dropped), lock the local copy for drop before - - downloading it (v10) or immediately after download (v9 or older). + - the same content is being run at the same time as this move, the content + - may end up locally present, or not. This is similar to the behavior + - when running `git-annex move --to` concurrently with git-annex get. -} fromToPerform :: Remote -> Remote -> RemoveWhen -> Key -> AssociatedFile -> CommandPerform -fromToPerform src dest removewhen key afile = go =<< inAnnex key +fromToPerform src dest removewhen key afile = do + hereuuid <- getUUID + loggedpresent <- any (== hereuuid) + <$> loggedLocations key + ispresent <- inAnnex key + go ispresent loggedpresent where - go True = do + -- The content is present, and is logged as present, so it + -- can be sent to dest and dropped from src. + -- + -- When resuming an interrupted move --from --to, where the content + -- was not present but got downloaded from src, it will not be + -- logged present, and so this won't be used. Instead, the local + -- content will get dropped after being copied to dest. + go True True = do haskey <- Remote.hasKey dest key -- Prepare to drop from src later. Doing this first -- makes "from src" be shown consistently before @@ -380,12 +384,12 @@ fromToPerform src dest removewhen key afile = go =<< inAnnex key dropsrc <- fromsrc True combinecleanups -- Send to dest, preserve local copy. - (todest RemoveNever haskey) + (todest Nothing RemoveNever haskey) (\senttodest -> if senttodest then dropsrc removewhen else stop ) - go False = do + go ispresent _loggedpresent = do haskey <- Remote.hasKey dest key case haskey of Left err -> do @@ -399,25 +403,34 @@ fromToPerform src dest removewhen key afile = go =<< inAnnex key dropfromsrc id Right False -> do -- Get local copy from src, defer dropping - -- from src until later. - cleanupfromsrc <- fromsrc False - combinecleanups - -- Send to dest and remove local copy. - (todest RemoveSafe haskey) - (\senttodest -> - -- Drop from src, checking - -- copies including dest. - combinecleanups - (cleanupfromsrc RemoveNever) - (\_ -> if senttodest - then dropfromsrc (\l -> UnVerifiedRemote dest : l) - else stop - ) - ) + -- from src until later. Note that fromsrc + -- does not update the location log. + cleanupfromsrc <- if ispresent + then return $ const $ next (return True) + else fromsrc False + -- Lock the local copy for removal early, + -- to avoid other processes relying on it + -- as a copy, and removing other copies + -- (such as the one in src), that prevents + -- dropping the local copy later. + lockContentForRemoval key stop $ \contentlock -> + combinecleanups + -- Send to dest and remove local copy. + (todest (Just contentlock) RemoveSafe haskey) + (\senttodest -> + -- Drop from src, checking + -- copies including dest. + combinecleanups + (cleanupfromsrc RemoveNever) + (\_ -> if senttodest + then dropfromsrc (\l -> UnVerifiedRemote dest : l) + else stop + ) + ) fromsrc present = fromPerform' present False src key afile - todest removewhen' = toPerform dest removewhen' key afile False + todest mcontentlock removewhen' = toPerform' mcontentlock dest removewhen' key afile False dropfromsrc adjusttocheck = logMove (Remote.uuid src) (Remote.uuid dest) True key $ \deststartedwithcopy -> diff --git a/doc/git-annex-move.mdwn b/doc/git-annex-move.mdwn index 7b29284d7c..162595fb97 100644 --- a/doc/git-annex-move.mdwn +++ b/doc/git-annex-move.mdwn @@ -31,9 +31,12 @@ Paths of files or directories to operate on can be specified. * `--from=remote1 --to=remote2` Move the content of files that are in remote1 to remote2. Does not change - what is stored in the local repository. + what is stored in the local repository. - Note: This may need to store an intermediate copy of the content on disk. + This is implemented by first downloading the content from remote1 to the + local repository (if not already present), then sending it to remote2, and + then deleting the content from the local repository (if it was not present + to start with). * `--force` From 77266e46dda7009e7d62c6ae18ac22c535438472 Mon Sep 17 00:00:00 2001 From: Joey Hess Date: Mon, 23 Jan 2023 17:53:57 -0400 Subject: [PATCH 8/9] fix behavior of copy --from --to Sponsored-by: Dartmouth College's DANDI project --- Command/Move.hs | 5 +++-- doc/git-annex-copy.mdwn | 8 +++++--- 2 files changed, 8 insertions(+), 5 deletions(-) diff --git a/Command/Move.hs b/Command/Move.hs index b105306bf2..0f66948379 100644 --- a/Command/Move.hs +++ b/Command/Move.hs @@ -432,9 +432,10 @@ fromToPerform src dest removewhen key afile = do todest mcontentlock removewhen' = toPerform' mcontentlock dest removewhen' key afile False - dropfromsrc adjusttocheck = - logMove (Remote.uuid src) (Remote.uuid dest) True key $ \deststartedwithcopy -> + dropfromsrc adjusttocheck = case removewhen of + RemoveSafe -> logMove (Remote.uuid src) (Remote.uuid dest) True key $ \deststartedwithcopy -> fromDrop src (Remote.uuid dest) deststartedwithcopy key afile adjusttocheck + RemoveNever -> next (return True) combinecleanups a b = a >>= \case Just cleanupa -> b True >>= \case diff --git a/doc/git-annex-copy.mdwn b/doc/git-annex-copy.mdwn index 03563bda11..acf3b2a32d 100644 --- a/doc/git-annex-copy.mdwn +++ b/doc/git-annex-copy.mdwn @@ -34,10 +34,12 @@ Paths of files or directories to operate on can be specified. * `--from=remote1 --to=remote2` - Copy the content of files that are in remote1 to remote2. Does not change - what is stored in the local repository. + Copy the content of files that are in remote1 to remote2. - Note: This may need to store an intermediate copy of the content on disk. + This is implemented by first downloading the content from remote1 to the + local repository (if not already present), then sending it to remote2, and + then deleting the content from the local repository (if it was not present + to start with). * `--jobs=N` `-JN` From 1ee72de32ea979a90ce149537a9b30d2fdba55a3 Mon Sep 17 00:00:00 2001 From: Joey Hess Date: Mon, 23 Jan 2023 17:57:15 -0400 Subject: [PATCH 9/9] done --- doc/todo/copy_with_both_--to_and_--from_.mdwn | 2 ++ ...ment_8_a230b52cd5985437827bb432a85349ce._comment | 13 +++++++++++++ 2 files changed, 15 insertions(+) create mode 100644 doc/todo/copy_with_both_--to_and_--from_/comment_8_a230b52cd5985437827bb432a85349ce._comment diff --git a/doc/todo/copy_with_both_--to_and_--from_.mdwn b/doc/todo/copy_with_both_--to_and_--from_.mdwn index 6149912544..8183d93c09 100644 --- a/doc/todo/copy_with_both_--to_and_--from_.mdwn +++ b/doc/todo/copy_with_both_--to_and_--from_.mdwn @@ -14,3 +14,5 @@ NB may be some `annex sync` would do that magic? [[!meta author=yoh]] [[!tag projects/dandi]] + +> [[implemented|done]] --[[Joey]] diff --git a/doc/todo/copy_with_both_--to_and_--from_/comment_8_a230b52cd5985437827bb432a85349ce._comment b/doc/todo/copy_with_both_--to_and_--from_/comment_8_a230b52cd5985437827bb432a85349ce._comment new file mode 100644 index 0000000000..0d6ec2d0bf --- /dev/null +++ b/doc/todo/copy_with_both_--to_and_--from_/comment_8_a230b52cd5985437827bb432a85349ce._comment @@ -0,0 +1,13 @@ +[[!comment format=mdwn + username="joey" + subject="""comment 8""" + date="2023-01-23T21:45:37Z" + content=""" +I did implement this using the second path described in comment #5. + +And it does download the content to the local annex, and then after +sending it to the destination remote, will drop the local content +(unless it was already present). So running it concurrent with +`git-annex get` may leave local files present or not, depending on which +process gets to a file first. +"""]]