From 0896038ba7ba23c5824ee630751fd7099016de7f Mon Sep 17 00:00:00 2001 From: Joey Hess Date: Mon, 16 Nov 2020 14:09:55 -0400 Subject: [PATCH] annex.adjustedbranchrefresh Added annex.adjustedbranchrefresh git config to update adjusted branches set up by git-annex adjust --unlock-present/--hide-missing. Note, in a few cases, I was not able to make the adjusted branch be updated in calls to moveAnnex, because information about what file corresponds to a key is not available. They are: * If two files point to one file, then eg, `git annex get foo` will update the branch to unlock foo, but will not unlock bar, because it does not know about it. Might be fixable by making `git annex get bar` do something besides skipping bar? * git-annex-shell recvkey likewise (so sends over ssh from old versions of git-annex) * git-annex setkey * git-annex transferkey if the user does not use --file * git-annex multicast sends keys with no associated file info Doing a single full refresh at the end, after any incremental refresh, will deal with those edge cases. --- Annex.hs | 6 +- Annex/AdjustedBranch.hs | 200 +++++------------- Annex/AdjustedBranch/Merge.hs | 164 ++++++++++++++ Annex/Content.hs | 25 +-- Annex/Import.hs | 4 +- Annex/Ingest.hs | 13 +- CHANGELOG | 2 + Command/Get.hs | 2 +- Command/Move.hs | 2 +- Command/Multicast.hs | 2 +- Command/ReKey.hs | 2 +- Command/RecvKey.hs | 2 +- Command/Reinject.hs | 2 +- Command/SetKey.hs | 2 +- Command/Sync.hs | 1 + Command/TestRemote.hs | 8 +- Command/TransferKey.hs | 2 +- Command/TransferKeys.hs | 2 +- P2P/Annex.hs | 2 +- Remote/Git.hs | 2 +- Types/CleanupActions.hs | 1 + Types/GitConfig.hs | 5 + Upgrade/V0.hs | 3 +- Upgrade/V1.hs | 2 +- doc/git-annex-adjust.mdwn | 6 +- doc/git-annex.mdwn | 16 ++ ...branch_on_content_availability_change.mdwn | 12 ++ git-annex.cabal | 1 + 28 files changed, 311 insertions(+), 180 deletions(-) create mode 100644 Annex/AdjustedBranch/Merge.hs diff --git a/Annex.hs b/Annex.hs index 5ae01ea2ac..748ad96009 100644 --- a/Annex.hs +++ b/Annex.hs @@ -143,6 +143,7 @@ data AnnexState = AnnexState , sentinalstatus :: Maybe SentinalStatus , useragent :: Maybe String , errcounter :: Integer + , adjustedbranchrefreshcounter :: Integer , unusedkeys :: Maybe (S.Set Key) , tempurls :: M.Map Key URLString , existinghooks :: M.Map Git.Hook.Hook Bool @@ -203,6 +204,7 @@ newState c r = do , sentinalstatus = Nothing , useragent = Nothing , errcounter = 0 + , adjustedbranchrefreshcounter = 0 , unusedkeys = Nothing , tempurls = M.empty , existinghooks = M.empty @@ -399,8 +401,8 @@ changeDirectory d = do incError :: Annex () incError = changeState $ \s -> - let ! c = errcounter s + 1 - ! s' = s { errcounter = c } + let !c = errcounter s + 1 + !s' = s { errcounter = c } in s' getGitRemotes :: Annex [Git.Repo] diff --git a/Annex/AdjustedBranch.hs b/Annex/AdjustedBranch.hs index 8e6eb4dc8b..14467565aa 100644 --- a/Annex/AdjustedBranch.hs +++ b/Annex/AdjustedBranch.hs @@ -22,10 +22,18 @@ module Annex.AdjustedBranch ( getAdjustment, enterAdjustedBranch, updateAdjustedBranch, + adjustedBranchRefresh, adjustBranch, + adjustTree, adjustToCrippledFileSystem, - mergeToAdjustedBranch, propigateAdjustedCommits, + propigateAdjustedCommits', + commitAdjustedTree, + commitAdjustedTree', + BasisBranch(..), + basisBranch, + setBasisBranch, + preventCommits, AdjustedClone(..), checkAdjustedClone, checkVersionSupported, @@ -43,7 +51,6 @@ import qualified Git.Ref import qualified Git.Command import qualified Git.Tree import qualified Git.DiffTree -import qualified Git.Merge import Git.Tree (TreeItem(..)) import Git.Sha import Git.Env @@ -53,19 +60,13 @@ import qualified Git.LockFile import qualified Git.Version import Annex.CatFile import Annex.Link -import Annex.AutoMerge -import Annex.Content -import Annex.Tmp -import Annex.GitOverlay -import Utility.Tmp.Dir -import Utility.CopyFile -import Utility.Directory.Create +import Annex.Content.Presence +import Annex.CurrentBranch +import Types.CleanupActions import qualified Database.Keys import Config import qualified Data.Map as M -import qualified Data.ByteString as S -import qualified System.FilePath.ByteString as P class AdjustTreeItem t where -- How to perform various adjustments to a TreeItem. @@ -272,6 +273,52 @@ updateAdjustedBranch adj (AdjBranch currbranch) origbranch -- adjustment is stable. return True +{- Passed an action that, if it succeeds may get or drop the Key associated + - with the file. When the adjusted branch needs to be refreshed to reflect + - those changes, it's handled here. + - + - Note that the AssociatedFile must be verified by this to point to the + - Key. In some cases, the value was provided by the user and might not + - really be an associated file. + -} +adjustedBranchRefresh :: AssociatedFile -> Annex a -> Annex a +adjustedBranchRefresh _af a = do + r <- a + annexAdjustedBranchRefresh <$> Annex.getGitConfig >>= \case + 0 -> return () + n -> go n + return r + where + go n = getCurrentBranch >>= \case + (Just origbranch, Just adj) -> + unless (adjustmentIsStable adj) $ + ifM (checkcounter n) + ( update adj origbranch + , Annex.addCleanup AdjustedBranchUpdate $ + update adj origbranch + ) + _ -> return () + + checkcounter n + -- Special case, 1 (or true) refreshes only at shutdown. + | n == 1 = pure False + | n == 2 = pure True + | otherwise = Annex.withState $ \s -> + let !c = Annex.adjustedbranchrefreshcounter s + 1 + !enough = c >= pred n + !c' = if enough then 0 else c + !s' = s { Annex.adjustedbranchrefreshcounter = c' } + in pure (s', enough) + + -- TODO This is very slow when run a lot of times. + -- Incrementally adjust only the AssociatedFile. + -- However, this should be run once at shutdown then, + -- because other files than the provided AssociatedFile + -- can need to be updated in some edge cases. + update adj origbranch = do + let adjbranch = originalToAdjusted origbranch adj + void $ updateAdjustedBranch adj adjbranch origbranch + adjustToCrippledFileSystem :: Annex () adjustToCrippledFileSystem = do warning "Entering an adjusted branch where files are unlocked as this filesystem does not support locked files." @@ -389,137 +436,6 @@ findAdjustingCommit (AdjBranch b) = go =<< catCommit b [p] -> go =<< catCommit p _ -> return Nothing -{- Update the currently checked out adjusted branch, merging the provided - - branch into it. Note that the provided branch should be a non-adjusted - - branch. -} -mergeToAdjustedBranch :: Branch -> (OrigBranch, Adjustment) -> [Git.Merge.MergeConfig] -> Bool -> Git.Branch.CommitMode -> Annex Bool -mergeToAdjustedBranch tomerge (origbranch, adj) mergeconfig canresolvemerge commitmode = catchBoolIO $ - join $ preventCommits go - where - adjbranch@(AdjBranch currbranch) = originalToAdjusted origbranch adj - basis = basisBranch adjbranch - - go commitsprevented = - ifM (inRepo $ Git.Branch.changed currbranch tomerge) - ( do - (updatedorig, _) <- propigateAdjustedCommits' - origbranch adj commitsprevented - changestomerge updatedorig - , nochangestomerge - ) - - nochangestomerge = return $ return True - - {- Since the adjusted branch changes files, merging tomerge - - directly into it would likely result in unncessary merge - - conflicts. To avoid those conflicts, instead merge tomerge into - - updatedorig. The result of the merge can the be - - adjusted to yield the final adjusted branch. - - - - In order to do a merge into a ref that is not checked out, - - set the work tree to a temp directory, and set GIT_DIR - - to another temp directory, in which HEAD contains the - - updatedorig sha. GIT_COMMON_DIR is set to point to the real - - git directory, and so git can read and write objects from there, - - but will use GIT_DIR for HEAD and index. - - - - (Doing the merge this way also lets it run even though the main - - index file is currently locked.) - -} - changestomerge (Just updatedorig) = withOtherTmp $ \othertmpdir -> do - git_dir <- fromRepo Git.localGitDir - let git_dir' = fromRawFilePath git_dir - tmpwt <- fromRepo gitAnnexMergeDir - withTmpDirIn (fromRawFilePath othertmpdir) "git" $ \tmpgit -> withWorkTreeRelated tmpgit $ - withemptydir git_dir tmpwt $ withWorkTree tmpwt $ do - liftIO $ writeFile (tmpgit "HEAD") (fromRef updatedorig) - -- Copy in refs and packed-refs, to work - -- around bug in git 2.13.0, which - -- causes it not to look in GIT_DIR for refs. - refs <- liftIO $ dirContentsRecursive $ - git_dir' "refs" - let refs' = (git_dir' "packed-refs") : refs - liftIO $ forM_ refs' $ \src -> - whenM (doesFileExist src) $ do - dest <- relPathDirToFile git_dir - (toRawFilePath src) - let dest' = toRawFilePath tmpgit P. dest - createDirectoryUnder git_dir - (P.takeDirectory dest') - void $ createLinkOrCopy src - (fromRawFilePath dest') - -- This reset makes git merge not care - -- that the work tree is empty; otherwise - -- it will think that all the files have - -- been staged for deletion, and sometimes - -- the merge includes these deletions - -- (for an unknown reason). - -- http://thread.gmane.org/gmane.comp.version-control.git/297237 - inRepo $ Git.Command.run [Param "reset", Param "HEAD", Param "--quiet"] - showAction $ "Merging into " ++ fromRef (Git.Ref.base origbranch) - merged <- autoMergeFrom' tomerge Nothing mergeconfig commitmode True - (const $ resolveMerge (Just updatedorig) tomerge True) - if merged - then do - !mergecommit <- liftIO $ extractSha - <$> S.readFile (tmpgit "HEAD") - -- This is run after the commit lock is dropped. - return $ postmerge mergecommit - else return $ return False - changestomerge Nothing = return $ return False - - withemptydir git_dir d a = bracketIO setup cleanup (const a) - where - setup = do - whenM (doesDirectoryExist d) $ - removeDirectoryRecursive d - createDirectoryUnder git_dir (toRawFilePath d) - cleanup _ = removeDirectoryRecursive d - - {- A merge commit has been made between the basisbranch and - - tomerge. Update the basisbranch and origbranch to point - - to that commit, adjust it to get the new adjusted branch, - - and check it out. - - - - But, there may be unstaged work tree changes that conflict, - - so the check out is done by making a normal merge of - - the new adjusted branch. - -} - postmerge (Just mergecommit) = do - setBasisBranch basis mergecommit - inRepo $ Git.Branch.update' origbranch mergecommit - adjtree <- adjustTree adj (BasisBranch mergecommit) - adjmergecommit <- commitAdjustedTree adjtree (BasisBranch mergecommit) - -- Make currbranch be the parent, so that merging - -- this commit will be a fast-forward. - adjmergecommitff <- commitAdjustedTree' adjtree (BasisBranch mergecommit) [currbranch] - showAction "Merging into adjusted branch" - ifM (autoMergeFrom adjmergecommitff (Just currbranch) mergeconfig commitmode canresolvemerge) - ( reparent adjtree adjmergecommit =<< getcurrentcommit - , return False - ) - postmerge Nothing = return False - - -- Now that the merge into the adjusted branch is complete, - -- take the tree from that merge, and attach it on top of the - -- adjmergecommit, if it's different. - reparent adjtree adjmergecommit (Just currentcommit) = do - if (commitTree currentcommit /= adjtree) - then do - cmode <- annexCommitMode <$> Annex.getGitConfig - c <- inRepo $ Git.Branch.commitTree cmode - ("Merged " ++ fromRef tomerge) [adjmergecommit] - (commitTree currentcommit) - inRepo $ Git.Branch.update "updating adjusted branch" currbranch c - propigateAdjustedCommits origbranch adj - else inRepo $ Git.Branch.update "updating adjusted branch" currbranch adjmergecommit - return True - reparent _ _ Nothing = return False - - getcurrentcommit = inRepo Git.Branch.currentUnsafe >>= \case - Nothing -> return Nothing - Just c -> catCommit c - {- Check for any commits present on the adjusted branch that have not yet - been propigated to the basis branch, and propigate them to the basis - branch and from there on to the orig branch. diff --git a/Annex/AdjustedBranch/Merge.hs b/Annex/AdjustedBranch/Merge.hs new file mode 100644 index 0000000000..c2cce8754d --- /dev/null +++ b/Annex/AdjustedBranch/Merge.hs @@ -0,0 +1,164 @@ +{- adjusted branch merging + - + - Copyright 2016-2020 Joey Hess + - + - Licensed under the GNU AGPL version 3 or higher. + -} + +{-# LANGUAGE BangPatterns, OverloadedStrings #-} + +module Annex.AdjustedBranch.Merge ( + mergeToAdjustedBranch, +) where + +import Annex.Common +import Annex.AdjustedBranch +import qualified Annex +import Git +import Git.Types +import qualified Git.Branch +import qualified Git.Ref +import qualified Git.Command +import qualified Git.Merge +import Git.Sha +import Annex.CatFile +import Annex.AutoMerge +import Annex.Tmp +import Annex.GitOverlay +import Utility.Tmp.Dir +import Utility.CopyFile +import Utility.Directory.Create + +import qualified Data.ByteString as S +import qualified System.FilePath.ByteString as P + +{- Update the currently checked out adjusted branch, merging the provided + - branch into it. Note that the provided branch should be a non-adjusted + - branch. -} +mergeToAdjustedBranch :: Branch -> (OrigBranch, Adjustment) -> [Git.Merge.MergeConfig] -> Bool -> Git.Branch.CommitMode -> Annex Bool +mergeToAdjustedBranch tomerge (origbranch, adj) mergeconfig canresolvemerge commitmode = catchBoolIO $ + join $ preventCommits go + where + adjbranch@(AdjBranch currbranch) = originalToAdjusted origbranch adj + basis = basisBranch adjbranch + + go commitsprevented = + ifM (inRepo $ Git.Branch.changed currbranch tomerge) + ( do + (updatedorig, _) <- propigateAdjustedCommits' + origbranch adj commitsprevented + changestomerge updatedorig + , nochangestomerge + ) + + nochangestomerge = return $ return True + + {- Since the adjusted branch changes files, merging tomerge + - directly into it would likely result in unncessary merge + - conflicts. To avoid those conflicts, instead merge tomerge into + - updatedorig. The result of the merge can the be + - adjusted to yield the final adjusted branch. + - + - In order to do a merge into a ref that is not checked out, + - set the work tree to a temp directory, and set GIT_DIR + - to another temp directory, in which HEAD contains the + - updatedorig sha. GIT_COMMON_DIR is set to point to the real + - git directory, and so git can read and write objects from there, + - but will use GIT_DIR for HEAD and index. + - + - (Doing the merge this way also lets it run even though the main + - index file is currently locked.) + -} + changestomerge (Just updatedorig) = withOtherTmp $ \othertmpdir -> do + git_dir <- fromRepo Git.localGitDir + let git_dir' = fromRawFilePath git_dir + tmpwt <- fromRepo gitAnnexMergeDir + withTmpDirIn (fromRawFilePath othertmpdir) "git" $ \tmpgit -> withWorkTreeRelated tmpgit $ + withemptydir git_dir tmpwt $ withWorkTree tmpwt $ do + liftIO $ writeFile (tmpgit "HEAD") (fromRef updatedorig) + -- Copy in refs and packed-refs, to work + -- around bug in git 2.13.0, which + -- causes it not to look in GIT_DIR for refs. + refs <- liftIO $ dirContentsRecursive $ + git_dir' "refs" + let refs' = (git_dir' "packed-refs") : refs + liftIO $ forM_ refs' $ \src -> + whenM (doesFileExist src) $ do + dest <- relPathDirToFile git_dir + (toRawFilePath src) + let dest' = toRawFilePath tmpgit P. dest + createDirectoryUnder git_dir + (P.takeDirectory dest') + void $ createLinkOrCopy src + (fromRawFilePath dest') + -- This reset makes git merge not care + -- that the work tree is empty; otherwise + -- it will think that all the files have + -- been staged for deletion, and sometimes + -- the merge includes these deletions + -- (for an unknown reason). + -- http://thread.gmane.org/gmane.comp.version-control.git/297237 + inRepo $ Git.Command.run [Param "reset", Param "HEAD", Param "--quiet"] + showAction $ "Merging into " ++ fromRef (Git.Ref.base origbranch) + merged <- autoMergeFrom' tomerge Nothing mergeconfig commitmode True + (const $ resolveMerge (Just updatedorig) tomerge True) + if merged + then do + !mergecommit <- liftIO $ extractSha + <$> S.readFile (tmpgit "HEAD") + -- This is run after the commit lock is dropped. + return $ postmerge mergecommit + else return $ return False + changestomerge Nothing = return $ return False + + withemptydir git_dir d a = bracketIO setup cleanup (const a) + where + setup = do + whenM (doesDirectoryExist d) $ + removeDirectoryRecursive d + createDirectoryUnder git_dir (toRawFilePath d) + cleanup _ = removeDirectoryRecursive d + + {- A merge commit has been made between the basisbranch and + - tomerge. Update the basisbranch and origbranch to point + - to that commit, adjust it to get the new adjusted branch, + - and check it out. + - + - But, there may be unstaged work tree changes that conflict, + - so the check out is done by making a normal merge of + - the new adjusted branch. + -} + postmerge (Just mergecommit) = do + setBasisBranch basis mergecommit + inRepo $ Git.Branch.update' origbranch mergecommit + adjtree <- adjustTree adj (BasisBranch mergecommit) + adjmergecommit <- commitAdjustedTree adjtree (BasisBranch mergecommit) + -- Make currbranch be the parent, so that merging + -- this commit will be a fast-forward. + adjmergecommitff <- commitAdjustedTree' adjtree (BasisBranch mergecommit) [currbranch] + showAction "Merging into adjusted branch" + ifM (autoMergeFrom adjmergecommitff (Just currbranch) mergeconfig commitmode canresolvemerge) + ( reparent adjtree adjmergecommit =<< getcurrentcommit + , return False + ) + postmerge Nothing = return False + + -- Now that the merge into the adjusted branch is complete, + -- take the tree from that merge, and attach it on top of the + -- adjmergecommit, if it's different. + reparent adjtree adjmergecommit (Just currentcommit) = do + if (commitTree currentcommit /= adjtree) + then do + cmode <- annexCommitMode <$> Annex.getGitConfig + c <- inRepo $ Git.Branch.commitTree cmode + ("Merged " ++ fromRef tomerge) [adjmergecommit] + (commitTree currentcommit) + inRepo $ Git.Branch.update "updating adjusted branch" currbranch c + propigateAdjustedCommits origbranch adj + else inRepo $ Git.Branch.update "updating adjusted branch" currbranch adjmergecommit + return True + reparent _ _ Nothing = return False + + getcurrentcommit = inRepo Git.Branch.currentUnsafe >>= \case + Nothing -> return Nothing + Just c -> catCommit c diff --git a/Annex/Content.hs b/Annex/Content.hs index e39351d781..3bdaacd45a 100644 --- a/Annex/Content.hs +++ b/Annex/Content.hs @@ -76,6 +76,7 @@ import Annex.Link import Annex.LockPool import Annex.UUID import Annex.InodeSentinal +import Annex.AdjustedBranch (adjustedBranchRefresh) import Messages.Progress import Types.Remote (RetrievalSecurityPolicy(..)) import Types.NumCopies @@ -203,15 +204,15 @@ lockContentUsing locker key fallback a = do {- Runs an action, passing it the temp file to get, - and if the action succeeds, verifies the file matches - the key and moves the file into the annex as a key's content. -} -getViaTmp :: RetrievalSecurityPolicy -> VerifyConfig -> Key -> (RawFilePath -> Annex (Bool, Verification)) -> Annex Bool -getViaTmp rsp v key action = checkDiskSpaceToGet key False $ - getViaTmpFromDisk rsp v key action +getViaTmp :: RetrievalSecurityPolicy -> VerifyConfig -> Key -> AssociatedFile -> (RawFilePath -> Annex (Bool, Verification)) -> Annex Bool +getViaTmp rsp v key af action = checkDiskSpaceToGet key False $ + getViaTmpFromDisk rsp v key af action {- Like getViaTmp, but does not check that there is enough disk space - for the incoming key. For use when the key content is already on disk - and not being copied into place. -} -getViaTmpFromDisk :: RetrievalSecurityPolicy -> VerifyConfig -> Key -> (RawFilePath -> Annex (Bool, Verification)) -> Annex Bool -getViaTmpFromDisk rsp v key action = checkallowed $ do +getViaTmpFromDisk :: RetrievalSecurityPolicy -> VerifyConfig -> Key -> AssociatedFile -> (RawFilePath -> Annex (Bool, Verification)) -> Annex Bool +getViaTmpFromDisk rsp v key af action = checkallowed $ do tmpfile <- prepTmp key resuming <- liftIO $ R.doesPathExist tmpfile (ok, verification) <- action tmpfile @@ -226,7 +227,7 @@ getViaTmpFromDisk rsp v key action = checkallowed $ do else verification if ok then ifM (verifyKeyContent rsp v verification' key tmpfile) - ( ifM (pruneTmpWorkDirBefore tmpfile (moveAnnex key)) + ( ifM (pruneTmpWorkDirBefore tmpfile (moveAnnex key af)) ( do logStatus key InfoPresent return True @@ -329,8 +330,8 @@ withTmp key action = do - accepted into the repository. Will display a warning message in this - case. May also throw exceptions in some cases. -} -moveAnnex :: Key -> RawFilePath -> Annex Bool -moveAnnex key src = ifM (checkSecureHashes' key) +moveAnnex :: Key -> AssociatedFile -> RawFilePath -> Annex Bool +moveAnnex key af src = ifM (checkSecureHashes' key) ( do withObjectLoc key storeobject return True @@ -339,7 +340,7 @@ moveAnnex key src = ifM (checkSecureHashes' key) where storeobject dest = ifM (liftIO $ R.doesPathExist dest) ( alreadyhave - , modifyContent dest $ do + , adjustedBranchRefresh af $ modifyContent dest $ do freezeContent src liftIO $ moveFile (fromRawFilePath src) @@ -500,8 +501,7 @@ cleanObjectLoc key cleaner = do maybe noop (const $ removeparents dir (n-1)) <=< catchMaybeIO $ removeDirectory (fromRawFilePath dir) -{- Removes a key's file from .git/annex/objects/ - -} +{- Removes a key's file from .git/annex/objects/ -} removeAnnex :: ContentRemovalLock -> Annex () removeAnnex (ContentRemovalLock key) = withObjectLoc key $ \file -> cleanObjectLoc key $ do @@ -515,7 +515,8 @@ removeAnnex (ContentRemovalLock key) = withObjectLoc key $ \file -> -- Check associated pointer file for modifications, and reset if -- it's unmodified. resetpointer file = ifM (isUnmodified key file) - ( depopulatePointerFile key file + ( adjustedBranchRefresh (AssociatedFile (Just file)) $ + depopulatePointerFile key file -- Modified file, so leave it alone. -- If it was a hard link to the annex object, -- that object might have been frozen as part of the diff --git a/Annex/Import.hs b/Annex/Import.hs index 0cb257c103..57d7b5b2c1 100644 --- a/Annex/Import.hs +++ b/Annex/Import.hs @@ -460,7 +460,7 @@ importKeys remote importtreeconfig importcontent importablecontents = do ia loc cid (fromRawFilePath tmpfile) (pure k) (combineMeterUpdate p' p) - ok <- moveAnnex k' tmpfile + ok <- moveAnnex k' af tmpfile when ok $ logStatus k InfoPresent return (Just (k', ok)) @@ -503,7 +503,7 @@ importKeys remote importtreeconfig importcontent importablecontents = do p case keyGitSha k of Nothing -> do - ok <- moveAnnex k tmpfile + ok <- moveAnnex k af tmpfile when ok $ do recordcidkey cidmap db cid k logStatus k InfoPresent diff --git a/Annex/Ingest.hs b/Annex/Ingest.hs index 0e5a7a2452..6aeeea25cd 100644 --- a/Annex/Ingest.hs +++ b/Annex/Ingest.hs @@ -177,13 +177,19 @@ ingest' preferredbackend meterupdate (Just (LockedDown cfg source)) mk restage = go _ _ Nothing = failure "failed to generate a key" golocked key mcache s = - tryNonAsync (moveAnnex key $ contentLocation source) >>= \case + tryNonAsync (moveAnnex key naf (contentLocation source)) >>= \case Right True -> do populateAssociatedFiles key source restage success key mcache s Right False -> giveup "failed to add content to annex" Left e -> restoreFile (keyFilename source) key e + -- moveAnnex uses the AssociatedFile provided to it to unlock + -- locked files when getting a file in an adjusted branch. + -- That case does not apply here, where we're adding an unlocked + -- file, so provide it nothing. + naf = AssociatedFile Nothing + gounlocked key (Just cache) s = do -- Remove temp directory hard link first because -- linkToAnnex falls back to copying if a file @@ -352,7 +358,7 @@ addAnnexedFile ci matcher file key mtmp = ifM (addUnlocked matcher mi) stagePointerFile file mode =<< hashPointerFile key Database.Keys.addAssociatedFile key =<< inRepo (toTopFilePath file) case mtmp of - Just tmp -> ifM (moveAnnex key tmp) + Just tmp -> ifM (moveAnnex key af tmp) ( linkunlocked mode >> return True , writepointer mode >> return False ) @@ -363,10 +369,11 @@ addAnnexedFile ci matcher file key mtmp = ifM (addUnlocked matcher mi) , do addLink ci file key Nothing case mtmp of - Just tmp -> moveAnnex key tmp + Just tmp -> moveAnnex key af tmp Nothing -> return True ) where + af = AssociatedFile (Just file) mi = case mtmp of Just tmp -> MatchingFile $ FileInfo { contentFile = Just tmp diff --git a/CHANGELOG b/CHANGELOG index 9489507055..87daf47413 100644 --- a/CHANGELOG +++ b/CHANGELOG @@ -3,6 +3,8 @@ git-annex (8.20201117) UNRELEASED; urgency=medium * adjust: New --unlock-present mode which locks files whose content is not present (so the broken symlink is visible), while unlocking files whose content is present. + * Added annex.adjustedbranchrefresh git config to update adjusted + branches set up by git-annex adjust --unlock-present/--hide-missing. * examinekey: Added a "file" format variable for consistency with find, and for easier scripting. diff --git a/Command/Get.hs b/Command/Get.hs index ffb40df89c..c31b2c0bd7 100644 --- a/Command/Get.hs +++ b/Command/Get.hs @@ -114,7 +114,7 @@ getKey' key afile = dispatch | Remote.hasKeyCheap r = either (const False) id <$> Remote.hasKey r key | otherwise = return True - docopy r witness = getViaTmp (Remote.retrievalSecurityPolicy r) (RemoteVerify r) key $ \dest -> + docopy r witness = getViaTmp (Remote.retrievalSecurityPolicy r) (RemoteVerify r) key afile $ \dest -> download (Remote.uuid r) key afile stdRetry (\p -> do showAction $ "from " ++ Remote.name r diff --git a/Command/Move.hs b/Command/Move.hs index 1e5a282752..114f2507af 100644 --- a/Command/Move.hs +++ b/Command/Move.hs @@ -225,7 +225,7 @@ fromPerform src removewhen key afile = do where get = notifyTransfer Download afile $ download (Remote.uuid src) key afile stdRetry $ \p -> - getViaTmp (Remote.retrievalSecurityPolicy src) (RemoteVerify src) key $ \t -> + getViaTmp (Remote.retrievalSecurityPolicy src) (RemoteVerify src) key afile $ \t -> Remote.verifiedAction $ Remote.retrieveKeyFile src key afile (fromRawFilePath t) p dispatch _ _ False = stop -- failed diff --git a/Command/Multicast.hs b/Command/Multicast.hs index bb423aea4e..f6e29b6f90 100644 --- a/Command/Multicast.hs +++ b/Command/Multicast.hs @@ -212,7 +212,7 @@ storeReceived f = do warning $ "Received a file " ++ f ++ " that is not a git-annex key. Deleting this file." liftIO $ removeWhenExistsWith removeLink f Just k -> void $ - getViaTmpFromDisk RetrievalVerifiableKeysSecure AlwaysVerify k $ \dest -> unVerified $ + getViaTmpFromDisk RetrievalVerifiableKeysSecure AlwaysVerify k (AssociatedFile Nothing) $ \dest -> unVerified $ liftIO $ catchBoolIO $ do rename f (fromRawFilePath dest) return True diff --git a/Command/ReKey.hs b/Command/ReKey.hs index 4f325f4d87..ec94d98856 100644 --- a/Command/ReKey.hs +++ b/Command/ReKey.hs @@ -90,7 +90,7 @@ linkKey file oldkey newkey = ifM (isJust <$> isAnnexLink file) - This avoids hard linking to content linked to an - unlocked file, which would leave the new key unlocked - and vulnerable to corruption. -} - ( getViaTmpFromDisk RetrievalAllKeysSecure DefaultVerify newkey $ \tmp -> unVerified $ do + ( getViaTmpFromDisk RetrievalAllKeysSecure DefaultVerify newkey (AssociatedFile Nothing) $ \tmp -> unVerified $ do oldobj <- calcRepo (gitAnnexLocation oldkey) isJust <$> linkOrCopy' (return True) newkey oldobj tmp Nothing , do diff --git a/Command/RecvKey.hs b/Command/RecvKey.hs index 3455acc876..df81ecc99d 100644 --- a/Command/RecvKey.hs +++ b/Command/RecvKey.hs @@ -33,7 +33,7 @@ start (_, key) = fieldTransfer Download key $ \_p -> do let verify = if fromunlocked then AlwaysVerify else DefaultVerify -- This matches the retrievalSecurityPolicy of Remote.Git let rsp = RetrievalAllKeysSecure - ifM (getViaTmp rsp verify key go) + ifM (getViaTmp rsp verify key (AssociatedFile Nothing) go) ( do -- forcibly quit after receiving one key, -- and shutdown cleanly diff --git a/Command/Reinject.hs b/Command/Reinject.hs index 69bace8a98..c49b234004 100644 --- a/Command/Reinject.hs +++ b/Command/Reinject.hs @@ -88,7 +88,7 @@ perform src key = ifM move ) where move = checkDiskSpaceToGet key False $ - moveAnnex key src + moveAnnex key (AssociatedFile Nothing) src cleanup :: Key -> CommandCleanup cleanup key = do diff --git a/Command/SetKey.hs b/Command/SetKey.hs index 859a5871e5..ec15cf87c7 100644 --- a/Command/SetKey.hs +++ b/Command/SetKey.hs @@ -35,7 +35,7 @@ perform file key = do -- the file might be on a different filesystem, so moveFile is used -- rather than simply calling moveAnnex; disk space is also -- checked this way. - ok <- getViaTmp RetrievalAllKeysSecure DefaultVerify key $ \dest -> unVerified $ + ok <- getViaTmp RetrievalAllKeysSecure DefaultVerify key (AssociatedFile Nothing) $ \dest -> unVerified $ if dest /= file then liftIO $ catchBoolIO $ do moveFile (fromRawFilePath file) (fromRawFilePath dest) diff --git a/Command/Sync.hs b/Command/Sync.hs index 8fa3f49742..966cba340e 100644 --- a/Command/Sync.hs +++ b/Command/Sync.hs @@ -60,6 +60,7 @@ import Logs.Export import Logs.PreferredContent import Annex.AutoMerge import Annex.AdjustedBranch +import Annex.AdjustedBranch.Merge import Annex.Ssh import Annex.BloomFilter import Annex.UpdateInstead diff --git a/Command/TestRemote.hs b/Command/TestRemote.hs index 7258f9eb08..0f15ae6b41 100644 --- a/Command/TestRemote.hs +++ b/Command/TestRemote.hs @@ -294,7 +294,7 @@ test runannex mkr mkk = Just b -> case Types.Backend.verifyKeyContent b of Nothing -> return True Just verifier -> verifier k (serializeKey' k) - get r k = getViaTmp (Remote.retrievalSecurityPolicy r) (RemoteVerify r) k $ \dest -> + get r k = getViaTmp (Remote.retrievalSecurityPolicy r) (RemoteVerify r) k (AssociatedFile Nothing) $ \dest -> tryNonAsync (Remote.retrieveKeyFile r k (AssociatedFile Nothing) (fromRawFilePath dest) nullMeterUpdate) >>= \case Right v -> return (True, v) Left _ -> return (False, UnVerified) @@ -368,13 +368,13 @@ testUnavailable runannex mkr mkk = , check (`notElem` [Right True, Right False]) "checkPresent" $ \r k -> Remote.checkPresent r k , check (== Right False) "retrieveKeyFile" $ \r k -> - getViaTmp (Remote.retrievalSecurityPolicy r) (RemoteVerify r) k $ \dest -> + getViaTmp (Remote.retrievalSecurityPolicy r) (RemoteVerify r) k (AssociatedFile Nothing) $ \dest -> tryNonAsync (Remote.retrieveKeyFile r k (AssociatedFile Nothing) (fromRawFilePath dest) nullMeterUpdate) >>= \case Right v -> return (True, v) Left _ -> return (False, UnVerified) , check (== Right False) "retrieveKeyFileCheap" $ \r k -> case Remote.retrieveKeyFileCheap r of Nothing -> return False - Just a -> getViaTmp (Remote.retrievalSecurityPolicy r) (RemoteVerify r) k $ \dest -> + Just a -> getViaTmp (Remote.retrievalSecurityPolicy r) (RemoteVerify r) k (AssociatedFile Nothing) $ \dest -> unVerified $ isRight <$> tryNonAsync (a k (AssociatedFile Nothing) (fromRawFilePath dest)) ] @@ -436,7 +436,7 @@ randKey sz = withTmpFile "randkey" $ \f h -> do k <- case Types.Backend.genKey Backend.Hash.testKeyBackend of Just a -> a ks nullMeterUpdate Nothing -> giveup "failed to generate random key (backend problem)" - _ <- moveAnnex k (toRawFilePath f) + _ <- moveAnnex k (AssociatedFile Nothing) (toRawFilePath f) return k getReadonlyKey :: Remote -> FilePath -> Annex Key diff --git a/Command/TransferKey.hs b/Command/TransferKey.hs index 43f0eb254f..b7f3cc9177 100644 --- a/Command/TransferKey.hs +++ b/Command/TransferKey.hs @@ -63,7 +63,7 @@ toPerform key file remote = go Upload file $ fromPerform :: Key -> AssociatedFile -> Remote -> CommandPerform fromPerform key file remote = go Upload file $ download (uuid remote) key file stdRetry $ \p -> - getViaTmp (retrievalSecurityPolicy remote) (RemoteVerify remote) key $ \t -> + getViaTmp (retrievalSecurityPolicy remote) (RemoteVerify remote) key file $ \t -> tryNonAsync (Remote.retrieveKeyFile remote key file (fromRawFilePath t) p) >>= \case Right v -> return (True, v) Left e -> do diff --git a/Command/TransferKeys.hs b/Command/TransferKeys.hs index 4d13f3d28f..f0ac31be97 100644 --- a/Command/TransferKeys.hs +++ b/Command/TransferKeys.hs @@ -47,7 +47,7 @@ start = do return True | otherwise = notifyTransfer direction file $ download (Remote.uuid remote) key file stdRetry $ \p -> - getViaTmp (Remote.retrievalSecurityPolicy remote) (RemoteVerify remote) key $ \t -> do + getViaTmp (Remote.retrievalSecurityPolicy remote) (RemoteVerify remote) key file $ \t -> do r <- tryNonAsync (Remote.retrieveKeyFile remote key file (fromRawFilePath t) p) >>= \case Left e -> do warning (show e) diff --git a/P2P/Annex.hs b/P2P/Annex.hs index 737d96be91..41f5f3b5dd 100644 --- a/P2P/Annex.hs +++ b/P2P/Annex.hs @@ -76,7 +76,7 @@ runLocal runst runner a = case a of v <- tryNonAsync $ do let runtransfer ti = Right <$> transfer download k af (\p -> - getViaTmp rsp DefaultVerify k $ \tmp -> + getViaTmp rsp DefaultVerify k af $ \tmp -> storefile (fromRawFilePath tmp) o l getb validitycheck p ti) let fallback = return $ Left $ ProtoFailureMessage "transfer already in progress, or unable to take transfer lock" diff --git a/Remote/Git.hs b/Remote/Git.hs index 97fee8cb92..0a44f68548 100644 --- a/Remote/Git.hs +++ b/Remote/Git.hs @@ -690,7 +690,7 @@ copyToRemote' repo r st@(State connpool duc _ _ _) key file meterupdate copier <- mkCopier hardlink st params let verify = Annex.Content.RemoteVerify r let rsp = RetrievalAllKeysSecure - res <- Annex.Content.getViaTmp rsp verify key $ \dest -> + res <- Annex.Content.getViaTmp rsp verify key file $ \dest -> metered (Just (combineMeterUpdate meterupdate p)) key $ \_ p' -> copier object (fromRawFilePath dest) p' (liftIO checksuccessio) Annex.Content.saveState True diff --git a/Types/CleanupActions.hs b/Types/CleanupActions.hs index 337b7eadf1..23dc7e748a 100644 --- a/Types/CleanupActions.hs +++ b/Types/CleanupActions.hs @@ -16,6 +16,7 @@ data CleanupAction | StopHook UUID | FsckCleanup | SshCachingCleanup + | AdjustedBranchUpdate | TorrentCleanup URLString | OtherTmpCleanup deriving (Eq, Ord) diff --git a/Types/GitConfig.hs b/Types/GitConfig.hs index c4e3abbf8e..7de6b62e46 100644 --- a/Types/GitConfig.hs +++ b/Types/GitConfig.hs @@ -125,6 +125,7 @@ data GitConfig = GitConfig , annexAutoUpgradeRepository :: Bool , annexCommitMode :: CommitMode , annexSkipUnknown :: Bool + , annexAdjustedBranchRefresh :: Integer , coreSymlinks :: Bool , coreSharedRepository :: SharedRepository , receiveDenyCurrentBranch :: DenyCurrentBranch @@ -219,6 +220,10 @@ extractGitConfig configsource r = GitConfig then ManualCommit else AutomaticCommit , annexSkipUnknown = getbool (annexConfig "skipunknown") True + , annexAdjustedBranchRefresh = fromMaybe + -- parse as bool if it's not a number + (if getbool "adjustedbranchrefresh" False then 1 else 0) + (getmayberead (annexConfig "adjustedbranchrefresh")) , coreSymlinks = getbool "core.symlinks" True , coreSharedRepository = getSharedRepository r , receiveDenyCurrentBranch = getDenyCurrentBranch r diff --git a/Upgrade/V0.hs b/Upgrade/V0.hs index bddc0bd000..ca8601e224 100644 --- a/Upgrade/V0.hs +++ b/Upgrade/V0.hs @@ -19,7 +19,8 @@ upgrade = do olddir <- fromRawFilePath <$> fromRepo gitAnnexDir keys <- getKeysPresent0 olddir forM_ keys $ \k -> - moveAnnex k $ toRawFilePath $ olddir keyFile0 k + moveAnnex k (AssociatedFile Nothing) + (toRawFilePath $ olddir keyFile0 k) -- update the symlinks to the key files -- No longer needed here; V1.upgrade does the same thing diff --git a/Upgrade/V1.hs b/Upgrade/V1.hs index 64a21e7f71..f69025ea06 100644 --- a/Upgrade/V1.hs +++ b/Upgrade/V1.hs @@ -80,7 +80,7 @@ moveContent = do let d = parentDir f' liftIO $ allowWrite d liftIO $ allowWrite f' - _ <- moveAnnex k f' + _ <- moveAnnex k (AssociatedFile Nothing) f' liftIO $ removeDirectory (fromRawFilePath d) updateSymlinks :: Annex () diff --git a/doc/git-annex-adjust.mdwn b/doc/git-annex-adjust.mdwn index 125272b86a..97b7518a84 100644 --- a/doc/git-annex-adjust.mdwn +++ b/doc/git-annex-adjust.mdwn @@ -82,7 +82,8 @@ and will also propagate commits back to the original branch. branch. To update the adjusted branch to reflect changes to content availability, - run `git annex adjust --hide-missing` again. + run `git annex adjust --hide-missing` again. Or, to automate updates, + set the `annex.adjustedbranchrefresh` config. Despite missing files being hidden, `git annex sync --content` will still operate on them, and can be used to download missing @@ -104,7 +105,8 @@ and will also propagate commits back to the original branch. not be broken symlinks. To update the adjusted branch to reflect changes to content availability, - run `git annex adjust --unlock-present` again. Or use `git-annex sync + run `git annex adjust --unlock-present` again. Or, to automate updates, + set the `annex.adjustedbranchrefresh` config. Or use `git-annex sync --content`, which updates the branch after transferring content. # SEE ALSO diff --git a/doc/git-annex.mdwn b/doc/git-annex.mdwn index 5240b3e46c..1c49bf408b 100644 --- a/doc/git-annex.mdwn +++ b/doc/git-annex.mdwn @@ -1016,6 +1016,22 @@ Like other git commands, git-annex is configured via `.git/config`. When the `--batch` option is used, this configuration is ignored. +* `annex.adjustedbranchrefresh` + + When [[git-annex-adjust]](1) is used to set up an adjusted branch + that needs to be refreshed after getting or dropping files, this config + controls how frequently the branch is refreshed. + + Refreshing the branch takes some time, so doing it after every file + can be too slow. The default value is 0 (or false), which does not + refresh the branch. 1 (or true) will refresh once, after git-annex + has made other changes. Higher values refresh after approximately that + many files need to be updated. Ie, 2 refreshes after every file, + and 100 after every 99 files. + + (If git-annex gets faster in the future, refresh rates will increase + proportional to the speed improvements.) + * `annex.queuesize` git-annex builds a queue of git commands, in order to combine similar diff --git a/doc/todo/update_adjusted_branch_on_content_availability_change.mdwn b/doc/todo/update_adjusted_branch_on_content_availability_change.mdwn index f41b25f129..133382e11b 100644 --- a/doc/todo/update_adjusted_branch_on_content_availability_change.mdwn +++ b/doc/todo/update_adjusted_branch_on_content_availability_change.mdwn @@ -10,6 +10,18 @@ it makes sense to do that? And for that matter, can it be done efficiently enough to do it more frequently? After every file or after some number of files, or after processing all files in a (sub-)tree? +> Since the answer to that will change over time, let's make a config for +> it. annex.adjustedbranchrefresh +> +> The config can start out as a range from 0 up that indicates how +> infrequently to update the branch for this. With 0 being never refresh, +> 1 being refresh the minimum (once at shutdown), 2 being refresh after +> 1 file, 100 after every 99 files, etc. +> If refreshing gets twice as fast, divide the numbers by two, etc. +> If it becomes sufficiently fast that the overhead doesn't matter, +> it can change to a simple boolean. Since 0 is false and > 0 is true, +> in git config, the old values will still work. (done) + Investigation of the obvious things that make it slow follows: ## efficient branch adjustment diff --git a/git-annex.cabal b/git-annex.cabal index f2dc13ec20..f5db5cc5c7 100644 --- a/git-annex.cabal +++ b/git-annex.cabal @@ -609,6 +609,7 @@ Executable git-annex Annex Annex.Action Annex.AdjustedBranch + Annex.AdjustedBranch.Merge Annex.AdjustedBranch.Name Annex.AutoMerge Annex.BloomFilter