From 4a788fbb3b2fb507658380de634b03681e258b09 Mon Sep 17 00:00:00 2001 From: Joey Hess Date: Fri, 19 Oct 2018 17:51:25 -0400 Subject: [PATCH] sync --content now supports --hide-missing adjusted branches This relies on git ls-files --with-tree, which I'm using in a way that its man page does not document. Hm. I emailed the git list to try to get the docs improved, but at least the git test suite does test the same kind of use case I'm using here. Performance impact when not in an adjusted branch is limited to some additional MVar accesses, and a single git call to determine the name of the current branch. So very minimal. When in an adjusted branch, the performance impact is in Annex.WorkTree.lookupFile, which starts doing an equal amount of work for files that didn't exist as it already did for files that were unlocked. This commit was sponsored by Jochen Bartl on Patreon. --- Annex/CatFile.hs | 21 +++++++++++++-- Annex/WorkTree.hs | 14 +++++----- CHANGELOG | 2 ++ CmdLine/Seek.hs | 21 +++++++++++++-- Command/Sync.hs | 27 ++++++++++++------- Git/LsFiles.hs | 17 +++++++++--- doc/git-annex-adjust.mdwn | 6 ++++- doc/git-annex-sync.mdwn | 10 +++---- ..._e3bf8aaecc0f612873609c92814dcd12._comment | 2 -- 9 files changed, 89 insertions(+), 31 deletions(-) diff --git a/Annex/CatFile.hs b/Annex/CatFile.hs index 7062f785af..4f42db617d 100644 --- a/Annex/CatFile.hs +++ b/Annex/CatFile.hs @@ -1,6 +1,6 @@ {- git cat-file interface, with handle automatically stored in the Annex monad - - - Copyright 2011-2015 Joey Hess + - Copyright 2011-2018 Joey Hess - - Licensed under the GNU GPL version 3 or higher. -} @@ -16,9 +16,11 @@ module Annex.CatFile ( catObjectMetaData, catFileStop, catKey, + catSymLinkTarget, catKeyFile, catKeyFileHEAD, - catSymLinkTarget, + catKeyFileHidden, + catObjectMetaDataHidden, ) where import qualified Data.ByteString.Lazy as L @@ -34,6 +36,8 @@ import Git.FilePath import Git.Index import qualified Git.Ref import Annex.Link +import Annex.CurrentBranch +import Types.AdjustedBranch import Utility.FileSystemEncoding catFile :: Git.Branch -> FilePath -> Annex L.ByteString @@ -142,3 +146,16 @@ catKeyFile f = ifM (Annex.getState Annex.daemon) catKeyFileHEAD :: FilePath -> Annex (Maybe Key) catKeyFileHEAD f = catKey $ Git.Ref.fileFromRef Git.Ref.headRef f + +{- Look in the original branch from whence an adjusted branch is based + - to find the file. But only when the adjustment hides some files. -} +catKeyFileHidden :: FilePath -> CurrBranch -> Annex (Maybe Key) +catKeyFileHidden = hiddenCat catKey + +catObjectMetaDataHidden :: FilePath -> CurrBranch -> Annex (Maybe (Integer, ObjectType)) +catObjectMetaDataHidden = hiddenCat catObjectMetaData + +hiddenCat :: (Ref -> Annex (Maybe a)) -> FilePath -> CurrBranch -> Annex (Maybe a) +hiddenCat a f (Just origbranch, Just adj) + | adjustmentHidesFiles adj = a (Git.Ref.fileFromRef origbranch f) +hiddenCat _ _ _ = return Nothing diff --git a/Annex/WorkTree.hs b/Annex/WorkTree.hs index 9301707987..89d80c0cae 100644 --- a/Annex/WorkTree.hs +++ b/Annex/WorkTree.hs @@ -1,6 +1,6 @@ {- git-annex worktree files - - - Copyright 2013-2016 Joey Hess + - Copyright 2013-2018 Joey Hess - - Licensed under the GNU GPL version 3 or higher. -} @@ -13,6 +13,7 @@ import Annex.CatFile import Annex.Version import Annex.Content import Annex.ReplaceFile +import Annex.CurrentBranch import Config import Git.FilePath import qualified Git.Ref @@ -28,19 +29,20 @@ import qualified Database.Keys.SQL - - An unlocked file will not have a link on disk, so fall back to - looking for a pointer to a key in git. + - + - When in an adjusted branch that may have hidden the file, looks for a + - pointer to a key in the original branch. -} lookupFile :: FilePath -> Annex (Maybe Key) lookupFile file = isAnnexLink file >>= \case - Just key -> makeret key + Just key -> return (Just key) Nothing -> ifM (versionSupportsUnlockedPointers <||> isDirect) ( ifM (liftIO $ doesFileExist file) - ( maybe (return Nothing) makeret =<< catKeyFile file - , return Nothing + ( catKeyFile file + , catKeyFileHidden file =<< getCurrentBranch ) , return Nothing ) - where - makeret = return . Just {- Modifies an action to only act on files that are already annexed, - and passes the key on to it. -} diff --git a/CHANGELOG b/CHANGELOG index 7d4a46604a..fb0d8619d4 100644 --- a/CHANGELOG +++ b/CHANGELOG @@ -6,6 +6,8 @@ git-annex (6.20181012) UNRELEASED; urgency=medium again will update the branch as needed. This is mostly useful with --hide-missing to hide/unhide files after their content has been dropped or received. + * git-annex sync --content supports --hide-missing; it can + be used to get the content of hidden files. * Removed the old Android app. * Removed support for building with very old ghc < 8.0.1, and with yesod < 1.4.3, and without concurrent-output, diff --git a/CmdLine/Seek.hs b/CmdLine/Seek.hs index 677fc4b684..47a52176c6 100644 --- a/CmdLine/Seek.hs +++ b/CmdLine/Seek.hs @@ -29,6 +29,7 @@ import Logs.Transfer import Remote.List import qualified Remote import Annex.CatFile +import Annex.CurrentBranch import Annex.Content import Annex.InodeSentinal import qualified Database.Keys @@ -270,17 +271,33 @@ seekHelper a l = inRepo $ \g -> -- An item in the work tree, which may be a file or a directory. newtype WorkTreeItem = WorkTreeItem FilePath +-- When in an adjusted branch that hides some files, it may not exist +-- in the current work tree, but in the original branch. This allows +-- seeking for such files. +newtype AllowHidden = AllowHidden Bool + -- Many git commands seek work tree items matching some criteria, -- and silently skip over anything that does not exist. But users expect -- an error message when one of the files they provided as a command-line -- parameter doesn't exist, so this checks that each exists. workTreeItems :: CmdParams -> Annex [WorkTreeItem] -workTreeItems ps = do +workTreeItems = workTreeItems' (AllowHidden False) + +workTreeItems' :: AllowHidden -> CmdParams -> Annex [WorkTreeItem] +workTreeItems' (AllowHidden allowhidden) ps = do + currbranch <- getCurrentBranch forM_ ps $ \p -> - unlessM (isJust <$> liftIO (catchMaybeIO $ getSymbolicLinkStatus p)) $ do + unlessM (exists p <||> hidden currbranch p) $ do toplevelWarning False (p ++ " not found") Annex.incError return (map WorkTreeItem ps) + where + exists p = isJust <$> liftIO (catchMaybeIO $ getSymbolicLinkStatus p) + hidden currbranch p + | allowhidden = do + f <- liftIO $ relPathCwdToFile p + isJust <$> catObjectMetaDataHidden f currbranch + | otherwise = return False notSymlink :: FilePath -> IO Bool notSymlink f = liftIO $ not . isSymbolicLink <$> getSymbolicLinkStatus f diff --git a/Command/Sync.hs b/Command/Sync.hs index 5e42e6eff3..c7b2cd8825 100644 --- a/Command/Sync.hs +++ b/Command/Sync.hs @@ -556,26 +556,35 @@ seekSyncContent o rs currbranch = do bloom <- case keyOptions o of Just WantAllKeys -> Just <$> genBloomFilter (seekworktree mvar []) _ -> case currbranch of - (origbranch, Just adj) | adjustmentHidesFiles adj -> do - seekbranch origbranch (contentOfOption o) - pure Nohing - _ = do + (Just origbranch, Just adj) | adjustmentHidesFiles adj -> do + l <- workTreeItems' (AllowHidden True) (contentOfOption o) + seekincludinghidden origbranch mvar l (const noop) + pure Nothing + _ -> do l <- workTreeItems (contentOfOption o) seekworktree mvar l (const noop) pure Nothing withKeyOptions' (keyOptions o) False - (return (seekkeys mvar bloom)) + (return (gokey mvar bloom)) (const noop) [] finishCommandActions liftIO $ not <$> isEmptyMVar mvar where - seekworktree mvar l bloomfeeder = seekHelper LsFiles.inRepo l >>= - mapM_ (\f -> ifAnnexed f (go (Right bloomfeeder) mvar (AssociatedFile (Just f))) noop) + seekworktree mvar l bloomfeeder = + seekHelper LsFiles.inRepo l + >>= gofiles bloomfeeder mvar - seekbranch origbranch l = + seekincludinghidden origbranch mvar l bloomfeeder = + seekHelper (LsFiles.inRepoOrBranch origbranch) l + >>= gofiles bloomfeeder mvar - seekkeys mvar bloom (k, _) = go (Left bloom) mvar (AssociatedFile Nothing) k + gofiles bloomfeeder mvar = mapM_ $ \f -> + ifAnnexed f + (go (Right bloomfeeder) mvar (AssociatedFile (Just f))) + noop + + gokey mvar bloom (k, _) = go (Left bloom) mvar (AssociatedFile Nothing) k go ebloom mvar af k = commandAction $ do whenM (syncFile ebloom rs af k) $ diff --git a/Git/LsFiles.hs b/Git/LsFiles.hs index 20b8245498..2dc24ad0fa 100644 --- a/Git/LsFiles.hs +++ b/Git/LsFiles.hs @@ -7,6 +7,7 @@ module Git.LsFiles ( inRepo, + inRepoOrBranch, notInRepo, notInRepoIncludingEmptyDirectories, allFiles, @@ -34,14 +35,22 @@ import Git.Sha import Numeric import System.Posix.Types -{- Scans for files that are checked into git at the specified locations. -} +{- Scans for files that are checked into git's index at the specified locations. -} inRepo :: [FilePath] -> Repo -> IO ([FilePath], IO Bool) -inRepo l = pipeNullSplit $ +inRepo = inRepo' [] + +inRepo' :: [CommandParam] -> [FilePath] -> Repo -> IO ([FilePath], IO Bool) +inRepo' ps l = pipeNullSplit $ Param "ls-files" : Param "--cached" : Param "-z" : - Param "--" : - map File l + ps ++ + (Param "--" : map File l) + +{- Files that are checked into the index or have been committed to a + - branch. -} +inRepoOrBranch :: Branch -> [FilePath] -> Repo -> IO ([FilePath], IO Bool) +inRepoOrBranch (Ref b) = inRepo' [Param $ "--with-tree=" ++ b] {- Scans for files at the specified locations that are not checked into git. -} notInRepo :: Bool -> [FilePath] -> Repo -> IO ([FilePath], IO Bool) diff --git a/doc/git-annex-adjust.mdwn b/doc/git-annex-adjust.mdwn index 07319f7f17..0acfffca10 100644 --- a/doc/git-annex-adjust.mdwn +++ b/doc/git-annex-adjust.mdwn @@ -54,11 +54,15 @@ This command can only be used in a v6 git-annex repository. changes, so if you `git annex drop` files, they will become broken links in the usual way. And when files that were missing are copied into the repository from elsewhere, they won't immediatly become visible in the - branch. + branch. To update the adjusted branch to reflect changes to content availability, run `git annex adjust --hide-missing` again. + Despite missing files being hidden, `git annex sync --content` will + still operate on them, and can be used to download missing + files from remotes. + This option can be combined with --unlock or --fix. # SEE ALSO diff --git a/doc/git-annex-sync.mdwn b/doc/git-annex-sync.mdwn index 2e6912b15e..9ac27b65e5 100644 --- a/doc/git-annex-sync.mdwn +++ b/doc/git-annex-sync.mdwn @@ -70,15 +70,15 @@ by running "git annex sync" on the remote. * `--content`, `--no-content` Normally, syncing does not transfer the contents of annexed files. - The --content option causes the content of files in the work tree + The --content option causes the content of annexed files to also be uploaded and downloaded as necessary. The `annex.synccontent` configuration can be set to true to make content be synced by default. - Normally this tries to get each annexed file in the work tree - that the local repository does not yet have, and then copies each - file in the work tree to every remote that it is syncing with. + Normally this tries to get each annexed file that the local repository + does not yet have, and then copies each file to every remote that it + is syncing with. This behavior can be overridden by configuring the preferred content of a repository. See [[git-annex-preferred-content]](1). @@ -88,7 +88,7 @@ by running "git annex sync" on the remote. * `--content-of=path` `-C path` - While --content operates on all annexed files in the work tree, + While --content operates on all annexed files, --content-of allows limiting the transferred files to ones in a given location. diff --git a/doc/todo/hide_missing_files/comment_4_e3bf8aaecc0f612873609c92814dcd12._comment b/doc/todo/hide_missing_files/comment_4_e3bf8aaecc0f612873609c92814dcd12._comment index 279d0557d7..dbda929438 100644 --- a/doc/todo/hide_missing_files/comment_4_e3bf8aaecc0f612873609c92814dcd12._comment +++ b/doc/todo/hide_missing_files/comment_4_e3bf8aaecc0f612873609c92814dcd12._comment @@ -14,8 +14,6 @@ reports about it being too slow. ;) What still needs to be done: -* `git annex sync --content` needs to scan the original branch, not the - adjusted branch, to find files to transfer. * `git annex sync` needs to update the adjusted branch. * The assistant also needs to scan the original branch when looking for files to download.