From 5d380c6c5cbca4637840236d382ad69ce1d84805 Mon Sep 17 00:00:00 2001 From: Joey Hess Date: Thu, 6 Aug 2020 13:47:45 -0400 Subject: [PATCH] when workTreeItems finds a problem with a parameter, don't go on to process it Part of workTreeItems is trying detect a case where git porcelain refuses to process a file, and where git ls-files silently outputs nothing. But, it's hard to perfectly replicate git's behavior, and besides, git's behavior could change. So it could be that we warn, but then git ls-files does not skip over it, and so git-annex also processes it after warning about it. So, if we think we have a problem with a parameter, display the warning, and skip processing it at all. Implementing this was complicated by needing to handle the case where all command-line parameters get filtered out this way. Which is different than the case where there are none, because we don't want to operate on all files in this new case.. --- CmdLine/Seek.hs | 96 +++++++++++-------- Command/Sync.hs | 4 +- ..._a2d1112b0a9338b7c093546a985a5f90._comment | 51 ++++++++++ ..._c6e30af134a73bc8b3faee83c7c3b378._comment | 11 +++ 4 files changed, 121 insertions(+), 41 deletions(-) create mode 100644 doc/bugs/__39__Beyond_symbolic__39___link_error_when_link_is_upstream_of_repo/comment_1_a2d1112b0a9338b7c093546a985a5f90._comment create mode 100644 doc/bugs/__39__Beyond_symbolic__39___link_error_when_link_is_upstream_of_repo/comment_2_c6e30af134a73bc8b3faee83c7c3b378._comment diff --git a/CmdLine/Seek.hs b/CmdLine/Seek.hs index de75fa9fef..88a3c61212 100644 --- a/CmdLine/Seek.hs +++ b/CmdLine/Seek.hs @@ -55,20 +55,20 @@ data AnnexedFileSeeker = AnnexedFileSeeker , usesLocationLog :: Bool } -withFilesInGitAnnex :: WarnUnmatchWhen -> AnnexedFileSeeker -> [WorkTreeItem] -> CommandSeek +withFilesInGitAnnex :: WarnUnmatchWhen -> AnnexedFileSeeker -> WorkTreeItems -> CommandSeek withFilesInGitAnnex ww a l = seekFilteredKeys a $ seekHelper fst3 ww LsFiles.inRepoDetails l -withFilesInGitAnnexNonRecursive :: WarnUnmatchWhen -> String -> AnnexedFileSeeker -> [WorkTreeItem] -> CommandSeek -withFilesInGitAnnexNonRecursive ww needforce a l = ifM (Annex.getState Annex.force) - ( withFilesInGitAnnex ww a l +withFilesInGitAnnexNonRecursive :: WarnUnmatchWhen -> String -> AnnexedFileSeeker -> WorkTreeItems -> CommandSeek +withFilesInGitAnnexNonRecursive ww needforce a (WorkTreeItems l) = ifM (Annex.getState Annex.force) + ( withFilesInGitAnnex ww a (WorkTreeItems l) , if null l then giveup needforce else seekFilteredKeys a (getfiles [] l) ) where getfiles c [] = return (reverse c) - getfiles c ((WorkTreeItem p):ps) = do + getfiles c (p:ps) = do os <- seekOptions ww (fs, cleanup) <- inRepo $ LsFiles.inRepoDetails os [toRawFilePath p] case fs of @@ -79,17 +79,20 @@ withFilesInGitAnnexNonRecursive ww needforce a l = ifM (Annex.getState Annex.for void $ liftIO $ cleanup getfiles c ps _ -> giveup needforce +withFilesInGitAnnexNonRecursive _ _ _ NoWorkTreeItems = noop -withFilesNotInGit :: (RawFilePath -> CommandSeek) -> [WorkTreeItem] -> CommandSeek -withFilesNotInGit a l = go =<< seek +withFilesNotInGit :: (RawFilePath -> CommandSeek) -> WorkTreeItems -> CommandSeek +withFilesNotInGit a (WorkTreeItems l) = go =<< seek where seek = do force <- Annex.getState Annex.force g <- gitRepo liftIO $ Git.Command.leaveZombie - <$> LsFiles.notInRepo [] force (map (\(WorkTreeItem f) -> toRawFilePath f) l) g + <$> LsFiles.notInRepo [] force l' g go fs = seekFiltered a $ - return $ concat $ segmentPaths id (map (\(WorkTreeItem f) -> toRawFilePath f) l) fs + return $ concat $ segmentPaths id l' fs + l' = map toRawFilePath l +withFilesNotInGit _ NoWorkTreeItems = noop withPathContents :: ((FilePath, FilePath) -> CommandSeek) -> CmdParams -> CommandSeek withPathContents a params = do @@ -123,13 +126,13 @@ withPairs a params = sequence_ $ map a $ pairs [] params pairs c (x:y:xs) = pairs ((x,y):c) xs pairs _ _ = giveup "expected pairs" -withFilesToBeCommitted :: (RawFilePath -> CommandSeek) -> [WorkTreeItem] -> CommandSeek +withFilesToBeCommitted :: (RawFilePath -> CommandSeek) -> WorkTreeItems -> CommandSeek withFilesToBeCommitted a l = seekFiltered a $ seekHelper id WarnUnmatchWorkTreeItems (const LsFiles.stagedNotDeleted) l {- unlocked pointer files that are staged, and whose content has not been - modified-} -withUnmodifiedUnlockedPointers :: WarnUnmatchWhen -> (RawFilePath -> CommandSeek) -> [WorkTreeItem] -> CommandSeek +withUnmodifiedUnlockedPointers :: WarnUnmatchWhen -> (RawFilePath -> CommandSeek) -> WorkTreeItems -> CommandSeek withUnmodifiedUnlockedPointers ww a l = seekFiltered a unlockedfiles where unlockedfiles = filterM isUnmodifiedUnlocked @@ -141,7 +144,7 @@ isUnmodifiedUnlocked f = catKeyFile f >>= \case Just k -> sameInodeCache f =<< Database.Keys.getInodeCaches k {- Finds files that may be modified. -} -withFilesMaybeModified :: WarnUnmatchWhen -> (RawFilePath -> CommandSeek) -> [WorkTreeItem] -> CommandSeek +withFilesMaybeModified :: WarnUnmatchWhen -> (RawFilePath -> CommandSeek) -> WorkTreeItems -> CommandSeek withFilesMaybeModified ww a params = seekFiltered a $ seekHelper id ww LsFiles.modified params @@ -168,8 +171,8 @@ withKeyOptions -> Bool -> AnnexedFileSeeker -> ((Key, ActionItem) -> CommandSeek) - -> ([WorkTreeItem] -> CommandSeek) - -> [WorkTreeItem] + -> (WorkTreeItems -> CommandSeek) + -> WorkTreeItems -> CommandSeek withKeyOptions ko auto seeker keyaction = withKeyOptions' ko auto mkkeyaction where @@ -192,18 +195,18 @@ withKeyOptions' :: Maybe KeyOptions -> Bool -> Annex ((Key, ActionItem) -> Annex ()) - -> ([WorkTreeItem] -> CommandSeek) - -> [WorkTreeItem] + -> (WorkTreeItems -> CommandSeek) + -> WorkTreeItems -> CommandSeek -withKeyOptions' ko auto mkkeyaction fallbackaction params = do +withKeyOptions' ko auto mkkeyaction fallbackaction worktreeitems = do bare <- fromRepo Git.repoIsLocalBare when (auto && bare) $ giveup "Cannot use --auto in a bare repository" - case (null params, ko) of + case (noworktreeitems, ko) of (True, Nothing) | bare -> noauto runallkeys - | otherwise -> fallbackaction params - (False, Nothing) -> fallbackaction params + | otherwise -> fallbackaction worktreeitems + (False, Nothing) -> fallbackaction worktreeitems (True, Just WantAllKeys) -> noauto runallkeys (True, Just WantUnusedKeys) -> noauto $ runkeyaction unusedKeys' (True, Just WantFailedTransfers) -> noauto runfailedtransfers @@ -216,6 +219,11 @@ withKeyOptions' ko auto mkkeyaction fallbackaction params = do | auto = giveup "Cannot use --auto with --all or --branch or --unused or --key or --incomplete" | otherwise = a + noworktreeitems = case worktreeitems of + WorkTreeItems [] -> True + WorkTreeItems _ -> False + NoWorkTreeItems -> False + incompletekeys = staleKeysPrune gitAnnexTmpObjectDir True -- List all location log files on the git-annex branch, @@ -378,14 +386,13 @@ seekFilteredKeys seeker listfs = do Just _ -> mdprocess matcher mdreader ofeeder ocloser Nothing -> liftIO $ void ocloser -seekHelper :: (a -> RawFilePath) -> WarnUnmatchWhen -> ([LsFiles.Options] -> [RawFilePath] -> Git.Repo -> IO ([a], IO Bool)) -> [WorkTreeItem] -> Annex [a] -seekHelper c ww a l = do +seekHelper :: (a -> RawFilePath) -> WarnUnmatchWhen -> ([LsFiles.Options] -> [RawFilePath] -> Git.Repo -> IO ([a], IO Bool)) -> WorkTreeItems -> Annex [a] +seekHelper c ww a (WorkTreeItems l) = do os <- seekOptions ww inRepo $ \g -> - concat . concat <$> forM (segmentXargsOrdered l') + concat . concat <$> forM (segmentXargsOrdered l) (runSegmentPaths c (\fs -> Git.Command.leaveZombie <$> a os fs g) . map toRawFilePath) - where - l' = map (\(WorkTreeItem f) -> f) l +seekHelper _ _ _ NoWorkTreeItems = return [] data WarnUnmatchWhen = WarnUnmatchLsFiles | WarnUnmatchWorkTreeItems @@ -397,8 +404,13 @@ seekOptions WarnUnmatchLsFiles = ) seekOptions WarnUnmatchWorkTreeItems = return [] --- An item in the work tree, which may be a file or a directory. -newtype WorkTreeItem = WorkTreeItem FilePath +-- Items in the work tree, which may be files or directories. +data WorkTreeItems + = WorkTreeItems [FilePath] + -- ^ An empty list often means all files. + | NoWorkTreeItems + -- ^ Used when no work tree items should be operated on. + deriving (Show) -- When in an adjusted branch that hides some files, it may not exist -- in the current work tree, but in the original branch. This allows @@ -419,27 +431,32 @@ newtype AllowHidden = AllowHidden Bool -- -- Note that, unlike --error-unmatch, using this does not warn -- about command-line parameters that exist, but are not checked into git. -workTreeItems :: WarnUnmatchWhen -> CmdParams -> Annex [WorkTreeItem] +workTreeItems :: WarnUnmatchWhen -> CmdParams -> Annex WorkTreeItems workTreeItems = workTreeItems' (AllowHidden False) -workTreeItems' :: AllowHidden -> WarnUnmatchWhen -> CmdParams -> Annex [WorkTreeItem] -workTreeItems' (AllowHidden allowhidden) ww ps = do - case ww of - WarnUnmatchWorkTreeItems -> runcheck - WarnUnmatchLsFiles -> - whenM (annexSkipUnknown <$> Annex.getGitConfig) - runcheck - return (map WorkTreeItem ps) +workTreeItems' :: AllowHidden -> WarnUnmatchWhen -> CmdParams -> Annex WorkTreeItems +workTreeItems' (AllowHidden allowhidden) ww ps = case ww of + WarnUnmatchWorkTreeItems -> runcheck + WarnUnmatchLsFiles -> + ifM (annexSkipUnknown <$> Annex.getGitConfig) + ( runcheck + , return $ WorkTreeItems ps + ) where runcheck = do currbranch <- getCurrentBranch - forM_ ps $ \p -> do + ps' <- flip filterM ps $ \p -> do relf <- liftIO $ relPathCwdToFile p ifM (not <$> (exists p <||> hidden currbranch relf)) ( prob (p ++ " not found") - , whenM (viasymlink (upFrom relf)) $ - prob (p ++ " is beyond a symbolic link") + , ifM (viasymlink (upFrom relf)) + ( prob (p ++ " is beyond a symbolic link") + , return True + ) ) + if null ps' && not (null ps) + then return NoWorkTreeItems + else return (WorkTreeItems ps') exists p = isJust <$> liftIO (catchMaybeIO $ getSymbolicLinkStatus p) @@ -458,6 +475,7 @@ workTreeItems' (AllowHidden allowhidden) ww ps = do prob msg = do toplevelWarning False msg Annex.incError + return False notSymlink :: RawFilePath -> IO Bool notSymlink f = liftIO $ not . isSymbolicLink <$> R.getSymbolicLinkStatus f diff --git a/Command/Sync.hs b/Command/Sync.hs index 6d7c54a2ff..0bd13e4408 100644 --- a/Command/Sync.hs +++ b/Command/Sync.hs @@ -635,7 +635,7 @@ seekSyncContent _ [] _ = return False seekSyncContent o rs currbranch = do mvar <- liftIO newEmptyMVar bloom <- case keyOptions o of - Just WantAllKeys -> Just <$> genBloomFilter (seekworktree mvar []) + Just WantAllKeys -> Just <$> genBloomFilter (seekworktree mvar (WorkTreeItems [])) _ -> case currbranch of (Just origbranch, Just adj) | adjustmentHidesFiles adj -> do l <- workTreeItems' (AllowHidden True) ww (contentOfOption o) @@ -648,7 +648,7 @@ seekSyncContent o rs currbranch = do withKeyOptions' (keyOptions o) False (return (commandAction . gokey mvar bloom)) (const noop) - [] + (WorkTreeItems []) waitForAllRunningCommandActions liftIO $ not <$> isEmptyMVar mvar where diff --git a/doc/bugs/__39__Beyond_symbolic__39___link_error_when_link_is_upstream_of_repo/comment_1_a2d1112b0a9338b7c093546a985a5f90._comment b/doc/bugs/__39__Beyond_symbolic__39___link_error_when_link_is_upstream_of_repo/comment_1_a2d1112b0a9338b7c093546a985a5f90._comment new file mode 100644 index 0000000000..be4951bfb8 --- /dev/null +++ b/doc/bugs/__39__Beyond_symbolic__39___link_error_when_link_is_upstream_of_repo/comment_1_a2d1112b0a9338b7c093546a985a5f90._comment @@ -0,0 +1,51 @@ +[[!comment format=mdwn + username="joey" + subject="""comment 1""" + date="2020-08-06T16:01:46Z" + content=""" +Really, the shell is the only thing that knows cd has been used with a symlink, +the real cwd is what getCurrentDirectory returns, same as getcwd(3). + +(I think this is a bad design decision on the part of shells, it often +leads to various confusion and odd behavior. A good example of this is +that, if you cd bar/dir, which is a symlink to foo/dir, and both +bar/file and foo/file exist, what does rm ../file delete? Not what the +shell prompt's `bar>` suggests!) + +What this code needs to do is stop traversing the path and checking for +symlinks when it reaches the top of the working tree. But I'm not currently +seeing a good way to do that. It would need to examine the path for +symlinks and resolve them. Like canonicalizePath, but don't want to +canonicalize the part of the path inside the working tree, eg a symlink +"./foo" should not be canonicalized to "./bar". But how does it know where +inside working tree part begins when there are symlinks involved? Probably +I'm missing something simple which git does to deal with this. + +(Also, canonicalizePath or anything like it does add a certian amount of +overhead for every file passed to git-annex on the command line, just for +this edge case. The current code does 2 stats, but this would be stats +all the way up the path.) + +It also looks like the nearby code `hidden currbranch relf` +will not work in this same case, because relf is "../../alink/repo/f0". +In that case it will display "not found" and really fail to process +the file. + +In a way the really surprising thing is not that it rejects this file with +"is beyond a symbolic link" but that the file then goes on to be processed +anyway. Because this code only displays a warning, and is written with the +naive hope that it will manage to replicate git's porcelain behavior. +But then git ls-files goes on to list the file anyway, so it's processed +anyway. + +I think it should certianly, if it warns about a file, not pass it into git +ls-files. So the warning really becomes an error and that surprise is +avoided. + +That might be enough to consider this dealt with, without using +canonicalizePath. There would then be an inconsistency that `git add` etc +can be used with that path, while `git annex add` etc reject it as beyond +a symbolic link. But git-annex is not obliged to copy git's behavior in what +path situations it will accept and rejecting this particular one does not seem +like a hardship to the user. +"""]] diff --git a/doc/bugs/__39__Beyond_symbolic__39___link_error_when_link_is_upstream_of_repo/comment_2_c6e30af134a73bc8b3faee83c7c3b378._comment b/doc/bugs/__39__Beyond_symbolic__39___link_error_when_link_is_upstream_of_repo/comment_2_c6e30af134a73bc8b3faee83c7c3b378._comment new file mode 100644 index 0000000000..985a751088 --- /dev/null +++ b/doc/bugs/__39__Beyond_symbolic__39___link_error_when_link_is_upstream_of_repo/comment_2_c6e30af134a73bc8b3faee83c7c3b378._comment @@ -0,0 +1,11 @@ +[[!comment format=mdwn + username="joey" + subject="""comment 2""" + date="2020-08-06T17:46:11Z" + content=""" +I've made it skip processing the file in this case. + +Remain unsure if I want to close it now that the behavior is at least not +weird, or try to more closely replicate git's behavior of what symlinks +it's ok to be behind and which not. +"""]]