Avoid repeated checking that files passed on the command line exist.

git annex add, git annex lock etc make multiple seek passes,
and each seek pass checked that files existed. That was unncessary
redundant work.

Fixed by adding a new WorkTreeItem type, make seek actions use it,
and check that the files exist when constructing it.

This commit was supported by the NSF-funded DataLad project.
This commit is contained in:
Joey Hess 2017-10-16 14:10:03 -04:00
parent a461cf2ce6
commit 85ed38a574
No known key found for this signature in database
GPG key ID: DB12DB0FF05F8F38
25 changed files with 128 additions and 71 deletions

View file

@ -10,6 +10,7 @@ git-annex (6.20171004) UNRELEASED; urgency=medium
* add: Replace work tree file atomically. Before, there was a window
where interrupting an add could result in the file being
moved into the annex, with no symlink yet created.
* Avoid repeated checking that files passed on the command line exist.
-- Joey Hess <id@joeyh.name> Sat, 07 Oct 2017 14:11:00 -0400

View file

@ -32,22 +32,20 @@ import qualified Remote
import Annex.CatFile
import Annex.Content
withFilesInGit :: (FilePath -> CommandStart) -> CmdParams -> CommandSeek
withFilesInGit a params = seekActions $ prepFiltered a $
seekHelper LsFiles.inRepo params
withFilesInGit :: (FilePath -> CommandStart) -> [WorkTreeItem] -> CommandSeek
withFilesInGit a l = seekActions $ prepFiltered a $
seekHelper LsFiles.inRepo l
withFilesInGitNonRecursive :: String -> (FilePath -> CommandStart) -> CmdParams -> CommandSeek
withFilesInGitNonRecursive needforce a params = ifM (Annex.getState Annex.force)
( withFilesInGit a params
, if null params
withFilesInGitNonRecursive :: String -> (FilePath -> CommandStart) -> [WorkTreeItem] -> CommandSeek
withFilesInGitNonRecursive needforce a l = ifM (Annex.getState Annex.force)
( withFilesInGit a l
, if null l
then giveup needforce
else do
checkFileOrDirectoryExists params
seekActions $ prepFiltered a (getfiles [] params)
else seekActions $ prepFiltered a (getfiles [] l)
)
where
getfiles c [] = return (reverse c)
getfiles c (p:ps) = do
getfiles c ((WorkTreeItem p):ps) = do
(fs, cleanup) <- inRepo $ LsFiles.inRepo [p]
case fs of
[f] -> do
@ -58,24 +56,25 @@ withFilesInGitNonRecursive needforce a params = ifM (Annex.getState Annex.force)
getfiles c ps
_ -> giveup needforce
withFilesNotInGit :: Bool -> (FilePath -> CommandStart) -> CmdParams -> CommandSeek
withFilesNotInGit skipdotfiles a params
withFilesNotInGit :: Bool -> (FilePath -> CommandStart) -> [WorkTreeItem] -> CommandSeek
withFilesNotInGit skipdotfiles a l
| skipdotfiles = do
{- dotfiles are not acted on unless explicitly listed -}
files <- filter (not . dotfile) <$>
seekunless (null ps && not (null params)) ps
seekunless (null ps && not (null l)) ps
dotfiles <- seekunless (null dotps) dotps
go (files++dotfiles)
| otherwise = go =<< seekunless False params
| otherwise = go =<< seekunless False l
where
(dotps, ps) = partition dotfile params
(dotps, ps) = partition (\(WorkTreeItem f) -> dotfile f) l
seekunless True _ = return []
seekunless _ l = do
seekunless _ l' = do
force <- Annex.getState Annex.force
g <- gitRepo
liftIO $ Git.Command.leaveZombie <$> LsFiles.notInRepo force l g
go l = seekActions $ prepFiltered a $
return $ concat $ segmentPaths params l
liftIO $ Git.Command.leaveZombie
<$> LsFiles.notInRepo force (map (\(WorkTreeItem f) -> f) l') g
go fs = seekActions $ prepFiltered a $
return $ concat $ segmentPaths (map (\(WorkTreeItem f) -> f) l) fs
withFilesInRefs :: (FilePath -> Key -> CommandStart) -> [Git.Ref] -> CommandSeek
withFilesInRefs a = mapM_ go
@ -121,14 +120,14 @@ withPairs a params = seekActions $ return $ map a $ pairs [] params
pairs c (x:y:xs) = pairs ((x,y):c) xs
pairs _ _ = giveup "expected pairs"
withFilesToBeCommitted :: (FilePath -> CommandStart) -> CmdParams -> CommandSeek
withFilesToBeCommitted a params = seekActions $ prepFiltered a $
seekHelper LsFiles.stagedNotDeleted params
withFilesToBeCommitted :: (FilePath -> CommandStart) -> [WorkTreeItem] -> CommandSeek
withFilesToBeCommitted a l = seekActions $ prepFiltered a $
seekHelper LsFiles.stagedNotDeleted l
withFilesOldUnlocked :: (FilePath -> CommandStart) -> CmdParams -> CommandSeek
withFilesOldUnlocked :: (FilePath -> CommandStart) -> [WorkTreeItem] -> CommandSeek
withFilesOldUnlocked = withFilesOldUnlocked' LsFiles.typeChanged
withFilesOldUnlockedToBeCommitted :: (FilePath -> CommandStart) -> CmdParams -> CommandSeek
withFilesOldUnlockedToBeCommitted :: (FilePath -> CommandStart) -> [WorkTreeItem] -> CommandSeek
withFilesOldUnlockedToBeCommitted = withFilesOldUnlocked' LsFiles.typeChangedStaged
{- Unlocked files before v6 have changed type from a symlink to a regular file.
@ -136,23 +135,23 @@ withFilesOldUnlockedToBeCommitted = withFilesOldUnlocked' LsFiles.typeChangedSta
- Furthermore, unlocked files used to be a git-annex symlink,
- not some other sort of symlink.
-}
withFilesOldUnlocked' :: ([FilePath] -> Git.Repo -> IO ([FilePath], IO Bool)) -> (FilePath -> CommandStart) -> CmdParams -> CommandSeek
withFilesOldUnlocked' typechanged a params = seekActions $
withFilesOldUnlocked' :: ([FilePath] -> Git.Repo -> IO ([FilePath], IO Bool)) -> (FilePath -> CommandStart) -> [WorkTreeItem] -> CommandSeek
withFilesOldUnlocked' typechanged a l = seekActions $
prepFiltered a unlockedfiles
where
unlockedfiles = filterM isOldUnlocked =<< seekHelper typechanged params
unlockedfiles = filterM isOldUnlocked =<< seekHelper typechanged l
isOldUnlocked :: FilePath -> Annex Bool
isOldUnlocked f = liftIO (notSymlink f) <&&>
(isJust <$> catKeyFile f <||> isJust <$> catKeyFileHEAD f)
{- Finds files that may be modified. -}
withFilesMaybeModified :: (FilePath -> CommandStart) -> CmdParams -> CommandSeek
withFilesMaybeModified :: (FilePath -> CommandStart) -> [WorkTreeItem] -> CommandSeek
withFilesMaybeModified a params = seekActions $
prepFiltered a $ seekHelper LsFiles.modified params
withKeys :: (Key -> CommandStart) -> CmdParams -> CommandSeek
withKeys a params = seekActions $ return $ map (a . parse) params
withKeys a l = seekActions $ return $ map (a . parse) l
where
parse p = fromMaybe (giveup "bad key") $ file2key p
@ -172,8 +171,8 @@ withKeyOptions
:: Maybe KeyOptions
-> Bool
-> (Key -> ActionItem -> CommandStart)
-> (CmdParams -> CommandSeek)
-> CmdParams
-> ([WorkTreeItem] -> CommandSeek)
-> [WorkTreeItem]
-> CommandSeek
withKeyOptions ko auto keyaction = withKeyOptions' ko auto mkkeyaction
where
@ -187,8 +186,8 @@ withKeyOptions'
:: Maybe KeyOptions
-> Bool
-> Annex (Key -> ActionItem -> Annex ())
-> (CmdParams -> CommandSeek)
-> CmdParams
-> ([WorkTreeItem] -> CommandSeek)
-> [WorkTreeItem]
-> CommandSeek
withKeyOptions' ko auto mkkeyaction fallbackaction params = do
bare <- fromRepo Git.repoIsLocalBare
@ -243,17 +242,27 @@ prepFiltered a fs = do
seekActions :: Annex [CommandStart] -> Annex ()
seekActions gen = mapM_ commandAction =<< gen
seekHelper :: ([FilePath] -> Git.Repo -> IO ([FilePath], IO Bool)) -> [FilePath] -> Annex [FilePath]
seekHelper a params = do
checkFileOrDirectoryExists params
inRepo $ \g -> concat . concat <$> forM (segmentXargsOrdered params)
seekHelper :: ([FilePath] -> Git.Repo -> IO ([FilePath], IO Bool)) -> [WorkTreeItem] -> Annex [FilePath]
seekHelper a l = inRepo $ \g ->
concat . concat <$> forM (segmentXargsOrdered l')
(runSegmentPaths (\fs -> Git.Command.leaveZombie <$> a fs g))
where
l' = map (\(WorkTreeItem f) -> f) l
checkFileOrDirectoryExists :: [FilePath] -> Annex ()
checkFileOrDirectoryExists ps = forM_ ps $ \p ->
unlessM (isJust <$> liftIO (catchMaybeIO $ getSymbolicLinkStatus p)) $ do
toplevelWarning False (p ++ " not found")
Annex.incError
-- An item in the work tree, which may be a file or a directory.
newtype WorkTreeItem = WorkTreeItem FilePath
-- Many git commands seek work tree items matching some criteria,
-- and silently skip over anything that does not exist. But users expect
-- an error message when one of the files they provided as a command-line
-- parameter doesn't exist, so this checks that each exists.
workTreeItems :: CmdParams -> Annex [WorkTreeItem]
workTreeItems ps = do
forM_ ps $ \p ->
unlessM (isJust <$> liftIO (catchMaybeIO $ getSymbolicLinkStatus p)) $ do
toplevelWarning False (p ++ " not found")
Annex.incError
return (map WorkTreeItem ps)
notSymlink :: FilePath -> IO Bool
notSymlink f = liftIO $ not . isSymbolicLink <$> getSymbolicLinkStatus f

View file

@ -63,7 +63,8 @@ seek o = allowConcurrentOutput $ do
giveup "--update --batch is not supported"
| otherwise -> batchFiles gofile
NoBatch -> do
let go a = a gofile (addThese o)
l <- workTreeItems (addThese o)
let go a = a gofile l
unless (updateOnly o) $
go (withFilesNotInGit (not $ includeDotFiles o))
go withFilesMaybeModified

View file

@ -43,7 +43,7 @@ seek o = allowConcurrentOutput $ do
(Command.Move.keyOptions $ moveOptions o) (autoMode o)
(Command.Move.startKey (moveOptions o) False)
(withFilesInGit go)
(Command.Move.moveFiles $ moveOptions o)
=<< workTreeItems (Command.Move.moveFiles $ moveOptions o)
{- A copy is just a move that does not delete the source file.
- However, auto mode avoids unnecessary copies, and avoids getting or

View file

@ -58,7 +58,7 @@ seek o = allowConcurrentOutput $
NoBatch -> withKeyOptions (keyOptions o) (autoMode o)
(startKeys o)
(withFilesInGit go)
(dropFiles o)
=<< workTreeItems (dropFiles o)
where
go = whenAnnexed $ start o

View file

@ -50,7 +50,7 @@ parseFormatOption =
seek :: FindOptions -> CommandSeek
seek o = case batchOption o of
NoBatch -> withFilesInGit go (findThese o)
NoBatch -> withFilesInGit go =<< workTreeItems (findThese o)
Batch -> batchFiles go
where
go = whenAnnexed $ start o

View file

@ -34,7 +34,8 @@ seek ps = unlessM crippledFileSystem $ do
( return FixAll
, return FixSymlinks
)
flip withFilesInGit ps $ whenAnnexed $ start fixwhat
l <- workTreeItems ps
flip withFilesInGit l $ whenAnnexed $ start fixwhat
data FixWhat = FixSymlinks | FixAll

View file

@ -93,7 +93,7 @@ seek o = allowConcurrentOutput $ do
withKeyOptions (keyOptions o) False
(\k ai -> startKey from i k ai =<< getNumCopies)
(withFilesInGit $ whenAnnexed $ start from i)
(fsckFiles o)
=<< workTreeItems (fsckFiles o)
cleanupIncremental i
void $ tryIO $ recordActivity Fsck u

View file

@ -46,7 +46,7 @@ seek o = allowConcurrentOutput $ do
NoBatch -> withKeyOptions (keyOptions o) (autoMode o)
(startKeys from)
(withFilesInGit go)
(getFiles o)
=<< workTreeItems (getFiles o)
start :: GetOptions -> Maybe Remote -> FilePath -> Key -> CommandStart
start o from file key = start' expensivecheck from key afile (mkActionItem afile)

View file

@ -44,7 +44,8 @@ seek :: ListOptions -> CommandSeek
seek o = do
list <- getList o
printHeader list
withFilesInGit (whenAnnexed $ start list) (listThese o)
withFilesInGit (whenAnnexed $ start list)
=<< workTreeItems (listThese o)
getList :: ListOptions -> Annex [(UUID, RemoteName, TrustLevel)]
getList o

View file

@ -29,12 +29,14 @@ cmd = notDirect $ withGlobalOptions annexedMatchingOptions $
paramPaths (withParams seek)
seek :: CmdParams -> CommandSeek
seek ps = ifM versionSupportsUnlockedPointers
( withFilesInGit (whenAnnexed startNew) ps
, do
withFilesOldUnlocked startOld ps
withFilesOldUnlockedToBeCommitted startOld ps
)
seek ps = do
l <- workTreeItems ps
ifM versionSupportsUnlockedPointers
( withFilesInGit (whenAnnexed startNew) l
, do
withFilesOldUnlocked startOld l
withFilesOldUnlockedToBeCommitted startOld l
)
startNew :: FilePath -> Key -> CommandStart
startNew file key = ifM (isJust <$> isAnnexLink file)

View file

@ -91,7 +91,8 @@ seek o = do
zone <- liftIO getCurrentTimeZone
let outputter = mkOutputter m zone o
case (logFiles o, allOption o) of
(fs, False) -> withFilesInGit (whenAnnexed $ start o outputter) fs
(fs, False) -> withFilesInGit (whenAnnexed $ start o outputter)
=<< workTreeItems fs
([], True) -> commandAction (startAll o outputter)
(_, True) -> giveup "Cannot specify both files and --all"

View file

@ -81,7 +81,7 @@ seek o = case batchOption o of
withKeyOptions (keyOptions o) False
(startKeys c o)
(seeker $ whenAnnexed $ start c o)
(forFiles o)
=<< workTreeItems (forFiles o)
Batch -> withMessageState $ \s -> case outputType s of
JSONOutput _ -> batchInput parseJSONInput $
commandAction . startBatch

View file

@ -26,7 +26,7 @@ cmd = notDirect $ withGlobalOptions annexedMatchingOptions $
paramPaths (withParams seek)
seek :: CmdParams -> CommandSeek
seek = withFilesInGit $ whenAnnexed start
seek ps = withFilesInGit (whenAnnexed start) =<< workTreeItems ps
start :: FilePath -> Key -> CommandStart
start file key = do

View file

@ -45,7 +45,7 @@ seek o = allowConcurrentOutput $
withKeyOptions (keyOptions o) False
(startKey o (AssociatedFile Nothing))
(withFilesInGit $ whenAnnexed $ start o)
(mirrorFiles o)
=<< workTreeItems (mirrorFiles o)
start :: MirrorOptions -> FilePath -> Key -> CommandStart
start o file k = startKey o afile k (mkActionItem afile)

View file

@ -63,7 +63,7 @@ seek o = allowConcurrentOutput $ do
NoBatch -> withKeyOptions (keyOptions o) False
(startKey o True)
(withFilesInGit go)
(moveFiles o)
=<< workTreeItems (moveFiles o)
start :: MoveOptions -> Bool -> FilePath -> Key -> CommandStart
start o move f k = start' o move afile k (mkActionItem afile)

View file

@ -131,7 +131,7 @@ send ups fs = withTmpFile "send" $ \t h -> do
giveup "Sorry, multicast send cannot be done from a direct mode repository."
showStart "generating file list" ""
fs' <- seekHelper LsFiles.inRepo fs
fs' <- seekHelper LsFiles.inRepo =<< workTreeItems fs
matcher <- Limit.getMatcher
let addlist f o = whenM (matcher $ MatchingFile $ FileInfo f f) $
liftIO $ hPutStrLn h o

View file

@ -49,15 +49,16 @@ seek ps = lockPreCommitHook $ ifM isDirect
giveup "Cannot make a partial commit with unlocked annexed files. You should `git annex add` the files you want to commit, and then run git commit."
void $ liftIO cleanup
, do
l <- workTreeItems ps
-- fix symlinks to files being committed
flip withFilesToBeCommitted ps $ \f ->
flip withFilesToBeCommitted l $ \f ->
maybe stop (Command.Fix.start Command.Fix.FixSymlinks f)
=<< isAnnexLink f
-- inject unlocked files into the annex
-- (not needed when repo version uses
-- unlocked pointer files)
unlessM versionSupportsUnlockedPointers $
withFilesOldUnlockedToBeCommitted startInjectUnlocked ps
withFilesOldUnlockedToBeCommitted startInjectUnlocked l
)
runAnnexHook preCommitAnnexHook
-- committing changes to a view updates metadata

View file

@ -576,7 +576,10 @@ seekSyncContent o rs = do
mvar <- liftIO newEmptyMVar
bloom <- case keyOptions o of
Just WantAllKeys -> Just <$> genBloomFilter (seekworktree mvar [])
_ -> seekworktree mvar (contentOfOption o) (const noop) >> pure Nothing
_ -> do
l <- workTreeItems (contentOfOption o)
seekworktree mvar l (const noop)
pure Nothing
withKeyOptions' (keyOptions o) False
(return (seekkeys mvar bloom))
(const noop)

View file

@ -30,7 +30,8 @@ cmd = withGlobalOptions annexedMatchingOptions $
paramPaths (withParams seek)
seek :: CmdParams -> CommandSeek
seek = wrapUnannex . (withFilesInGit $ whenAnnexed start)
seek ps = wrapUnannex $
(withFilesInGit $ whenAnnexed start) =<< workTreeItems ps
wrapUnannex :: Annex a -> Annex a
wrapUnannex a = ifM (versionSupportsUnlockedPointers <||> isDirect)

View file

@ -40,9 +40,10 @@ check = do
seek :: CmdParams -> CommandSeek
seek ps = do
withFilesNotInGit False (whenAnnexed startCheckIncomplete) ps
l <- workTreeItems ps
withFilesNotInGit False (whenAnnexed startCheckIncomplete) l
Annex.changeState $ \s -> s { Annex.fast = True }
withFilesInGit (whenAnnexed Command.Unannex.start) ps
withFilesInGit (whenAnnexed Command.Unannex.start) l
finish
{- git annex symlinks that are not checked into git could be left by an

View file

@ -30,7 +30,7 @@ mkcmd n d = notDirect $ withGlobalOptions annexedMatchingOptions $
command n SectionCommon d paramPaths (withParams seek)
seek :: CmdParams -> CommandSeek
seek = withFilesInGit $ whenAnnexed start
seek ps = withFilesInGit (whenAnnexed start) =<< workTreeItems ps
{- Before v6, the unlock subcommand replaces the symlink with a copy of
- the file's content. In v6 and above, it converts the file from a symlink

View file

@ -44,7 +44,7 @@ seek o = do
withKeyOptions (keyOptions o) False
(startKeys m)
(withFilesInGit go)
(whereisFiles o)
=<< workTreeItems (whereisFiles o)
start :: M.Map UUID Remote -> FilePath -> Key -> CommandStart
start remotemap file key = startKeys remotemap key (mkActionItem afile)

View file

@ -0,0 +1,22 @@
[[!comment format=mdwn
username="joey"
subject="""comment 4"""
date="2017-10-16T16:58:46Z"
content="""
I was worried there could be further races in the seeking
done by withFilesOldUnlocked and withFilesMaybeModified if those
run while files are still being ingested by actions run earlier
in the `git annex add`. Seems this is not a problem though --
withFilesOldUnlocked looks for typeChanged files, but the files
that were just/are currently being added were not in git before,
so are not typeChanged.
withFilesMaybeModified looks for modified files, and again these
files were/are just being added for the first time, so it won't stumble
over them.
So, I don't think a synchronization point is needed. In fact,
all three seeks could actually be run more concurrently than they are not
without stepping on one-another's toes.
"""]]

View file

@ -0,0 +1,13 @@
[[!comment format=mdwn
username="joey"
subject="""comment 5"""
date="2017-10-16T17:06:43Z"
content="""
That leaves only the innefficiency of checkFileOrDirectoryExists being
run three times per parameter passed to `git annex add`.
There are some other commands that also run checkFileOrDirectoryExists
multiple times. `git annex lock` being one.
So, I factored that out into a separate pass, that's only done once.
"""]]