avoid creating content directory when locking content

If the content directory does not exist, then it does not make sense to
lock the content file, as it also does not exist, and so it's ok for the
lock operation to fail.

This avoids potential races where the content file exists but is then
deleted/renamed, while another process sees that it exists and goes to
lock it, resulting in a dangling lock file in an otherwise empty object
directory.

Also renamed modifyContent to modifyContentDir since it is not only
necessarily used for modifying content files, but also other files in
the content directory.

Sponsored-by: Dartmouth College's Datalad project
This commit is contained in:
Joey Hess 2022-05-16 12:34:56 -04:00
parent b6c7819803
commit 5a98f2d509
No known key found for this signature in database
GPG key ID: DB12DB0FF05F8F38
7 changed files with 52 additions and 16 deletions

View file

@ -169,13 +169,13 @@ type ContentLocker = RawFilePath -> Maybe LockFile -> (Annex (Maybe LockHandle),
posixLocker :: (Maybe FileMode -> LockFile -> Annex (Maybe LockHandle)) -> LockFile -> Annex (Maybe LockHandle)
posixLocker takelock lockfile = do
mode <- annexFileMode
modifyContent lockfile $
modifyContentDirWhenExists lockfile $
takelock (Just mode) lockfile
#else
winLocker :: (LockFile -> IO (Maybe LockHandle)) -> ContentLocker
winLocker takelock _ (Just lockfile) =
let lck = do
modifyContent lockfile $
modifyContentDirWhenExists lockfile $
void $ liftIO $ tryIO $
writeFile (fromRawFilePath lockfile) ""
liftIO $ takelock lockfile
@ -407,7 +407,7 @@ moveAnnex key af src = ifM (checkSecureHashes' key)
where
storeobject dest = ifM (liftIO $ R.doesPathExist dest)
( alreadyhave
, adjustedBranchRefresh af $ modifyContent dest $ do
, adjustedBranchRefresh af $ modifyContentDir dest $ do
liftIO $ moveFile
(fromRawFilePath src)
(fromRawFilePath dest)
@ -452,7 +452,7 @@ linkToAnnex :: Key -> RawFilePath -> Maybe InodeCache -> Annex LinkAnnexResult
linkToAnnex key src srcic = ifM (checkSecureHashes' key)
( do
dest <- calcRepo (gitAnnexLocation key)
modifyContent dest $ linkAnnex To key src srcic dest Nothing
modifyContentDir dest $ linkAnnex To key src srcic dest Nothing
, return LinkAnnexFailed
)
@ -528,7 +528,7 @@ linkAnnex fromto key src (Just srcic) dest destmode =
unlinkAnnex :: Key -> Annex ()
unlinkAnnex key = do
obj <- calcRepo (gitAnnexLocation key)
modifyContent obj $ do
modifyContentDir obj $ do
secureErase obj
liftIO $ removeWhenExistsWith R.removeLink obj

View file

@ -112,7 +112,7 @@ inAnnexSafe key = inAnnex' (fromMaybe True) (Just False) go key
- remove the lock file to clean up after ourselves. -}
checklock (Just lockfile) contentfile =
ifM (liftIO $ doesFileExist (fromRawFilePath contentfile))
( modifyContent lockfile $ liftIO $
( modifyContentDirWhenExists lockfile $ liftIO $
lockShared lockfile >>= \case
Nothing -> return is_locked
Just lockhandle -> do

View file

@ -26,7 +26,8 @@ module Annex.Perms (
createContentDir,
freezeContentDir,
thawContentDir,
modifyContent,
modifyContentDir,
modifyContentDirWhenExists,
withShared,
hasFreezeHook,
hasThawHook,
@ -290,15 +291,24 @@ createContentDir dest = do
dir = parentDir dest
{- Creates the content directory for a file if it doesn't already exist,
- or thaws it if it does, then runs an action to modify the file, and
- finally, freezes the content directory. -}
modifyContent :: RawFilePath -> Annex a -> Annex a
modifyContent f a = do
- or thaws it if it does, then runs an action to modify a file in the
- directory, and finally, freezes the content directory. -}
modifyContentDir :: RawFilePath -> Annex a -> Annex a
modifyContentDir f a = do
createContentDir f -- also thaws it
v <- tryNonAsync a
freezeContentDir f
either throwM return v
{- Like modifyContentDir, but avoids creating the content directory if it
- ddoes not already exist. In that case, the action will probably fail. -}
modifyContentDirWhenExists :: RawFilePath -> Annex a -> Annex a
modifyContentDirWhenExists f a = do
thawContentDir f
v <- tryNonAsync a
freezeContentDir f
either throwM return v
hasFreezeHook :: Annex Bool
hasFreezeHook = isJust . annexFreezeContentCommand <$> Annex.getGitConfig

View file

@ -78,7 +78,7 @@ breakHardLink file key obj = do
error "unable to break hard link"
thawContent tmp'
Database.Keys.storeInodeCaches key [tmp']
modifyContent obj $ freezeContent obj
modifyContentDir obj $ freezeContent obj
next $ return True
makeHardLink :: RawFilePath -> Key -> CommandPerform

View file

@ -77,13 +77,13 @@ perform file key = do
breakhardlink obj = whenM (catchBoolIO $ (> 1) . linkCount <$> liftIO (R.getFileStatus obj)) $ do
mfc <- withTSDelta (liftIO . genInodeCache file)
unlessM (sameInodeCache obj (maybeToList mfc)) $ do
modifyContent obj $ replaceGitAnnexDirFile (fromRawFilePath obj) $ \tmp -> do
modifyContentDir obj $ replaceGitAnnexDirFile (fromRawFilePath obj) $ \tmp -> do
unlessM (checkedCopyFile key obj (toRawFilePath tmp) Nothing) $
giveup "unable to lock file"
Database.Keys.storeInodeCaches key [obj]
-- Try to repopulate obj from an unmodified associated file.
repopulate obj = modifyContent obj $ do
repopulate obj = modifyContentDir obj $ do
g <- Annex.gitRepo
fs <- map (`fromTopFilePath` g)
<$> Database.Keys.getAssociatedFiles key

View file

@ -97,7 +97,7 @@ associatedFilesRelative key = do
removeAssociatedFiles :: Key -> Annex ()
removeAssociatedFiles key = do
mapping <- calcRepo $ gitAnnexMapping key
modifyContent mapping $
modifyContentDir mapping $
liftIO $ removeWhenExistsWith R.removeLink mapping
{- Checks if a file in the tree, associated with a key, has not been modified.
@ -124,7 +124,7 @@ recordedInodeCache key = withInodeCacheFile key $ \f ->
{- Removes an inode cache. -}
removeInodeCache :: Key -> Annex ()
removeInodeCache key = withInodeCacheFile key $ \f ->
modifyContent f $
modifyContentDir f $
liftIO $ removeWhenExistsWith R.removeLink f
withInodeCacheFile :: Key -> (RawFilePath -> Annex a) -> Annex a

View file

@ -0,0 +1,26 @@
[[!comment format=mdwn
username="joey"
subject="""comment 6"""
date="2022-05-16T15:24:43Z"
content="""
If fsck locks the content for removal, then moves it to the preferred
location, how is that any different from git-annex first dropping content
and then very quickly retrieving another copy and storing it in the other
location? The only difference is timing, but things like being suspended
and resumed can affect timing.
So, if there is a problem with fsck doing that, there would also be a more
general problem, that could occur in other circumstances, even if only
rarely.
One way to see the general problem happen would be to have two processes
trying to drop the same object. One process finds the object location, then
stalls. Meanwhile, the second process drops the object. Then the first
process resumes, and locks for removal. Per comment #5 this will result in
a dangling lock file in the object directory. I have not managed to get
this to happen yet though.
A fix for the general problem is to make it not create the
object directory when opening the object lock file. So I've made that
change.
"""]]