use InodeCache to avoid races in import from directory special remote
This does not avoid all possible races, but it does avoid all likely ones, and is demonstratably better than git's own handling of races where files get modified at the same time as it's updating the working tree. The main thing this won't detect are not unlikely races where part of a file gets changed while it's being copied and then the file is restored to its original condition before the modification check. No, it's more likely that the limitations of checking inode, size, and mtime won't detect certian modifications, involving eg mmapped files.
This commit is contained in:
parent
51fc969b66
commit
3cd19fb4d0
3 changed files with 91 additions and 28 deletions
|
@ -233,12 +233,14 @@ downloadImport remote importtreeconfig importablecontents = do
|
||||||
(k:_) -> return $ Just (loc, k)
|
(k:_) -> return $ Just (loc, k)
|
||||||
[] -> checkDiskSpaceToGet tmpkey Nothing $
|
[] -> checkDiskSpaceToGet tmpkey Nothing $
|
||||||
withTmp tmpkey $ \tmpfile ->
|
withTmp tmpkey $ \tmpfile ->
|
||||||
Remote.retrieveExportWithContentIdentifier ia loc cid tmpfile (ingestkey loc tmpfile) p >>= \case
|
Remote.retrieveExportWithContentIdentifier ia loc cid tmpfile (mkkey loc tmpfile) p >>= \case
|
||||||
Just k -> do
|
Just k -> tryNonAsync (moveAnnex k tmpfile) >>= \case
|
||||||
recordcidkey cidmap db cid k
|
Right True -> do
|
||||||
logStatus k InfoPresent
|
recordcidkey cidmap db cid k
|
||||||
logChange k (Remote.uuid remote) InfoPresent
|
logStatus k InfoPresent
|
||||||
return $ Just (loc, k)
|
logChange k (Remote.uuid remote) InfoPresent
|
||||||
|
return $ Just (loc, k)
|
||||||
|
_ -> return Nothing
|
||||||
Nothing -> return Nothing
|
Nothing -> return Nothing
|
||||||
where
|
where
|
||||||
-- TODO progress bar
|
-- TODO progress bar
|
||||||
|
@ -246,7 +248,7 @@ downloadImport remote importtreeconfig importablecontents = do
|
||||||
ia = Remote.importActions remote
|
ia = Remote.importActions remote
|
||||||
tmpkey = importKey cid sz
|
tmpkey = importKey cid sz
|
||||||
|
|
||||||
ingestkey loc tmpfile = do
|
mkkey loc tmpfile = do
|
||||||
f <- fromRepo $ fromTopFilePath $ locworktreefilename loc
|
f <- fromRepo $ fromTopFilePath $ locworktreefilename loc
|
||||||
backend <- chooseBackend f
|
backend <- chooseBackend f
|
||||||
let ks = KeySource
|
let ks = KeySource
|
||||||
|
@ -254,12 +256,7 @@ downloadImport remote importtreeconfig importablecontents = do
|
||||||
, contentLocation = tmpfile
|
, contentLocation = tmpfile
|
||||||
, inodeCache = Nothing
|
, inodeCache = Nothing
|
||||||
}
|
}
|
||||||
genKey ks backend >>= \case
|
fmap fst <$> genKey ks backend
|
||||||
Nothing -> return Nothing
|
|
||||||
Just (k, _) ->
|
|
||||||
tryNonAsync (moveAnnex k tmpfile) >>= \case
|
|
||||||
Right True -> return (Just k)
|
|
||||||
_ -> return Nothing
|
|
||||||
|
|
||||||
locworktreefilename loc = asTopFilePath $ case importtreeconfig of
|
locworktreefilename loc = asTopFilePath $ case importtreeconfig of
|
||||||
ImportTree -> fromImportLocation loc
|
ImportTree -> fromImportLocation loc
|
||||||
|
|
|
@ -33,6 +33,7 @@ import Annex.Content
|
||||||
import Annex.UUID
|
import Annex.UUID
|
||||||
import Utility.Metered
|
import Utility.Metered
|
||||||
import Utility.Tmp
|
import Utility.Tmp
|
||||||
|
import Utility.InodeCache
|
||||||
|
|
||||||
remote :: RemoteType
|
remote :: RemoteType
|
||||||
remote = RemoteType
|
remote = RemoteType
|
||||||
|
@ -300,22 +301,90 @@ listImportableContentsM :: FilePath -> Annex (Maybe (ImportableContents (Content
|
||||||
listImportableContentsM dir = catchMaybeIO $ liftIO $ do
|
listImportableContentsM dir = catchMaybeIO $ liftIO $ do
|
||||||
l <- dirContentsRecursive dir
|
l <- dirContentsRecursive dir
|
||||||
l' <- mapM go l
|
l' <- mapM go l
|
||||||
return $ ImportableContents l' []
|
return $ ImportableContents (catMaybes l') []
|
||||||
where
|
where
|
||||||
go f = do
|
go f = do
|
||||||
sz <- getFileSize f
|
st <- getFileStatus f
|
||||||
loc <- mkImportLocation <$> relPathDirToFile dir f
|
mkContentIdentifier f st >>= \case
|
||||||
-- TODO use inode, size etc
|
Nothing -> return Nothing
|
||||||
let cid = ContentIdentifier $ encodeBS f
|
Just cid -> do
|
||||||
return (loc, (cid, sz))
|
relf <- relPathDirToFile dir f
|
||||||
|
sz <- getFileSize' f st
|
||||||
|
return $ Just (mkImportLocation relf, (cid, sz))
|
||||||
|
|
||||||
|
-- Make a ContentIdentifier that contains an InodeCache.
|
||||||
|
--
|
||||||
|
-- The InodeCache is generated without checking a sentinal file.
|
||||||
|
-- So in a case when a remount etc causes all the inodes to change,
|
||||||
|
-- files may appear to be modified when they are not, which will only
|
||||||
|
-- result in extra work to re-import them.
|
||||||
|
--
|
||||||
|
-- If the file is not a regular file, this will return Nothing.
|
||||||
|
mkContentIdentifier :: FilePath -> FileStatus -> IO (Maybe ContentIdentifier)
|
||||||
|
mkContentIdentifier f st =
|
||||||
|
fmap (ContentIdentifier . encodeBS . showInodeCache)
|
||||||
|
<$> toInodeCache noTSDelta f st
|
||||||
|
|
||||||
retrieveExportWithContentIdentifierM :: FilePath -> ExportLocation -> ContentIdentifier -> FilePath -> Annex (Maybe Key) -> MeterUpdate -> Annex (Maybe Key)
|
retrieveExportWithContentIdentifierM :: FilePath -> ExportLocation -> ContentIdentifier -> FilePath -> Annex (Maybe Key) -> MeterUpdate -> Annex (Maybe Key)
|
||||||
retrieveExportWithContentIdentifierM dir loc cid dest mkkey p = catchDefaultIO Nothing $ do
|
retrieveExportWithContentIdentifierM dir loc cid dest mkkey p =
|
||||||
-- TODO check the ContentIdentifier is valid (avoiding all races)
|
catchDefaultIO Nothing $ precheck $ docopy postcheck
|
||||||
let f = dir </> fromExportLocation loc
|
where
|
||||||
h <- liftIO $ openBinaryFile f ReadMode
|
f = dir </> fromExportLocation loc
|
||||||
liftIO $ hGetContentsMetered h p >>= L.writeFile dest
|
|
||||||
mkkey
|
docopy cont = do
|
||||||
|
#ifndef mingw32_HOST_OS
|
||||||
|
-- Need a duplicate fd for the post check, since
|
||||||
|
-- hGetContentsMetered closes its handle.
|
||||||
|
fd <- liftIO $ openFd f ReadOnly Nothing defaultFileFlags
|
||||||
|
dupfd <- liftIO $ dup fd
|
||||||
|
h <- liftIO $ fdToHandle fd
|
||||||
|
#else
|
||||||
|
h <- liftIO $ openBinaryFile f ReadMode
|
||||||
|
#endif
|
||||||
|
liftIO $ hGetContentsMetered h p >>= L.writeFile dest
|
||||||
|
k <- mkkey
|
||||||
|
#ifndef mingw32_HOST_OS
|
||||||
|
cont dupfd (return k)
|
||||||
|
#else
|
||||||
|
cont (return k)
|
||||||
|
#endif
|
||||||
|
|
||||||
|
-- Check before copy, to avoid expensive copy of wrong file
|
||||||
|
-- content.
|
||||||
|
precheck cont = comparecid cont
|
||||||
|
=<< liftIO . mkContentIdentifier f
|
||||||
|
=<< liftIO (getFileStatus f)
|
||||||
|
|
||||||
|
-- Check after copy, in case the file was changed while it was
|
||||||
|
-- being copied.
|
||||||
|
--
|
||||||
|
-- When possible (not on Windows), check the same handle
|
||||||
|
-- Check the same handle that the file was copied from.
|
||||||
|
-- Avoids some race cases where the file is modified while
|
||||||
|
-- it's copied but then gets restored to the original content
|
||||||
|
-- afterwards.
|
||||||
|
--
|
||||||
|
-- This does not guard against every possible race, but neither
|
||||||
|
-- can InodeCaches detect every possible modification to a file.
|
||||||
|
-- It's probably as good or better than git's handling of similar
|
||||||
|
-- situations with files being modified while it's updating the
|
||||||
|
-- working tree for a merge.
|
||||||
|
#ifndef mingw32_HOST_OS
|
||||||
|
postcheck fd cont = do
|
||||||
|
#else
|
||||||
|
postcheck cont = do
|
||||||
|
#endif
|
||||||
|
currcid <- liftIO $ mkContentIdentifier f
|
||||||
|
#ifndef mingw32_HOST_OS
|
||||||
|
=<< getFdStatus fd
|
||||||
|
#else
|
||||||
|
=<< getFileStatus f
|
||||||
|
#endif
|
||||||
|
comparecid cont currcid
|
||||||
|
|
||||||
|
comparecid cont currcid
|
||||||
|
| currcid == Just cid = cont
|
||||||
|
| otherwise = return Nothing
|
||||||
|
|
||||||
storeExportWithContentIdentifierM :: FilePath -> FilePath -> Key -> ExportLocation -> [ContentIdentifier] -> MeterUpdate -> Annex (Maybe ContentIdentifier)
|
storeExportWithContentIdentifierM :: FilePath -> FilePath -> Key -> ExportLocation -> [ContentIdentifier] -> MeterUpdate -> Annex (Maybe ContentIdentifier)
|
||||||
storeExportWithContentIdentifierM dir = error "TODO"
|
storeExportWithContentIdentifierM dir = error "TODO"
|
||||||
|
|
|
@ -10,9 +10,6 @@ this.
|
||||||
|
|
||||||
## implementation notes
|
## implementation notes
|
||||||
|
|
||||||
* directory special remote import is prototype, does not notice modified
|
|
||||||
files, not race safe
|
|
||||||
|
|
||||||
* Need to support annex-tracking-branch configuration, which documentation
|
* Need to support annex-tracking-branch configuration, which documentation
|
||||||
says makes git-annex sync and assistant do imports.
|
says makes git-annex sync and assistant do imports.
|
||||||
|
|
||||||
|
|
Loading…
Reference in a new issue