directory CoW on import

This commit is contained in:
Joey Hess 2021-04-14 16:10:09 -04:00
parent 4b048ca042
commit b86206b553
No known key found for this signature in database
GPG key ID: DB12DB0FF05F8F38
4 changed files with 80 additions and 40 deletions

View file

@ -44,6 +44,25 @@ newtype CopyCoWTried = CopyCoWTried (MVar Bool)
newCopyCoWTried :: IO CopyCoWTried
newCopyCoWTried = CopyCoWTried <$> newEmptyMVar
{- Copies a file is copy-on-write is supported. Otherwise, returns False. -}
tryCopyCoW :: CopyCoWTried -> FilePath -> FilePath -> MeterUpdate -> IO Bool
tryCopyCoW (CopyCoWTried copycowtried) src dest meterupdate =
-- If multiple threads reach this at the same time, they
-- will both try CoW, which is acceptable.
ifM (isEmptyMVar copycowtried)
( do
ok <- docopycow
void $ tryPutMVar copycowtried ok
return ok
, ifM (readMVar copycowtried)
( docopycow
, return False
)
)
where
docopycow = watchFileSize dest meterupdate $
copyCoW CopyTimeStamps src dest
{- Copys a file. Uses copy-on-write if it is supported. Otherwise,
- copies the file itself. If the destination already exists,
- an interruped copy will resume where it left off.
@ -62,32 +81,14 @@ newCopyCoWTried = CopyCoWTried <$> newEmptyMVar
fileCopier :: CopyCoWTried -> FileCopier
#ifdef mingw32_HOST_OS
fileCopier _ src dest k meterupdate check verifyconfig = docopy
where
#else
fileCopier (CopyCoWTried copycowtried) src dest k meterupdate check verifyconfig =
-- If multiple threads reach this at the same time, they
-- will both try CoW, which is acceptable.
ifM (liftIO $ isEmptyMVar copycowtried)
( do
ok <- docopycow
void $ liftIO $ tryPutMVar copycowtried ok
if ok
then unVerified check
else docopy
, ifM (liftIO $ readMVar copycowtried)
( do
ok <- docopycow
if ok
then unVerified check
else docopy
, docopy
)
fileCopier copycowtried src dest k meterupdate check verifyconfig =
ifM (liftIO $ tryCopyCoW copycowtried src dest meterupdate)
( unVerified check
, docopy
)
where
docopycow = liftIO $ watchFileSize dest meterupdate $
copyCoW CopyTimeStamps src dest
#endif
where
dest' = toRawFilePath dest
docopy = do

View file

@ -88,7 +88,7 @@ gen r u rc gc rs = do
, checkPresentCheap = True
, exportActions = ExportActions
{ storeExport = storeExportM dir
, retrieveExport = retrieveExportM dir
, retrieveExport = retrieveExportM dir cow
, removeExport = removeExportM dir
, versionedExport = False
, checkPresentExport = checkPresentExportM dir
@ -100,7 +100,7 @@ gen r u rc gc rs = do
, importActions = ImportActions
{ listImportableContents = listImportableContentsM dir
, importKey = Just (importKeyM dir)
, retrieveExportWithContentIdentifier = retrieveExportWithContentIdentifierM dir
, retrieveExportWithContentIdentifier = retrieveExportWithContentIdentifierM dir cow
, storeExportWithContentIdentifier = storeExportWithContentIdentifierM dir
, removeExportWithContentIdentifier = removeExportWithContentIdentifierM dir
-- Not needed because removeExportWithContentIdentifier
@ -190,8 +190,7 @@ storeKeyM d chunkconfig cow k c m =
in byteStorer go k c m
NoChunks ->
let go _k src p = do
(ok, _verification) <- fileCopier cow src tmpf k p (return True) NoVerify
unless ok $ giveup "failed to copy file to remote"
fileCopierUnVerified cow src tmpf k p
liftIO $ finalizeStoreGeneric d tmpdir destdir
in fileStorer go k c m
_ ->
@ -205,6 +204,11 @@ storeKeyM d chunkconfig cow k c m =
kf = keyFile k
destdir = storeDir d k
fileCopierUnVerified :: CopyCoWTried -> FilePath -> FilePath -> Key -> MeterUpdate -> Annex ()
fileCopierUnVerified cow src dest k p = do
(ok, _verification) <- fileCopier cow src dest k p (return True) NoVerify
unless ok $ giveup "failed to copy file"
checkDiskSpaceDirectory :: RawFilePath -> Key -> Annex Bool
checkDiskSpaceDirectory d k = do
annexdir <- fromRepo gitAnnexObjectDir
@ -234,8 +238,7 @@ retrieveKeyFileM :: RawFilePath -> ChunkConfig -> CopyCoWTried -> Retriever
retrieveKeyFileM d (LegacyChunks _) _ = Legacy.retrieve locations d
retrieveKeyFileM d NoChunks cow = fileRetriever $ \dest k p -> do
src <- liftIO $ fromRawFilePath <$> getLocation d k
(ok, _verification) <- fileCopier cow src dest k p (return True) NoVerify
unless ok $ giveup "failed to copy file from remote"
fileCopierUnVerified cow src dest k p
retrieveKeyFileM d _ _ = byteRetriever $ \k sink ->
sink =<< liftIO (L.readFile . fromRawFilePath =<< getLocation d k)
@ -310,9 +313,8 @@ storeExportM d src _k loc p = liftIO $ do
dest = exportPath d loc
go tmp () = withMeteredFile src p (L.writeFile tmp)
retrieveExportM :: RawFilePath -> Key -> ExportLocation -> FilePath -> MeterUpdate -> Annex ()
retrieveExportM d _k loc dest p =
liftIO $ withMeteredFile src p (L.writeFile dest)
retrieveExportM :: RawFilePath -> CopyCoWTried -> Key -> ExportLocation -> FilePath -> MeterUpdate -> Annex ()
retrieveExportM d cow k loc dest p = fileCopierUnVerified cow src dest k p
where
src = fromRawFilePath $ exportPath d loc
@ -407,14 +409,21 @@ importKeyM dir loc cid sz p = do
, inodeCache = Nothing
}
retrieveExportWithContentIdentifierM :: RawFilePath -> ExportLocation -> ContentIdentifier -> FilePath -> Annex Key -> MeterUpdate -> Annex Key
retrieveExportWithContentIdentifierM dir loc cid dest mkkey p =
precheck $ docopy postcheck
retrieveExportWithContentIdentifierM :: RawFilePath -> CopyCoWTried -> ExportLocation -> ContentIdentifier -> FilePath -> Annex Key -> MeterUpdate -> Annex Key
retrieveExportWithContentIdentifierM dir cow loc cid dest mkkey p =
precheck docopy
where
f = exportPath dir loc
f' = fromRawFilePath f
docopy cont = do
docopy = ifM (liftIO $ tryCopyCoW cow f' dest p)
( do
k <- mkkey
postcheckcow (return k)
, docopynoncow
)
docopynoncow = do
#ifndef mingw32_HOST_OS
let open = do
-- Need a duplicate fd for the post check, since
@ -435,9 +444,9 @@ retrieveExportWithContentIdentifierM dir loc cid dest mkkey p =
liftIO $ hGetContentsMetered h p >>= L.writeFile dest
k <- mkkey
#ifndef mingw32_HOST_OS
cont dupfd (return k)
postchecknoncow dupfd (return k)
#else
cont (return k)
postchecknoncow (return k)
#endif
-- Check before copy, to avoid expensive copy of wrong file
@ -460,9 +469,9 @@ retrieveExportWithContentIdentifierM dir loc cid dest mkkey p =
-- situations with files being modified while it's updating the
-- working tree for a merge.
#ifndef mingw32_HOST_OS
postcheck fd cont = do
postchecknoncow fd cont = do
#else
postcheck cont = do
postchecknoncow cont = do
#endif
currcid <- liftIO $ mkContentIdentifier f
#ifndef mingw32_HOST_OS
@ -472,6 +481,16 @@ retrieveExportWithContentIdentifierM dir loc cid dest mkkey p =
#endif
guardSameContentIdentifiers cont cid currcid
-- When copy-on-write was done, cannot check the handle that was
-- copied from, but such a copy should run very fast, so
-- it's very unlikely that the file changed after precheck,
-- the modified version was copied CoW, and then the file was
-- restored to the original content before this check.
postcheckcow cont = do
currcid <- liftIO $ mkContentIdentifier f
=<< R.getFileStatus f
guardSameContentIdentifiers cont cid currcid
storeExportWithContentIdentifierM :: RawFilePath -> FilePath -> Key -> ExportLocation -> [ContentIdentifier] -> MeterUpdate -> Annex ContentIdentifier
storeExportWithContentIdentifierM dir src _k loc overwritablecids p = do
liftIO $ createDirectoryUnder dir (toRawFilePath destdir)

View file

@ -0,0 +1,10 @@
[[!comment format=mdwn
username="joey"
subject="""comment 4"""
date="2021-04-14T19:33:13Z"
content="""
Implemented CoW for directory special remote, comprehensively.
(Except for when exporting to it, which I'll do for completeness before closing
this.)
"""]]

View file

@ -0,0 +1,10 @@
[[!comment format=mdwn
username="joey"
subject="""comment 5"""
date="2021-04-14T20:07:33Z"
content="""
Correction: Import from directory special remote copies the content by
default still. But with --no-content it does not. Might be you could have
used that, if you did not want to load the content up into your repo and
were ok leaving it on the remote.
"""]]