directory CoW on import

This commit is contained in:
Joey Hess 2021-04-14 16:10:09 -04:00
parent 4b048ca042
commit b86206b553
No known key found for this signature in database
GPG key ID: DB12DB0FF05F8F38
4 changed files with 80 additions and 40 deletions

View file

@ -44,6 +44,25 @@ newtype CopyCoWTried = CopyCoWTried (MVar Bool)
newCopyCoWTried :: IO CopyCoWTried newCopyCoWTried :: IO CopyCoWTried
newCopyCoWTried = CopyCoWTried <$> newEmptyMVar newCopyCoWTried = CopyCoWTried <$> newEmptyMVar
{- Copies a file is copy-on-write is supported. Otherwise, returns False. -}
tryCopyCoW :: CopyCoWTried -> FilePath -> FilePath -> MeterUpdate -> IO Bool
tryCopyCoW (CopyCoWTried copycowtried) src dest meterupdate =
-- If multiple threads reach this at the same time, they
-- will both try CoW, which is acceptable.
ifM (isEmptyMVar copycowtried)
( do
ok <- docopycow
void $ tryPutMVar copycowtried ok
return ok
, ifM (readMVar copycowtried)
( docopycow
, return False
)
)
where
docopycow = watchFileSize dest meterupdate $
copyCoW CopyTimeStamps src dest
{- Copys a file. Uses copy-on-write if it is supported. Otherwise, {- Copys a file. Uses copy-on-write if it is supported. Otherwise,
- copies the file itself. If the destination already exists, - copies the file itself. If the destination already exists,
- an interruped copy will resume where it left off. - an interruped copy will resume where it left off.
@ -62,32 +81,14 @@ newCopyCoWTried = CopyCoWTried <$> newEmptyMVar
fileCopier :: CopyCoWTried -> FileCopier fileCopier :: CopyCoWTried -> FileCopier
#ifdef mingw32_HOST_OS #ifdef mingw32_HOST_OS
fileCopier _ src dest k meterupdate check verifyconfig = docopy fileCopier _ src dest k meterupdate check verifyconfig = docopy
where
#else #else
fileCopier (CopyCoWTried copycowtried) src dest k meterupdate check verifyconfig = fileCopier copycowtried src dest k meterupdate check verifyconfig =
-- If multiple threads reach this at the same time, they ifM (liftIO $ tryCopyCoW copycowtried src dest meterupdate)
-- will both try CoW, which is acceptable. ( unVerified check
ifM (liftIO $ isEmptyMVar copycowtried) , docopy
( do
ok <- docopycow
void $ liftIO $ tryPutMVar copycowtried ok
if ok
then unVerified check
else docopy
, ifM (liftIO $ readMVar copycowtried)
( do
ok <- docopycow
if ok
then unVerified check
else docopy
, docopy
)
) )
where
docopycow = liftIO $ watchFileSize dest meterupdate $
copyCoW CopyTimeStamps src dest
#endif #endif
where
dest' = toRawFilePath dest dest' = toRawFilePath dest
docopy = do docopy = do

View file

@ -88,7 +88,7 @@ gen r u rc gc rs = do
, checkPresentCheap = True , checkPresentCheap = True
, exportActions = ExportActions , exportActions = ExportActions
{ storeExport = storeExportM dir { storeExport = storeExportM dir
, retrieveExport = retrieveExportM dir , retrieveExport = retrieveExportM dir cow
, removeExport = removeExportM dir , removeExport = removeExportM dir
, versionedExport = False , versionedExport = False
, checkPresentExport = checkPresentExportM dir , checkPresentExport = checkPresentExportM dir
@ -100,7 +100,7 @@ gen r u rc gc rs = do
, importActions = ImportActions , importActions = ImportActions
{ listImportableContents = listImportableContentsM dir { listImportableContents = listImportableContentsM dir
, importKey = Just (importKeyM dir) , importKey = Just (importKeyM dir)
, retrieveExportWithContentIdentifier = retrieveExportWithContentIdentifierM dir , retrieveExportWithContentIdentifier = retrieveExportWithContentIdentifierM dir cow
, storeExportWithContentIdentifier = storeExportWithContentIdentifierM dir , storeExportWithContentIdentifier = storeExportWithContentIdentifierM dir
, removeExportWithContentIdentifier = removeExportWithContentIdentifierM dir , removeExportWithContentIdentifier = removeExportWithContentIdentifierM dir
-- Not needed because removeExportWithContentIdentifier -- Not needed because removeExportWithContentIdentifier
@ -190,8 +190,7 @@ storeKeyM d chunkconfig cow k c m =
in byteStorer go k c m in byteStorer go k c m
NoChunks -> NoChunks ->
let go _k src p = do let go _k src p = do
(ok, _verification) <- fileCopier cow src tmpf k p (return True) NoVerify fileCopierUnVerified cow src tmpf k p
unless ok $ giveup "failed to copy file to remote"
liftIO $ finalizeStoreGeneric d tmpdir destdir liftIO $ finalizeStoreGeneric d tmpdir destdir
in fileStorer go k c m in fileStorer go k c m
_ -> _ ->
@ -205,6 +204,11 @@ storeKeyM d chunkconfig cow k c m =
kf = keyFile k kf = keyFile k
destdir = storeDir d k destdir = storeDir d k
fileCopierUnVerified :: CopyCoWTried -> FilePath -> FilePath -> Key -> MeterUpdate -> Annex ()
fileCopierUnVerified cow src dest k p = do
(ok, _verification) <- fileCopier cow src dest k p (return True) NoVerify
unless ok $ giveup "failed to copy file"
checkDiskSpaceDirectory :: RawFilePath -> Key -> Annex Bool checkDiskSpaceDirectory :: RawFilePath -> Key -> Annex Bool
checkDiskSpaceDirectory d k = do checkDiskSpaceDirectory d k = do
annexdir <- fromRepo gitAnnexObjectDir annexdir <- fromRepo gitAnnexObjectDir
@ -234,8 +238,7 @@ retrieveKeyFileM :: RawFilePath -> ChunkConfig -> CopyCoWTried -> Retriever
retrieveKeyFileM d (LegacyChunks _) _ = Legacy.retrieve locations d retrieveKeyFileM d (LegacyChunks _) _ = Legacy.retrieve locations d
retrieveKeyFileM d NoChunks cow = fileRetriever $ \dest k p -> do retrieveKeyFileM d NoChunks cow = fileRetriever $ \dest k p -> do
src <- liftIO $ fromRawFilePath <$> getLocation d k src <- liftIO $ fromRawFilePath <$> getLocation d k
(ok, _verification) <- fileCopier cow src dest k p (return True) NoVerify fileCopierUnVerified cow src dest k p
unless ok $ giveup "failed to copy file from remote"
retrieveKeyFileM d _ _ = byteRetriever $ \k sink -> retrieveKeyFileM d _ _ = byteRetriever $ \k sink ->
sink =<< liftIO (L.readFile . fromRawFilePath =<< getLocation d k) sink =<< liftIO (L.readFile . fromRawFilePath =<< getLocation d k)
@ -310,9 +313,8 @@ storeExportM d src _k loc p = liftIO $ do
dest = exportPath d loc dest = exportPath d loc
go tmp () = withMeteredFile src p (L.writeFile tmp) go tmp () = withMeteredFile src p (L.writeFile tmp)
retrieveExportM :: RawFilePath -> Key -> ExportLocation -> FilePath -> MeterUpdate -> Annex () retrieveExportM :: RawFilePath -> CopyCoWTried -> Key -> ExportLocation -> FilePath -> MeterUpdate -> Annex ()
retrieveExportM d _k loc dest p = retrieveExportM d cow k loc dest p = fileCopierUnVerified cow src dest k p
liftIO $ withMeteredFile src p (L.writeFile dest)
where where
src = fromRawFilePath $ exportPath d loc src = fromRawFilePath $ exportPath d loc
@ -407,14 +409,21 @@ importKeyM dir loc cid sz p = do
, inodeCache = Nothing , inodeCache = Nothing
} }
retrieveExportWithContentIdentifierM :: RawFilePath -> ExportLocation -> ContentIdentifier -> FilePath -> Annex Key -> MeterUpdate -> Annex Key retrieveExportWithContentIdentifierM :: RawFilePath -> CopyCoWTried -> ExportLocation -> ContentIdentifier -> FilePath -> Annex Key -> MeterUpdate -> Annex Key
retrieveExportWithContentIdentifierM dir loc cid dest mkkey p = retrieveExportWithContentIdentifierM dir cow loc cid dest mkkey p =
precheck $ docopy postcheck precheck docopy
where where
f = exportPath dir loc f = exportPath dir loc
f' = fromRawFilePath f f' = fromRawFilePath f
docopy cont = do docopy = ifM (liftIO $ tryCopyCoW cow f' dest p)
( do
k <- mkkey
postcheckcow (return k)
, docopynoncow
)
docopynoncow = do
#ifndef mingw32_HOST_OS #ifndef mingw32_HOST_OS
let open = do let open = do
-- Need a duplicate fd for the post check, since -- Need a duplicate fd for the post check, since
@ -435,9 +444,9 @@ retrieveExportWithContentIdentifierM dir loc cid dest mkkey p =
liftIO $ hGetContentsMetered h p >>= L.writeFile dest liftIO $ hGetContentsMetered h p >>= L.writeFile dest
k <- mkkey k <- mkkey
#ifndef mingw32_HOST_OS #ifndef mingw32_HOST_OS
cont dupfd (return k) postchecknoncow dupfd (return k)
#else #else
cont (return k) postchecknoncow (return k)
#endif #endif
-- Check before copy, to avoid expensive copy of wrong file -- Check before copy, to avoid expensive copy of wrong file
@ -460,9 +469,9 @@ retrieveExportWithContentIdentifierM dir loc cid dest mkkey p =
-- situations with files being modified while it's updating the -- situations with files being modified while it's updating the
-- working tree for a merge. -- working tree for a merge.
#ifndef mingw32_HOST_OS #ifndef mingw32_HOST_OS
postcheck fd cont = do postchecknoncow fd cont = do
#else #else
postcheck cont = do postchecknoncow cont = do
#endif #endif
currcid <- liftIO $ mkContentIdentifier f currcid <- liftIO $ mkContentIdentifier f
#ifndef mingw32_HOST_OS #ifndef mingw32_HOST_OS
@ -472,6 +481,16 @@ retrieveExportWithContentIdentifierM dir loc cid dest mkkey p =
#endif #endif
guardSameContentIdentifiers cont cid currcid guardSameContentIdentifiers cont cid currcid
-- When copy-on-write was done, cannot check the handle that was
-- copied from, but such a copy should run very fast, so
-- it's very unlikely that the file changed after precheck,
-- the modified version was copied CoW, and then the file was
-- restored to the original content before this check.
postcheckcow cont = do
currcid <- liftIO $ mkContentIdentifier f
=<< R.getFileStatus f
guardSameContentIdentifiers cont cid currcid
storeExportWithContentIdentifierM :: RawFilePath -> FilePath -> Key -> ExportLocation -> [ContentIdentifier] -> MeterUpdate -> Annex ContentIdentifier storeExportWithContentIdentifierM :: RawFilePath -> FilePath -> Key -> ExportLocation -> [ContentIdentifier] -> MeterUpdate -> Annex ContentIdentifier
storeExportWithContentIdentifierM dir src _k loc overwritablecids p = do storeExportWithContentIdentifierM dir src _k loc overwritablecids p = do
liftIO $ createDirectoryUnder dir (toRawFilePath destdir) liftIO $ createDirectoryUnder dir (toRawFilePath destdir)

View file

@ -0,0 +1,10 @@
[[!comment format=mdwn
username="joey"
subject="""comment 4"""
date="2021-04-14T19:33:13Z"
content="""
Implemented CoW for directory special remote, comprehensively.
(Except for when exporting to it, which I'll do for completeness before closing
this.)
"""]]

View file

@ -0,0 +1,10 @@
[[!comment format=mdwn
username="joey"
subject="""comment 5"""
date="2021-04-14T20:07:33Z"
content="""
Correction: Import from directory special remote copies the content by
default still. But with --no-content it does not. Might be you could have
used that, if you did not want to load the content up into your repo and
were ok leaving it on the remote.
"""]]