refactor fileCopier

Sponsored-by: Dartmouth College's DANDI project
This commit is contained in:
Joey Hess 2021-08-16 15:56:24 -04:00
parent d889ae0c01
commit a644f729ce
No known key found for this signature in database
GPG key ID: DB12DB0FF05F8F38
3 changed files with 47 additions and 49 deletions

View file

@ -10,23 +10,16 @@
module Annex.CopyFile where
import Annex.Common
import Types.Remote
import Utility.Metered
import Utility.CopyFile
import Utility.FileMode
import Utility.Touch
import Types.Backend
import Annex.Verify
import Control.Concurrent
import qualified Data.ByteString as S
import Data.Time.Clock.POSIX
-- Copies from src to dest, updating a meter. If the copy finishes
-- successfully, calls a final check action, which must also succeed, or
-- returns false.
type FileCopier = FilePath -> FilePath -> Key -> MeterUpdate -> Annex Bool -> VerifyConfig -> Annex (Bool, Verification)
-- To avoid the overhead of trying copy-on-write every time, it's tried
-- once and if it fails, is not tried again.
newtype CopyCoWTried = CopyCoWTried (MVar Bool)
@ -53,13 +46,17 @@ tryCopyCoW (CopyCoWTried copycowtried) src dest meterupdate =
docopycow = watchFileSize dest meterupdate $
copyCoW CopyTimeStamps src dest
{- Copys a file. Uses copy-on-write if it is supported. Otherwise,
- copies the file itself. If the destination already exists,
- an interruped copy will resume where it left off.
data CopyMethod = CopiedCoW | Copied
{- Copies from src to dest, updating a meter. Preserves mode and mtime.
- Uses copy-on-write if it is supported. If the the destination already
- exists, an interruped copy will resume where it left off.
-
- When copy-on-write is used, returns UnVerified, because the content of
- the file has not been verified to be correct. When the file has to be
- read to copy it, a hash is calulated at the same time.
- The IncrementalVerifier is updated with the content of the file as it's
- being copied. But it is not finalized at the end.
-
- When copy-on-write is used, the IncrementalVerifier is not fed
- the content of the file.
-
- Note that, when the destination file already exists, it's read both
- to start calculating the hash, and also to verify that its content is
@ -68,13 +65,13 @@ tryCopyCoW (CopyCoWTried copycowtried) src dest meterupdate =
- (eg when isStableKey is false), and doing this avoids getting a
- corrupted file in such cases.
-}
fileCopier :: CopyCoWTried -> FileCopier
fileCopier :: CopyCoWTried -> FilePath -> FilePath -> MeterUpdate -> Maybe IncrementalVerifier -> Annex CopyMethod
#ifdef mingw32_HOST_OS
fileCopier _ src dest k meterupdate check verifyconfig = docopy
fileCopier _ src dest meterupdate iv = docopy
#else
fileCopier copycowtried src dest k meterupdate check verifyconfig =
fileCopier copycowtried src dest meterupdate iv =
ifM (liftIO $ tryCopyCoW copycowtried src dest meterupdate)
( unVerified check
( return CopiedCoW
, docopy
)
#endif
@ -82,16 +79,14 @@ fileCopier copycowtried src dest k meterupdate check verifyconfig =
dest' = toRawFilePath dest
docopy = do
iv <- startVerifyKeyContentIncrementally verifyconfig k
-- The file might have had the write bit removed,
-- so make sure we can write to it.
void $ liftIO $ tryIO $ allowWrite dest'
liftIO $ withBinaryFile dest ReadWriteMode $ \hdest ->
withBinaryFile src ReadMode $ \hsrc -> do
sofar <- compareexisting iv hdest hsrc zeroBytesProcessed
docopy' iv hdest hsrc sofar
sofar <- compareexisting hdest hsrc zeroBytesProcessed
docopy' hdest hsrc sofar
-- Copy src mode and mtime.
mode <- liftIO $ fileMode <$> getFileStatus src
@ -99,19 +94,9 @@ fileCopier copycowtried src dest k meterupdate check verifyconfig =
liftIO $ setFileMode dest mode
liftIO $ touch dest' mtime False
ifM check
( case iv of
Just x -> ifM (liftIO $ finalizeIncremental x)
( return (True, Verified)
, do
warning "verification of content failed"
return (False, UnVerified)
)
Nothing -> return (True, UnVerified)
, return (False, UnVerified)
)
return Copied
docopy' iv hdest hsrc sofar = do
docopy' hdest hsrc sofar = do
s <- S.hGet hsrc defaultChunkSize
if s == S.empty
then return ()
@ -120,12 +105,12 @@ fileCopier copycowtried src dest k meterupdate check verifyconfig =
S.hPut hdest s
maybe noop (flip updateIncremental s) iv
meterupdate sofar'
docopy' iv hdest hsrc sofar'
docopy' hdest hsrc sofar'
-- Leaves hdest and hsrc seeked to wherever the two diverge,
-- so typically hdest will be seeked to end, and hsrc to the same
-- position.
compareexisting iv hdest hsrc sofar = do
compareexisting hdest hsrc sofar = do
s <- S.hGet hdest defaultChunkSize
if s == S.empty
then return sofar
@ -136,7 +121,7 @@ fileCopier copycowtried src dest k meterupdate check verifyconfig =
maybe noop (flip updateIncremental s) iv
let sofar' = addBytesProcessed sofar (S.length s)
meterupdate sofar'
compareexisting iv hdest hsrc sofar'
compareexisting hdest hsrc sofar'
else do
seekbefore hdest s
seekbefore hsrc s'

View file

@ -191,7 +191,7 @@ storeKeyM d chunkconfig cow k c m =
in byteStorer go k c m
NoChunks ->
let go _k src p = do
fileCopierUnVerified cow src tmpf k p
void $ fileCopier cow src tmpf p Nothing
liftIO $ finalizeStoreGeneric d tmpdir destdir
in fileStorer go k c m
_ ->
@ -205,11 +205,6 @@ storeKeyM d chunkconfig cow k c m =
kf = keyFile k
destdir = storeDir d k
fileCopierUnVerified :: CopyCoWTried -> FilePath -> FilePath -> Key -> MeterUpdate -> Annex ()
fileCopierUnVerified cow src dest k p = do
(ok, _verification) <- fileCopier cow src dest k p (return True) NoVerify
unless ok $ giveup "failed to copy file"
checkDiskSpaceDirectory :: RawFilePath -> Key -> Annex Bool
checkDiskSpaceDirectory d k = do
annexdir <- fromRepo gitAnnexObjectDir
@ -239,7 +234,7 @@ retrieveKeyFileM :: RawFilePath -> ChunkConfig -> CopyCoWTried -> Retriever
retrieveKeyFileM d (LegacyChunks _) _ = Legacy.retrieve locations d
retrieveKeyFileM d NoChunks cow = fileRetriever $ \dest k p -> do
src <- liftIO $ fromRawFilePath <$> getLocation d k
fileCopierUnVerified cow src dest k p
void $ fileCopier cow src dest p Nothing
retrieveKeyFileM d _ _ = byteRetriever $ \k sink ->
sink =<< liftIO (L.readFile . fromRawFilePath =<< getLocation d k)
@ -305,17 +300,17 @@ checkPresentGeneric' d check = ifM check
)
storeExportM :: RawFilePath -> CopyCoWTried -> FilePath -> Key -> ExportLocation -> MeterUpdate -> Annex ()
storeExportM d cow src k loc p = do
storeExportM d cow src _k loc p = do
liftIO $ createDirectoryUnder d (P.takeDirectory dest)
-- Write via temp file so that checkPresentGeneric will not
-- see it until it's fully stored.
viaTmp go (fromRawFilePath dest) ()
where
dest = exportPath d loc
go tmp () = fileCopierUnVerified cow src tmp k p
go tmp () = void $ fileCopier cow src tmp p Nothing
retrieveExportM :: RawFilePath -> CopyCoWTried -> Key -> ExportLocation -> FilePath -> MeterUpdate -> Annex ()
retrieveExportM d cow k loc dest p = fileCopierUnVerified cow src dest k p
retrieveExportM d cow _k loc dest p = void $ fileCopier cow src dest p Nothing
where
src = fromRawFilePath $ exportPath d loc
@ -493,11 +488,11 @@ retrieveExportWithContentIdentifierM dir cow loc cid dest mkkey p =
guardSameContentIdentifiers cont cid currcid
storeExportWithContentIdentifierM :: RawFilePath -> CopyCoWTried -> FilePath -> Key -> ExportLocation -> [ContentIdentifier] -> MeterUpdate -> Annex ContentIdentifier
storeExportWithContentIdentifierM dir cow src k loc overwritablecids p = do
storeExportWithContentIdentifierM dir cow src _k loc overwritablecids p = do
liftIO $ createDirectoryUnder dir (toRawFilePath destdir)
withTmpFileIn destdir template $ \tmpf tmph -> do
liftIO $ hClose tmph
fileCopierUnVerified cow src tmpf k p
void $ fileCopier cow src tmpf p Nothing
let tmpf' = toRawFilePath tmpf
resetAnnexFilePerm tmpf'
liftIO (getFileStatus tmpf) >>= liftIO . mkContentIdentifier tmpf' >>= \case

View file

@ -29,6 +29,7 @@ import qualified Annex
import Logs.Presence
import Annex.Transfer
import Annex.CopyFile
import Annex.Verify
import Annex.UUID
import qualified Annex.Content
import qualified Annex.BranchState
@ -839,6 +840,8 @@ wantHardLink = (annexHardLink <$> Annex.getGitConfig)
-- because they can be modified at any time.
<&&> (not <$> annexThin <$> Annex.getGitConfig)
type FileCopier = FilePath -> FilePath -> Key -> MeterUpdate -> Annex Bool -> VerifyConfig -> Annex (Bool, Verification)
-- If either the remote or local repository wants to use hard links,
-- the copier will do so (falling back to copying if a hard link cannot be
-- made).
@ -849,7 +852,6 @@ wantHardLink = (annexHardLink <$> Annex.getGitConfig)
-- copying it.
mkFileCopier :: Bool -> State -> Annex FileCopier
mkFileCopier remotewanthardlink (State _ _ copycowtried _ _) = do
let copier = fileCopier copycowtried
localwanthardlink <- wantHardLink
let linker = \src dest -> createLink src dest >> return True
if remotewanthardlink || localwanthardlink
@ -862,6 +864,22 @@ mkFileCopier remotewanthardlink (State _ _ copycowtried _ _) = do
, copier src dest k p check verifyconfig
)
else return copier
where
copier src dest k p check verifyconfig = do
iv <- startVerifyKeyContentIncrementally verifyconfig k
fileCopier copycowtried src dest p iv >>= \case
Copied -> ifM check
( case iv of
Just x -> ifM (liftIO $ finalizeIncremental x)
( return (True, Verified)
, do
warning "verification of content failed"
return (False, UnVerified)
)
Nothing -> return (True, UnVerified)
, return (False, UnVerified)
)
CopiedCoW -> unVerified check
{- Normally the UUID of a local repository is checked at startup,
- but annex-checkuuid config can prevent that. To avoid getting