refactor fileCopier

Sponsored-by: Dartmouth College's DANDI project
This commit is contained in:
Joey Hess 2021-08-16 15:56:24 -04:00
parent d889ae0c01
commit a644f729ce
No known key found for this signature in database
GPG key ID: DB12DB0FF05F8F38
3 changed files with 47 additions and 49 deletions

View file

@ -10,23 +10,16 @@
module Annex.CopyFile where module Annex.CopyFile where
import Annex.Common import Annex.Common
import Types.Remote
import Utility.Metered import Utility.Metered
import Utility.CopyFile import Utility.CopyFile
import Utility.FileMode import Utility.FileMode
import Utility.Touch import Utility.Touch
import Types.Backend import Types.Backend
import Annex.Verify
import Control.Concurrent import Control.Concurrent
import qualified Data.ByteString as S import qualified Data.ByteString as S
import Data.Time.Clock.POSIX import Data.Time.Clock.POSIX
-- Copies from src to dest, updating a meter. If the copy finishes
-- successfully, calls a final check action, which must also succeed, or
-- returns false.
type FileCopier = FilePath -> FilePath -> Key -> MeterUpdate -> Annex Bool -> VerifyConfig -> Annex (Bool, Verification)
-- To avoid the overhead of trying copy-on-write every time, it's tried -- To avoid the overhead of trying copy-on-write every time, it's tried
-- once and if it fails, is not tried again. -- once and if it fails, is not tried again.
newtype CopyCoWTried = CopyCoWTried (MVar Bool) newtype CopyCoWTried = CopyCoWTried (MVar Bool)
@ -53,13 +46,17 @@ tryCopyCoW (CopyCoWTried copycowtried) src dest meterupdate =
docopycow = watchFileSize dest meterupdate $ docopycow = watchFileSize dest meterupdate $
copyCoW CopyTimeStamps src dest copyCoW CopyTimeStamps src dest
{- Copys a file. Uses copy-on-write if it is supported. Otherwise, data CopyMethod = CopiedCoW | Copied
- copies the file itself. If the destination already exists,
- an interruped copy will resume where it left off. {- Copies from src to dest, updating a meter. Preserves mode and mtime.
- Uses copy-on-write if it is supported. If the the destination already
- exists, an interruped copy will resume where it left off.
- -
- When copy-on-write is used, returns UnVerified, because the content of - The IncrementalVerifier is updated with the content of the file as it's
- the file has not been verified to be correct. When the file has to be - being copied. But it is not finalized at the end.
- read to copy it, a hash is calulated at the same time. -
- When copy-on-write is used, the IncrementalVerifier is not fed
- the content of the file.
- -
- Note that, when the destination file already exists, it's read both - Note that, when the destination file already exists, it's read both
- to start calculating the hash, and also to verify that its content is - to start calculating the hash, and also to verify that its content is
@ -68,13 +65,13 @@ tryCopyCoW (CopyCoWTried copycowtried) src dest meterupdate =
- (eg when isStableKey is false), and doing this avoids getting a - (eg when isStableKey is false), and doing this avoids getting a
- corrupted file in such cases. - corrupted file in such cases.
-} -}
fileCopier :: CopyCoWTried -> FileCopier fileCopier :: CopyCoWTried -> FilePath -> FilePath -> MeterUpdate -> Maybe IncrementalVerifier -> Annex CopyMethod
#ifdef mingw32_HOST_OS #ifdef mingw32_HOST_OS
fileCopier _ src dest k meterupdate check verifyconfig = docopy fileCopier _ src dest meterupdate iv = docopy
#else #else
fileCopier copycowtried src dest k meterupdate check verifyconfig = fileCopier copycowtried src dest meterupdate iv =
ifM (liftIO $ tryCopyCoW copycowtried src dest meterupdate) ifM (liftIO $ tryCopyCoW copycowtried src dest meterupdate)
( unVerified check ( return CopiedCoW
, docopy , docopy
) )
#endif #endif
@ -82,16 +79,14 @@ fileCopier copycowtried src dest k meterupdate check verifyconfig =
dest' = toRawFilePath dest dest' = toRawFilePath dest
docopy = do docopy = do
iv <- startVerifyKeyContentIncrementally verifyconfig k
-- The file might have had the write bit removed, -- The file might have had the write bit removed,
-- so make sure we can write to it. -- so make sure we can write to it.
void $ liftIO $ tryIO $ allowWrite dest' void $ liftIO $ tryIO $ allowWrite dest'
liftIO $ withBinaryFile dest ReadWriteMode $ \hdest -> liftIO $ withBinaryFile dest ReadWriteMode $ \hdest ->
withBinaryFile src ReadMode $ \hsrc -> do withBinaryFile src ReadMode $ \hsrc -> do
sofar <- compareexisting iv hdest hsrc zeroBytesProcessed sofar <- compareexisting hdest hsrc zeroBytesProcessed
docopy' iv hdest hsrc sofar docopy' hdest hsrc sofar
-- Copy src mode and mtime. -- Copy src mode and mtime.
mode <- liftIO $ fileMode <$> getFileStatus src mode <- liftIO $ fileMode <$> getFileStatus src
@ -99,19 +94,9 @@ fileCopier copycowtried src dest k meterupdate check verifyconfig =
liftIO $ setFileMode dest mode liftIO $ setFileMode dest mode
liftIO $ touch dest' mtime False liftIO $ touch dest' mtime False
ifM check return Copied
( case iv of
Just x -> ifM (liftIO $ finalizeIncremental x)
( return (True, Verified)
, do
warning "verification of content failed"
return (False, UnVerified)
)
Nothing -> return (True, UnVerified)
, return (False, UnVerified)
)
docopy' iv hdest hsrc sofar = do docopy' hdest hsrc sofar = do
s <- S.hGet hsrc defaultChunkSize s <- S.hGet hsrc defaultChunkSize
if s == S.empty if s == S.empty
then return () then return ()
@ -120,12 +105,12 @@ fileCopier copycowtried src dest k meterupdate check verifyconfig =
S.hPut hdest s S.hPut hdest s
maybe noop (flip updateIncremental s) iv maybe noop (flip updateIncremental s) iv
meterupdate sofar' meterupdate sofar'
docopy' iv hdest hsrc sofar' docopy' hdest hsrc sofar'
-- Leaves hdest and hsrc seeked to wherever the two diverge, -- Leaves hdest and hsrc seeked to wherever the two diverge,
-- so typically hdest will be seeked to end, and hsrc to the same -- so typically hdest will be seeked to end, and hsrc to the same
-- position. -- position.
compareexisting iv hdest hsrc sofar = do compareexisting hdest hsrc sofar = do
s <- S.hGet hdest defaultChunkSize s <- S.hGet hdest defaultChunkSize
if s == S.empty if s == S.empty
then return sofar then return sofar
@ -136,7 +121,7 @@ fileCopier copycowtried src dest k meterupdate check verifyconfig =
maybe noop (flip updateIncremental s) iv maybe noop (flip updateIncremental s) iv
let sofar' = addBytesProcessed sofar (S.length s) let sofar' = addBytesProcessed sofar (S.length s)
meterupdate sofar' meterupdate sofar'
compareexisting iv hdest hsrc sofar' compareexisting hdest hsrc sofar'
else do else do
seekbefore hdest s seekbefore hdest s
seekbefore hsrc s' seekbefore hsrc s'

View file

@ -191,7 +191,7 @@ storeKeyM d chunkconfig cow k c m =
in byteStorer go k c m in byteStorer go k c m
NoChunks -> NoChunks ->
let go _k src p = do let go _k src p = do
fileCopierUnVerified cow src tmpf k p void $ fileCopier cow src tmpf p Nothing
liftIO $ finalizeStoreGeneric d tmpdir destdir liftIO $ finalizeStoreGeneric d tmpdir destdir
in fileStorer go k c m in fileStorer go k c m
_ -> _ ->
@ -205,11 +205,6 @@ storeKeyM d chunkconfig cow k c m =
kf = keyFile k kf = keyFile k
destdir = storeDir d k destdir = storeDir d k
fileCopierUnVerified :: CopyCoWTried -> FilePath -> FilePath -> Key -> MeterUpdate -> Annex ()
fileCopierUnVerified cow src dest k p = do
(ok, _verification) <- fileCopier cow src dest k p (return True) NoVerify
unless ok $ giveup "failed to copy file"
checkDiskSpaceDirectory :: RawFilePath -> Key -> Annex Bool checkDiskSpaceDirectory :: RawFilePath -> Key -> Annex Bool
checkDiskSpaceDirectory d k = do checkDiskSpaceDirectory d k = do
annexdir <- fromRepo gitAnnexObjectDir annexdir <- fromRepo gitAnnexObjectDir
@ -239,7 +234,7 @@ retrieveKeyFileM :: RawFilePath -> ChunkConfig -> CopyCoWTried -> Retriever
retrieveKeyFileM d (LegacyChunks _) _ = Legacy.retrieve locations d retrieveKeyFileM d (LegacyChunks _) _ = Legacy.retrieve locations d
retrieveKeyFileM d NoChunks cow = fileRetriever $ \dest k p -> do retrieveKeyFileM d NoChunks cow = fileRetriever $ \dest k p -> do
src <- liftIO $ fromRawFilePath <$> getLocation d k src <- liftIO $ fromRawFilePath <$> getLocation d k
fileCopierUnVerified cow src dest k p void $ fileCopier cow src dest p Nothing
retrieveKeyFileM d _ _ = byteRetriever $ \k sink -> retrieveKeyFileM d _ _ = byteRetriever $ \k sink ->
sink =<< liftIO (L.readFile . fromRawFilePath =<< getLocation d k) sink =<< liftIO (L.readFile . fromRawFilePath =<< getLocation d k)
@ -305,17 +300,17 @@ checkPresentGeneric' d check = ifM check
) )
storeExportM :: RawFilePath -> CopyCoWTried -> FilePath -> Key -> ExportLocation -> MeterUpdate -> Annex () storeExportM :: RawFilePath -> CopyCoWTried -> FilePath -> Key -> ExportLocation -> MeterUpdate -> Annex ()
storeExportM d cow src k loc p = do storeExportM d cow src _k loc p = do
liftIO $ createDirectoryUnder d (P.takeDirectory dest) liftIO $ createDirectoryUnder d (P.takeDirectory dest)
-- Write via temp file so that checkPresentGeneric will not -- Write via temp file so that checkPresentGeneric will not
-- see it until it's fully stored. -- see it until it's fully stored.
viaTmp go (fromRawFilePath dest) () viaTmp go (fromRawFilePath dest) ()
where where
dest = exportPath d loc dest = exportPath d loc
go tmp () = fileCopierUnVerified cow src tmp k p go tmp () = void $ fileCopier cow src tmp p Nothing
retrieveExportM :: RawFilePath -> CopyCoWTried -> Key -> ExportLocation -> FilePath -> MeterUpdate -> Annex () retrieveExportM :: RawFilePath -> CopyCoWTried -> Key -> ExportLocation -> FilePath -> MeterUpdate -> Annex ()
retrieveExportM d cow k loc dest p = fileCopierUnVerified cow src dest k p retrieveExportM d cow _k loc dest p = void $ fileCopier cow src dest p Nothing
where where
src = fromRawFilePath $ exportPath d loc src = fromRawFilePath $ exportPath d loc
@ -493,11 +488,11 @@ retrieveExportWithContentIdentifierM dir cow loc cid dest mkkey p =
guardSameContentIdentifiers cont cid currcid guardSameContentIdentifiers cont cid currcid
storeExportWithContentIdentifierM :: RawFilePath -> CopyCoWTried -> FilePath -> Key -> ExportLocation -> [ContentIdentifier] -> MeterUpdate -> Annex ContentIdentifier storeExportWithContentIdentifierM :: RawFilePath -> CopyCoWTried -> FilePath -> Key -> ExportLocation -> [ContentIdentifier] -> MeterUpdate -> Annex ContentIdentifier
storeExportWithContentIdentifierM dir cow src k loc overwritablecids p = do storeExportWithContentIdentifierM dir cow src _k loc overwritablecids p = do
liftIO $ createDirectoryUnder dir (toRawFilePath destdir) liftIO $ createDirectoryUnder dir (toRawFilePath destdir)
withTmpFileIn destdir template $ \tmpf tmph -> do withTmpFileIn destdir template $ \tmpf tmph -> do
liftIO $ hClose tmph liftIO $ hClose tmph
fileCopierUnVerified cow src tmpf k p void $ fileCopier cow src tmpf p Nothing
let tmpf' = toRawFilePath tmpf let tmpf' = toRawFilePath tmpf
resetAnnexFilePerm tmpf' resetAnnexFilePerm tmpf'
liftIO (getFileStatus tmpf) >>= liftIO . mkContentIdentifier tmpf' >>= \case liftIO (getFileStatus tmpf) >>= liftIO . mkContentIdentifier tmpf' >>= \case

View file

@ -29,6 +29,7 @@ import qualified Annex
import Logs.Presence import Logs.Presence
import Annex.Transfer import Annex.Transfer
import Annex.CopyFile import Annex.CopyFile
import Annex.Verify
import Annex.UUID import Annex.UUID
import qualified Annex.Content import qualified Annex.Content
import qualified Annex.BranchState import qualified Annex.BranchState
@ -839,6 +840,8 @@ wantHardLink = (annexHardLink <$> Annex.getGitConfig)
-- because they can be modified at any time. -- because they can be modified at any time.
<&&> (not <$> annexThin <$> Annex.getGitConfig) <&&> (not <$> annexThin <$> Annex.getGitConfig)
type FileCopier = FilePath -> FilePath -> Key -> MeterUpdate -> Annex Bool -> VerifyConfig -> Annex (Bool, Verification)
-- If either the remote or local repository wants to use hard links, -- If either the remote or local repository wants to use hard links,
-- the copier will do so (falling back to copying if a hard link cannot be -- the copier will do so (falling back to copying if a hard link cannot be
-- made). -- made).
@ -849,7 +852,6 @@ wantHardLink = (annexHardLink <$> Annex.getGitConfig)
-- copying it. -- copying it.
mkFileCopier :: Bool -> State -> Annex FileCopier mkFileCopier :: Bool -> State -> Annex FileCopier
mkFileCopier remotewanthardlink (State _ _ copycowtried _ _) = do mkFileCopier remotewanthardlink (State _ _ copycowtried _ _) = do
let copier = fileCopier copycowtried
localwanthardlink <- wantHardLink localwanthardlink <- wantHardLink
let linker = \src dest -> createLink src dest >> return True let linker = \src dest -> createLink src dest >> return True
if remotewanthardlink || localwanthardlink if remotewanthardlink || localwanthardlink
@ -862,6 +864,22 @@ mkFileCopier remotewanthardlink (State _ _ copycowtried _ _) = do
, copier src dest k p check verifyconfig , copier src dest k p check verifyconfig
) )
else return copier else return copier
where
copier src dest k p check verifyconfig = do
iv <- startVerifyKeyContentIncrementally verifyconfig k
fileCopier copycowtried src dest p iv >>= \case
Copied -> ifM check
( case iv of
Just x -> ifM (liftIO $ finalizeIncremental x)
( return (True, Verified)
, do
warning "verification of content failed"
return (False, UnVerified)
)
Nothing -> return (True, UnVerified)
, return (False, UnVerified)
)
CopiedCoW -> unVerified check
{- Normally the UUID of a local repository is checked at startup, {- Normally the UUID of a local repository is checked at startup,
- but annex-checkuuid config can prevent that. To avoid getting - but annex-checkuuid config can prevent that. To avoid getting