refactor fileCopier
Sponsored-by: Dartmouth College's DANDI project
This commit is contained in:
parent
d889ae0c01
commit
a644f729ce
3 changed files with 47 additions and 49 deletions
|
@ -10,23 +10,16 @@
|
||||||
module Annex.CopyFile where
|
module Annex.CopyFile where
|
||||||
|
|
||||||
import Annex.Common
|
import Annex.Common
|
||||||
import Types.Remote
|
|
||||||
import Utility.Metered
|
import Utility.Metered
|
||||||
import Utility.CopyFile
|
import Utility.CopyFile
|
||||||
import Utility.FileMode
|
import Utility.FileMode
|
||||||
import Utility.Touch
|
import Utility.Touch
|
||||||
import Types.Backend
|
import Types.Backend
|
||||||
import Annex.Verify
|
|
||||||
|
|
||||||
import Control.Concurrent
|
import Control.Concurrent
|
||||||
import qualified Data.ByteString as S
|
import qualified Data.ByteString as S
|
||||||
import Data.Time.Clock.POSIX
|
import Data.Time.Clock.POSIX
|
||||||
|
|
||||||
-- Copies from src to dest, updating a meter. If the copy finishes
|
|
||||||
-- successfully, calls a final check action, which must also succeed, or
|
|
||||||
-- returns false.
|
|
||||||
type FileCopier = FilePath -> FilePath -> Key -> MeterUpdate -> Annex Bool -> VerifyConfig -> Annex (Bool, Verification)
|
|
||||||
|
|
||||||
-- To avoid the overhead of trying copy-on-write every time, it's tried
|
-- To avoid the overhead of trying copy-on-write every time, it's tried
|
||||||
-- once and if it fails, is not tried again.
|
-- once and if it fails, is not tried again.
|
||||||
newtype CopyCoWTried = CopyCoWTried (MVar Bool)
|
newtype CopyCoWTried = CopyCoWTried (MVar Bool)
|
||||||
|
@ -53,13 +46,17 @@ tryCopyCoW (CopyCoWTried copycowtried) src dest meterupdate =
|
||||||
docopycow = watchFileSize dest meterupdate $
|
docopycow = watchFileSize dest meterupdate $
|
||||||
copyCoW CopyTimeStamps src dest
|
copyCoW CopyTimeStamps src dest
|
||||||
|
|
||||||
{- Copys a file. Uses copy-on-write if it is supported. Otherwise,
|
data CopyMethod = CopiedCoW | Copied
|
||||||
- copies the file itself. If the destination already exists,
|
|
||||||
- an interruped copy will resume where it left off.
|
{- Copies from src to dest, updating a meter. Preserves mode and mtime.
|
||||||
|
- Uses copy-on-write if it is supported. If the the destination already
|
||||||
|
- exists, an interruped copy will resume where it left off.
|
||||||
-
|
-
|
||||||
- When copy-on-write is used, returns UnVerified, because the content of
|
- The IncrementalVerifier is updated with the content of the file as it's
|
||||||
- the file has not been verified to be correct. When the file has to be
|
- being copied. But it is not finalized at the end.
|
||||||
- read to copy it, a hash is calulated at the same time.
|
-
|
||||||
|
- When copy-on-write is used, the IncrementalVerifier is not fed
|
||||||
|
- the content of the file.
|
||||||
-
|
-
|
||||||
- Note that, when the destination file already exists, it's read both
|
- Note that, when the destination file already exists, it's read both
|
||||||
- to start calculating the hash, and also to verify that its content is
|
- to start calculating the hash, and also to verify that its content is
|
||||||
|
@ -68,13 +65,13 @@ tryCopyCoW (CopyCoWTried copycowtried) src dest meterupdate =
|
||||||
- (eg when isStableKey is false), and doing this avoids getting a
|
- (eg when isStableKey is false), and doing this avoids getting a
|
||||||
- corrupted file in such cases.
|
- corrupted file in such cases.
|
||||||
-}
|
-}
|
||||||
fileCopier :: CopyCoWTried -> FileCopier
|
fileCopier :: CopyCoWTried -> FilePath -> FilePath -> MeterUpdate -> Maybe IncrementalVerifier -> Annex CopyMethod
|
||||||
#ifdef mingw32_HOST_OS
|
#ifdef mingw32_HOST_OS
|
||||||
fileCopier _ src dest k meterupdate check verifyconfig = docopy
|
fileCopier _ src dest meterupdate iv = docopy
|
||||||
#else
|
#else
|
||||||
fileCopier copycowtried src dest k meterupdate check verifyconfig =
|
fileCopier copycowtried src dest meterupdate iv =
|
||||||
ifM (liftIO $ tryCopyCoW copycowtried src dest meterupdate)
|
ifM (liftIO $ tryCopyCoW copycowtried src dest meterupdate)
|
||||||
( unVerified check
|
( return CopiedCoW
|
||||||
, docopy
|
, docopy
|
||||||
)
|
)
|
||||||
#endif
|
#endif
|
||||||
|
@ -82,16 +79,14 @@ fileCopier copycowtried src dest k meterupdate check verifyconfig =
|
||||||
dest' = toRawFilePath dest
|
dest' = toRawFilePath dest
|
||||||
|
|
||||||
docopy = do
|
docopy = do
|
||||||
iv <- startVerifyKeyContentIncrementally verifyconfig k
|
|
||||||
|
|
||||||
-- The file might have had the write bit removed,
|
-- The file might have had the write bit removed,
|
||||||
-- so make sure we can write to it.
|
-- so make sure we can write to it.
|
||||||
void $ liftIO $ tryIO $ allowWrite dest'
|
void $ liftIO $ tryIO $ allowWrite dest'
|
||||||
|
|
||||||
liftIO $ withBinaryFile dest ReadWriteMode $ \hdest ->
|
liftIO $ withBinaryFile dest ReadWriteMode $ \hdest ->
|
||||||
withBinaryFile src ReadMode $ \hsrc -> do
|
withBinaryFile src ReadMode $ \hsrc -> do
|
||||||
sofar <- compareexisting iv hdest hsrc zeroBytesProcessed
|
sofar <- compareexisting hdest hsrc zeroBytesProcessed
|
||||||
docopy' iv hdest hsrc sofar
|
docopy' hdest hsrc sofar
|
||||||
|
|
||||||
-- Copy src mode and mtime.
|
-- Copy src mode and mtime.
|
||||||
mode <- liftIO $ fileMode <$> getFileStatus src
|
mode <- liftIO $ fileMode <$> getFileStatus src
|
||||||
|
@ -99,19 +94,9 @@ fileCopier copycowtried src dest k meterupdate check verifyconfig =
|
||||||
liftIO $ setFileMode dest mode
|
liftIO $ setFileMode dest mode
|
||||||
liftIO $ touch dest' mtime False
|
liftIO $ touch dest' mtime False
|
||||||
|
|
||||||
ifM check
|
return Copied
|
||||||
( case iv of
|
|
||||||
Just x -> ifM (liftIO $ finalizeIncremental x)
|
|
||||||
( return (True, Verified)
|
|
||||||
, do
|
|
||||||
warning "verification of content failed"
|
|
||||||
return (False, UnVerified)
|
|
||||||
)
|
|
||||||
Nothing -> return (True, UnVerified)
|
|
||||||
, return (False, UnVerified)
|
|
||||||
)
|
|
||||||
|
|
||||||
docopy' iv hdest hsrc sofar = do
|
docopy' hdest hsrc sofar = do
|
||||||
s <- S.hGet hsrc defaultChunkSize
|
s <- S.hGet hsrc defaultChunkSize
|
||||||
if s == S.empty
|
if s == S.empty
|
||||||
then return ()
|
then return ()
|
||||||
|
@ -120,12 +105,12 @@ fileCopier copycowtried src dest k meterupdate check verifyconfig =
|
||||||
S.hPut hdest s
|
S.hPut hdest s
|
||||||
maybe noop (flip updateIncremental s) iv
|
maybe noop (flip updateIncremental s) iv
|
||||||
meterupdate sofar'
|
meterupdate sofar'
|
||||||
docopy' iv hdest hsrc sofar'
|
docopy' hdest hsrc sofar'
|
||||||
|
|
||||||
-- Leaves hdest and hsrc seeked to wherever the two diverge,
|
-- Leaves hdest and hsrc seeked to wherever the two diverge,
|
||||||
-- so typically hdest will be seeked to end, and hsrc to the same
|
-- so typically hdest will be seeked to end, and hsrc to the same
|
||||||
-- position.
|
-- position.
|
||||||
compareexisting iv hdest hsrc sofar = do
|
compareexisting hdest hsrc sofar = do
|
||||||
s <- S.hGet hdest defaultChunkSize
|
s <- S.hGet hdest defaultChunkSize
|
||||||
if s == S.empty
|
if s == S.empty
|
||||||
then return sofar
|
then return sofar
|
||||||
|
@ -136,7 +121,7 @@ fileCopier copycowtried src dest k meterupdate check verifyconfig =
|
||||||
maybe noop (flip updateIncremental s) iv
|
maybe noop (flip updateIncremental s) iv
|
||||||
let sofar' = addBytesProcessed sofar (S.length s)
|
let sofar' = addBytesProcessed sofar (S.length s)
|
||||||
meterupdate sofar'
|
meterupdate sofar'
|
||||||
compareexisting iv hdest hsrc sofar'
|
compareexisting hdest hsrc sofar'
|
||||||
else do
|
else do
|
||||||
seekbefore hdest s
|
seekbefore hdest s
|
||||||
seekbefore hsrc s'
|
seekbefore hsrc s'
|
||||||
|
|
|
@ -191,7 +191,7 @@ storeKeyM d chunkconfig cow k c m =
|
||||||
in byteStorer go k c m
|
in byteStorer go k c m
|
||||||
NoChunks ->
|
NoChunks ->
|
||||||
let go _k src p = do
|
let go _k src p = do
|
||||||
fileCopierUnVerified cow src tmpf k p
|
void $ fileCopier cow src tmpf p Nothing
|
||||||
liftIO $ finalizeStoreGeneric d tmpdir destdir
|
liftIO $ finalizeStoreGeneric d tmpdir destdir
|
||||||
in fileStorer go k c m
|
in fileStorer go k c m
|
||||||
_ ->
|
_ ->
|
||||||
|
@ -205,11 +205,6 @@ storeKeyM d chunkconfig cow k c m =
|
||||||
kf = keyFile k
|
kf = keyFile k
|
||||||
destdir = storeDir d k
|
destdir = storeDir d k
|
||||||
|
|
||||||
fileCopierUnVerified :: CopyCoWTried -> FilePath -> FilePath -> Key -> MeterUpdate -> Annex ()
|
|
||||||
fileCopierUnVerified cow src dest k p = do
|
|
||||||
(ok, _verification) <- fileCopier cow src dest k p (return True) NoVerify
|
|
||||||
unless ok $ giveup "failed to copy file"
|
|
||||||
|
|
||||||
checkDiskSpaceDirectory :: RawFilePath -> Key -> Annex Bool
|
checkDiskSpaceDirectory :: RawFilePath -> Key -> Annex Bool
|
||||||
checkDiskSpaceDirectory d k = do
|
checkDiskSpaceDirectory d k = do
|
||||||
annexdir <- fromRepo gitAnnexObjectDir
|
annexdir <- fromRepo gitAnnexObjectDir
|
||||||
|
@ -239,7 +234,7 @@ retrieveKeyFileM :: RawFilePath -> ChunkConfig -> CopyCoWTried -> Retriever
|
||||||
retrieveKeyFileM d (LegacyChunks _) _ = Legacy.retrieve locations d
|
retrieveKeyFileM d (LegacyChunks _) _ = Legacy.retrieve locations d
|
||||||
retrieveKeyFileM d NoChunks cow = fileRetriever $ \dest k p -> do
|
retrieveKeyFileM d NoChunks cow = fileRetriever $ \dest k p -> do
|
||||||
src <- liftIO $ fromRawFilePath <$> getLocation d k
|
src <- liftIO $ fromRawFilePath <$> getLocation d k
|
||||||
fileCopierUnVerified cow src dest k p
|
void $ fileCopier cow src dest p Nothing
|
||||||
retrieveKeyFileM d _ _ = byteRetriever $ \k sink ->
|
retrieveKeyFileM d _ _ = byteRetriever $ \k sink ->
|
||||||
sink =<< liftIO (L.readFile . fromRawFilePath =<< getLocation d k)
|
sink =<< liftIO (L.readFile . fromRawFilePath =<< getLocation d k)
|
||||||
|
|
||||||
|
@ -305,17 +300,17 @@ checkPresentGeneric' d check = ifM check
|
||||||
)
|
)
|
||||||
|
|
||||||
storeExportM :: RawFilePath -> CopyCoWTried -> FilePath -> Key -> ExportLocation -> MeterUpdate -> Annex ()
|
storeExportM :: RawFilePath -> CopyCoWTried -> FilePath -> Key -> ExportLocation -> MeterUpdate -> Annex ()
|
||||||
storeExportM d cow src k loc p = do
|
storeExportM d cow src _k loc p = do
|
||||||
liftIO $ createDirectoryUnder d (P.takeDirectory dest)
|
liftIO $ createDirectoryUnder d (P.takeDirectory dest)
|
||||||
-- Write via temp file so that checkPresentGeneric will not
|
-- Write via temp file so that checkPresentGeneric will not
|
||||||
-- see it until it's fully stored.
|
-- see it until it's fully stored.
|
||||||
viaTmp go (fromRawFilePath dest) ()
|
viaTmp go (fromRawFilePath dest) ()
|
||||||
where
|
where
|
||||||
dest = exportPath d loc
|
dest = exportPath d loc
|
||||||
go tmp () = fileCopierUnVerified cow src tmp k p
|
go tmp () = void $ fileCopier cow src tmp p Nothing
|
||||||
|
|
||||||
retrieveExportM :: RawFilePath -> CopyCoWTried -> Key -> ExportLocation -> FilePath -> MeterUpdate -> Annex ()
|
retrieveExportM :: RawFilePath -> CopyCoWTried -> Key -> ExportLocation -> FilePath -> MeterUpdate -> Annex ()
|
||||||
retrieveExportM d cow k loc dest p = fileCopierUnVerified cow src dest k p
|
retrieveExportM d cow _k loc dest p = void $ fileCopier cow src dest p Nothing
|
||||||
where
|
where
|
||||||
src = fromRawFilePath $ exportPath d loc
|
src = fromRawFilePath $ exportPath d loc
|
||||||
|
|
||||||
|
@ -493,11 +488,11 @@ retrieveExportWithContentIdentifierM dir cow loc cid dest mkkey p =
|
||||||
guardSameContentIdentifiers cont cid currcid
|
guardSameContentIdentifiers cont cid currcid
|
||||||
|
|
||||||
storeExportWithContentIdentifierM :: RawFilePath -> CopyCoWTried -> FilePath -> Key -> ExportLocation -> [ContentIdentifier] -> MeterUpdate -> Annex ContentIdentifier
|
storeExportWithContentIdentifierM :: RawFilePath -> CopyCoWTried -> FilePath -> Key -> ExportLocation -> [ContentIdentifier] -> MeterUpdate -> Annex ContentIdentifier
|
||||||
storeExportWithContentIdentifierM dir cow src k loc overwritablecids p = do
|
storeExportWithContentIdentifierM dir cow src _k loc overwritablecids p = do
|
||||||
liftIO $ createDirectoryUnder dir (toRawFilePath destdir)
|
liftIO $ createDirectoryUnder dir (toRawFilePath destdir)
|
||||||
withTmpFileIn destdir template $ \tmpf tmph -> do
|
withTmpFileIn destdir template $ \tmpf tmph -> do
|
||||||
liftIO $ hClose tmph
|
liftIO $ hClose tmph
|
||||||
fileCopierUnVerified cow src tmpf k p
|
void $ fileCopier cow src tmpf p Nothing
|
||||||
let tmpf' = toRawFilePath tmpf
|
let tmpf' = toRawFilePath tmpf
|
||||||
resetAnnexFilePerm tmpf'
|
resetAnnexFilePerm tmpf'
|
||||||
liftIO (getFileStatus tmpf) >>= liftIO . mkContentIdentifier tmpf' >>= \case
|
liftIO (getFileStatus tmpf) >>= liftIO . mkContentIdentifier tmpf' >>= \case
|
||||||
|
|
|
@ -29,6 +29,7 @@ import qualified Annex
|
||||||
import Logs.Presence
|
import Logs.Presence
|
||||||
import Annex.Transfer
|
import Annex.Transfer
|
||||||
import Annex.CopyFile
|
import Annex.CopyFile
|
||||||
|
import Annex.Verify
|
||||||
import Annex.UUID
|
import Annex.UUID
|
||||||
import qualified Annex.Content
|
import qualified Annex.Content
|
||||||
import qualified Annex.BranchState
|
import qualified Annex.BranchState
|
||||||
|
@ -839,6 +840,8 @@ wantHardLink = (annexHardLink <$> Annex.getGitConfig)
|
||||||
-- because they can be modified at any time.
|
-- because they can be modified at any time.
|
||||||
<&&> (not <$> annexThin <$> Annex.getGitConfig)
|
<&&> (not <$> annexThin <$> Annex.getGitConfig)
|
||||||
|
|
||||||
|
type FileCopier = FilePath -> FilePath -> Key -> MeterUpdate -> Annex Bool -> VerifyConfig -> Annex (Bool, Verification)
|
||||||
|
|
||||||
-- If either the remote or local repository wants to use hard links,
|
-- If either the remote or local repository wants to use hard links,
|
||||||
-- the copier will do so (falling back to copying if a hard link cannot be
|
-- the copier will do so (falling back to copying if a hard link cannot be
|
||||||
-- made).
|
-- made).
|
||||||
|
@ -849,7 +852,6 @@ wantHardLink = (annexHardLink <$> Annex.getGitConfig)
|
||||||
-- copying it.
|
-- copying it.
|
||||||
mkFileCopier :: Bool -> State -> Annex FileCopier
|
mkFileCopier :: Bool -> State -> Annex FileCopier
|
||||||
mkFileCopier remotewanthardlink (State _ _ copycowtried _ _) = do
|
mkFileCopier remotewanthardlink (State _ _ copycowtried _ _) = do
|
||||||
let copier = fileCopier copycowtried
|
|
||||||
localwanthardlink <- wantHardLink
|
localwanthardlink <- wantHardLink
|
||||||
let linker = \src dest -> createLink src dest >> return True
|
let linker = \src dest -> createLink src dest >> return True
|
||||||
if remotewanthardlink || localwanthardlink
|
if remotewanthardlink || localwanthardlink
|
||||||
|
@ -862,6 +864,22 @@ mkFileCopier remotewanthardlink (State _ _ copycowtried _ _) = do
|
||||||
, copier src dest k p check verifyconfig
|
, copier src dest k p check verifyconfig
|
||||||
)
|
)
|
||||||
else return copier
|
else return copier
|
||||||
|
where
|
||||||
|
copier src dest k p check verifyconfig = do
|
||||||
|
iv <- startVerifyKeyContentIncrementally verifyconfig k
|
||||||
|
fileCopier copycowtried src dest p iv >>= \case
|
||||||
|
Copied -> ifM check
|
||||||
|
( case iv of
|
||||||
|
Just x -> ifM (liftIO $ finalizeIncremental x)
|
||||||
|
( return (True, Verified)
|
||||||
|
, do
|
||||||
|
warning "verification of content failed"
|
||||||
|
return (False, UnVerified)
|
||||||
|
)
|
||||||
|
Nothing -> return (True, UnVerified)
|
||||||
|
, return (False, UnVerified)
|
||||||
|
)
|
||||||
|
CopiedCoW -> unVerified check
|
||||||
|
|
||||||
{- Normally the UUID of a local repository is checked at startup,
|
{- Normally the UUID of a local repository is checked at startup,
|
||||||
- but annex-checkuuid config can prevent that. To avoid getting
|
- but annex-checkuuid config can prevent that. To avoid getting
|
||||||
|
|
Loading…
Reference in a new issue