split out Annex.CopyFile

Goal is to use it in Remote.Directory, but also it's nice to shrink Remote.Git.
This commit is contained in:
Joey Hess 2021-04-14 14:06:43 -04:00
parent 34e959f181
commit 441f65c2cf
No known key found for this signature in database
GPG key ID: DB12DB0FF05F8F38
3 changed files with 169 additions and 152 deletions

162
Annex/CopyFile.hs Normal file
View file

@ -0,0 +1,162 @@
{- Copying files.
-
- Copyright 2011-2021 Joey Hess <id@joeyh.name>
-
- Licensed under the GNU AGPL version 3 or higher.
-}
{-# LANGUAGE CPP #-}
module Annex.CopyFile where
import Annex.Common
import Types.Remote
import Utility.Metered
import Utility.CopyFile
import Utility.FileMode
import Utility.Touch
import Types.Backend
import Backend
import Annex.Verify
import Control.Concurrent
import qualified Data.ByteString as S
import Data.Time.Clock.POSIX
-- Copies from src to dest, updating a meter. If the copy finishes
-- successfully, calls a final check action, which must also succeed, or
-- returns false.
--
-- If either the remote or local repository wants to use hard links,
-- the copier will do so (falling back to copying if a hard link cannot be
-- made).
--
-- When a hard link is created, returns Verified; the repo being linked
-- from is implicitly trusted, so no expensive verification needs to be
-- done. Also returns Verified if the key's content is verified while
-- copying it.
type FileCopier = FilePath -> FilePath -> Key -> MeterUpdate -> Annex Bool -> VerifyConfig -> Annex (Bool, Verification)
-- To avoid the overhead of trying copy-on-write every time, it's tried
-- once and if it fails, is not tried again.
newtype CopyCoWTried = CopyCoWTried (MVar Bool)
newCopyCoWTried :: IO CopyCoWTried
newCopyCoWTried = CopyCoWTried <$> newEmptyMVar
{- Copys a file. Uses copy-on-write if it is supported. Otherwise,
- copies the file itself. If the destination already exists,
- an interruped copy will resume where it left off.
-
- When copy-on-write is used, returns UnVerified, because the content of
- the file has not been verified to be correct. When the file has to be
- read to copy it, a hash is calulated at the same time.
-
- Note that, when the destination file already exists, it's read both
- to start calculating the hash, and also to verify that its content is
- the same as the start of the source file. It's possible that the
- destination file was created from some other source file,
- (eg when isStableKey is false), and doing this avoids getting a
- corrupted file in such cases.
-}
fileCopier :: CopyCoWTried -> FileCopier
#ifdef mingw32_HOST_OS
fileCopier _ src dest k meterupdate check verifyconfig = docopy
where
#else
fileCopier (CopyCoWTried copycowtried) src dest k meterupdate check verifyconfig =
-- If multiple threads reach this at the same time, they
-- will both try CoW, which is acceptable.
ifM (liftIO $ isEmptyMVar copycowtried)
( do
ok <- docopycow
void $ liftIO $ tryPutMVar copycowtried ok
if ok
then unVerified check
else docopy
, ifM (liftIO $ readMVar copycowtried)
( do
ok <- docopycow
if ok
then unVerified check
else docopy
, docopy
)
)
where
docopycow = liftIO $ watchFileSize dest meterupdate $
copyCoW CopyTimeStamps src dest
#endif
dest' = toRawFilePath dest
docopy = do
iv <- startVerifyKeyContentIncrementally verifyconfig k
-- The file might have had the write bit removed,
-- so make sure we can write to it.
void $ liftIO $ tryIO $ allowWrite dest'
liftIO $ withBinaryFile dest ReadWriteMode $ \hdest ->
withBinaryFile src ReadMode $ \hsrc -> do
sofar <- compareexisting iv hdest hsrc zeroBytesProcessed
docopy' iv hdest hsrc sofar
-- Copy src mode and mtime.
mode <- liftIO $ fileMode <$> getFileStatus src
mtime <- liftIO $ utcTimeToPOSIXSeconds <$> getModificationTime src
liftIO $ setFileMode dest mode
liftIO $ touch dest' mtime False
ifM check
( case iv of
Just x -> ifM (liftIO $ finalizeIncremental x)
( return (True, Verified)
, return (False, UnVerified)
)
Nothing -> return (True, UnVerified)
, return (False, UnVerified)
)
docopy' iv hdest hsrc sofar = do
s <- S.hGet hsrc defaultChunkSize
if s == S.empty
then return ()
else do
let sofar' = addBytesProcessed sofar (S.length s)
S.hPut hdest s
maybe noop (flip updateIncremental s) iv
meterupdate sofar'
docopy' iv hdest hsrc sofar'
-- Leaves hdest and hsrc seeked to wherever the two diverge,
-- so typically hdest will be seeked to end, and hsrc to the same
-- position.
compareexisting iv hdest hsrc sofar = do
s <- S.hGet hdest defaultChunkSize
if s == S.empty
then return sofar
else do
s' <- getnoshort (S.length s) hsrc
if s == s'
then do
maybe noop (flip updateIncremental s) iv
let sofar' = addBytesProcessed sofar (S.length s)
meterupdate sofar'
compareexisting iv hdest hsrc sofar'
else do
seekbefore hdest s
seekbefore hsrc s'
return sofar
seekbefore h s = hSeek h RelativeSeek (fromIntegral (-1*S.length s))
-- Like hGet, but never returns less than the requested number of
-- bytes, unless it reaches EOF.
getnoshort n h = do
s <- S.hGet h n
if S.length s == n || S.empty == s
then return s
else do
s' <- getnoshort (n - S.length s) h
return (s <> s')

View file

@ -28,6 +28,7 @@ import qualified Git.Types as Git
import qualified Annex import qualified Annex
import Logs.Presence import Logs.Presence
import Annex.Transfer import Annex.Transfer
import Annex.CopyFile
import Annex.UUID import Annex.UUID
import qualified Annex.Content import qualified Annex.Content
import qualified Annex.BranchState import qualified Annex.BranchState
@ -44,12 +45,9 @@ import Types.CleanupActions
import qualified CmdLine.GitAnnexShell.Fields as Fields import qualified CmdLine.GitAnnexShell.Fields as Fields
import Logs.Location import Logs.Location
import Utility.Metered import Utility.Metered
import Utility.CopyFile
import Utility.Env import Utility.Env
import Utility.FileMode
import Utility.Batch import Utility.Batch
import Utility.SimpleProtocol import Utility.SimpleProtocol
import Utility.Touch
import Remote.Helper.Git import Remote.Helper.Git
import Remote.Helper.Messages import Remote.Helper.Messages
import Remote.Helper.ExportImport import Remote.Helper.ExportImport
@ -63,10 +61,7 @@ import Annex.Path
import Creds import Creds
import Types.NumCopies import Types.NumCopies
import Types.ProposedAccepted import Types.ProposedAccepted
import Types.Backend
import Backend
import Annex.Action import Annex.Action
import Annex.Verify
import Messages.Progress import Messages.Progress
#ifndef mingw32_HOST_OS #ifndef mingw32_HOST_OS
@ -77,7 +72,6 @@ import Control.Concurrent
import Control.Concurrent.MSampleVar import Control.Concurrent.MSampleVar
import qualified Data.Map as M import qualified Data.Map as M
import qualified Data.ByteString as S import qualified Data.ByteString as S
import Data.Time.Clock.POSIX
import Network.URI import Network.URI
remote :: RemoteType remote :: RemoteType
@ -559,7 +553,7 @@ copyFromRemote'' repo forcersync r st@(State connpool _ _ _ _) key file dest met
onLocalFast st $ Annex.Content.prepSendAnnex key >>= \case onLocalFast st $ Annex.Content.prepSendAnnex key >>= \case
Just (object, checksuccess) -> do Just (object, checksuccess) -> do
let verify = Annex.Content.RemoteVerify r let verify = Annex.Content.RemoteVerify r
copier <- mkCopier hardlink st copier <- mkFileCopier hardlink st
(ok, v) <- runTransfer (Transfer Download u (fromKey id key)) (ok, v) <- runTransfer (Transfer Download u (fromKey id key))
file Nothing stdRetry $ \p -> file Nothing stdRetry $ \p ->
metered (Just (combineMeterUpdate p meterupdate)) key $ \_ p' -> metered (Just (combineMeterUpdate p meterupdate)) key $ \_ p' ->
@ -703,7 +697,7 @@ copyToRemote' repo r st@(State connpool duc _ _ _) key file meterupdate
( return True ( return True
, runTransfer (Transfer Download u (fromKey id key)) file Nothing stdRetry $ \p -> do , runTransfer (Transfer Download u (fromKey id key)) file Nothing stdRetry $ \p -> do
let verify = Annex.Content.RemoteVerify r let verify = Annex.Content.RemoteVerify r
copier <- mkCopier hardlink st copier <- mkFileCopier hardlink st
let rsp = RetrievalAllKeysSecure let rsp = RetrievalAllKeysSecure
res <- logStatusAfter key $ Annex.Content.getViaTmp rsp verify key file $ \dest -> res <- logStatusAfter key $ Annex.Content.getViaTmp rsp verify key file $ \dest ->
metered (Just (combineMeterUpdate meterupdate p)) key $ \_ p' -> metered (Just (combineMeterUpdate meterupdate p)) key $ \_ p' ->
@ -830,23 +824,9 @@ wantHardLink = (annexHardLink <$> Annex.getGitConfig)
-- because they can be modified at any time. -- because they can be modified at any time.
<&&> (not <$> annexThin <$> Annex.getGitConfig) <&&> (not <$> annexThin <$> Annex.getGitConfig)
-- Copies from src to dest, updating a meter. If the copy finishes mkFileCopier :: Bool -> State -> Annex FileCopier
-- successfully, calls a final check action, which must also succeed, or mkFileCopier remotewanthardlink (State _ _ copycowtried _ _) = do
-- returns false. let copier = fileCopier copycowtried
--
-- If either the remote or local repository wants to use hard links,
-- the copier will do so (falling back to copying if a hard link cannot be
-- made).
--
-- When a hard link is created, returns Verified; the repo being linked
-- from is implicitly trusted, so no expensive verification needs to be
-- done. Also returns Verified if the key's content is verified while
-- copying it.
type Copier = FilePath -> FilePath -> Key -> MeterUpdate -> Annex Bool -> VerifyConfig -> Annex (Bool, Verification)
mkCopier :: Bool -> State -> Annex Copier
mkCopier remotewanthardlink st = do
let copier = fileCopier st
localwanthardlink <- wantHardLink localwanthardlink <- wantHardLink
let linker = \src dest -> createLink src dest >> return True let linker = \src dest -> createLink src dest >> return True
if remotewanthardlink || localwanthardlink if remotewanthardlink || localwanthardlink
@ -914,129 +894,3 @@ mkState r u gc = do
) )
return (duc, getrepo) return (duc, getrepo)
-- To avoid the overhead of trying copy-on-write every time, it's tried
-- once and if it fails, is not tried again.
newtype CopyCoWTried = CopyCoWTried (MVar Bool)
newCopyCoWTried :: IO CopyCoWTried
newCopyCoWTried = CopyCoWTried <$> newEmptyMVar
{- Copys a file. Uses copy-on-write if it is supported. Otherwise,
- copies the file itself. If the destination already exists,
- an interruped copy will resume where it left off.
-
- When copy-on-write is used, returns UnVerified, because the content of
- the file has not been verified to be correct. When the file has to be
- read to copy it, a hash is calulated at the same time.
-
- Note that, when the destination file already exists, it's read both
- to start calculating the hash, and also to verify that its content is
- the same as the start of the source file. It's possible that the
- destination file was created from some other source file,
- (eg when isStableKey is false), and doing this avoids getting a
- corrupted file in such cases.
-}
fileCopier :: State -> Copier
#ifdef mingw32_HOST_OS
fileCopier _st src dest k meterupdate check verifyconfig = docopy
where
#else
fileCopier st src dest k meterupdate check verifyconfig =
-- If multiple threads reach this at the same time, they
-- will both try CoW, which is acceptable.
ifM (liftIO $ isEmptyMVar copycowtried)
( do
ok <- docopycow
void $ liftIO $ tryPutMVar copycowtried ok
if ok
then unVerified check
else docopy
, ifM (liftIO $ readMVar copycowtried)
( do
ok <- docopycow
if ok
then unVerified check
else docopy
, docopy
)
)
where
copycowtried = case st of
State _ _ (CopyCoWTried v) _ _ -> v
docopycow = liftIO $ watchFileSize dest meterupdate $
copyCoW CopyTimeStamps src dest
#endif
dest' = toRawFilePath dest
docopy = do
iv <- startVerifyKeyContentIncrementally verifyconfig k
-- The file might have had the write bit removed,
-- so make sure we can write to it.
void $ liftIO $ tryIO $ allowWrite dest'
liftIO $ withBinaryFile dest ReadWriteMode $ \hdest ->
withBinaryFile src ReadMode $ \hsrc -> do
sofar <- compareexisting iv hdest hsrc zeroBytesProcessed
docopy' iv hdest hsrc sofar
-- Copy src mode and mtime.
mode <- liftIO $ fileMode <$> getFileStatus src
mtime <- liftIO $ utcTimeToPOSIXSeconds <$> getModificationTime src
liftIO $ setFileMode dest mode
liftIO $ touch dest' mtime False
ifM check
( case iv of
Just x -> ifM (liftIO $ finalizeIncremental x)
( return (True, Verified)
, return (False, UnVerified)
)
Nothing -> return (True, UnVerified)
, return (False, UnVerified)
)
docopy' iv hdest hsrc sofar = do
s <- S.hGet hsrc defaultChunkSize
if s == S.empty
then return ()
else do
let sofar' = addBytesProcessed sofar (S.length s)
S.hPut hdest s
maybe noop (flip updateIncremental s) iv
meterupdate sofar'
docopy' iv hdest hsrc sofar'
-- Leaves hdest and hsrc seeked to wherever the two diverge,
-- so typically hdest will be seeked to end, and hsrc to the same
-- position.
compareexisting iv hdest hsrc sofar = do
s <- S.hGet hdest defaultChunkSize
if s == S.empty
then return sofar
else do
s' <- getnoshort (S.length s) hsrc
if s == s'
then do
maybe noop (flip updateIncremental s) iv
let sofar' = addBytesProcessed sofar (S.length s)
meterupdate sofar'
compareexisting iv hdest hsrc sofar'
else do
seekbefore hdest s
seekbefore hsrc s'
return sofar
seekbefore h s = hSeek h RelativeSeek (fromIntegral (-1*S.length s))
-- Like hGet, but never returns less than the requested number of
-- bytes, unless it reaches EOF.
getnoshort n h = do
s <- S.hGet h n
if S.length s == n || S.empty == s
then return s
else do
s' <- getnoshort (n - S.length s) h
return (s <> s')

View file

@ -628,6 +628,7 @@ Executable git-annex
Annex.Content.Presence Annex.Content.Presence
Annex.Content.LowLevel Annex.Content.LowLevel
Annex.Content.PointerFile Annex.Content.PointerFile
Annex.CopyFile
Annex.CurrentBranch Annex.CurrentBranch
Annex.Debug Annex.Debug
Annex.Debug.Utility Annex.Debug.Utility