split out Annex.CopyFile

Goal is to use it in Remote.Directory, but also it's nice to shrink Remote.Git.
This commit is contained in:
Joey Hess 2021-04-14 14:06:43 -04:00
parent 34e959f181
commit 441f65c2cf
No known key found for this signature in database
GPG key ID: DB12DB0FF05F8F38
3 changed files with 169 additions and 152 deletions

162
Annex/CopyFile.hs Normal file
View file

@ -0,0 +1,162 @@
{- Copying files.
-
- Copyright 2011-2021 Joey Hess <id@joeyh.name>
-
- Licensed under the GNU AGPL version 3 or higher.
-}
{-# LANGUAGE CPP #-}
module Annex.CopyFile where
import Annex.Common
import Types.Remote
import Utility.Metered
import Utility.CopyFile
import Utility.FileMode
import Utility.Touch
import Types.Backend
import Backend
import Annex.Verify
import Control.Concurrent
import qualified Data.ByteString as S
import Data.Time.Clock.POSIX
-- Copies from src to dest, updating a meter. If the copy finishes
-- successfully, calls a final check action, which must also succeed, or
-- returns false.
--
-- If either the remote or local repository wants to use hard links,
-- the copier will do so (falling back to copying if a hard link cannot be
-- made).
--
-- When a hard link is created, returns Verified; the repo being linked
-- from is implicitly trusted, so no expensive verification needs to be
-- done. Also returns Verified if the key's content is verified while
-- copying it.
type FileCopier = FilePath -> FilePath -> Key -> MeterUpdate -> Annex Bool -> VerifyConfig -> Annex (Bool, Verification)
-- To avoid the overhead of trying copy-on-write every time, it's tried
-- once and if it fails, is not tried again.
newtype CopyCoWTried = CopyCoWTried (MVar Bool)
newCopyCoWTried :: IO CopyCoWTried
newCopyCoWTried = CopyCoWTried <$> newEmptyMVar
{- Copys a file. Uses copy-on-write if it is supported. Otherwise,
- copies the file itself. If the destination already exists,
- an interruped copy will resume where it left off.
-
- When copy-on-write is used, returns UnVerified, because the content of
- the file has not been verified to be correct. When the file has to be
- read to copy it, a hash is calulated at the same time.
-
- Note that, when the destination file already exists, it's read both
- to start calculating the hash, and also to verify that its content is
- the same as the start of the source file. It's possible that the
- destination file was created from some other source file,
- (eg when isStableKey is false), and doing this avoids getting a
- corrupted file in such cases.
-}
fileCopier :: CopyCoWTried -> FileCopier
#ifdef mingw32_HOST_OS
fileCopier _ src dest k meterupdate check verifyconfig = docopy
where
#else
fileCopier (CopyCoWTried copycowtried) src dest k meterupdate check verifyconfig =
-- If multiple threads reach this at the same time, they
-- will both try CoW, which is acceptable.
ifM (liftIO $ isEmptyMVar copycowtried)
( do
ok <- docopycow
void $ liftIO $ tryPutMVar copycowtried ok
if ok
then unVerified check
else docopy
, ifM (liftIO $ readMVar copycowtried)
( do
ok <- docopycow
if ok
then unVerified check
else docopy
, docopy
)
)
where
docopycow = liftIO $ watchFileSize dest meterupdate $
copyCoW CopyTimeStamps src dest
#endif
dest' = toRawFilePath dest
docopy = do
iv <- startVerifyKeyContentIncrementally verifyconfig k
-- The file might have had the write bit removed,
-- so make sure we can write to it.
void $ liftIO $ tryIO $ allowWrite dest'
liftIO $ withBinaryFile dest ReadWriteMode $ \hdest ->
withBinaryFile src ReadMode $ \hsrc -> do
sofar <- compareexisting iv hdest hsrc zeroBytesProcessed
docopy' iv hdest hsrc sofar
-- Copy src mode and mtime.
mode <- liftIO $ fileMode <$> getFileStatus src
mtime <- liftIO $ utcTimeToPOSIXSeconds <$> getModificationTime src
liftIO $ setFileMode dest mode
liftIO $ touch dest' mtime False
ifM check
( case iv of
Just x -> ifM (liftIO $ finalizeIncremental x)
( return (True, Verified)
, return (False, UnVerified)
)
Nothing -> return (True, UnVerified)
, return (False, UnVerified)
)
docopy' iv hdest hsrc sofar = do
s <- S.hGet hsrc defaultChunkSize
if s == S.empty
then return ()
else do
let sofar' = addBytesProcessed sofar (S.length s)
S.hPut hdest s
maybe noop (flip updateIncremental s) iv
meterupdate sofar'
docopy' iv hdest hsrc sofar'
-- Leaves hdest and hsrc seeked to wherever the two diverge,
-- so typically hdest will be seeked to end, and hsrc to the same
-- position.
compareexisting iv hdest hsrc sofar = do
s <- S.hGet hdest defaultChunkSize
if s == S.empty
then return sofar
else do
s' <- getnoshort (S.length s) hsrc
if s == s'
then do
maybe noop (flip updateIncremental s) iv
let sofar' = addBytesProcessed sofar (S.length s)
meterupdate sofar'
compareexisting iv hdest hsrc sofar'
else do
seekbefore hdest s
seekbefore hsrc s'
return sofar
seekbefore h s = hSeek h RelativeSeek (fromIntegral (-1*S.length s))
-- Like hGet, but never returns less than the requested number of
-- bytes, unless it reaches EOF.
getnoshort n h = do
s <- S.hGet h n
if S.length s == n || S.empty == s
then return s
else do
s' <- getnoshort (n - S.length s) h
return (s <> s')

View file

@ -28,6 +28,7 @@ import qualified Git.Types as Git
import qualified Annex
import Logs.Presence
import Annex.Transfer
import Annex.CopyFile
import Annex.UUID
import qualified Annex.Content
import qualified Annex.BranchState
@ -44,12 +45,9 @@ import Types.CleanupActions
import qualified CmdLine.GitAnnexShell.Fields as Fields
import Logs.Location
import Utility.Metered
import Utility.CopyFile
import Utility.Env
import Utility.FileMode
import Utility.Batch
import Utility.SimpleProtocol
import Utility.Touch
import Remote.Helper.Git
import Remote.Helper.Messages
import Remote.Helper.ExportImport
@ -63,10 +61,7 @@ import Annex.Path
import Creds
import Types.NumCopies
import Types.ProposedAccepted
import Types.Backend
import Backend
import Annex.Action
import Annex.Verify
import Messages.Progress
#ifndef mingw32_HOST_OS
@ -77,7 +72,6 @@ import Control.Concurrent
import Control.Concurrent.MSampleVar
import qualified Data.Map as M
import qualified Data.ByteString as S
import Data.Time.Clock.POSIX
import Network.URI
remote :: RemoteType
@ -559,7 +553,7 @@ copyFromRemote'' repo forcersync r st@(State connpool _ _ _ _) key file dest met
onLocalFast st $ Annex.Content.prepSendAnnex key >>= \case
Just (object, checksuccess) -> do
let verify = Annex.Content.RemoteVerify r
copier <- mkCopier hardlink st
copier <- mkFileCopier hardlink st
(ok, v) <- runTransfer (Transfer Download u (fromKey id key))
file Nothing stdRetry $ \p ->
metered (Just (combineMeterUpdate p meterupdate)) key $ \_ p' ->
@ -703,7 +697,7 @@ copyToRemote' repo r st@(State connpool duc _ _ _) key file meterupdate
( return True
, runTransfer (Transfer Download u (fromKey id key)) file Nothing stdRetry $ \p -> do
let verify = Annex.Content.RemoteVerify r
copier <- mkCopier hardlink st
copier <- mkFileCopier hardlink st
let rsp = RetrievalAllKeysSecure
res <- logStatusAfter key $ Annex.Content.getViaTmp rsp verify key file $ \dest ->
metered (Just (combineMeterUpdate meterupdate p)) key $ \_ p' ->
@ -830,23 +824,9 @@ wantHardLink = (annexHardLink <$> Annex.getGitConfig)
-- because they can be modified at any time.
<&&> (not <$> annexThin <$> Annex.getGitConfig)
-- Copies from src to dest, updating a meter. If the copy finishes
-- successfully, calls a final check action, which must also succeed, or
-- returns false.
--
-- If either the remote or local repository wants to use hard links,
-- the copier will do so (falling back to copying if a hard link cannot be
-- made).
--
-- When a hard link is created, returns Verified; the repo being linked
-- from is implicitly trusted, so no expensive verification needs to be
-- done. Also returns Verified if the key's content is verified while
-- copying it.
type Copier = FilePath -> FilePath -> Key -> MeterUpdate -> Annex Bool -> VerifyConfig -> Annex (Bool, Verification)
mkCopier :: Bool -> State -> Annex Copier
mkCopier remotewanthardlink st = do
let copier = fileCopier st
mkFileCopier :: Bool -> State -> Annex FileCopier
mkFileCopier remotewanthardlink (State _ _ copycowtried _ _) = do
let copier = fileCopier copycowtried
localwanthardlink <- wantHardLink
let linker = \src dest -> createLink src dest >> return True
if remotewanthardlink || localwanthardlink
@ -914,129 +894,3 @@ mkState r u gc = do
)
return (duc, getrepo)
-- To avoid the overhead of trying copy-on-write every time, it's tried
-- once and if it fails, is not tried again.
newtype CopyCoWTried = CopyCoWTried (MVar Bool)
newCopyCoWTried :: IO CopyCoWTried
newCopyCoWTried = CopyCoWTried <$> newEmptyMVar
{- Copys a file. Uses copy-on-write if it is supported. Otherwise,
- copies the file itself. If the destination already exists,
- an interruped copy will resume where it left off.
-
- When copy-on-write is used, returns UnVerified, because the content of
- the file has not been verified to be correct. When the file has to be
- read to copy it, a hash is calulated at the same time.
-
- Note that, when the destination file already exists, it's read both
- to start calculating the hash, and also to verify that its content is
- the same as the start of the source file. It's possible that the
- destination file was created from some other source file,
- (eg when isStableKey is false), and doing this avoids getting a
- corrupted file in such cases.
-}
fileCopier :: State -> Copier
#ifdef mingw32_HOST_OS
fileCopier _st src dest k meterupdate check verifyconfig = docopy
where
#else
fileCopier st src dest k meterupdate check verifyconfig =
-- If multiple threads reach this at the same time, they
-- will both try CoW, which is acceptable.
ifM (liftIO $ isEmptyMVar copycowtried)
( do
ok <- docopycow
void $ liftIO $ tryPutMVar copycowtried ok
if ok
then unVerified check
else docopy
, ifM (liftIO $ readMVar copycowtried)
( do
ok <- docopycow
if ok
then unVerified check
else docopy
, docopy
)
)
where
copycowtried = case st of
State _ _ (CopyCoWTried v) _ _ -> v
docopycow = liftIO $ watchFileSize dest meterupdate $
copyCoW CopyTimeStamps src dest
#endif
dest' = toRawFilePath dest
docopy = do
iv <- startVerifyKeyContentIncrementally verifyconfig k
-- The file might have had the write bit removed,
-- so make sure we can write to it.
void $ liftIO $ tryIO $ allowWrite dest'
liftIO $ withBinaryFile dest ReadWriteMode $ \hdest ->
withBinaryFile src ReadMode $ \hsrc -> do
sofar <- compareexisting iv hdest hsrc zeroBytesProcessed
docopy' iv hdest hsrc sofar
-- Copy src mode and mtime.
mode <- liftIO $ fileMode <$> getFileStatus src
mtime <- liftIO $ utcTimeToPOSIXSeconds <$> getModificationTime src
liftIO $ setFileMode dest mode
liftIO $ touch dest' mtime False
ifM check
( case iv of
Just x -> ifM (liftIO $ finalizeIncremental x)
( return (True, Verified)
, return (False, UnVerified)
)
Nothing -> return (True, UnVerified)
, return (False, UnVerified)
)
docopy' iv hdest hsrc sofar = do
s <- S.hGet hsrc defaultChunkSize
if s == S.empty
then return ()
else do
let sofar' = addBytesProcessed sofar (S.length s)
S.hPut hdest s
maybe noop (flip updateIncremental s) iv
meterupdate sofar'
docopy' iv hdest hsrc sofar'
-- Leaves hdest and hsrc seeked to wherever the two diverge,
-- so typically hdest will be seeked to end, and hsrc to the same
-- position.
compareexisting iv hdest hsrc sofar = do
s <- S.hGet hdest defaultChunkSize
if s == S.empty
then return sofar
else do
s' <- getnoshort (S.length s) hsrc
if s == s'
then do
maybe noop (flip updateIncremental s) iv
let sofar' = addBytesProcessed sofar (S.length s)
meterupdate sofar'
compareexisting iv hdest hsrc sofar'
else do
seekbefore hdest s
seekbefore hsrc s'
return sofar
seekbefore h s = hSeek h RelativeSeek (fromIntegral (-1*S.length s))
-- Like hGet, but never returns less than the requested number of
-- bytes, unless it reaches EOF.
getnoshort n h = do
s <- S.hGet h n
if S.length s == n || S.empty == s
then return s
else do
s' <- getnoshort (n - S.length s) h
return (s <> s')

View file

@ -628,6 +628,7 @@ Executable git-annex
Annex.Content.Presence
Annex.Content.LowLevel
Annex.Content.PointerFile
Annex.CopyFile
Annex.CurrentBranch
Annex.Debug
Annex.Debug.Utility