2021-04-14 18:06:43 +00:00
|
|
|
{- Copying files.
|
|
|
|
-
|
2022-05-09 19:38:21 +00:00
|
|
|
- Copyright 2011-2022 Joey Hess <id@joeyh.name>
|
2021-04-14 18:06:43 +00:00
|
|
|
-
|
|
|
|
- Licensed under the GNU AGPL version 3 or higher.
|
|
|
|
-}
|
|
|
|
|
|
|
|
{-# LANGUAGE CPP #-}
|
|
|
|
|
|
|
|
module Annex.CopyFile where
|
|
|
|
|
|
|
|
import Annex.Common
|
|
|
|
import Utility.Metered
|
|
|
|
import Utility.CopyFile
|
|
|
|
import Utility.FileMode
|
|
|
|
import Utility.Touch
|
2021-08-18 17:19:02 +00:00
|
|
|
import Utility.Hash (IncrementalVerifier(..))
|
2023-03-01 19:55:58 +00:00
|
|
|
import qualified Utility.RawFilePath as R
|
2021-04-14 18:06:43 +00:00
|
|
|
|
|
|
|
import Control.Concurrent
|
|
|
|
import qualified Data.ByteString as S
|
|
|
|
import Data.Time.Clock.POSIX
|
2023-03-01 19:55:58 +00:00
|
|
|
import System.PosixCompat.Files (fileMode)
|
2021-04-14 18:06:43 +00:00
|
|
|
|
|
|
|
-- To avoid the overhead of trying copy-on-write every time, it's tried
|
|
|
|
-- once and if it fails, is not tried again.
|
|
|
|
newtype CopyCoWTried = CopyCoWTried (MVar Bool)
|
|
|
|
|
|
|
|
newCopyCoWTried :: IO CopyCoWTried
|
|
|
|
newCopyCoWTried = CopyCoWTried <$> newEmptyMVar
|
|
|
|
|
2021-09-27 20:03:01 +00:00
|
|
|
{- Copies a file is copy-on-write is supported. Otherwise, returns False.
|
|
|
|
-
|
2023-03-28 17:06:11 +00:00
|
|
|
- The destination file must not exist yet (or may exist but be empty),
|
|
|
|
- or it will fail to make a CoW copy, and will return false.
|
2021-09-27 20:03:01 +00:00
|
|
|
-}
|
2021-09-27 19:53:12 +00:00
|
|
|
tryCopyCoW :: CopyCoWTried -> FilePath -> FilePath -> MeterUpdate -> IO Bool
|
2021-04-14 20:10:09 +00:00
|
|
|
tryCopyCoW (CopyCoWTried copycowtried) src dest meterupdate =
|
|
|
|
-- If multiple threads reach this at the same time, they
|
|
|
|
-- will both try CoW, which is acceptable.
|
2021-09-27 19:53:12 +00:00
|
|
|
ifM (isEmptyMVar copycowtried)
|
2023-03-28 17:06:11 +00:00
|
|
|
( ifM destfilealreadypopulated
|
2021-09-27 20:03:01 +00:00
|
|
|
( return False
|
|
|
|
, do
|
|
|
|
ok <- docopycow
|
|
|
|
void $ tryPutMVar copycowtried ok
|
|
|
|
return ok
|
|
|
|
)
|
2021-09-27 19:53:12 +00:00
|
|
|
, ifM (readMVar copycowtried)
|
2021-09-27 20:03:01 +00:00
|
|
|
( do
|
|
|
|
-- CoW is known to work, so delete
|
|
|
|
-- dest if it exists in order to do a fast
|
|
|
|
-- CoW copy.
|
|
|
|
void $ tryIO $ removeFile dest
|
|
|
|
docopycow
|
2021-04-14 20:10:09 +00:00
|
|
|
, return False
|
|
|
|
)
|
|
|
|
)
|
|
|
|
where
|
2024-01-19 18:11:27 +00:00
|
|
|
docopycow = watchFileSize dest meterupdate $ const $
|
2021-09-27 19:53:12 +00:00
|
|
|
copyCoW CopyTimeStamps src dest
|
2023-03-28 17:06:11 +00:00
|
|
|
|
|
|
|
dest' = toRawFilePath dest
|
|
|
|
|
|
|
|
-- Check if the dest file already exists, which would prevent
|
|
|
|
-- probing CoW. If the file exists but is empty, there's no benefit
|
|
|
|
-- to resuming from it when CoW does not work, so remove it.
|
|
|
|
destfilealreadypopulated =
|
|
|
|
tryIO (R.getFileStatus dest') >>= \case
|
|
|
|
Left _ -> return False
|
|
|
|
Right st -> do
|
|
|
|
sz <- getFileSize' dest' st
|
|
|
|
if sz == 0
|
|
|
|
then tryIO (removeFile dest) >>= \case
|
|
|
|
Right () -> return False
|
|
|
|
Left _ -> return True
|
|
|
|
else return True
|
2021-04-14 20:10:09 +00:00
|
|
|
|
2021-08-16 19:56:24 +00:00
|
|
|
data CopyMethod = CopiedCoW | Copied
|
|
|
|
|
|
|
|
{- Copies from src to dest, updating a meter. Preserves mode and mtime.
|
|
|
|
- Uses copy-on-write if it is supported. If the the destination already
|
2023-03-14 02:39:16 +00:00
|
|
|
- exists, an interrupted copy will resume where it left off.
|
2021-08-16 19:56:24 +00:00
|
|
|
-
|
|
|
|
- The IncrementalVerifier is updated with the content of the file as it's
|
|
|
|
- being copied. But it is not finalized at the end.
|
2021-04-14 18:06:43 +00:00
|
|
|
-
|
2021-08-16 19:56:24 +00:00
|
|
|
- When copy-on-write is used, the IncrementalVerifier is not fed
|
2021-08-16 20:22:00 +00:00
|
|
|
- the content of the file, and verification using it will fail.
|
2021-04-14 18:06:43 +00:00
|
|
|
-
|
|
|
|
- Note that, when the destination file already exists, it's read both
|
|
|
|
- to start calculating the hash, and also to verify that its content is
|
|
|
|
- the same as the start of the source file. It's possible that the
|
|
|
|
- destination file was created from some other source file,
|
|
|
|
- (eg when isStableKey is false), and doing this avoids getting a
|
|
|
|
- corrupted file in such cases.
|
|
|
|
-}
|
2022-05-09 19:38:21 +00:00
|
|
|
fileCopier :: CopyCoWTried -> FilePath -> FilePath -> MeterUpdate -> Maybe IncrementalVerifier -> IO CopyMethod
|
2021-04-14 18:06:43 +00:00
|
|
|
#ifdef mingw32_HOST_OS
|
2021-08-16 19:56:24 +00:00
|
|
|
fileCopier _ src dest meterupdate iv = docopy
|
2021-04-14 18:06:43 +00:00
|
|
|
#else
|
2021-08-16 19:56:24 +00:00
|
|
|
fileCopier copycowtried src dest meterupdate iv =
|
2022-05-09 19:38:21 +00:00
|
|
|
ifM (tryCopyCoW copycowtried src dest meterupdate)
|
2021-08-16 20:22:00 +00:00
|
|
|
( do
|
2022-05-09 19:38:21 +00:00
|
|
|
maybe noop unableIncrementalVerifier iv
|
2021-08-16 20:22:00 +00:00
|
|
|
return CopiedCoW
|
2021-04-14 20:10:09 +00:00
|
|
|
, docopy
|
2021-04-14 18:06:43 +00:00
|
|
|
)
|
|
|
|
#endif
|
2021-04-14 20:10:09 +00:00
|
|
|
where
|
2021-04-14 18:06:43 +00:00
|
|
|
docopy = do
|
|
|
|
-- The file might have had the write bit removed,
|
|
|
|
-- so make sure we can write to it.
|
2022-05-09 19:38:21 +00:00
|
|
|
void $ tryIO $ allowWrite dest'
|
2021-04-14 18:06:43 +00:00
|
|
|
|
2022-05-09 19:38:21 +00:00
|
|
|
withBinaryFile src ReadMode $ \hsrc ->
|
|
|
|
fileContentCopier hsrc dest meterupdate iv
|
|
|
|
|
2021-04-14 18:06:43 +00:00
|
|
|
-- Copy src mode and mtime.
|
2023-03-01 19:55:58 +00:00
|
|
|
mode <- fileMode <$> R.getFileStatus (toRawFilePath src)
|
2022-05-09 19:38:21 +00:00
|
|
|
mtime <- utcTimeToPOSIXSeconds <$> getModificationTime src
|
2023-03-01 19:55:58 +00:00
|
|
|
R.setFileMode dest' mode
|
2022-05-09 19:38:21 +00:00
|
|
|
touch dest' mtime False
|
2021-04-14 18:06:43 +00:00
|
|
|
|
2021-08-16 19:56:24 +00:00
|
|
|
return Copied
|
2021-04-14 18:06:43 +00:00
|
|
|
|
2022-05-09 19:38:21 +00:00
|
|
|
dest' = toRawFilePath dest
|
|
|
|
|
|
|
|
{- Copies content from a handle to a destination file. Does not
|
|
|
|
- use copy-on-write, and does not copy file mode and mtime.
|
|
|
|
-}
|
|
|
|
fileContentCopier :: Handle -> FilePath -> MeterUpdate -> Maybe IncrementalVerifier -> IO ()
|
|
|
|
fileContentCopier hsrc dest meterupdate iv =
|
|
|
|
withBinaryFile dest ReadWriteMode $ \hdest -> do
|
|
|
|
sofar <- compareexisting hdest zeroBytesProcessed
|
|
|
|
docopy hdest sofar
|
|
|
|
where
|
|
|
|
docopy hdest sofar = do
|
2021-04-14 18:06:43 +00:00
|
|
|
s <- S.hGet hsrc defaultChunkSize
|
|
|
|
if s == S.empty
|
|
|
|
then return ()
|
|
|
|
else do
|
|
|
|
let sofar' = addBytesProcessed sofar (S.length s)
|
|
|
|
S.hPut hdest s
|
2021-11-09 16:29:09 +00:00
|
|
|
maybe noop (flip updateIncrementalVerifier s) iv
|
2021-04-14 18:06:43 +00:00
|
|
|
meterupdate sofar'
|
2022-05-09 19:38:21 +00:00
|
|
|
docopy hdest sofar'
|
2021-04-14 18:06:43 +00:00
|
|
|
|
|
|
|
-- Leaves hdest and hsrc seeked to wherever the two diverge,
|
|
|
|
-- so typically hdest will be seeked to end, and hsrc to the same
|
|
|
|
-- position.
|
2022-05-09 19:38:21 +00:00
|
|
|
compareexisting hdest sofar = do
|
2021-04-14 18:06:43 +00:00
|
|
|
s <- S.hGet hdest defaultChunkSize
|
|
|
|
if s == S.empty
|
|
|
|
then return sofar
|
|
|
|
else do
|
|
|
|
s' <- getnoshort (S.length s) hsrc
|
|
|
|
if s == s'
|
|
|
|
then do
|
2021-11-09 16:29:09 +00:00
|
|
|
maybe noop (flip updateIncrementalVerifier s) iv
|
2021-04-14 18:06:43 +00:00
|
|
|
let sofar' = addBytesProcessed sofar (S.length s)
|
|
|
|
meterupdate sofar'
|
2022-05-09 19:38:21 +00:00
|
|
|
compareexisting hdest sofar'
|
2021-04-14 18:06:43 +00:00
|
|
|
else do
|
|
|
|
seekbefore hdest s
|
|
|
|
seekbefore hsrc s'
|
|
|
|
return sofar
|
|
|
|
|
|
|
|
seekbefore h s = hSeek h RelativeSeek (fromIntegral (-1*S.length s))
|
|
|
|
|
|
|
|
-- Like hGet, but never returns less than the requested number of
|
|
|
|
-- bytes, unless it reaches EOF.
|
|
|
|
getnoshort n h = do
|
|
|
|
s <- S.hGet h n
|
|
|
|
if S.length s == n || S.empty == s
|
|
|
|
then return s
|
|
|
|
else do
|
|
|
|
s' <- getnoshort (n - S.length s) h
|
|
|
|
return (s <> s')
|