2014-07-27 00:14:09 +00:00
|
|
|
{- Legacy chunksize support for directory special remote.
|
|
|
|
-
|
|
|
|
- Can be removed eventually.
|
|
|
|
-
|
2015-01-21 16:50:09 +00:00
|
|
|
- Copyright 2011-2012 Joey Hess <id@joeyh.name>
|
2014-07-27 00:14:09 +00:00
|
|
|
-
|
2019-03-13 19:48:14 +00:00
|
|
|
- Licensed under the GNU AGPL version 3 or higher.
|
2014-07-27 00:14:09 +00:00
|
|
|
-}
|
|
|
|
|
2020-10-30 14:29:42 +00:00
|
|
|
{-# LANGUAGE OverloadedStrings #-}
|
incremental verify for byteRetriever special remotes
Several special remotes verify content while it is being retrieved,
avoiding a separate checksum pass. They are: S3, bup, ddar, and
gcrypt (with a local repository).
Not done when using chunking, yet.
Complicated by Retriever needing to change to be polymorphic. Which in turn
meant RankNTypes is needed, and also needed some code changes. The
change in Remote.External does not change behavior at all but avoids
the type checking failing because of a "rigid, skolem type" which
"would escape its scope". So I refactored slightly to make the type
checker's job easier there.
Unfortunately, directory uses fileRetriever (except when chunked),
so it is not amoung the improved ones. Fixing that would need a way for
FileRetriever to return a Verification. But, since the file retrieved
may be encrypted or chunked, it would be extra work to always
incrementally checksum the file while retrieving it. Hm.
Some other special remotes use fileRetriever, and so don't get incremental
verification, but could be converted to byteRetriever later. One is
GitLFS, which uses downloadConduit, which writes to the file, so could
verify as it goes. Other special remotes like web could too, but don't
use Remote.Helper.Special and so will need to be addressed separately.
Sponsored-by: Dartmouth College's DANDI project
2021-08-11 17:43:30 +00:00
|
|
|
{-# LANGUAGE RankNTypes #-}
|
2020-10-30 14:29:42 +00:00
|
|
|
|
2014-07-27 00:14:09 +00:00
|
|
|
module Remote.Directory.LegacyChunked where
|
|
|
|
|
|
|
|
import qualified Data.ByteString.Lazy as L
|
|
|
|
import qualified Data.ByteString as S
|
2020-10-30 14:29:42 +00:00
|
|
|
import qualified System.FilePath.ByteString as P
|
2014-07-27 00:14:09 +00:00
|
|
|
|
2016-01-20 20:36:33 +00:00
|
|
|
import Annex.Common
|
2014-07-27 00:14:09 +00:00
|
|
|
import Utility.FileMode
|
2014-08-03 19:35:23 +00:00
|
|
|
import Remote.Helper.Special
|
2014-07-27 00:14:09 +00:00
|
|
|
import qualified Remote.Helper.Chunked.Legacy as Legacy
|
2019-01-17 19:40:44 +00:00
|
|
|
import Annex.Tmp
|
2014-07-27 00:14:09 +00:00
|
|
|
import Utility.Metered
|
2020-10-30 14:29:42 +00:00
|
|
|
import Utility.Directory.Create
|
2020-11-24 16:38:12 +00:00
|
|
|
import qualified Utility.RawFilePath as R
|
2014-07-27 00:14:09 +00:00
|
|
|
|
|
|
|
withCheckedFiles :: (FilePath -> IO Bool) -> FilePath -> (FilePath -> Key -> [FilePath]) -> Key -> ([FilePath] -> IO Bool) -> IO Bool
|
|
|
|
withCheckedFiles _ [] _locations _ _ = return False
|
|
|
|
withCheckedFiles check d locations k a = go $ locations d k
|
|
|
|
where
|
|
|
|
go [] = return False
|
|
|
|
go (f:fs) = do
|
|
|
|
let chunkcount = f ++ Legacy.chunkCount
|
|
|
|
ifM (check chunkcount)
|
|
|
|
( do
|
|
|
|
chunks <- Legacy.listChunks f <$> readFile chunkcount
|
|
|
|
ifM (allM check chunks)
|
|
|
|
( a chunks , return False )
|
|
|
|
, do
|
|
|
|
chunks <- Legacy.probeChunks f check
|
|
|
|
if null chunks
|
|
|
|
then go fs
|
|
|
|
else a chunks
|
|
|
|
)
|
|
|
|
withStoredFiles :: FilePath -> (FilePath -> Key -> [FilePath]) -> Key -> ([FilePath] -> IO Bool) -> IO Bool
|
|
|
|
withStoredFiles = withCheckedFiles doesFileExist
|
|
|
|
|
|
|
|
{- Splits a ByteString into chunks and writes to dests, obeying configured
|
|
|
|
- chunk size (not to be confused with the L.ByteString chunk size). -}
|
|
|
|
storeLegacyChunked :: MeterUpdate -> ChunkSize -> [FilePath] -> L.ByteString -> IO [FilePath]
|
|
|
|
storeLegacyChunked _ _ [] _ = error "bad storeLegacyChunked call"
|
|
|
|
storeLegacyChunked meterupdate chunksize alldests@(firstdest:_) b
|
|
|
|
| L.null b = do
|
|
|
|
-- always write at least one file, even for empty
|
|
|
|
L.writeFile firstdest b
|
|
|
|
return [firstdest]
|
|
|
|
| otherwise = storeLegacyChunked' meterupdate chunksize alldests (L.toChunks b) []
|
|
|
|
storeLegacyChunked' :: MeterUpdate -> ChunkSize -> [FilePath] -> [S.ByteString] -> [FilePath] -> IO [FilePath]
|
|
|
|
storeLegacyChunked' _ _ [] _ _ = error "ran out of dests"
|
|
|
|
storeLegacyChunked' _ _ _ [] c = return $ reverse c
|
|
|
|
storeLegacyChunked' meterupdate chunksize (d:dests) bs c = do
|
|
|
|
bs' <- withFile d WriteMode $
|
|
|
|
feed zeroBytesProcessed chunksize bs
|
|
|
|
storeLegacyChunked' meterupdate chunksize dests bs' (d:c)
|
|
|
|
where
|
|
|
|
feed _ _ [] _ = return []
|
|
|
|
feed bytes sz (l:ls) h = do
|
|
|
|
let len = S.length l
|
|
|
|
let s = fromIntegral len
|
|
|
|
if s <= sz || sz == chunksize
|
|
|
|
then do
|
|
|
|
S.hPut h l
|
|
|
|
let bytes' = addBytesProcessed bytes len
|
|
|
|
meterupdate bytes'
|
|
|
|
feed bytes' (sz - s) ls h
|
|
|
|
else return (l:ls)
|
|
|
|
|
2020-10-30 14:29:42 +00:00
|
|
|
storeHelper :: FilePath -> (RawFilePath -> RawFilePath -> IO ()) -> Key -> ([FilePath] -> IO [FilePath]) -> FilePath -> FilePath -> IO ()
|
2020-03-05 18:56:47 +00:00
|
|
|
storeHelper repotop finalizer key storer tmpdir destdir = do
|
2020-10-30 14:29:42 +00:00
|
|
|
void $ liftIO $ tryIO $ createDirectoryUnder
|
|
|
|
(toRawFilePath repotop)
|
|
|
|
(toRawFilePath tmpdir)
|
|
|
|
Legacy.storeChunks key tmpdir destdir storer recorder (legacyFinalizer finalizer)
|
2014-07-27 00:14:09 +00:00
|
|
|
where
|
|
|
|
recorder f s = do
|
2020-11-06 18:10:58 +00:00
|
|
|
let f' = toRawFilePath f
|
|
|
|
void $ tryIO $ allowWrite f'
|
2014-07-27 00:14:09 +00:00
|
|
|
writeFile f s
|
2020-11-06 18:10:58 +00:00
|
|
|
void $ tryIO $ preventWrite f'
|
2014-07-27 00:14:09 +00:00
|
|
|
|
2020-10-30 14:29:42 +00:00
|
|
|
store :: FilePath -> ChunkSize -> (RawFilePath -> RawFilePath -> IO ()) -> Key -> L.ByteString -> MeterUpdate -> FilePath -> FilePath -> IO ()
|
2020-03-05 18:56:47 +00:00
|
|
|
store repotop chunksize finalizer k b p = storeHelper repotop finalizer k $ \dests ->
|
2014-07-27 00:14:09 +00:00
|
|
|
storeLegacyChunked p chunksize dests b
|
|
|
|
|
|
|
|
{- Need to get a single ByteString containing every chunk.
|
|
|
|
- Done very innefficiently, by writing to a temp file.
|
|
|
|
- :/ This is legacy code..
|
|
|
|
-}
|
2020-10-30 14:29:42 +00:00
|
|
|
retrieve :: (RawFilePath -> Key -> [RawFilePath]) -> RawFilePath -> Retriever
|
incremental hashing for fileRetriever
It uses tailVerify to hash the file while it's being written.
This is able to sometimes avoid a separate checksum step. Although
if the file gets written quickly enough, tailVerify may not see it
get created before the write finishes, and the checksum still happens.
Testing with the directory special remote, incremental checksumming did
not happen. But then I disabled the copy CoW probing, and it did work.
What's going on with that is the CoW probe creates an empty file on
failure, then deletes it, and then the file is created again. tailVerify
will open the first, empty file, and so fails to read the content that
gets written to the file that replaces it.
The directory special remote really ought to be able to avoid needing to
use tailVerify, and while other special remotes could do things that
cause similar problems, they probably don't. And if they do, it just
means the checksum doesn't get done incrementally.
Sponsored-by: Dartmouth College's DANDI project
2021-08-13 19:43:29 +00:00
|
|
|
retrieve locations d basek p miv c = withOtherTmp $ \tmpdir -> do
|
2014-07-27 00:14:09 +00:00
|
|
|
showLongNote "This remote uses the deprecated chunksize setting. So this will be quite slow."
|
2020-11-24 16:38:12 +00:00
|
|
|
let tmp = tmpdir P.</> keyFile basek <> ".directorylegacy.tmp"
|
|
|
|
let tmp' = fromRawFilePath tmp
|
2020-05-13 15:50:31 +00:00
|
|
|
let go = \k sink -> do
|
2020-10-30 14:29:42 +00:00
|
|
|
liftIO $ void $ withStoredFiles (fromRawFilePath d) (legacyLocations locations) k $ \fs -> do
|
2014-07-27 00:14:09 +00:00
|
|
|
forM_ fs $
|
2020-11-24 16:38:12 +00:00
|
|
|
S.appendFile tmp' <=< S.readFile
|
2014-07-27 00:14:09 +00:00
|
|
|
return True
|
2020-11-24 16:38:12 +00:00
|
|
|
b <- liftIO $ L.readFile tmp'
|
|
|
|
liftIO $ removeWhenExistsWith R.removeLink tmp
|
2014-08-03 05:12:24 +00:00
|
|
|
sink b
|
incremental hashing for fileRetriever
It uses tailVerify to hash the file while it's being written.
This is able to sometimes avoid a separate checksum step. Although
if the file gets written quickly enough, tailVerify may not see it
get created before the write finishes, and the checksum still happens.
Testing with the directory special remote, incremental checksumming did
not happen. But then I disabled the copy CoW probing, and it did work.
What's going on with that is the CoW probe creates an empty file on
failure, then deletes it, and then the file is created again. tailVerify
will open the first, empty file, and so fails to read the content that
gets written to the file that replaces it.
The directory special remote really ought to be able to avoid needing to
use tailVerify, and while other special remotes could do things that
cause similar problems, they probably don't. And if they do, it just
means the checksum doesn't get done incrementally.
Sponsored-by: Dartmouth College's DANDI project
2021-08-13 19:43:29 +00:00
|
|
|
byteRetriever go basek p miv c
|
2014-07-27 00:14:09 +00:00
|
|
|
|
2020-10-30 14:29:42 +00:00
|
|
|
checkKey :: RawFilePath -> (RawFilePath -> Key -> [RawFilePath]) -> Key -> Annex Bool
|
|
|
|
checkKey d locations k = liftIO $
|
|
|
|
withStoredFiles (fromRawFilePath d) (legacyLocations locations) k $
|
|
|
|
-- withStoredFiles checked that it exists
|
|
|
|
const $ return True
|
|
|
|
|
|
|
|
legacyFinalizer :: (RawFilePath -> RawFilePath -> IO ()) -> (FilePath -> FilePath -> IO ())
|
|
|
|
legacyFinalizer f = \a b -> f (toRawFilePath a) (toRawFilePath b)
|
|
|
|
|
|
|
|
legacyLocations :: (RawFilePath -> Key -> [RawFilePath]) -> (FilePath -> Key -> [FilePath])
|
|
|
|
legacyLocations locations = \f k ->
|
|
|
|
map fromRawFilePath $ locations (toRawFilePath f) k
|