disk free checking for unsized keys

Improve disk free space checking when transferring unsized keys to
local git remotes. Since the size of the object file is known, can
check that instead.

Getting unsized keys from local git remotes does not check the actual
object size. It would be harder to handle that direction because the size
check is run locally, before anything involving the remote is done. So it
doesn't know the size of the file on the remote.

Also, transferring unsized keys to other remotes, including ssh remotes and
p2p remotes don't do disk size checking for unsized keys. This would need a
change in protocol.

(It does seem like it would be possible to implement the same thing for
directory special remotes though.)

In some sense, it might be better to not ever do disk free checking for
unsized keys, than to do it only sometimes. A user might notice this
direction working and consider it a bug that the other direction does not.
On the other hand, disk reserve checking is not implemented for most
special remotes at all, and yet it is implemented for a few, which is also
inconsistent, but best effort. And so doing this best effort seems to make
some sense. Fundamentally, if the user wants the size to always be checked,
they should not use unsized keys.

Sponsored-by: Brock Spratlen on Patreon
This commit is contained in:
Joey Hess 2024-01-16 14:29:10 -04:00
parent 94dcbdf0ac
commit f6cf2dec4c
No known key found for this signature in database
GPG key ID: DB12DB0FF05F8F38
22 changed files with 62 additions and 52 deletions

View file

@ -279,8 +279,9 @@ lockContentUsing contentlocker key fallback a = withContentLockFile key $ \mlock
{- Runs an action, passing it the temp file to get,
- and if the action succeeds, verifies the file matches
- the key and moves the file into the annex as a key's content. -}
getViaTmp :: RetrievalSecurityPolicy -> VerifyConfig -> Key -> AssociatedFile -> (RawFilePath -> Annex (Bool, Verification)) -> Annex Bool
getViaTmp rsp v key af action = checkDiskSpaceToGet key False $
getViaTmp :: RetrievalSecurityPolicy -> VerifyConfig -> Key -> AssociatedFile -> Maybe FileSize -> (RawFilePath -> Annex (Bool, Verification)) -> Annex Bool
getViaTmp rsp v key af sz action =
checkDiskSpaceToGet key sz False $
getViaTmpFromDisk rsp v key af action
{- Like getViaTmp, but does not check that there is enough disk space
@ -349,14 +350,14 @@ verificationOfContentFailed tmpfile = do
-
- Wen there's enough free space, runs the download action.
-}
checkDiskSpaceToGet :: Key -> a -> Annex a -> Annex a
checkDiskSpaceToGet key unabletoget getkey = do
checkDiskSpaceToGet :: Key -> Maybe FileSize -> a -> Annex a -> Annex a
checkDiskSpaceToGet key sz unabletoget getkey = do
tmp <- fromRepo (gitAnnexTmpObjectLocation key)
e <- liftIO $ doesFileExist (fromRawFilePath tmp)
alreadythere <- liftIO $ if e
then getFileSize tmp
else return 0
ifM (checkDiskSpace Nothing key alreadythere True)
ifM (checkDiskSpace sz Nothing key alreadythere True)
( do
-- The tmp file may not have been left writable
when e $ thawContent tmp
@ -542,17 +543,18 @@ unlinkAnnex key = do
secureErase obj
liftIO $ removeWhenExistsWith R.removeLink obj
{- Runs an action to transfer an object's content.
{- Runs an action to transfer an object's content. The action is also
- passed the size of the object.
-
- In some cases, it's possible for the file to change as it's being sent.
- If this happens, runs the rollback action and throws an exception.
- The rollback action should remove the data that was transferred.
-}
sendAnnex :: Key -> Annex () -> (FilePath -> Annex a) -> Annex a
sendAnnex :: Key -> Annex () -> (FilePath -> FileSize -> Annex a) -> Annex a
sendAnnex key rollback sendobject = go =<< prepSendAnnex' key
where
go (Just (f, check)) = do
r <- sendobject f
go (Just (f, sz, check)) = do
r <- sendobject f sz
check >>= \case
Nothing -> return r
Just err -> do
@ -571,9 +573,13 @@ sendAnnex key rollback sendobject = go =<< prepSendAnnex' key
- Annex monad of the remote that is receiving the object, rather than
- the sender. So it cannot rely on Annex state.
-}
prepSendAnnex :: Key -> Annex (Maybe (FilePath, Annex Bool))
prepSendAnnex :: Key -> Annex (Maybe (FilePath, FileSize, Annex Bool))
prepSendAnnex key = withObjectLoc key $ \f -> do
let retval c = return $ Just (fromRawFilePath f, sameInodeCache f c)
let retval c cs = return $ Just
(fromRawFilePath f
, inodeCacheFileSize c
, sameInodeCache f cs
)
cache <- Database.Keys.getInodeCaches key
if null cache
-- Since no inode cache is in the database, this
@ -581,7 +587,7 @@ prepSendAnnex key = withObjectLoc key $ \f -> do
-- change while the transfer is in progress, so
-- generate an inode cache for the starting
-- content.
then maybe (return Nothing) (retval . (:[]))
then maybe (return Nothing) (\fc -> retval fc [fc])
=<< withTSDelta (liftIO . genInodeCache f)
-- Verify that the object is not modified. Usually this
-- only has to check the inode cache, but if the cache
@ -589,19 +595,19 @@ prepSendAnnex key = withObjectLoc key $ \f -> do
-- content.
else withTSDelta (liftIO . genInodeCache f) >>= \case
Just fc -> ifM (isUnmodified' key f fc cache)
( retval (fc:cache)
( retval fc (fc:cache)
, return Nothing
)
Nothing -> return Nothing
prepSendAnnex' :: Key -> Annex (Maybe (FilePath, Annex (Maybe String)))
prepSendAnnex' :: Key -> Annex (Maybe (FilePath, FileSize, Annex (Maybe String)))
prepSendAnnex' key = prepSendAnnex key >>= \case
Just (f, checksuccess) ->
Just (f, sz, checksuccess) ->
let checksuccess' = ifM checksuccess
( return Nothing
, return (Just "content changed while it was being sent")
)
in return (Just (f, checksuccess'))
in return (Just (f, sz, checksuccess'))
Nothing -> return Nothing
cleanObjectLoc :: Key -> Annex () -> Annex ()

View file

@ -1,6 +1,6 @@
{- git-annex low-level content functions
-
- Copyright 2010-2018 Joey Hess <id@joeyh.name>
- Copyright 2010-2024 Joey Hess <id@joeyh.name>
-
- Licensed under the GNU AGPL version 3 or higher.
-}
@ -102,13 +102,13 @@ preserveGitMode _ _ = return True
- to be downloaded from the free space. This way, we avoid overcommitting
- when doing concurrent downloads.
-}
checkDiskSpace :: Maybe RawFilePath -> Key -> Integer -> Bool -> Annex Bool
checkDiskSpace destdir key = checkDiskSpace' (fromMaybe 1 (fromKey keySize key)) destdir key
checkDiskSpace :: Maybe FileSize -> Maybe RawFilePath -> Key -> Integer -> Bool -> Annex Bool
checkDiskSpace msz destdir key = checkDiskSpace' sz destdir key
where
sz = fromMaybe 1 (fromKey keySize key <|> msz)
{- Allows specifying the size of the key, if it's known, which is useful
- as not all keys know their size. -}
checkDiskSpace' :: Integer -> Maybe RawFilePath -> Key -> Integer -> Bool -> Annex Bool
checkDiskSpace' need destdir key alreadythere samefilesystem = ifM (Annex.getRead Annex.force)
checkDiskSpace' :: FileSize -> Maybe RawFilePath -> Key -> Integer -> Bool -> Annex Bool
checkDiskSpace' sz destdir key alreadythere samefilesystem = ifM (Annex.getRead Annex.force)
( return True
, do
-- We can't get inprogress and free at the same
@ -123,7 +123,7 @@ checkDiskSpace' need destdir key alreadythere samefilesystem = ifM (Annex.getRea
dir >>= liftIO . getDiskFree . fromRawFilePath >>= \case
Just have -> do
reserve <- annexDiskReserve <$> Annex.getGitConfig
let delta = need + reserve - have - alreadythere + inprogress
let delta = sz + reserve - have - alreadythere + inprogress
let ok = delta <= 0
unless ok $
warning $ UnquotedString $

View file

@ -841,7 +841,7 @@ importKeys remote importtreeconfig importcontent thirdpartypopulated importablec
when ok $
logStatus k InfoPresent
return (Just (k, ok))
checkDiskSpaceToGet k Nothing $
checkDiskSpaceToGet k Nothing Nothing $
notifyTransfer Download af $
download' (Remote.uuid remote) k af Nothing stdRetry $ \p' ->
withTmp k $ downloader p'
@ -860,7 +860,7 @@ importKeys remote importtreeconfig importcontent thirdpartypopulated importablec
recordcidkey cidmap cid k
return sha
Nothing -> error "internal"
checkDiskSpaceToGet tmpkey Nothing $
checkDiskSpaceToGet tmpkey Nothing Nothing $
withTmp tmpkey $ \tmpfile ->
tryNonAsync (downloader tmpfile) >>= \case
Right sha -> return $ Just (loc, Left sha)
@ -896,7 +896,7 @@ importKeys remote importtreeconfig importcontent thirdpartypopulated importablec
warning (UnquotedString (show e))
return Nothing
let bwlimit = remoteAnnexBwLimit (Remote.gitconfig remote)
checkDiskSpaceToGet tmpkey Nothing $
checkDiskSpaceToGet tmpkey Nothing Nothing $
notifyTransfer Download af $
download' (Remote.uuid remote) tmpkey af Nothing stdRetry $ \p ->
withTmp tmpkey $ \tmpfile ->

View file

@ -80,7 +80,7 @@ download r key f d witness =
Just StallDetectionDisabled -> go Nothing
Just sd -> runTransferrer sd r key f d Download witness
where
go sd = getViaTmp (Remote.retrievalSecurityPolicy r) vc key f $ \dest ->
go sd = getViaTmp (Remote.retrievalSecurityPolicy r) vc key f Nothing $ \dest ->
download' (Remote.uuid r) key f sd d (go' dest) witness
go' dest p = verifiedAction $
Remote.retrieveKeyFile r key f (fromRawFilePath dest) p vc

View file

@ -10,6 +10,8 @@ git-annex (10.20231228) UNRELEASED; urgency=medium
run with eg --test-git-config annex.shared-sop-command=sqop
* assistant: When generating a gpg secret key, avoid hardcoding the
key algorithm and size.
* Improve disk free space checking when transferring unsized keys to
local git remotes.
-- Joey Hess <id@joeyh.name> Fri, 29 Dec 2023 11:52:06 -0400

View file

@ -439,7 +439,7 @@ downloadWith canadd addunlockedmatcher downloader dummykey u url file =
- the returned location. -}
downloadWith' :: (FilePath -> MeterUpdate -> Annex Bool) -> Key -> UUID -> URLString -> RawFilePath -> Annex (Maybe (RawFilePath, Backend))
downloadWith' downloader dummykey u url file =
checkDiskSpaceToGet dummykey Nothing $ do
checkDiskSpaceToGet dummykey Nothing Nothing $ do
backend <- chooseBackend file
tmp <- fromRepo $ gitAnnexTmpObjectLocation dummykey
let t = (Transfer.Transfer Transfer.Download u (fromKey id dummykey))

View file

@ -304,7 +304,7 @@ performExport r db ek af contentsha loc allfilledvar = do
alwaysUpload (uuid r) ek af Nothing stdRetry $ \pm -> do
let rollback = void $
performUnexport r db [ek] loc
sendAnnex ek rollback $ \f ->
sendAnnex ek rollback $ \f _sz ->
Remote.action $
storer f ek loc pm
, do

View file

@ -195,7 +195,7 @@ performRemote key afile backend numcopies remote =
let cleanup = liftIO $ catchIO (R.removeLink tmp) (const noop)
cleanup
cleanup `after` a tmp
getfile tmp = ifM (checkDiskSpace (Just (P.takeDirectory tmp)) key 0 True)
getfile tmp = ifM (checkDiskSpace Nothing (Just (P.takeDirectory tmp)) key 0 True)
( ifM (getcheap tmp)
( return (Just (Right UnVerified))
, ifM (Annex.getRead Annex.fast)

View file

@ -28,7 +28,7 @@ start :: (SeekInput, Key) -> CommandStart
start (_, key) = fieldTransfer Download key $ \_p -> do
-- This matches the retrievalSecurityPolicy of Remote.Git
let rsp = RetrievalAllKeysSecure
ifM (getViaTmp rsp DefaultVerify key (AssociatedFile Nothing) go)
ifM (getViaTmp rsp DefaultVerify key (AssociatedFile Nothing) Nothing go)
( do
logStatus key InfoPresent
_ <- quiesce True

View file

@ -128,7 +128,7 @@ perform src key = do
, giveup "failed"
)
where
move = checkDiskSpaceToGet key False $
move = checkDiskSpaceToGet key Nothing False $
moveAnnex key (AssociatedFile Nothing) src
cleanup :: Key -> CommandCleanup

View file

@ -32,7 +32,8 @@ start (_, key) = do
<$> getField "RsyncOptions"
ifM (inAnnex key)
( fieldTransfer Upload key $ \_p ->
sendAnnex key rollback $ liftIO . rsyncServerSend (map Param opts)
sendAnnex key rollback $ \f _sz ->
liftIO $ rsyncServerSend (map Param opts) f
, do
warning "requested key is not present"
liftIO exitFailure

View file

@ -36,7 +36,7 @@ perform file key = do
-- the file might be on a different filesystem, so moveFile is used
-- rather than simply calling moveAnnex; disk space is also
-- checked this way.
ok <- getViaTmp RetrievalAllKeysSecure DefaultVerify key (AssociatedFile Nothing) $ \dest -> unVerified $
ok <- getViaTmp RetrievalAllKeysSecure DefaultVerify key (AssociatedFile Nothing) Nothing $ \dest -> unVerified $
if dest /= file
then liftIO $ catchBoolIO $ do
moveFile file dest

View file

@ -296,7 +296,7 @@ test runannex mkr mkk =
Just b -> case Types.Backend.verifyKeyContent b of
Nothing -> return True
Just verifier -> verifier k (serializeKey' k)
get r k = logStatusAfter k $ getViaTmp (Remote.retrievalSecurityPolicy r) (RemoteVerify r) k (AssociatedFile Nothing) $ \dest ->
get r k = logStatusAfter k $ getViaTmp (Remote.retrievalSecurityPolicy r) (RemoteVerify r) k (AssociatedFile Nothing) Nothing $ \dest ->
tryNonAsync (Remote.retrieveKeyFile r k (AssociatedFile Nothing) (fromRawFilePath dest) nullMeterUpdate (RemoteVerify r)) >>= \case
Right v -> return (True, v)
Left _ -> return (False, UnVerified)
@ -370,13 +370,13 @@ testUnavailable runannex mkr mkk =
, check (`notElem` [Right True, Right False]) "checkPresent" $ \r k ->
Remote.checkPresent r k
, check (== Right False) "retrieveKeyFile" $ \r k ->
logStatusAfter k $ getViaTmp (Remote.retrievalSecurityPolicy r) (RemoteVerify r) k (AssociatedFile Nothing) $ \dest ->
logStatusAfter k $ getViaTmp (Remote.retrievalSecurityPolicy r) (RemoteVerify r) k (AssociatedFile Nothing) Nothing $ \dest ->
tryNonAsync (Remote.retrieveKeyFile r k (AssociatedFile Nothing) (fromRawFilePath dest) nullMeterUpdate (RemoteVerify r)) >>= \case
Right v -> return (True, v)
Left _ -> return (False, UnVerified)
, check (== Right False) "retrieveKeyFileCheap" $ \r k -> case Remote.retrieveKeyFileCheap r of
Nothing -> return False
Just a -> logStatusAfter k $ getViaTmp (Remote.retrievalSecurityPolicy r) (RemoteVerify r) k (AssociatedFile Nothing) $ \dest ->
Just a -> logStatusAfter k $ getViaTmp (Remote.retrievalSecurityPolicy r) (RemoteVerify r) k (AssociatedFile Nothing) Nothing $ \dest ->
unVerified $ isRight
<$> tryNonAsync (a k (AssociatedFile Nothing) (fromRawFilePath dest))
]

View file

@ -63,7 +63,7 @@ toPerform key file remote = go Upload file $
fromPerform :: Key -> AssociatedFile -> Remote -> CommandPerform
fromPerform key file remote = go Upload file $
download' (uuid remote) key file Nothing stdRetry $ \p ->
logStatusAfter key $ getViaTmp (retrievalSecurityPolicy remote) vc key file $ \t ->
logStatusAfter key $ getViaTmp (retrievalSecurityPolicy remote) vc key file Nothing $ \t ->
tryNonAsync (Remote.retrieveKeyFile remote key file (fromRawFilePath t) p vc) >>= \case
Right v -> return (True, v)
Left e -> do

View file

@ -50,7 +50,7 @@ start = do
return True
| otherwise = notifyTransfer direction file $
download' (Remote.uuid remote) key file Nothing stdRetry $ \p ->
logStatusAfter key $ getViaTmp (Remote.retrievalSecurityPolicy remote) (RemoteVerify remote) key file $ \t -> do
logStatusAfter key $ getViaTmp (Remote.retrievalSecurityPolicy remote) (RemoteVerify remote) key file Nothing $ \t -> do
r <- tryNonAsync (Remote.retrieveKeyFile remote key file (fromRawFilePath t) p (RemoteVerify remote)) >>= \case
Left e -> do
warning (UnquotedString (show e))

View file

@ -55,7 +55,7 @@ start = do
-- so caller is responsible for doing notification
-- and for retrying, and updating location log,
-- and stall canceling.
let go p = getViaTmp (Remote.retrievalSecurityPolicy remote) (RemoteVerify remote) key file $ \t -> do
let go p = getViaTmp (Remote.retrievalSecurityPolicy remote) (RemoteVerify remote) key file Nothing $ \t -> do
Remote.verifiedAction (Remote.retrieveKeyFile remote key file (fromRawFilePath t) p (RemoteVerify remote))
in download' (Remote.uuid remote) key file Nothing noRetry go
noNotification
@ -72,7 +72,7 @@ start = do
runner (AssistantDownloadRequest _ key (TransferAssociatedFile file)) remote =
notifyTransfer Download file $
download' (Remote.uuid remote) key file Nothing stdRetry $ \p ->
logStatusAfter key $ getViaTmp (Remote.retrievalSecurityPolicy remote) (RemoteVerify remote) key file $ \t -> do
logStatusAfter key $ getViaTmp (Remote.retrievalSecurityPolicy remote) (RemoteVerify remote) key file Nothing $ \t -> do
r <- tryNonAsync (Remote.retrieveKeyFile remote key file (fromRawFilePath t) p (RemoteVerify remote)) >>= \case
Left e -> do
warning (UnquotedString (show e))

View file

@ -64,7 +64,7 @@ runLocal runst runner a = case a of
let fallback = runner (sender mempty (return Invalid))
v <- tryNonAsync $ prepSendAnnex k
case v of
Right (Just (f, checkchanged)) -> proceed $ do
Right (Just (f, _sz, checkchanged)) -> proceed $ do
-- alwaysUpload to allow multiple uploads of the same key.
let runtransfer ti = transfer alwaysUpload k af Nothing $ \p ->
sinkfile f o checkchanged sender p ti
@ -79,7 +79,7 @@ runLocal runst runner a = case a of
iv <- startVerifyKeyContentIncrementally DefaultVerify k
let runtransfer ti =
Right <$> transfer download' k af Nothing (\p ->
logStatusAfter k $ getViaTmp rsp DefaultVerify k af $ \tmp ->
logStatusAfter k $ getViaTmp rsp DefaultVerify k af Nothing $ \tmp ->
storefile (fromRawFilePath tmp) o l getb iv validitycheck p ti)
let fallback = return $ Left $
ProtoFailureMessage "transfer already in progress, or unable to take transfer lock"

View file

@ -228,7 +228,7 @@ checkDiskSpaceDirectory d k = do
(\a b -> deviceID a == deviceID b)
<$> R.getSymbolicLinkStatus d
<*> R.getSymbolicLinkStatus annexdir
checkDiskSpace (Just d) k 0 samefilesystem
checkDiskSpace Nothing (Just d) k 0 samefilesystem
{- Passed a temp directory that contains the files that should be placed
- in the dest directory, moves it into place. Anything already existing

View file

@ -487,7 +487,7 @@ copyFromRemote'' repo r st@(State connpool _ _ _ _) key file dest meterupdate vc
let bwlimit = remoteAnnexBwLimit (gitconfig r)
-- run copy from perspective of remote
onLocalFast st $ Annex.Content.prepSendAnnex' key >>= \case
Just (object, check) -> do
Just (object, _sz, check) -> do
let checksuccess = check >>= \case
Just err -> giveup err
Nothing -> return True
@ -545,7 +545,7 @@ copyToRemote' repo r st@(State connpool duc _ _ _) key file meterupdate
| otherwise = giveup "copying to non-ssh repo not supported"
where
copylocal Nothing = giveup "content not available"
copylocal (Just (object, check)) = do
copylocal (Just (object, sz, check)) = do
-- The check action is going to be run in
-- the remote's Annex, but it needs access to the local
-- Annex monad's state.
@ -563,7 +563,7 @@ copyToRemote' repo r st@(State connpool duc _ _ _) key file meterupdate
let checksuccess = liftIO checkio >>= \case
Just err -> giveup err
Nothing -> return True
logStatusAfter key $ Annex.Content.getViaTmp rsp verify key file $ \dest ->
logStatusAfter key $ Annex.Content.getViaTmp rsp verify key file (Just sz) $ \dest ->
metered (Just (combineMeterUpdate meterupdate p)) key bwlimit $ \_ p' ->
copier object (fromRawFilePath dest) key p' checksuccess verify
)

View file

@ -16,6 +16,7 @@ import Types.Remote
import Annex.Content
import Messages.Progress
import Utility.Metered
import Utility.Tuple
import Types.NumCopies
import Annex.Verify
@ -33,7 +34,7 @@ type WithConn a c = (ClosableConnection c -> Annex (ClosableConnection c, a)) ->
store :: RemoteGitConfig -> ProtoRunner Bool -> Key -> AssociatedFile -> MeterUpdate -> Annex ()
store gc runner k af p = do
let sizer = KeySizer k (fmap (toRawFilePath . fst) <$> prepSendAnnex k)
let sizer = KeySizer k (fmap (toRawFilePath . fst3) <$> prepSendAnnex k)
let bwlimit = remoteAnnexBwLimit gc
metered (Just p) sizer bwlimit $ \_ p' ->
runner (P2P.put k af p') >>= \case

View file

@ -222,7 +222,7 @@ specialRemote' cfg c storer retriever remover checkpresent baser = encr
isencrypted = isEncrypted c
-- chunk, then encrypt, then feed to the storer
storeKeyGen k p enc = sendAnnex k rollback $ \src ->
storeKeyGen k p enc = sendAnnex k rollback $ \src _sz ->
displayprogress p k (Just src) $ \p' ->
storeChunks (uuid baser) chunkconfig enck k src p'
enc encr storer checkpresent

View file

@ -139,7 +139,7 @@ tahoeSetup _ mu _ c _ = do
missingfurl = giveup "Set TAHOE_FURL to the introducer furl to use."
store :: RemoteStateHandle -> TahoeHandle -> Key -> AssociatedFile -> MeterUpdate -> Annex ()
store rs hdl k _f _p = sendAnnex k noop $ \src ->
store rs hdl k _f _p = sendAnnex k noop $ \src _sz ->
parsePut <$> liftIO (readTahoe hdl "put" [File src]) >>= maybe
(giveup "tahoe failed to store content")
(\cap -> storeCapability rs k cap)