2018-08-22 18:41:09 +00:00
|
|
|
{- git-annex low-level content functions
|
|
|
|
-
|
disk free checking for unsized keys
Improve disk free space checking when transferring unsized keys to
local git remotes. Since the size of the object file is known, can
check that instead.
Getting unsized keys from local git remotes does not check the actual
object size. It would be harder to handle that direction because the size
check is run locally, before anything involving the remote is done. So it
doesn't know the size of the file on the remote.
Also, transferring unsized keys to other remotes, including ssh remotes and
p2p remotes don't do disk size checking for unsized keys. This would need a
change in protocol.
(It does seem like it would be possible to implement the same thing for
directory special remotes though.)
In some sense, it might be better to not ever do disk free checking for
unsized keys, than to do it only sometimes. A user might notice this
direction working and consider it a bug that the other direction does not.
On the other hand, disk reserve checking is not implemented for most
special remotes at all, and yet it is implemented for a few, which is also
inconsistent, but best effort. And so doing this best effort seems to make
some sense. Fundamentally, if the user wants the size to always be checked,
they should not use unsized keys.
Sponsored-by: Brock Spratlen on Patreon
2024-01-16 18:29:10 +00:00
|
|
|
- Copyright 2010-2024 Joey Hess <id@joeyh.name>
|
2018-08-22 18:41:09 +00:00
|
|
|
-
|
2019-03-13 19:48:14 +00:00
|
|
|
- Licensed under the GNU AGPL version 3 or higher.
|
2018-08-22 18:41:09 +00:00
|
|
|
-}
|
|
|
|
|
|
|
|
{-# LANGUAGE CPP #-}
|
|
|
|
|
|
|
|
module Annex.Content.LowLevel where
|
|
|
|
|
|
|
|
import Annex.Common
|
|
|
|
import Logs.Transfer
|
|
|
|
import qualified Annex
|
|
|
|
import Utility.DiskFree
|
|
|
|
import Utility.FileMode
|
|
|
|
import Utility.DataUnits
|
|
|
|
import Utility.CopyFile
|
2020-10-29 18:20:57 +00:00
|
|
|
import qualified Utility.RawFilePath as R
|
2018-08-22 18:41:09 +00:00
|
|
|
|
2020-11-05 22:45:37 +00:00
|
|
|
import qualified System.FilePath.ByteString as P
|
2023-03-01 19:55:58 +00:00
|
|
|
import System.PosixCompat.Files (linkCount)
|
2020-11-05 22:45:37 +00:00
|
|
|
|
2018-08-22 18:41:09 +00:00
|
|
|
{- Runs the secure erase command if set, otherwise does nothing.
|
|
|
|
- File may or may not be deleted at the end; caller is responsible for
|
|
|
|
- making sure it's deleted. -}
|
2020-10-29 18:20:57 +00:00
|
|
|
secureErase :: RawFilePath -> Annex ()
|
2018-08-22 18:41:09 +00:00
|
|
|
secureErase file = maybe noop go =<< annexSecureEraseCommand <$> Annex.getGitConfig
|
|
|
|
where
|
|
|
|
go basecmd = void $ liftIO $
|
|
|
|
boolSystem "sh" [Param "-c", Param $ gencmd basecmd]
|
2020-10-29 18:20:57 +00:00
|
|
|
gencmd = massReplace [ ("%file", shellEscape (fromRawFilePath file)) ]
|
2018-08-22 18:41:09 +00:00
|
|
|
|
2018-09-05 21:26:12 +00:00
|
|
|
data LinkedOrCopied = Linked | Copied
|
|
|
|
|
2018-08-22 18:41:09 +00:00
|
|
|
{- Hard links or copies src to dest, which must not already exist.
|
|
|
|
-
|
|
|
|
- Only uses a hard link when annex.thin is enabled and when src is
|
|
|
|
- not already hardlinked to elsewhere.
|
|
|
|
-
|
|
|
|
- Checks disk reserve before copying against the size of the key,
|
|
|
|
- and will fail if not enough space, or if the dest file already exists.
|
|
|
|
-
|
|
|
|
- The FileMode, if provided, influences the mode of the dest file.
|
|
|
|
- In particular, if it has an execute bit set, the dest file's
|
|
|
|
- execute bit will be set. The mode is not fully copied over because
|
|
|
|
- git doesn't support file modes beyond execute.
|
|
|
|
-}
|
2020-10-29 18:20:57 +00:00
|
|
|
linkOrCopy :: Key -> RawFilePath -> RawFilePath -> Maybe FileMode -> Annex (Maybe LinkedOrCopied)
|
2018-08-22 18:41:09 +00:00
|
|
|
linkOrCopy = linkOrCopy' (annexThin <$> Annex.getGitConfig)
|
|
|
|
|
2020-10-29 18:20:57 +00:00
|
|
|
linkOrCopy' :: Annex Bool -> Key -> RawFilePath -> RawFilePath -> Maybe FileMode -> Annex (Maybe LinkedOrCopied)
|
2019-08-26 17:28:28 +00:00
|
|
|
linkOrCopy' canhardlink key src dest destmode = catchDefaultIO Nothing $
|
|
|
|
ifM canhardlink
|
|
|
|
( hardlink
|
|
|
|
, copy =<< getstat
|
|
|
|
)
|
2018-08-22 18:41:09 +00:00
|
|
|
where
|
|
|
|
hardlink = do
|
|
|
|
s <- getstat
|
|
|
|
if linkCount s > 1
|
|
|
|
then copy s
|
2020-11-05 22:45:37 +00:00
|
|
|
else liftIO (R.createLink src dest >> preserveGitMode dest destmode >> return (Just Linked))
|
2018-08-22 18:41:09 +00:00
|
|
|
`catchIO` const (copy s)
|
2020-11-05 22:45:37 +00:00
|
|
|
copy s = ifM (checkedCopyFile' key src dest destmode s)
|
2018-09-05 21:26:12 +00:00
|
|
|
( return (Just Copied)
|
|
|
|
, return Nothing
|
|
|
|
)
|
2020-10-29 18:20:57 +00:00
|
|
|
getstat = liftIO $ R.getFileStatus src
|
2018-08-22 18:41:09 +00:00
|
|
|
|
|
|
|
{- Checks disk space before copying. -}
|
2020-11-05 22:45:37 +00:00
|
|
|
checkedCopyFile :: Key -> RawFilePath -> RawFilePath -> Maybe FileMode -> Annex Bool
|
2018-08-22 18:41:09 +00:00
|
|
|
checkedCopyFile key src dest destmode = catchBoolIO $
|
|
|
|
checkedCopyFile' key src dest destmode
|
2020-11-05 22:45:37 +00:00
|
|
|
=<< liftIO (R.getFileStatus src)
|
2018-08-22 18:41:09 +00:00
|
|
|
|
2020-11-05 22:45:37 +00:00
|
|
|
checkedCopyFile' :: Key -> RawFilePath -> RawFilePath -> Maybe FileMode -> FileStatus -> Annex Bool
|
2020-11-24 16:38:12 +00:00
|
|
|
checkedCopyFile' key src dest destmode s = catchBoolIO $ do
|
|
|
|
sz <- liftIO $ getFileSize' src s
|
|
|
|
ifM (checkDiskSpace' sz (Just $ P.takeDirectory dest) key 0 True)
|
2018-08-22 18:41:09 +00:00
|
|
|
( liftIO $
|
2020-11-05 22:45:37 +00:00
|
|
|
copyFileExternal CopyAllMetaData (fromRawFilePath src) (fromRawFilePath dest)
|
2018-08-22 18:41:09 +00:00
|
|
|
<&&> preserveGitMode dest destmode
|
|
|
|
, return False
|
|
|
|
)
|
|
|
|
|
2020-11-05 22:45:37 +00:00
|
|
|
preserveGitMode :: RawFilePath -> Maybe FileMode -> IO Bool
|
2018-08-22 18:41:09 +00:00
|
|
|
preserveGitMode f (Just mode)
|
|
|
|
| isExecutable mode = catchBoolIO $ do
|
|
|
|
modifyFileMode f $ addModes executeModes
|
|
|
|
return True
|
|
|
|
| otherwise = catchBoolIO $ do
|
|
|
|
modifyFileMode f $ removeModes executeModes
|
|
|
|
return True
|
|
|
|
preserveGitMode _ _ = return True
|
|
|
|
|
|
|
|
{- Checks that there is disk space available to store a given key,
|
|
|
|
- in a destination directory (or the annex) printing a warning if not.
|
|
|
|
-
|
|
|
|
- If the destination is on the same filesystem as the annex,
|
|
|
|
- checks for any other running downloads, removing the amount of data still
|
|
|
|
- to be downloaded from the free space. This way, we avoid overcommitting
|
|
|
|
- when doing concurrent downloads.
|
|
|
|
-}
|
disk free checking for unsized keys
Improve disk free space checking when transferring unsized keys to
local git remotes. Since the size of the object file is known, can
check that instead.
Getting unsized keys from local git remotes does not check the actual
object size. It would be harder to handle that direction because the size
check is run locally, before anything involving the remote is done. So it
doesn't know the size of the file on the remote.
Also, transferring unsized keys to other remotes, including ssh remotes and
p2p remotes don't do disk size checking for unsized keys. This would need a
change in protocol.
(It does seem like it would be possible to implement the same thing for
directory special remotes though.)
In some sense, it might be better to not ever do disk free checking for
unsized keys, than to do it only sometimes. A user might notice this
direction working and consider it a bug that the other direction does not.
On the other hand, disk reserve checking is not implemented for most
special remotes at all, and yet it is implemented for a few, which is also
inconsistent, but best effort. And so doing this best effort seems to make
some sense. Fundamentally, if the user wants the size to always be checked,
they should not use unsized keys.
Sponsored-by: Brock Spratlen on Patreon
2024-01-16 18:29:10 +00:00
|
|
|
checkDiskSpace :: Maybe FileSize -> Maybe RawFilePath -> Key -> Integer -> Bool -> Annex Bool
|
|
|
|
checkDiskSpace msz destdir key = checkDiskSpace' sz destdir key
|
|
|
|
where
|
|
|
|
sz = fromMaybe 1 (fromKey keySize key <|> msz)
|
2018-08-22 18:41:09 +00:00
|
|
|
|
disk free checking for unsized keys
Improve disk free space checking when transferring unsized keys to
local git remotes. Since the size of the object file is known, can
check that instead.
Getting unsized keys from local git remotes does not check the actual
object size. It would be harder to handle that direction because the size
check is run locally, before anything involving the remote is done. So it
doesn't know the size of the file on the remote.
Also, transferring unsized keys to other remotes, including ssh remotes and
p2p remotes don't do disk size checking for unsized keys. This would need a
change in protocol.
(It does seem like it would be possible to implement the same thing for
directory special remotes though.)
In some sense, it might be better to not ever do disk free checking for
unsized keys, than to do it only sometimes. A user might notice this
direction working and consider it a bug that the other direction does not.
On the other hand, disk reserve checking is not implemented for most
special remotes at all, and yet it is implemented for a few, which is also
inconsistent, but best effort. And so doing this best effort seems to make
some sense. Fundamentally, if the user wants the size to always be checked,
they should not use unsized keys.
Sponsored-by: Brock Spratlen on Patreon
2024-01-16 18:29:10 +00:00
|
|
|
checkDiskSpace' :: FileSize -> Maybe RawFilePath -> Key -> Integer -> Bool -> Annex Bool
|
|
|
|
checkDiskSpace' sz destdir key alreadythere samefilesystem = ifM (Annex.getRead Annex.force)
|
2018-08-22 18:41:09 +00:00
|
|
|
( return True
|
|
|
|
, do
|
|
|
|
-- We can't get inprogress and free at the same
|
|
|
|
-- time, and both can be changing, so there's a
|
|
|
|
-- small race here. Err on the side of caution
|
|
|
|
-- by getting inprogress first, so if it takes
|
|
|
|
-- a while, we'll see any decrease in the free
|
|
|
|
-- disk space.
|
|
|
|
inprogress <- if samefilesystem
|
|
|
|
then sizeOfDownloadsInProgress (/= key)
|
|
|
|
else pure 0
|
2020-11-05 22:45:37 +00:00
|
|
|
dir >>= liftIO . getDiskFree . fromRawFilePath >>= \case
|
2018-08-22 18:41:09 +00:00
|
|
|
Just have -> do
|
|
|
|
reserve <- annexDiskReserve <$> Annex.getGitConfig
|
disk free checking for unsized keys
Improve disk free space checking when transferring unsized keys to
local git remotes. Since the size of the object file is known, can
check that instead.
Getting unsized keys from local git remotes does not check the actual
object size. It would be harder to handle that direction because the size
check is run locally, before anything involving the remote is done. So it
doesn't know the size of the file on the remote.
Also, transferring unsized keys to other remotes, including ssh remotes and
p2p remotes don't do disk size checking for unsized keys. This would need a
change in protocol.
(It does seem like it would be possible to implement the same thing for
directory special remotes though.)
In some sense, it might be better to not ever do disk free checking for
unsized keys, than to do it only sometimes. A user might notice this
direction working and consider it a bug that the other direction does not.
On the other hand, disk reserve checking is not implemented for most
special remotes at all, and yet it is implemented for a few, which is also
inconsistent, but best effort. And so doing this best effort seems to make
some sense. Fundamentally, if the user wants the size to always be checked,
they should not use unsized keys.
Sponsored-by: Brock Spratlen on Patreon
2024-01-16 18:29:10 +00:00
|
|
|
let delta = sz + reserve - have - alreadythere + inprogress
|
2018-08-22 18:41:09 +00:00
|
|
|
let ok = delta <= 0
|
|
|
|
unless ok $
|
filter out control characters in warning messages
Converted warning and similar to use StringContainingQuotedPath. Most
warnings are static strings, some do refer to filepaths that need to be
quoted, and others don't need quoting.
Note that, since quote filters out control characters of even
UnquotedString, this makes all warnings safe, even when an attacker
sneaks in a control character in some other way.
When json is being output, no quoting is done, since json gets its own
quoting.
This does, as a side effect, make warning messages in json output not
be indented. The indentation is only needed to offset warning messages
underneath the display of the file they apply to, so that's ok.
Sponsored-by: Brett Eisenberg on Patreon
2023-04-10 18:47:32 +00:00
|
|
|
warning $ UnquotedString $
|
|
|
|
needMoreDiskSpace delta
|
2018-08-22 18:41:09 +00:00
|
|
|
return ok
|
|
|
|
_ -> return True
|
|
|
|
)
|
|
|
|
where
|
2020-11-05 22:45:37 +00:00
|
|
|
dir = maybe (fromRepo gitAnnexDir) return destdir
|
2018-08-22 18:41:09 +00:00
|
|
|
|
|
|
|
needMoreDiskSpace :: Integer -> String
|
|
|
|
needMoreDiskSpace n = "not enough free space, need " ++
|
|
|
|
roughSize storageUnits True n ++ " more" ++ forcemsg
|
|
|
|
where
|
|
|
|
forcemsg = " (use --force to override this check or adjust annex.diskreserve)"
|