2010-11-06 21:06:19 +00:00
{- git - annex command
-
2024-03-01 18:41:10 +00:00
- Copyright 2010 - 2023 Joey Hess < id @ joeyh . name >
2010-11-06 21:06:19 +00:00
-
2019-03-13 19:48:14 +00:00
- Licensed under the GNU AGPL version 3 or higher .
2010-11-06 21:06:19 +00:00
- }
2013-05-10 21:29:59 +00:00
{- # LANGUAGE CPP # -}
2020-11-03 14:11:04 +00:00
{- # LANGUAGE OverloadedStrings # -}
2013-05-10 21:29:59 +00:00
2010-11-06 21:06:19 +00:00
module Command.Fsck where
import Command
2012-01-20 17:23:11 +00:00
import qualified Annex
2011-07-05 22:31:46 +00:00
import qualified Remote
import qualified Types.Backend
2011-10-29 21:49:37 +00:00
import qualified Backend
2011-10-04 04:40:47 +00:00
import Annex.Content
2024-03-01 18:41:10 +00:00
import Annex.Verify
2023-03-21 22:22:41 +00:00
# ifndef mingw32_HOST_OS
import Annex.Version
2022-05-16 19:19:48 +00:00
import Annex.Content.Presence
2023-03-21 22:22:41 +00:00
# endif
2021-07-29 18:06:13 +00:00
import Annex.Content.Presence.LowLevel
2012-06-06 00:25:32 +00:00
import Annex.Perms
fully support core.symlinks=false in all relevant symlink handling code
Refactored annex link code into nice clean new library.
Audited and dealt with calls to createSymbolicLink.
Remaining calls are all safe, because:
Annex/Link.hs: ( liftIO $ createSymbolicLink linktarget file
only when core.symlinks=true
Assistant/WebApp/Configurators/Local.hs: createSymbolicLink link link
test if symlinks can be made
Command/Fix.hs: liftIO $ createSymbolicLink link file
command only works in indirect mode
Command/FromKey.hs: liftIO $ createSymbolicLink link file
command only works in indirect mode
Command/Indirect.hs: liftIO $ createSymbolicLink l f
refuses to run if core.symlinks=false
Init.hs: createSymbolicLink f f2
test if symlinks can be made
Remote/Directory.hs: go [file] = catchBoolIO $ createSymbolicLink file f >> return True
fast key linking; catches failure to make symlink and falls back to copy
Remote/Git.hs: liftIO $ catchBoolIO $ createSymbolicLink loc file >> return True
ditto
Upgrade/V1.hs: liftIO $ createSymbolicLink link f
v1 repos could not be on a filesystem w/o symlinks
Audited and dealt with calls to readSymbolicLink.
Remaining calls are all safe, because:
Annex/Link.hs: ( liftIO $ catchMaybeIO $ readSymbolicLink file
only when core.symlinks=true
Assistant/Threads/Watcher.hs: ifM ((==) (Just link) <$> liftIO (catchMaybeIO $ readSymbolicLink file))
code that fixes real symlinks when inotify sees them
It's ok to not fix psdueo-symlinks.
Assistant/Threads/Watcher.hs: mlink <- liftIO (catchMaybeIO $ readSymbolicLink file)
ditto
Command/Fix.hs: stopUnless ((/=) (Just link) <$> liftIO (catchMaybeIO $ readSymbolicLink file)) $ do
command only works in indirect mode
Upgrade/V1.hs: getsymlink = takeFileName <$> readSymbolicLink file
v1 repos could not be on a filesystem w/o symlinks
Audited and dealt with calls to isSymbolicLink.
(Typically used with getSymbolicLinkStatus, but that is just used because
getFileStatus is not as robust; it also works on pseudolinks.)
Remaining calls are all safe, because:
Assistant/Threads/SanityChecker.hs: | isSymbolicLink s -> addsymlink file ms
only handles staging of symlinks that were somehow not staged
(might need to be updated to support pseudolinks, but this is
only a belt-and-suspenders check anyway, and I've never seen the code run)
Command/Add.hs: if isSymbolicLink s || not (isRegularFile s)
avoids adding symlinks to the annex, so not relevant
Command/Indirect.hs: | isSymbolicLink s -> void $ flip whenAnnexed f $
only allowed on systems that support symlinks
Command/Indirect.hs: whenM (liftIO $ not . isSymbolicLink <$> getSymbolicLinkStatus f) $ do
ditto
Seek.hs:notSymlink f = liftIO $ not . isSymbolicLink <$> getSymbolicLinkStatus f
used to find unlocked files, only relevant in indirect mode
Utility/FSEvents.hs: | Files.isSymbolicLink s = runhook addSymlinkHook $ Just s
Utility/FSEvents.hs: | Files.isSymbolicLink s ->
Utility/INotify.hs: | Files.isSymbolicLink s ->
Utility/INotify.hs: checkfiletype Files.isSymbolicLink addSymlinkHook f
Utility/Kqueue.hs: | Files.isSymbolicLink s = callhook addSymlinkHook (Just s) change
all above are lower-level, not relevant
Audited and dealt with calls to isSymLink.
Remaining calls are all safe, because:
Annex/Direct.hs: | isSymLink (getmode item) =
This is looking at git diff-tree objects, not files on disk
Command/Unused.hs: | isSymLink (LsTree.mode l) = do
This is looking at git ls-tree, not file on disk
Utility/FileMode.hs:isSymLink :: FileMode -> Bool
Utility/FileMode.hs:isSymLink = checkMode symbolicLinkMode
low-level
Done!!
2013-02-17 19:05:55 +00:00
import Annex.Link
2011-10-15 20:21:08 +00:00
import Logs.Location
import Logs.Trust
2015-04-05 16:50:02 +00:00
import Logs.Activity
2018-10-30 03:13:36 +00:00
import Utility.TimeStamp
2018-02-08 18:08:41 +00:00
import Logs.PreferredContent
2015-04-30 18:02:56 +00:00
import Annex.NumCopies
2011-10-15 21:47:03 +00:00
import Annex.UUID
2016-02-14 21:27:50 +00:00
import Annex.ReplaceFile
2011-07-06 00:36:43 +00:00
import Utility.DataUnits
2012-09-25 23:37:34 +00:00
import Utility.HumanTime
2015-04-18 18:13:07 +00:00
import Utility.CopyFile
2013-06-18 01:30:52 +00:00
import Git.FilePath
2014-02-11 19:29:56 +00:00
import Utility.PID
2021-07-29 18:06:13 +00:00
import Utility.InodeCache
2024-05-06 20:25:55 +00:00
import Utility.Metered
2021-07-29 18:06:13 +00:00
import Annex.InodeSentinal
2015-12-11 20:05:56 +00:00
import qualified Database.Keys
2015-02-16 19:08:29 +00:00
import qualified Database.Fsck as FsckDb
2015-07-25 21:37:09 +00:00
import Types.CleanupActions
2017-02-27 17:50:00 +00:00
import Types.Key
2019-12-06 18:44:42 +00:00
import qualified Utility.RawFilePath as R
2010-11-06 21:06:19 +00:00
2012-09-25 18:16:34 +00:00
import Data.Time.Clock.POSIX
import System.Posix.Types ( EpochTime )
2018-02-08 18:08:41 +00:00
import qualified Data.Set as S
import qualified Data.Map as M
2020-05-13 21:05:56 +00:00
import Data.Either
2020-11-03 14:11:04 +00:00
import qualified System.FilePath.ByteString as P
2023-03-01 19:55:58 +00:00
import System.PosixCompat.Files ( fileMode , isSymbolicLink , modificationTime )
2012-09-25 18:16:34 +00:00
2015-07-08 16:33:27 +00:00
cmd :: Command
2022-06-29 17:28:08 +00:00
cmd = withAnnexOptions [ jobsOption , jsonOptions , annexedMatchingOptions ] $
2015-07-10 17:18:46 +00:00
command " fsck " SectionMaintenance
" find and fix problems "
paramPaths ( seek <$$> optParser )
2015-07-08 20:58:54 +00:00
data FsckOptions = FsckOptions
{ fsckFiles :: CmdParams
2015-07-09 20:05:45 +00:00
, fsckFromOption :: Maybe ( DeferredParse Remote )
2015-07-09 16:26:25 +00:00
, incrementalOpt :: Maybe IncrementalOpt
2015-07-09 16:44:03 +00:00
, keyOptions :: Maybe KeyOptions
2015-07-08 20:58:54 +00:00
}
2015-07-09 16:26:25 +00:00
data IncrementalOpt
= StartIncrementalO
| MoreIncrementalO
| ScheduleIncrementalO Duration
2015-07-08 20:58:54 +00:00
optParser :: CmdParamsDesc -> Parser FsckOptions
optParser desc = FsckOptions
<$> cmdParams desc
2023-04-05 19:46:51 +00:00
<*> optional ( mkParseRemoteOption <$> strOption
2015-07-09 14:41:17 +00:00
( long " from " <> short 'f' <> metavar paramRemote
2015-07-08 20:58:54 +00:00
<> help " check remote "
2015-09-14 17:19:04 +00:00
<> completeRemotes
2015-07-08 20:58:54 +00:00
) )
2015-07-09 16:26:25 +00:00
<*> optional parseincremental
2016-08-03 16:37:12 +00:00
<*> optional parseKeyOptions
2015-07-09 16:26:25 +00:00
where
parseincremental =
flag' StartIncrementalO
( long " incremental " <> short 'S'
<> help " start an incremental fsck "
)
<|> flag' MoreIncrementalO
( long " more " <> short 'm'
<> help " continue an incremental fsck "
)
2020-08-15 19:53:35 +00:00
<|> ( ScheduleIncrementalO <$> option ( eitherReader parseDuration )
2015-07-09 16:26:25 +00:00
( long " incremental-schedule " <> metavar paramTime
<> help " schedule incremental fscking "
) )
2015-07-08 20:58:54 +00:00
seek :: FsckOptions -> CommandSeek
2019-06-19 16:35:08 +00:00
seek o = startConcurrency commandStages $ do
2015-07-09 20:05:45 +00:00
from <- maybe ( pure Nothing ) ( Just <$$> getParsed ) ( fsckFromOption o )
2015-02-17 21:08:11 +00:00
u <- maybe getUUID ( pure . Remote . uuid ) from
2015-11-10 18:44:58 +00:00
checkDeadRepo u
2015-07-09 16:26:25 +00:00
i <- prepIncremental u ( incrementalOpt o )
2020-07-13 21:04:02 +00:00
let seeker = AnnexedFileSeeker
2023-12-06 17:04:32 +00:00
{ startAction = const $ start from i
2020-07-15 15:21:43 +00:00
, checkContentPresent = Nothing
2020-07-13 21:04:02 +00:00
, usesLocationLog = True
}
2020-07-24 16:05:28 +00:00
withKeyOptions ( keyOptions o ) False seeker
2018-10-01 18:12:06 +00:00
( \ kai -> commandAction . startKey from i kai =<< getNumCopies )
2020-07-13 21:04:02 +00:00
( withFilesInGitAnnex ww seeker )
2020-05-28 19:55:17 +00:00
=<< workTreeItems ww ( fsckFiles o )
2015-07-25 21:37:09 +00:00
cleanupIncremental i
2015-05-06 18:45:20 +00:00
void $ tryIO $ recordActivity Fsck u
2020-05-28 19:55:17 +00:00
where
2023-04-25 23:26:20 +00:00
ww = WarnUnmatchLsFiles " fsck "
2010-11-15 22:22:50 +00:00
2015-11-10 18:44:58 +00:00
checkDeadRepo :: UUID -> Annex ()
checkDeadRepo u =
whenM ( ( == ) DeadTrusted <$> lookupTrust u ) $
earlyWarning " Warning: Fscking a repository that is currently marked as dead. "
2020-09-14 20:49:33 +00:00
start :: Maybe Remote -> Incremental -> SeekInput -> RawFilePath -> Key -> CommandStart
start from inc si file key = Backend . getBackend ( fromRawFilePath file ) key >>= \ case
2017-12-05 19:00:50 +00:00
Nothing -> stop
Just backend -> do
2021-01-06 18:11:08 +00:00
( numcopies , _mincopies ) <- getFileNumMinCopies file
2017-12-05 19:00:50 +00:00
case from of
Nothing -> go $ perform key file backend numcopies
2024-03-09 17:50:30 +00:00
Just r -> go $ performRemote key afile numcopies r
2012-11-12 05:05:04 +00:00
where
2020-09-14 20:49:33 +00:00
go = runFsck inc si ( mkActionItem ( key , afile ) ) key
2017-03-10 17:12:24 +00:00
afile = AssociatedFile ( Just file )
2010-11-15 22:22:50 +00:00
2019-12-04 17:15:34 +00:00
perform :: Key -> RawFilePath -> Backend -> NumCopies -> Annex Bool
2015-12-11 20:05:56 +00:00
perform key file backend numcopies = do
2022-06-22 20:20:08 +00:00
keystatus <- getKeyFileStatus key file
2015-12-11 20:05:56 +00:00
check
-- order matters
[ fixLink key file
2022-05-16 19:19:48 +00:00
, fixObjectLocation key
2017-03-10 18:12:39 +00:00
, verifyLocationLog key keystatus ai
2018-02-08 18:08:41 +00:00
, verifyRequiredContent key ai
2016-02-14 21:09:54 +00:00
, verifyAssociatedFiles key keystatus file
2016-02-14 21:27:50 +00:00
, verifyWorkTree key file
2017-03-10 18:12:39 +00:00
, checkKeySize key keystatus ai
2024-03-09 17:50:30 +00:00
, checkBackend key keystatus afile
2018-05-23 18:07:51 +00:00
, checkKeyUpgrade backend key ai afile
2017-03-10 17:12:24 +00:00
, checkKeyNumCopies key afile numcopies
2015-12-11 20:05:56 +00:00
]
2017-03-10 17:12:24 +00:00
where
afile = AssociatedFile ( Just file )
2019-06-06 16:53:24 +00:00
ai = mkActionItem ( key , afile )
2011-10-29 21:49:37 +00:00
2012-01-19 19:24:05 +00:00
{- To fsck a remote, the content is retrieved to a tmp file,
- and checked locally . - }
2024-03-09 17:50:30 +00:00
performRemote :: Key -> AssociatedFile -> NumCopies -> Remote -> Annex Bool
performRemote key afile numcopies remote =
2012-03-14 21:43:34 +00:00
dispatch =<< Remote . hasKey remote key
2012-11-12 05:05:04 +00:00
where
dispatch ( Left err ) = do
2023-04-10 21:03:41 +00:00
showNote ( UnquotedString err )
2012-11-12 05:05:04 +00:00
return False
2017-12-05 19:00:50 +00:00
dispatch ( Right True ) = withtmp $ \ tmpfile ->
getfile tmpfile >>= \ case
2015-04-27 21:40:21 +00:00
Nothing -> go True Nothing
2021-04-14 17:22:54 +00:00
Just ( Right verification ) -> go True ( Just ( tmpfile , verification ) )
Just ( Left _ ) -> do
2023-04-10 21:03:41 +00:00
warning $ actionItemDesc ai
<> " : failed to download file from remote "
2015-02-12 20:03:59 +00:00
void $ go True Nothing
2015-02-10 17:10:58 +00:00
return False
2012-11-12 05:05:04 +00:00
dispatch ( Right False ) = go False Nothing
2021-04-14 17:22:54 +00:00
go present lv = check
2017-03-10 18:12:39 +00:00
[ verifyLocationLogRemote key ai remote present
2018-02-08 18:08:41 +00:00
, verifyRequiredContent key ai
2021-04-14 17:22:54 +00:00
, withLocalCopy ( fmap fst lv ) $ checkKeySizeRemote key remote ai
, case fmap snd lv of
Just Verified -> return True
_ -> withLocalCopy ( fmap fst lv ) $
2024-03-09 17:50:30 +00:00
checkBackendRemote key remote ai
2016-11-16 19:32:49 +00:00
, checkKeyNumCopies key afile numcopies
2012-11-12 05:05:04 +00:00
]
2019-06-06 16:53:24 +00:00
ai = mkActionItem ( key , afile )
2012-11-12 05:05:04 +00:00
withtmp a = do
2020-02-14 18:52:15 +00:00
-- Put it in the gitAnnexTmpObjectDir since that's on a
-- filesystem where object temp files are normally
-- stored. The pid prevents multiple fsck processes
-- contending over the same file. (Multiple threads cannot,
-- because OnlyActionOn is used.)
2014-02-11 19:29:56 +00:00
pid <- liftIO getPID
2014-02-26 20:52:56 +00:00
t <- fromRepo gitAnnexTmpObjectDir
2012-11-12 05:05:04 +00:00
createAnnexDirectory t
2020-11-03 14:11:04 +00:00
let tmp = t P .</> " fsck " <> toRawFilePath ( show pid ) <> " . " <> keyFile key
let cleanup = liftIO $ catchIO ( R . removeLink tmp ) ( const noop )
2012-11-12 05:05:04 +00:00
cleanup
cleanup ` after ` a tmp
disk free checking for unsized keys
Improve disk free space checking when transferring unsized keys to
local git remotes. Since the size of the object file is known, can
check that instead.
Getting unsized keys from local git remotes does not check the actual
object size. It would be harder to handle that direction because the size
check is run locally, before anything involving the remote is done. So it
doesn't know the size of the file on the remote.
Also, transferring unsized keys to other remotes, including ssh remotes and
p2p remotes don't do disk size checking for unsized keys. This would need a
change in protocol.
(It does seem like it would be possible to implement the same thing for
directory special remotes though.)
In some sense, it might be better to not ever do disk free checking for
unsized keys, than to do it only sometimes. A user might notice this
direction working and consider it a bug that the other direction does not.
On the other hand, disk reserve checking is not implemented for most
special remotes at all, and yet it is implemented for a few, which is also
inconsistent, but best effort. And so doing this best effort seems to make
some sense. Fundamentally, if the user wants the size to always be checked,
they should not use unsized keys.
Sponsored-by: Brock Spratlen on Patreon
2024-01-16 18:29:10 +00:00
getfile tmp = ifM ( checkDiskSpace Nothing ( Just ( P . takeDirectory tmp ) ) key 0 True )
2020-05-13 21:05:56 +00:00
( ifM ( getcheap tmp )
2021-04-14 17:22:54 +00:00
( return ( Just ( Right UnVerified ) )
2022-06-28 19:28:14 +00:00
, ifM ( Annex . getRead Annex . fast )
2015-04-27 21:40:21 +00:00
( return Nothing
2021-04-14 17:22:54 +00:00
, Just <$> tryNonAsync ( getfile' tmp )
2012-03-14 21:43:34 +00:00
)
2012-11-12 05:05:04 +00:00
)
2021-04-14 17:22:54 +00:00
, return Nothing
2015-04-18 18:23:34 +00:00
)
2024-05-06 20:25:55 +00:00
getfile' tmp = Remote . retrieveKeyFile remote key ( AssociatedFile Nothing ) ( fromRawFilePath tmp ) nullMeterUpdate ( RemoteVerify remote )
2020-05-13 21:05:56 +00:00
getcheap tmp = case Remote . retrieveKeyFileCheap remote of
2020-11-03 14:11:04 +00:00
Just a -> isRight <$> tryNonAsync ( a key afile ( fromRawFilePath tmp ) )
2020-05-13 21:05:56 +00:00
Nothing -> return False
2012-01-19 19:24:05 +00:00
2020-09-14 20:49:33 +00:00
startKey :: Maybe Remote -> Incremental -> ( SeekInput , Key , ActionItem ) -> NumCopies -> CommandStart
startKey from inc ( si , key , ai ) numcopies =
2020-07-29 19:23:18 +00:00
Backend . maybeLookupBackendVariety ( fromKey keyVariety key ) >>= \ case
2014-02-20 18:45:17 +00:00
Nothing -> stop
2024-03-26 18:13:59 +00:00
Just _ -> runFsck inc si ai key $
2016-11-16 19:32:49 +00:00
case from of
2024-03-09 17:50:30 +00:00
Nothing -> performKey key numcopies
Just r -> performRemote key ( AssociatedFile Nothing ) numcopies r
2011-10-29 21:49:37 +00:00
2024-03-09 17:50:30 +00:00
performKey :: Key -> NumCopies -> Annex Bool
performKey key numcopies = do
2015-12-11 20:05:56 +00:00
keystatus <- getKeyStatus key
check
2017-03-10 19:03:33 +00:00
[ verifyLocationLog key keystatus ( mkActionItem key )
, checkKeySize key keystatus ( mkActionItem key )
2024-03-09 17:50:30 +00:00
, checkBackend key keystatus ( AssociatedFile Nothing )
2017-03-10 17:12:24 +00:00
, checkKeyNumCopies key ( AssociatedFile Nothing ) numcopies
2015-12-11 20:05:56 +00:00
]
2011-10-29 21:49:37 +00:00
2012-09-25 19:06:33 +00:00
check :: [ Annex Bool ] -> Annex Bool
2013-09-25 07:09:06 +00:00
check cs = and <$> sequence cs
2012-03-10 18:46:21 +00:00
2019-01-14 19:19:20 +00:00
{- Checks that symlinks points correctly to the annexed content. -}
2019-12-04 17:15:34 +00:00
fixLink :: Key -> RawFilePath -> Annex Bool
2012-03-10 18:46:21 +00:00
fixLink key file = do
2020-11-03 14:11:04 +00:00
want <- calcRepo $ gitAnnexLink file key
fully support core.symlinks=false in all relevant symlink handling code
Refactored annex link code into nice clean new library.
Audited and dealt with calls to createSymbolicLink.
Remaining calls are all safe, because:
Annex/Link.hs: ( liftIO $ createSymbolicLink linktarget file
only when core.symlinks=true
Assistant/WebApp/Configurators/Local.hs: createSymbolicLink link link
test if symlinks can be made
Command/Fix.hs: liftIO $ createSymbolicLink link file
command only works in indirect mode
Command/FromKey.hs: liftIO $ createSymbolicLink link file
command only works in indirect mode
Command/Indirect.hs: liftIO $ createSymbolicLink l f
refuses to run if core.symlinks=false
Init.hs: createSymbolicLink f f2
test if symlinks can be made
Remote/Directory.hs: go [file] = catchBoolIO $ createSymbolicLink file f >> return True
fast key linking; catches failure to make symlink and falls back to copy
Remote/Git.hs: liftIO $ catchBoolIO $ createSymbolicLink loc file >> return True
ditto
Upgrade/V1.hs: liftIO $ createSymbolicLink link f
v1 repos could not be on a filesystem w/o symlinks
Audited and dealt with calls to readSymbolicLink.
Remaining calls are all safe, because:
Annex/Link.hs: ( liftIO $ catchMaybeIO $ readSymbolicLink file
only when core.symlinks=true
Assistant/Threads/Watcher.hs: ifM ((==) (Just link) <$> liftIO (catchMaybeIO $ readSymbolicLink file))
code that fixes real symlinks when inotify sees them
It's ok to not fix psdueo-symlinks.
Assistant/Threads/Watcher.hs: mlink <- liftIO (catchMaybeIO $ readSymbolicLink file)
ditto
Command/Fix.hs: stopUnless ((/=) (Just link) <$> liftIO (catchMaybeIO $ readSymbolicLink file)) $ do
command only works in indirect mode
Upgrade/V1.hs: getsymlink = takeFileName <$> readSymbolicLink file
v1 repos could not be on a filesystem w/o symlinks
Audited and dealt with calls to isSymbolicLink.
(Typically used with getSymbolicLinkStatus, but that is just used because
getFileStatus is not as robust; it also works on pseudolinks.)
Remaining calls are all safe, because:
Assistant/Threads/SanityChecker.hs: | isSymbolicLink s -> addsymlink file ms
only handles staging of symlinks that were somehow not staged
(might need to be updated to support pseudolinks, but this is
only a belt-and-suspenders check anyway, and I've never seen the code run)
Command/Add.hs: if isSymbolicLink s || not (isRegularFile s)
avoids adding symlinks to the annex, so not relevant
Command/Indirect.hs: | isSymbolicLink s -> void $ flip whenAnnexed f $
only allowed on systems that support symlinks
Command/Indirect.hs: whenM (liftIO $ not . isSymbolicLink <$> getSymbolicLinkStatus f) $ do
ditto
Seek.hs:notSymlink f = liftIO $ not . isSymbolicLink <$> getSymbolicLinkStatus f
used to find unlocked files, only relevant in indirect mode
Utility/FSEvents.hs: | Files.isSymbolicLink s = runhook addSymlinkHook $ Just s
Utility/FSEvents.hs: | Files.isSymbolicLink s ->
Utility/INotify.hs: | Files.isSymbolicLink s ->
Utility/INotify.hs: checkfiletype Files.isSymbolicLink addSymlinkHook f
Utility/Kqueue.hs: | Files.isSymbolicLink s = callhook addSymlinkHook (Just s) change
all above are lower-level, not relevant
Audited and dealt with calls to isSymLink.
Remaining calls are all safe, because:
Annex/Direct.hs: | isSymLink (getmode item) =
This is looking at git diff-tree objects, not files on disk
Command/Unused.hs: | isSymLink (LsTree.mode l) = do
This is looking at git ls-tree, not file on disk
Utility/FileMode.hs:isSymLink :: FileMode -> Bool
Utility/FileMode.hs:isSymLink = checkMode symbolicLinkMode
low-level
Done!!
2013-02-17 19:05:55 +00:00
have <- getAnnexLinkTarget file
2013-01-06 19:42:49 +00:00
maybe noop ( go want ) have
return True
where
2013-06-18 00:51:36 +00:00
go want have
2020-11-03 14:11:04 +00:00
| want /= fromInternalGitPath have = do
2013-06-18 00:51:36 +00:00
showNote " fixing link "
2020-11-03 14:11:04 +00:00
createWorkTreeDirectory ( parentDir file )
liftIO $ R . removeLink file
2013-06-18 00:51:36 +00:00
addAnnexLink want file
| otherwise = noop
2012-03-10 18:46:21 +00:00
2022-05-16 19:19:48 +00:00
{- A repository that supports symlinks and is not bare may have in the past
- been bare , or not supported symlinks . If so , the object may be located
- in a directory other than the one where annex symlinks point to . Moves
- the object in that case .
-
- Also if a repository has been converted to bare , or moved to a crippled
- filesystem not supporting symlinks , the object file will be moved
- to the other location .
- }
fixObjectLocation :: Key -> Annex Bool
# ifdef mingw32_HOST_OS
2023-03-27 16:21:40 +00:00
fixObjectLocation _key = do
2022-05-16 19:19:48 +00:00
-- Windows does not allow locked files to be renamed, but annex
-- links are also not used on Windows.
return True
# else
2023-03-27 16:21:40 +00:00
fixObjectLocation key = do
2022-05-16 19:19:48 +00:00
loc <- calcRepo ( gitAnnexLocation key )
idealloc <- calcRepo ( gitAnnexLocation' ( const ( pure True ) ) key )
if loc == idealloc
then return True
else ifM ( liftIO $ R . doesPathExist loc )
( moveobjdir loc idealloc
` catchNonAsync ` \ _e -> return True
, return True
)
where
moveobjdir src dest = do
let srcdir = parentDir src
let destdir = parentDir dest
showNote " normalizing object location "
-- When the content file is moved, it will
-- appear to other processes as if it has been removed.
-- That should never happen to a process that has used
-- lockContentShared, so avoid it by locking the content
-- for removal, although it's not really being removed.
lockContentForRemoval key ( return True ) $ \ _lck -> do
-- Thaw the content directory to allow renaming it.
thawContentDir src
createAnnexDirectory ( parentDir destdir )
liftIO $ renameDirectory
( fromRawFilePath srcdir )
( fromRawFilePath destdir )
-- Since the directory was moved, lockContentForRemoval
-- will not be able to remove the lock file it
-- made. So, remove the lock file here.
mlockfile <- contentLockFile key =<< getVersion
liftIO $ maybe noop ( removeWhenExistsWith R . removeLink ) mlockfile
freezeContentDir dest
cleanObjectDirs src
return True
# endif
2011-03-02 18:30:36 +00:00
{- Checks that the location log reflects the current status of the key,
2012-12-13 04:45:27 +00:00
- in this repository only . - }
2017-03-10 18:12:39 +00:00
verifyLocationLog :: Key -> KeyStatus -> ActionItem -> Annex Bool
verifyLocationLog key keystatus ai = do
2020-11-03 14:11:04 +00:00
obj <- calcRepo ( gitAnnexLocation key )
2019-08-26 19:52:19 +00:00
present <- if isKeyUnlockedThin keystatus
2020-11-03 14:11:04 +00:00
then liftIO ( doesFileExist ( fromRawFilePath obj ) )
2015-12-11 20:05:56 +00:00
else inAnnex key
2013-01-06 19:42:49 +00:00
u <- getUUID
2011-03-02 18:30:36 +00:00
2015-12-11 20:05:56 +00:00
{- Since we're checking that a key's object file is present, throw
2013-01-06 19:42:49 +00:00
- in a permission fixup here too . - }
2019-08-26 19:52:19 +00:00
when present $ do
2019-03-18 19:53:54 +00:00
void $ tryIO $ case keystatus of
2020-11-06 18:10:58 +00:00
KeyUnlockedThin -> thawContent obj
KeyLockedThin -> thawContent obj
_ -> freezeContent obj
2021-08-27 18:33:01 +00:00
checkContentWritePerm obj >>= \ case
filter out control characters in warning messages
Converted warning and similar to use StringContainingQuotedPath. Most
warnings are static strings, some do refer to filepaths that need to be
quoted, and others don't need quoting.
Note that, since quote filters out control characters of even
UnquotedString, this makes all warnings safe, even when an attacker
sneaks in a control character in some other way.
When json is being output, no quoting is done, since json gets its own
quoting.
This does, as a side effect, make warning messages in json output not
be indented. The indentation is only needed to offset warning messages
underneath the display of the file they apply to, so that's ok.
Sponsored-by: Brett Eisenberg on Patreon
2023-04-10 18:47:32 +00:00
Nothing -> warning $ " ** Unable to set correct write mode for " <> QuotedPath obj <> " ; perhaps you don't own that file, or perhaps it has an xattr or ACL set "
2021-08-27 18:33:01 +00:00
_ -> return ()
2020-11-03 14:11:04 +00:00
whenM ( liftIO $ R . doesPathExist $ parentDir obj ) $
2015-12-11 20:05:56 +00:00
freezeContentDir obj
2011-03-28 20:19:20 +00:00
2017-02-27 17:50:00 +00:00
{- Warn when annex.securehashesonly is set and content using an
- insecure hash is present . This should only be able to happen
- if the repository already contained the content before the
2024-02-29 21:21:29 +00:00
- config was set , or of course if a hash was broken . - }
whenM ( pure present <&&> ( not <$> Backend . isCryptographicallySecureKey key ) ) $
2017-02-27 17:50:00 +00:00
whenM ( annexSecureHashesOnly <$> Annex . getGitConfig ) $
filter out control characters in warning messages
Converted warning and similar to use StringContainingQuotedPath. Most
warnings are static strings, some do refer to filepaths that need to be
quoted, and others don't need quoting.
Note that, since quote filters out control characters of even
UnquotedString, this makes all warnings safe, even when an attacker
sneaks in a control character in some other way.
When json is being output, no quoting is done, since json gets its own
quoting.
This does, as a side effect, make warning messages in json output not
be indented. The indentation is only needed to offset warning messages
underneath the display of the file they apply to, so that's ok.
Sponsored-by: Brett Eisenberg on Patreon
2023-04-10 18:47:32 +00:00
warning $ " ** Despite annex.securehashesonly being set, " <> QuotedPath obj <> " has content present in the annex using an insecure " <> UnquotedString ( decodeBS ( formatKeyVariety ( fromKey keyVariety key ) ) ) <> " key "
2017-02-27 17:50:00 +00:00
2024-08-23 20:35:12 +00:00
verifyLocationLog' key ai present u ( logChange NoLiveUpdate key u )
2012-01-19 19:24:05 +00:00
2017-03-10 18:12:39 +00:00
verifyLocationLogRemote :: Key -> ActionItem -> Remote -> Bool -> Annex Bool
verifyLocationLogRemote key ai remote present =
verifyLocationLog' key ai present ( Remote . uuid remote )
2024-08-23 20:35:12 +00:00
( Remote . logStatus NoLiveUpdate remote key )
2011-03-02 18:30:36 +00:00
2017-03-10 18:12:39 +00:00
verifyLocationLog' :: Key -> ActionItem -> Bool -> UUID -> ( LogStatus -> Annex () ) -> Annex Bool
verifyLocationLog' key ai present u updatestatus = do
2015-11-10 17:59:04 +00:00
uuids <- loggedLocations key
2011-03-02 18:30:36 +00:00
case ( present , u ` elem ` uuids ) of
( True , False ) -> do
2016-05-10 17:08:16 +00:00
fix InfoPresent
-- There is no data loss, so do not fail.
return True
2011-03-02 18:30:36 +00:00
( False , True ) -> do
2016-05-10 17:08:16 +00:00
fix InfoMissing
warning $
filter out control characters in warning messages
Converted warning and similar to use StringContainingQuotedPath. Most
warnings are static strings, some do refer to filepaths that need to be
quoted, and others don't need quoting.
Note that, since quote filters out control characters of even
UnquotedString, this makes all warnings safe, even when an attacker
sneaks in a control character in some other way.
When json is being output, no quoting is done, since json gets its own
quoting.
This does, as a side effect, make warning messages in json output not
be indented. The indentation is only needed to offset warning messages
underneath the display of the file they apply to, so that's ok.
Sponsored-by: Brett Eisenberg on Patreon
2023-04-10 18:47:32 +00:00
" ** Based on the location log, " <>
2023-04-10 21:03:41 +00:00
actionItemDesc ai <>
filter out control characters in warning messages
Converted warning and similar to use StringContainingQuotedPath. Most
warnings are static strings, some do refer to filepaths that need to be
quoted, and others don't need quoting.
Note that, since quote filters out control characters of even
UnquotedString, this makes all warnings safe, even when an attacker
sneaks in a control character in some other way.
When json is being output, no quoting is done, since json gets its own
quoting.
This does, as a side effect, make warning messages in json output not
be indented. The indentation is only needed to offset warning messages
underneath the display of the file they apply to, so that's ok.
Sponsored-by: Brett Eisenberg on Patreon
2023-04-10 18:47:32 +00:00
" \ n ** was expected to be present, " <>
2016-05-10 17:08:16 +00:00
" but its content is missing. "
return False
2016-05-10 17:20:45 +00:00
( False , False ) -> do
-- When the location log for the key is not present,
-- create it, so that the key will be known.
when ( null uuids ) $
whenM ( not <$> isKnownKey key ) $
updatestatus InfoMissing
return True
( True , True ) -> return True
2012-11-12 05:05:04 +00:00
where
fix s = do
showNote " fixing location log "
2013-12-01 19:52:30 +00:00
updatestatus s
2011-07-05 22:31:46 +00:00
2018-02-08 18:08:41 +00:00
{- Verifies that all repos that are required to contain the content do,
- checking against the location log . - }
verifyRequiredContent :: Key -> ActionItem -> Annex Bool
2021-03-22 19:00:53 +00:00
verifyRequiredContent key ai @ ( ActionItemAssociatedFile afile _ ) = case afile of
-- Can't be checked if there's no associated file.
AssociatedFile Nothing -> return True
AssociatedFile ( Just _ ) -> do
requiredlocs <- S . fromList . M . keys <$> requiredContentMap
if S . null requiredlocs
then return True
else go requiredlocs
where
go requiredlocs = do
presentlocs <- S . fromList <$> loggedLocations key
missinglocs <- filterM
2024-08-23 20:35:12 +00:00
( \ u -> isRequiredContent NoLiveUpdate ( Just u ) S . empty ( Just key ) afile False )
2021-03-22 19:00:53 +00:00
( S . toList $ S . difference requiredlocs presentlocs )
if null missinglocs
then return True
else do
missingrequired <- Remote . prettyPrintUUIDs " missingrequired " missinglocs
warning $
filter out control characters in warning messages
Converted warning and similar to use StringContainingQuotedPath. Most
warnings are static strings, some do refer to filepaths that need to be
quoted, and others don't need quoting.
Note that, since quote filters out control characters of even
UnquotedString, this makes all warnings safe, even when an attacker
sneaks in a control character in some other way.
When json is being output, no quoting is done, since json gets its own
quoting.
This does, as a side effect, make warning messages in json output not
be indented. The indentation is only needed to offset warning messages
underneath the display of the file they apply to, so that's ok.
Sponsored-by: Brett Eisenberg on Patreon
2023-04-10 18:47:32 +00:00
" ** Required content " <>
2023-04-10 21:03:41 +00:00
actionItemDesc ai <>
filter out control characters in warning messages
Converted warning and similar to use StringContainingQuotedPath. Most
warnings are static strings, some do refer to filepaths that need to be
quoted, and others don't need quoting.
Note that, since quote filters out control characters of even
UnquotedString, this makes all warnings safe, even when an attacker
sneaks in a control character in some other way.
When json is being output, no quoting is done, since json gets its own
quoting.
This does, as a side effect, make warning messages in json output not
be indented. The indentation is only needed to offset warning messages
underneath the display of the file they apply to, so that's ok.
Sponsored-by: Brett Eisenberg on Patreon
2023-04-10 18:47:32 +00:00
" is missing from these repositories: \ n " <>
UnquotedString missingrequired
2021-03-22 19:00:53 +00:00
return False
2018-02-08 18:08:41 +00:00
verifyRequiredContent _ _ = return True
2016-02-14 20:52:43 +00:00
{- Verifies the associated file records. -}
2019-12-04 17:15:34 +00:00
verifyAssociatedFiles :: Key -> KeyStatus -> RawFilePath -> Annex Bool
2016-02-14 21:09:54 +00:00
verifyAssociatedFiles key keystatus file = do
2019-08-26 19:52:19 +00:00
when ( isKeyUnlockedThin keystatus ) $ do
2019-12-09 17:49:05 +00:00
f <- inRepo $ toTopFilePath file
2019-03-18 19:53:54 +00:00
afs <- Database . Keys . getAssociatedFiles key
unless ( getTopFilePath f ` elem ` map getTopFilePath afs ) $
Database . Keys . addAssociatedFile key f
2019-08-26 19:52:19 +00:00
return True
2013-01-19 18:11:23 +00:00
2019-12-04 17:15:34 +00:00
verifyWorkTree :: Key -> RawFilePath -> Annex Bool
2016-02-14 21:27:50 +00:00
verifyWorkTree key file = do
{- Make sure that a pointer file is replaced with its content,
- when the content is available . - }
2019-08-26 19:52:19 +00:00
mk <- liftIO $ isPointerFile file
case mk of
Just k | k == key -> whenM ( inAnnex key ) $ do
showNote " fixing worktree content "
2020-03-06 15:31:01 +00:00
replaceWorkTreeFile ( fromRawFilePath file ) $ \ tmp -> do
2019-12-06 18:44:42 +00:00
mode <- liftIO $ catchMaybeIO $ fileMode <$> R . getFileStatus file
2019-08-26 19:52:19 +00:00
ifM ( annexThin <$> Annex . getGitConfig )
2023-10-26 17:36:49 +00:00
( void $ linkFromAnnex' key tmp mode
2019-08-26 19:52:19 +00:00
, do
2020-11-06 18:10:58 +00:00
obj <- calcRepo ( gitAnnexLocation key )
2023-10-26 17:36:49 +00:00
void $ checkedCopyFile key obj tmp mode
thawContent tmp
2019-08-26 19:52:19 +00:00
)
2023-10-26 17:36:49 +00:00
Database . Keys . storeInodeCaches key [ tmp ]
2019-08-26 19:52:19 +00:00
_ -> return ()
return True
2013-06-24 20:26:00 +00:00
2011-07-05 22:31:46 +00:00
{- The size of the data for a key is checked against the size encoded in
2013-01-06 19:42:49 +00:00
- the key's metadata , if available .
-
2019-08-26 19:52:19 +00:00
- Not checked when a file is unlocked .
2014-10-09 19:09:26 +00:00
- }
2017-03-10 18:12:39 +00:00
checkKeySize :: Key -> KeyStatus -> ActionItem -> Annex Bool
2019-03-18 19:53:54 +00:00
checkKeySize _ KeyUnlockedThin _ = return True
2017-03-10 18:12:39 +00:00
checkKeySize key _ ai = do
2015-12-15 18:27:20 +00:00
file <- calcRepo $ gitAnnexLocation key
2019-12-11 18:12:22 +00:00
ifM ( liftIO $ R . doesPathExist file )
2020-11-03 14:11:04 +00:00
( checkKeySizeOr badContent key file ai
2015-12-15 18:27:20 +00:00
, return True
)
2012-01-19 19:24:05 +00:00
2020-11-03 14:11:04 +00:00
withLocalCopy :: Maybe RawFilePath -> ( RawFilePath -> Annex Bool ) -> Annex Bool
2017-03-10 16:09:52 +00:00
withLocalCopy Nothing _ = return True
withLocalCopy ( Just localcopy ) f = f localcopy
2020-11-03 14:11:04 +00:00
checkKeySizeRemote :: Key -> Remote -> ActionItem -> RawFilePath -> Annex Bool
2017-03-10 18:12:39 +00:00
checkKeySizeRemote key remote ai localcopy =
checkKeySizeOr ( badContentRemote remote localcopy ) key localcopy ai
2011-07-05 22:31:46 +00:00
2020-11-03 14:11:04 +00:00
checkKeySizeOr :: ( Key -> Annex String ) -> Key -> RawFilePath -> ActionItem -> Annex Bool
2019-11-22 20:24:04 +00:00
checkKeySizeOr bad key file ai = case fromKey keySize key of
2012-01-19 19:24:05 +00:00
Nothing -> return True
Just size -> do
2020-11-05 15:26:34 +00:00
size' <- liftIO $ getFileSize file
2012-03-14 21:43:34 +00:00
comparesizes size size'
2012-11-12 05:05:04 +00:00
where
comparesizes a b = do
let same = a == b
unless same $ badsize a b
return same
badsize a b = do
msg <- bad key
2023-04-10 21:03:41 +00:00
warning $ actionItemDesc ai
filter out control characters in warning messages
Converted warning and similar to use StringContainingQuotedPath. Most
warnings are static strings, some do refer to filepaths that need to be
quoted, and others don't need quoting.
Note that, since quote filters out control characters of even
UnquotedString, this makes all warnings safe, even when an attacker
sneaks in a control character in some other way.
When json is being output, no quoting is done, since json gets its own
quoting.
This does, as a side effect, make warning messages in json output not
be indented. The indentation is only needed to offset warning messages
underneath the display of the file they apply to, so that's ok.
Sponsored-by: Brett Eisenberg on Patreon
2023-04-10 18:47:32 +00:00
<> " : Bad file size ( "
<> UnquotedString ( compareSizes storageUnits True a b )
<> " ); "
<> UnquotedString msg
2011-07-05 22:31:46 +00:00
2018-05-23 18:07:51 +00:00
{- Check for keys that are upgradable.
-
- Warns and suggests the user migrate , but does not migrate itself ,
- because migration can cause more disk space to be used , and makes
- worktree changes that need to be committed .
- }
checkKeyUpgrade :: Backend -> Key -> ActionItem -> AssociatedFile -> Annex Bool
checkKeyUpgrade backend key ai ( AssociatedFile ( Just file ) ) =
case Types . Backend . canUpgradeKey backend of
Just a | a key -> do
2023-04-10 21:03:41 +00:00
warning $ actionItemDesc ai
filter out control characters in warning messages
Converted warning and similar to use StringContainingQuotedPath. Most
warnings are static strings, some do refer to filepaths that need to be
quoted, and others don't need quoting.
Note that, since quote filters out control characters of even
UnquotedString, this makes all warnings safe, even when an attacker
sneaks in a control character in some other way.
When json is being output, no quoting is done, since json gets its own
quoting.
This does, as a side effect, make warning messages in json output not
be indented. The indentation is only needed to offset warning messages
underneath the display of the file they apply to, so that's ok.
Sponsored-by: Brett Eisenberg on Patreon
2023-04-10 18:47:32 +00:00
<> " : Can be upgraded to an improved key format. "
<> " You can do so by running: git annex migrate --backend= "
2023-04-10 21:03:41 +00:00
<> UnquotedByteString ( formatKeyVariety ( fromKey keyVariety key ) )
filter out control characters in warning messages
Converted warning and similar to use StringContainingQuotedPath. Most
warnings are static strings, some do refer to filepaths that need to be
quoted, and others don't need quoting.
Note that, since quote filters out control characters of even
UnquotedString, this makes all warnings safe, even when an attacker
sneaks in a control character in some other way.
When json is being output, no quoting is done, since json gets its own
quoting.
This does, as a side effect, make warning messages in json output not
be indented. The indentation is only needed to offset warning messages
underneath the display of the file they apply to, so that's ok.
Sponsored-by: Brett Eisenberg on Patreon
2023-04-10 18:47:32 +00:00
<> " "
<> QuotedPath file
2018-05-23 18:07:51 +00:00
return True
_ -> return True
checkKeyUpgrade _ _ _ ( AssociatedFile Nothing ) =
-- Don't suggest migrating without a filename, because
-- while possible to do, there is no actual benefit from
-- doing that in this situation.
return True
2015-12-11 20:05:56 +00:00
{- Runs the backend specific check on a key's content object.
-
2024-03-01 18:12:21 +00:00
- When a annex . thin is set , an unlocked file may be a hard link to the object .
2021-07-29 17:21:23 +00:00
- Thus when the user modifies the file , the object will be modified and
2015-12-11 20:05:56 +00:00
- not pass the check , and we don't want to find an error in this case .
2013-01-08 16:41:09 +00:00
- }
2024-03-09 17:50:30 +00:00
checkBackend :: Key -> KeyStatus -> AssociatedFile -> Annex Bool
checkBackend key keystatus afile = do
2019-12-11 18:12:22 +00:00
content <- calcRepo ( gitAnnexLocation key )
2024-05-13 01:23:27 +00:00
ifM ( liftIO $ R . doesPathExist content )
( ifM ( pure ( isKeyUnlockedThin keystatus ) <&&> ( not <$> isUnmodified key content ) )
( nocheck
, do
mic <- withTSDelta ( liftIO . genInodeCache content )
ifM ( checkBackendOr badContent key content ai )
( do
checkInodeCache key content mic ai
return True
, return False
)
)
, nocheck
2013-01-08 16:41:09 +00:00
)
2019-08-26 19:52:19 +00:00
where
2013-04-16 20:17:20 +00:00
nocheck = return True
2012-01-19 19:24:05 +00:00
2019-06-06 16:53:24 +00:00
ai = mkActionItem ( key , afile )
2024-03-09 17:50:30 +00:00
checkBackendRemote :: Key -> Remote -> ActionItem -> RawFilePath -> Annex Bool
checkBackendRemote key remote ai localcopy =
checkBackendOr ( badContentRemote remote localcopy ) key localcopy ai
2012-01-19 19:24:05 +00:00
2024-03-09 17:50:30 +00:00
checkBackendOr :: ( Key -> Annex String ) -> Key -> RawFilePath -> ActionItem -> Annex Bool
2024-05-13 01:36:48 +00:00
checkBackendOr bad key file ai =
ifM ( Annex . getRead Annex . fast )
( return True
, do
ok <- verifyKeyContent' key file
unless ok $ do
msg <- bad key
warning $ actionItemDesc ai
<> " : Bad file content; "
<> UnquotedString msg
return ok
)
2011-10-31 16:33:41 +00:00
2021-07-29 18:06:13 +00:00
{- Check, if there are InodeCaches recorded for a key, that one of them
- matches the object file . There are situations where the InodeCache
- of the object file does not get recorded , including a v8 upgrade .
- There may also be situations where the wrong InodeCache is recorded ,
- if inodes are not stable .
-
- This must be called after the content of the object file has been
- verified to be correct . The InodeCache is generated again to detect if
- the object file was changed while the content was being verified .
- }
checkInodeCache :: Key -> RawFilePath -> Maybe InodeCache -> ActionItem -> Annex ()
checkInodeCache key content mic ai = case mic of
Nothing -> noop
Just ic -> do
ics <- Database . Keys . getInodeCaches key
unless ( null ics ) $
unlessM ( isUnmodifiedCheapLowLevel ic ics ) $ do
withTSDelta ( liftIO . genInodeCache content ) >>= \ case
Nothing -> noop
Just ic' -> whenM ( compareInodeCaches ic ic' ) $ do
2023-04-10 21:03:41 +00:00
warning $ actionItemDesc ai
filter out control characters in warning messages
Converted warning and similar to use StringContainingQuotedPath. Most
warnings are static strings, some do refer to filepaths that need to be
quoted, and others don't need quoting.
Note that, since quote filters out control characters of even
UnquotedString, this makes all warnings safe, even when an attacker
sneaks in a control character in some other way.
When json is being output, no quoting is done, since json gets its own
quoting.
This does, as a side effect, make warning messages in json output not
be indented. The indentation is only needed to offset warning messages
underneath the display of the file they apply to, so that's ok.
Sponsored-by: Brett Eisenberg on Patreon
2023-04-10 18:47:32 +00:00
<> " : Stale or missing inode cache; updating. "
2021-07-29 18:06:13 +00:00
Database . Keys . addInodeCaches key [ ic ]
2015-06-09 18:08:57 +00:00
checkKeyNumCopies :: Key -> AssociatedFile -> NumCopies -> Annex Bool
checkKeyNumCopies key afile numcopies = do
2017-03-10 17:12:24 +00:00
let ( desc , hasafile ) = case afile of
filter out control characters in warning messages
Converted warning and similar to use StringContainingQuotedPath. Most
warnings are static strings, some do refer to filepaths that need to be
quoted, and others don't need quoting.
Note that, since quote filters out control characters of even
UnquotedString, this makes all warnings safe, even when an attacker
sneaks in a control character in some other way.
When json is being output, no quoting is done, since json gets its own
quoting.
This does, as a side effect, make warning messages in json output not
be indented. The indentation is only needed to offset warning messages
underneath the display of the file they apply to, so that's ok.
Sponsored-by: Brett Eisenberg on Patreon
2023-04-10 18:47:32 +00:00
AssociatedFile Nothing -> ( serializeKey' key , False )
AssociatedFile ( Just af ) -> ( af , True )
2016-02-19 19:12:11 +00:00
locs <- loggedLocations key
( untrustedlocations , otherlocations ) <- trustPartition UnTrusted locs
( deadlocations , safelocations ) <- trustPartition DeadTrusted otherlocations
2024-06-16 19:07:48 +00:00
let present = numCopiesCount safelocations
2022-03-28 19:19:52 +00:00
if present < fromNumCopies numcopies
2022-09-13 18:38:13 +00:00
then ifM ( checkDead key )
2015-06-09 18:08:57 +00:00
( do
showLongNote $ " This key is dead, skipping. "
return True
, do
2016-02-19 19:12:11 +00:00
untrusted <- Remote . prettyPrintUUIDs " untrusted " untrustedlocations
dead <- Remote . prettyPrintUUIDs " dead " deadlocations
2017-03-10 17:12:24 +00:00
warning $ missingNote desc present numcopies untrusted dead
2022-03-28 19:19:52 +00:00
when ( present == 0 && not hasafile ) $
2015-06-09 19:12:40 +00:00
showLongNote " (Avoid this check by running: git annex dead --key ) "
2015-06-09 18:08:57 +00:00
return False
)
2011-07-05 22:31:46 +00:00
else return True
filter out control characters in warning messages
Converted warning and similar to use StringContainingQuotedPath. Most
warnings are static strings, some do refer to filepaths that need to be
quoted, and others don't need quoting.
Note that, since quote filters out control characters of even
UnquotedString, this makes all warnings safe, even when an attacker
sneaks in a control character in some other way.
When json is being output, no quoting is done, since json gets its own
quoting.
This does, as a side effect, make warning messages in json output not
be indented. The indentation is only needed to offset warning messages
underneath the display of the file they apply to, so that's ok.
Sponsored-by: Brett Eisenberg on Patreon
2023-04-10 18:47:32 +00:00
missingNote :: RawFilePath -> Int -> NumCopies -> String -> String -> StringContainingQuotedPath
2022-03-28 19:19:52 +00:00
missingNote file 0 _ [] dead =
filter out control characters in warning messages
Converted warning and similar to use StringContainingQuotedPath. Most
warnings are static strings, some do refer to filepaths that need to be
quoted, and others don't need quoting.
Note that, since quote filters out control characters of even
UnquotedString, this makes all warnings safe, even when an attacker
sneaks in a control character in some other way.
When json is being output, no quoting is done, since json gets its own
quoting.
This does, as a side effect, make warning messages in json output not
be indented. The indentation is only needed to offset warning messages
underneath the display of the file they apply to, so that's ok.
Sponsored-by: Brett Eisenberg on Patreon
2023-04-10 18:47:32 +00:00
" ** No known copies exist of " <> QuotedPath file <> UnquotedString ( honorDead dead )
2022-03-28 19:19:52 +00:00
missingNote file 0 _ untrusted dead =
filter out control characters in warning messages
Converted warning and similar to use StringContainingQuotedPath. Most
warnings are static strings, some do refer to filepaths that need to be
quoted, and others don't need quoting.
Note that, since quote filters out control characters of even
UnquotedString, this makes all warnings safe, even when an attacker
sneaks in a control character in some other way.
When json is being output, no quoting is done, since json gets its own
quoting.
This does, as a side effect, make warning messages in json output not
be indented. The indentation is only needed to offset warning messages
underneath the display of the file they apply to, so that's ok.
Sponsored-by: Brett Eisenberg on Patreon
2023-04-10 18:47:32 +00:00
" Only these untrusted locations may have copies of " <> QuotedPath file <>
" \ n " <> UnquotedString untrusted <>
" Back it up to trusted locations with git-annex copy. " <> UnquotedString ( honorDead dead )
2016-02-19 19:12:11 +00:00
missingNote file present needed [] _ =
filter out control characters in warning messages
Converted warning and similar to use StringContainingQuotedPath. Most
warnings are static strings, some do refer to filepaths that need to be
quoted, and others don't need quoting.
Note that, since quote filters out control characters of even
UnquotedString, this makes all warnings safe, even when an attacker
sneaks in a control character in some other way.
When json is being output, no quoting is done, since json gets its own
quoting.
This does, as a side effect, make warning messages in json output not
be indented. The indentation is only needed to offset warning messages
underneath the display of the file they apply to, so that's ok.
Sponsored-by: Brett Eisenberg on Patreon
2023-04-10 18:47:32 +00:00
" Only " <> UnquotedString ( show present ) <> " of " <> UnquotedString ( show ( fromNumCopies needed ) ) <>
" trustworthy copies exist of " <> QuotedPath file <>
2011-07-05 22:31:46 +00:00
" \ n Back it up with git-annex copy. "
2016-02-19 19:12:11 +00:00
missingNote file present needed untrusted dead =
filter out control characters in warning messages
Converted warning and similar to use StringContainingQuotedPath. Most
warnings are static strings, some do refer to filepaths that need to be
quoted, and others don't need quoting.
Note that, since quote filters out control characters of even
UnquotedString, this makes all warnings safe, even when an attacker
sneaks in a control character in some other way.
When json is being output, no quoting is done, since json gets its own
quoting.
This does, as a side effect, make warning messages in json output not
be indented. The indentation is only needed to offset warning messages
underneath the display of the file they apply to, so that's ok.
Sponsored-by: Brett Eisenberg on Patreon
2023-04-10 18:47:32 +00:00
missingNote file present needed [] dead <>
" \ n The following untrusted locations may also have copies: " <>
" \ n " <> UnquotedString untrusted
2016-02-19 19:12:11 +00:00
honorDead :: String -> String
honorDead dead
| null dead = " "
| otherwise = " \ n These dead repositories used to have copies \ n " ++ dead
2012-01-19 19:24:05 +00:00
{- Bad content is moved aside. -}
badContent :: Key -> Annex String
badContent key = do
2013-01-07 17:01:53 +00:00
dest <- moveBad key
2022-06-22 20:47:34 +00:00
return $ " moved to " ++ fromRawFilePath dest
2012-01-19 19:24:05 +00:00
2015-04-18 18:13:07 +00:00
{- Bad content is dropped from the remote. We have downloaded a copy
- from the remote to a temp file already ( in some cases , it's just a
- symlink to a file in the remote ) . To avoid any further data loss ,
- that temp file is moved to the bad content directory unless
- the local annex has a copy of the content . - }
2020-11-03 14:11:04 +00:00
badContentRemote :: Remote -> RawFilePath -> Key -> Annex String
2015-04-18 18:13:07 +00:00
badContentRemote remote localcopy key = do
bad <- fromRepo gitAnnexBadDir
2020-11-03 14:11:04 +00:00
let destbad = bad P .</> keyFile key
let destbad' = fromRawFilePath destbad
movedbad <- ifM ( inAnnex key <||> liftIO ( doesFileExist destbad' ) )
2015-04-18 18:13:07 +00:00
( return False
, do
createAnnexDirectory ( parentDir destbad )
liftIO $ catchDefaultIO False $
2020-11-03 14:11:04 +00:00
ifM ( isSymbolicLink <$> R . getSymbolicLinkStatus localcopy )
( copyFileExternal CopyTimeStamps ( fromRawFilePath localcopy ) destbad'
2015-04-18 18:13:07 +00:00
, do
2022-06-22 20:47:34 +00:00
moveFile localcopy destbad
2015-04-18 18:13:07 +00:00
return True
)
)
toward SafeDropProof expiry checking
Added Maybe POSIXTime to SafeDropProof, which gets set when the proof is
based on a LockedCopy. If there are several LockedCopies, it uses the
closest expiry time. That is not optimal, it may be that the proof
expires based on one LockedCopy but another one has not expired. But
that seems unlikely to really happen, and anyway the user can just
re-run a drop if it fails due to expiry.
Pass the SafeDropProof to removeKey, which is responsible for checking
it for expiry in situations where that could be a problem. Which really
only means in Remote.Git.
Made Remote.Git check expiry when dropping from a local remote.
Checking expiry when dropping from a P2P remote is not yet implemented.
P2P.Protocol.remove has SafeDropProof plumbed through to it for that
purpose.
Fixing the remaining 2 build warnings should complete this work.
Note that the use of a POSIXTime here means that if the clock gets set
forward while git-annex is in the middle of a drop, it may say that
dropping took too long. That seems ok. Less ok is that if the clock gets
turned back a sufficient amount (eg 5 minutes), proof expiry won't be
noticed. It might be better to use the Monotonic clock, but that doesn't
advance when a laptop is suspended, and while there is the linux
Boottime clock, that is not available on other systems. Perhaps a
combination of POSIXTime and the Monotonic clock could detect laptop
suspension and also detect clock being turned back?
There is a potential future flag day where
p2pDefaultLockContentRetentionDuration is not assumed, but is probed
using the P2P protocol, and peers that don't support it can no longer
produce a LockedCopy. Until that happens, when git-annex is
communicating with older peers there is a risk of data loss when
a ssh connection closes during LOCKCONTENT.
2024-07-04 16:23:46 +00:00
dropped <- tryNonAsync ( Remote . removeKey remote Nothing key )
2020-05-14 18:08:09 +00:00
when ( isRight dropped ) $
2024-08-23 20:35:12 +00:00
Remote . logStatus NoLiveUpdate remote key InfoMissing
2015-04-18 18:13:07 +00:00
return $ case ( movedbad , dropped ) of
2020-05-14 18:08:09 +00:00
( True , Right () ) -> " moved from " ++ Remote . name remote ++
2020-11-03 14:11:04 +00:00
" to " ++ fromRawFilePath destbad
2020-05-14 18:08:09 +00:00
( False , Right () ) -> " dropped from " ++ Remote . name remote
( _ , Left e ) -> " failed to drop from " ++ Remote . name remote ++ " : " ++ show e
2012-09-25 17:22:12 +00:00
2020-09-14 20:49:33 +00:00
runFsck :: Incremental -> SeekInput -> ActionItem -> Key -> Annex Bool -> CommandStart
runFsck inc si ai key a = stopUnless ( needFsck inc key ) $
starting " fsck " ( OnlyActionOn key ai ) si $ do
make CommandStart return a StartMessage
The goal is to be able to run CommandStart in the main thread when -J is
used, rather than unncessarily passing it off to a worker thread, which
incurs overhead that is signficant when the CommandStart is going to
quickly decide to stop.
To do that, the message it displays needs to be displayed in the worker
thread, after the CommandStart has run.
Also, the change will mean that CommandStart will no longer necessarily
run with the same Annex state as CommandPerform. While its docs already
said it should avoid modifying Annex state, I audited all the
CommandStart code as part of the conversion. (Note that CommandSeek
already sometimes runs with a different Annex state, and that has not been
a source of any problems, so I am not too worried that this change will
lead to breakage going forward.)
The only modification of Annex state I found was it calling
allowMessages in some Commands that default to noMessages. Dealt with
that by adding a startCustomOutput and a startingUsualMessages.
This lets a command start with noMessages and then select the output it
wants for each CommandStart.
One bit of breakage: onlyActionOn has been removed from commands that used it.
The plan is that, since a StartMessage contains an ActionItem,
when a Key can be extracted from that, the parallel job runner can
run onlyActionOn' automatically. Then commands won't need to worry about
this detail. Future work.
Otherwise, this was a fairly straightforward process of making each
CommandStart compile again. Hopefully other behavior changes were mostly
avoided.
In a few cases, a command had a CommandStart that called a CommandPerform
that then called showStart multiple times. I have collapsed those
down to a single start action. The main command to perhaps suffer from it
is Command.Direct, which used to show a start for each file, and no
longer does.
Another minor behavior change is that some commands used showStart
before, but had an associated file and a Key available, so were changed
to ShowStart with an ActionItemAssociatedFile. That will not change the
normal output or behavior, but --json output will now include the key.
This should not break it for anyone using a real json parser.
2019-06-06 19:42:30 +00:00
ok <- a
when ok $
recordFsckTime inc key
next $ return ok
2012-09-25 19:06:33 +00:00
{- Check if a key needs to be fscked, with support for incremental fscks. -}
2012-09-25 19:45:17 +00:00
needFsck :: Incremental -> Key -> Annex Bool
2015-07-31 20:00:13 +00:00
needFsck ( ScheduleIncremental _ _ i ) k = needFsck i k
2015-02-16 20:04:23 +00:00
needFsck ( ContIncremental h ) key = liftIO $ not <$> FsckDb . inDb h key
2012-09-25 19:45:17 +00:00
needFsck _ _ = return True
2012-09-25 19:06:33 +00:00
2015-02-16 19:08:29 +00:00
recordFsckTime :: Incremental -> Key -> Annex ()
2015-02-16 20:48:19 +00:00
recordFsckTime inc key = withFsckDb inc $ \ h -> liftIO $ FsckDb . addDb h key
2012-09-25 18:16:34 +00:00
2013-05-19 18:46:48 +00:00
{- Records the start time of an incremental fsck.
2012-09-25 18:16:34 +00:00
-
2023-03-14 02:39:16 +00:00
- To guard against time stamp damage ( for example , if an annex directory
2012-09-25 18:16:34 +00:00
- is copied without - a ) , the fsckstate file contains a time that should
2014-02-13 16:40:10 +00:00
- be identical to its modification time .
2014-02-25 18:09:39 +00:00
- ( This is not possible to do on Windows , and so the timestamp in
- the file will only be equal or greater than the modification time . )
2014-02-13 16:40:10 +00:00
- }
2015-02-17 21:08:11 +00:00
recordStartTime :: UUID -> Annex ()
recordStartTime u = do
f <- fromRepo ( gitAnnexFsckState u )
2015-01-09 17:11:56 +00:00
createAnnexDirectory $ parentDir f
2020-11-03 14:11:04 +00:00
liftIO $ removeWhenExistsWith R . removeLink f
2020-11-06 18:10:58 +00:00
liftIO $ withFile ( fromRawFilePath f ) WriteMode $ \ h -> do
2014-02-13 16:40:10 +00:00
# ifndef mingw32_HOST_OS
2020-11-03 14:11:04 +00:00
t <- modificationTime <$> R . getFileStatus f
2014-02-13 16:40:10 +00:00
# else
2018-01-02 21:17:10 +00:00
t <- getPOSIXTime
2014-02-13 16:40:10 +00:00
# endif
2018-01-02 21:17:10 +00:00
hPutStr h $ showTime $ realToFrac t
2020-11-06 18:10:58 +00:00
setAnnexFilePerm f
2012-11-12 05:05:04 +00:00
where
showTime :: POSIXTime -> String
showTime = show
2012-09-25 18:16:34 +00:00
2015-02-17 21:08:11 +00:00
resetStartTime :: UUID -> Annex ()
2020-11-03 14:11:04 +00:00
resetStartTime u = liftIO . removeWhenExistsWith R . removeLink
2020-10-29 14:33:12 +00:00
=<< fromRepo ( gitAnnexFsckState u )
2012-09-25 23:37:34 +00:00
2012-09-25 18:16:34 +00:00
{- Gets the incremental fsck start time. -}
2015-02-17 21:08:11 +00:00
getStartTime :: UUID -> Annex ( Maybe EpochTime )
getStartTime u = do
f <- fromRepo ( gitAnnexFsckState u )
2012-09-25 18:16:34 +00:00
liftIO $ catchDefaultIO Nothing $ do
2020-11-03 14:11:04 +00:00
timestamp <- modificationTime <$> R . getFileStatus f
2014-02-25 18:09:39 +00:00
let fromstatus = Just ( realToFrac timestamp )
2020-11-03 14:11:04 +00:00
fromfile <- parsePOSIXTime <$> readFile ( fromRawFilePath f )
2014-02-25 18:09:39 +00:00
return $ if matchingtimestamp fromfile fromstatus
2012-09-25 18:16:34 +00:00
then Just timestamp
else Nothing
2012-11-12 05:05:04 +00:00
where
2014-02-25 18:09:39 +00:00
matchingtimestamp fromfile fromstatus =
# ifndef mingw32_HOST_OS
fromfile == fromstatus
# else
fromfile >= fromstatus
# endif
2015-04-01 21:53:16 +00:00
2015-07-25 21:37:09 +00:00
data Incremental
= NonIncremental
2015-08-23 22:39:29 +00:00
| ScheduleIncremental Duration UUID Incremental
2015-07-25 21:37:09 +00:00
| StartIncremental FsckDb . FsckHandle
2015-07-31 20:00:13 +00:00
| ContIncremental FsckDb . FsckHandle
2015-04-01 21:53:16 +00:00
2015-07-09 16:26:25 +00:00
prepIncremental :: UUID -> Maybe IncrementalOpt -> Annex Incremental
prepIncremental _ Nothing = pure NonIncremental
prepIncremental u ( Just StartIncrementalO ) = do
recordStartTime u
ifM ( FsckDb . newPass u )
2015-07-31 20:00:13 +00:00
( StartIncremental <$> openFsckDb u
2016-11-16 01:29:54 +00:00
, giveup " Cannot start a new --incremental fsck pass; another fsck process is already running. "
2015-07-09 16:26:25 +00:00
)
prepIncremental u ( Just MoreIncrementalO ) =
2015-07-31 20:00:13 +00:00
ContIncremental <$> openFsckDb u
2015-07-09 16:26:25 +00:00
prepIncremental u ( Just ( ScheduleIncrementalO delta ) ) = do
started <- getStartTime u
2015-07-31 20:00:13 +00:00
i <- prepIncremental u $ Just $ case started of
2015-07-09 16:26:25 +00:00
Nothing -> StartIncrementalO
Just _ -> MoreIncrementalO
2015-07-31 20:00:13 +00:00
return ( ScheduleIncremental delta u i )
2015-07-25 21:37:09 +00:00
cleanupIncremental :: Incremental -> Annex ()
2015-07-31 20:00:13 +00:00
cleanupIncremental ( ScheduleIncremental delta u i ) = do
v <- getStartTime u
case v of
Nothing -> noop
Just started -> do
now <- liftIO getPOSIXTime
when ( now - realToFrac started >= durationToPOSIXTime delta ) $
resetStartTime u
cleanupIncremental i
2015-07-25 21:37:09 +00:00
cleanupIncremental _ = return ()
2015-07-31 20:00:13 +00:00
openFsckDb :: UUID -> Annex FsckDb . FsckHandle
openFsckDb u = do
h <- FsckDb . openDb u
2020-12-11 19:28:58 +00:00
Annex . addCleanupAction FsckCleanup $
2015-07-31 20:00:13 +00:00
FsckDb . closeDb h
return h
withFsckDb :: Incremental -> ( FsckDb . FsckHandle -> Annex () ) -> Annex ()
withFsckDb ( ContIncremental h ) a = a h
withFsckDb ( StartIncremental h ) a = a h
withFsckDb NonIncremental _ = noop
withFsckDb ( ScheduleIncremental _ _ i ) a = withFsckDb i a
2015-12-11 20:05:56 +00:00