git-annex/Command/Fsck.hs

764 lines
26 KiB
Haskell
Raw Normal View History

2010-11-06 21:06:19 +00:00
{- git-annex command
-
- Copyright 2010-2023 Joey Hess <id@joeyh.name>
2010-11-06 21:06:19 +00:00
-
- Licensed under the GNU AGPL version 3 or higher.
2010-11-06 21:06:19 +00:00
-}
2013-05-10 21:29:59 +00:00
{-# LANGUAGE CPP #-}
{-# LANGUAGE OverloadedStrings #-}
2013-05-10 21:29:59 +00:00
2010-11-06 21:06:19 +00:00
module Command.Fsck where
import Command
import qualified Annex
import qualified Remote
import qualified Types.Backend
import qualified Backend
2011-10-04 04:40:47 +00:00
import Annex.Content
import Annex.Verify
2023-03-21 22:22:41 +00:00
#ifndef mingw32_HOST_OS
import Annex.Version
import Annex.Content.Presence
2023-03-21 22:22:41 +00:00
#endif
import Annex.Content.Presence.LowLevel
import Annex.Perms
fully support core.symlinks=false in all relevant symlink handling code Refactored annex link code into nice clean new library. Audited and dealt with calls to createSymbolicLink. Remaining calls are all safe, because: Annex/Link.hs: ( liftIO $ createSymbolicLink linktarget file only when core.symlinks=true Assistant/WebApp/Configurators/Local.hs: createSymbolicLink link link test if symlinks can be made Command/Fix.hs: liftIO $ createSymbolicLink link file command only works in indirect mode Command/FromKey.hs: liftIO $ createSymbolicLink link file command only works in indirect mode Command/Indirect.hs: liftIO $ createSymbolicLink l f refuses to run if core.symlinks=false Init.hs: createSymbolicLink f f2 test if symlinks can be made Remote/Directory.hs: go [file] = catchBoolIO $ createSymbolicLink file f >> return True fast key linking; catches failure to make symlink and falls back to copy Remote/Git.hs: liftIO $ catchBoolIO $ createSymbolicLink loc file >> return True ditto Upgrade/V1.hs: liftIO $ createSymbolicLink link f v1 repos could not be on a filesystem w/o symlinks Audited and dealt with calls to readSymbolicLink. Remaining calls are all safe, because: Annex/Link.hs: ( liftIO $ catchMaybeIO $ readSymbolicLink file only when core.symlinks=true Assistant/Threads/Watcher.hs: ifM ((==) (Just link) <$> liftIO (catchMaybeIO $ readSymbolicLink file)) code that fixes real symlinks when inotify sees them It's ok to not fix psdueo-symlinks. Assistant/Threads/Watcher.hs: mlink <- liftIO (catchMaybeIO $ readSymbolicLink file) ditto Command/Fix.hs: stopUnless ((/=) (Just link) <$> liftIO (catchMaybeIO $ readSymbolicLink file)) $ do command only works in indirect mode Upgrade/V1.hs: getsymlink = takeFileName <$> readSymbolicLink file v1 repos could not be on a filesystem w/o symlinks Audited and dealt with calls to isSymbolicLink. (Typically used with getSymbolicLinkStatus, but that is just used because getFileStatus is not as robust; it also works on pseudolinks.) Remaining calls are all safe, because: Assistant/Threads/SanityChecker.hs: | isSymbolicLink s -> addsymlink file ms only handles staging of symlinks that were somehow not staged (might need to be updated to support pseudolinks, but this is only a belt-and-suspenders check anyway, and I've never seen the code run) Command/Add.hs: if isSymbolicLink s || not (isRegularFile s) avoids adding symlinks to the annex, so not relevant Command/Indirect.hs: | isSymbolicLink s -> void $ flip whenAnnexed f $ only allowed on systems that support symlinks Command/Indirect.hs: whenM (liftIO $ not . isSymbolicLink <$> getSymbolicLinkStatus f) $ do ditto Seek.hs:notSymlink f = liftIO $ not . isSymbolicLink <$> getSymbolicLinkStatus f used to find unlocked files, only relevant in indirect mode Utility/FSEvents.hs: | Files.isSymbolicLink s = runhook addSymlinkHook $ Just s Utility/FSEvents.hs: | Files.isSymbolicLink s -> Utility/INotify.hs: | Files.isSymbolicLink s -> Utility/INotify.hs: checkfiletype Files.isSymbolicLink addSymlinkHook f Utility/Kqueue.hs: | Files.isSymbolicLink s = callhook addSymlinkHook (Just s) change all above are lower-level, not relevant Audited and dealt with calls to isSymLink. Remaining calls are all safe, because: Annex/Direct.hs: | isSymLink (getmode item) = This is looking at git diff-tree objects, not files on disk Command/Unused.hs: | isSymLink (LsTree.mode l) = do This is looking at git ls-tree, not file on disk Utility/FileMode.hs:isSymLink :: FileMode -> Bool Utility/FileMode.hs:isSymLink = checkMode symbolicLinkMode low-level Done!!
2013-02-17 19:05:55 +00:00
import Annex.Link
2011-10-15 20:21:08 +00:00
import Logs.Location
import Logs.Trust
import Logs.Activity
2018-10-30 03:13:36 +00:00
import Utility.TimeStamp
import Logs.PreferredContent
2015-04-30 18:02:56 +00:00
import Annex.NumCopies
import Annex.UUID
import Annex.ReplaceFile
2011-07-06 00:36:43 +00:00
import Utility.DataUnits
import Utility.HumanTime
import Utility.CopyFile
2013-06-18 01:30:52 +00:00
import Git.FilePath
2014-02-11 19:29:56 +00:00
import Utility.PID
import Utility.InodeCache
import Utility.Metered
import Annex.InodeSentinal
import qualified Database.Keys
import qualified Database.Fsck as FsckDb
import Types.CleanupActions
import Types.Key
import qualified Utility.RawFilePath as R
2010-11-06 21:06:19 +00:00
2012-09-25 18:16:34 +00:00
import Data.Time.Clock.POSIX
import System.Posix.Types (EpochTime)
import qualified Data.Set as S
import qualified Data.Map as M
import Data.Either
import qualified System.FilePath.ByteString as P
import System.PosixCompat.Files (fileMode, isSymbolicLink, modificationTime)
2012-09-25 18:16:34 +00:00
cmd :: Command
cmd = withAnnexOptions [jobsOption, jsonOptions, annexedMatchingOptions] $
command "fsck" SectionMaintenance
"find and fix problems"
paramPaths (seek <$$> optParser)
data FsckOptions = FsckOptions
{ fsckFiles :: CmdParams
2015-07-09 20:05:45 +00:00
, fsckFromOption :: Maybe (DeferredParse Remote)
, incrementalOpt :: Maybe IncrementalOpt
2015-07-09 16:44:03 +00:00
, keyOptions :: Maybe KeyOptions
}
data IncrementalOpt
= StartIncrementalO
| MoreIncrementalO
| ScheduleIncrementalO Duration
optParser :: CmdParamsDesc -> Parser FsckOptions
optParser desc = FsckOptions
<$> cmdParams desc
<*> optional (mkParseRemoteOption <$> strOption
2015-07-09 14:41:17 +00:00
( long "from" <> short 'f' <> metavar paramRemote
<> help "check remote"
<> completeRemotes
))
<*> optional parseincremental
<*> optional parseKeyOptions
where
parseincremental =
flag' StartIncrementalO
( long "incremental" <> short 'S'
<> help "start an incremental fsck"
)
<|> flag' MoreIncrementalO
( long "more" <> short 'm'
<> help "continue an incremental fsck"
)
<|> (ScheduleIncrementalO <$> option (eitherReader parseDuration)
( long "incremental-schedule" <> metavar paramTime
<> help "schedule incremental fscking"
))
seek :: FsckOptions -> CommandSeek
seek o = startConcurrency commandStages $ do
2015-07-09 20:05:45 +00:00
from <- maybe (pure Nothing) (Just <$$> getParsed) (fsckFromOption o)
u <- maybe getUUID (pure . Remote.uuid) from
checkDeadRepo u
i <- prepIncremental u (incrementalOpt o)
let seeker = AnnexedFileSeeker
{ startAction = const $ start from i
, checkContentPresent = Nothing
, usesLocationLog = True
}
withKeyOptions (keyOptions o) False seeker
(\kai -> commandAction . startKey from i kai =<< getNumCopies)
(withFilesInGitAnnex ww seeker)
=<< workTreeItems ww (fsckFiles o)
cleanupIncremental i
void $ tryIO $ recordActivity Fsck u
where
ww = WarnUnmatchLsFiles "fsck"
2010-11-15 22:22:50 +00:00
checkDeadRepo :: UUID -> Annex ()
checkDeadRepo u =
whenM ((==) DeadTrusted <$> lookupTrust u) $
earlyWarning "Warning: Fscking a repository that is currently marked as dead."
start :: Maybe Remote -> Incremental -> SeekInput -> RawFilePath -> Key -> CommandStart
start from inc si file key = Backend.getBackend (fromRawFilePath file) key >>= \case
2017-12-05 19:00:50 +00:00
Nothing -> stop
Just backend -> do
(numcopies, _mincopies) <- getFileNumMinCopies file
2017-12-05 19:00:50 +00:00
case from of
Nothing -> go $ perform key file backend numcopies
Just r -> go $ performRemote key afile numcopies r
2012-11-12 05:05:04 +00:00
where
go = runFsck inc si (mkActionItem (key, afile)) key
afile = AssociatedFile (Just file)
2010-11-15 22:22:50 +00:00
perform :: Key -> RawFilePath -> Backend -> NumCopies -> Annex Bool
perform key file backend numcopies = do
2022-06-22 20:20:08 +00:00
keystatus <- getKeyFileStatus key file
check
-- order matters
[ fixLink key file
, fixObjectLocation key
, verifyLocationLog key keystatus ai
, verifyRequiredContent key ai
, verifyAssociatedFiles key keystatus file
, verifyWorkTree key file
, checkKeySize key keystatus ai
, checkBackend key keystatus afile
, checkKeyUpgrade backend key ai afile
, checkKeyNumCopies key afile numcopies
]
where
afile = AssociatedFile (Just file)
2019-06-06 16:53:24 +00:00
ai = mkActionItem (key, afile)
{- To fsck a remote, the content is retrieved to a tmp file,
- and checked locally. -}
performRemote :: Key -> AssociatedFile -> NumCopies -> Remote -> Annex Bool
performRemote key afile numcopies remote =
dispatch =<< Remote.hasKey remote key
2012-11-12 05:05:04 +00:00
where
dispatch (Left err) = do
showNote (UnquotedString err)
2012-11-12 05:05:04 +00:00
return False
2017-12-05 19:00:50 +00:00
dispatch (Right True) = withtmp $ \tmpfile ->
getfile tmpfile >>= \case
2015-04-27 21:40:21 +00:00
Nothing -> go True Nothing
Just (Right verification) -> go True (Just (tmpfile, verification))
Just (Left _) -> do
warning $ actionItemDesc ai
<> ": failed to download file from remote"
2015-02-12 20:03:59 +00:00
void $ go True Nothing
return False
2012-11-12 05:05:04 +00:00
dispatch (Right False) = go False Nothing
go present lv = check
[ verifyLocationLogRemote key ai remote present
, verifyRequiredContent key ai
, withLocalCopy (fmap fst lv) $ checkKeySizeRemote key remote ai
, case fmap snd lv of
Just Verified -> return True
_ -> withLocalCopy (fmap fst lv) $
checkBackendRemote key remote ai
, checkKeyNumCopies key afile numcopies
2012-11-12 05:05:04 +00:00
]
2019-06-06 16:53:24 +00:00
ai = mkActionItem (key, afile)
2012-11-12 05:05:04 +00:00
withtmp a = do
-- Put it in the gitAnnexTmpObjectDir since that's on a
-- filesystem where object temp files are normally
-- stored. The pid prevents multiple fsck processes
-- contending over the same file. (Multiple threads cannot,
-- because OnlyActionOn is used.)
2014-02-11 19:29:56 +00:00
pid <- liftIO getPID
t <- fromRepo gitAnnexTmpObjectDir
2012-11-12 05:05:04 +00:00
createAnnexDirectory t
let tmp = t P.</> "fsck" <> toRawFilePath (show pid) <> "." <> keyFile key
let cleanup = liftIO $ catchIO (R.removeLink tmp) (const noop)
2012-11-12 05:05:04 +00:00
cleanup
cleanup `after` a tmp
getfile tmp = ifM (checkDiskSpace Nothing (Just (P.takeDirectory tmp)) key 0 True)
( ifM (getcheap tmp)
( return (Just (Right UnVerified))
, ifM (Annex.getRead Annex.fast)
2015-04-27 21:40:21 +00:00
( return Nothing
, Just <$> tryNonAsync (getfile' tmp)
)
2012-11-12 05:05:04 +00:00
)
, return Nothing
)
getfile' tmp = Remote.retrieveKeyFile remote key (AssociatedFile Nothing) (fromRawFilePath tmp) nullMeterUpdate (RemoteVerify remote)
getcheap tmp = case Remote.retrieveKeyFileCheap remote of
Just a -> isRight <$> tryNonAsync (a key afile (fromRawFilePath tmp))
Nothing -> return False
startKey :: Maybe Remote -> Incremental -> (SeekInput, Key, ActionItem) -> NumCopies -> CommandStart
startKey from inc (si, key, ai) numcopies =
Backend.maybeLookupBackendVariety (fromKey keyVariety key) >>= \case
Nothing -> stop
Just _ -> runFsck inc si ai key $
case from of
Nothing -> performKey key numcopies
Just r -> performRemote key (AssociatedFile Nothing) numcopies r
performKey :: Key -> NumCopies -> Annex Bool
performKey key numcopies = do
keystatus <- getKeyStatus key
check
2017-03-10 19:03:33 +00:00
[ verifyLocationLog key keystatus (mkActionItem key)
, checkKeySize key keystatus (mkActionItem key)
, checkBackend key keystatus (AssociatedFile Nothing)
, checkKeyNumCopies key (AssociatedFile Nothing) numcopies
]
2012-09-25 19:06:33 +00:00
check :: [Annex Bool] -> Annex Bool
2013-09-25 07:09:06 +00:00
check cs = and <$> sequence cs
{- Checks that symlinks points correctly to the annexed content. -}
fixLink :: Key -> RawFilePath -> Annex Bool
fixLink key file = do
want <- calcRepo $ gitAnnexLink file key
fully support core.symlinks=false in all relevant symlink handling code Refactored annex link code into nice clean new library. Audited and dealt with calls to createSymbolicLink. Remaining calls are all safe, because: Annex/Link.hs: ( liftIO $ createSymbolicLink linktarget file only when core.symlinks=true Assistant/WebApp/Configurators/Local.hs: createSymbolicLink link link test if symlinks can be made Command/Fix.hs: liftIO $ createSymbolicLink link file command only works in indirect mode Command/FromKey.hs: liftIO $ createSymbolicLink link file command only works in indirect mode Command/Indirect.hs: liftIO $ createSymbolicLink l f refuses to run if core.symlinks=false Init.hs: createSymbolicLink f f2 test if symlinks can be made Remote/Directory.hs: go [file] = catchBoolIO $ createSymbolicLink file f >> return True fast key linking; catches failure to make symlink and falls back to copy Remote/Git.hs: liftIO $ catchBoolIO $ createSymbolicLink loc file >> return True ditto Upgrade/V1.hs: liftIO $ createSymbolicLink link f v1 repos could not be on a filesystem w/o symlinks Audited and dealt with calls to readSymbolicLink. Remaining calls are all safe, because: Annex/Link.hs: ( liftIO $ catchMaybeIO $ readSymbolicLink file only when core.symlinks=true Assistant/Threads/Watcher.hs: ifM ((==) (Just link) <$> liftIO (catchMaybeIO $ readSymbolicLink file)) code that fixes real symlinks when inotify sees them It's ok to not fix psdueo-symlinks. Assistant/Threads/Watcher.hs: mlink <- liftIO (catchMaybeIO $ readSymbolicLink file) ditto Command/Fix.hs: stopUnless ((/=) (Just link) <$> liftIO (catchMaybeIO $ readSymbolicLink file)) $ do command only works in indirect mode Upgrade/V1.hs: getsymlink = takeFileName <$> readSymbolicLink file v1 repos could not be on a filesystem w/o symlinks Audited and dealt with calls to isSymbolicLink. (Typically used with getSymbolicLinkStatus, but that is just used because getFileStatus is not as robust; it also works on pseudolinks.) Remaining calls are all safe, because: Assistant/Threads/SanityChecker.hs: | isSymbolicLink s -> addsymlink file ms only handles staging of symlinks that were somehow not staged (might need to be updated to support pseudolinks, but this is only a belt-and-suspenders check anyway, and I've never seen the code run) Command/Add.hs: if isSymbolicLink s || not (isRegularFile s) avoids adding symlinks to the annex, so not relevant Command/Indirect.hs: | isSymbolicLink s -> void $ flip whenAnnexed f $ only allowed on systems that support symlinks Command/Indirect.hs: whenM (liftIO $ not . isSymbolicLink <$> getSymbolicLinkStatus f) $ do ditto Seek.hs:notSymlink f = liftIO $ not . isSymbolicLink <$> getSymbolicLinkStatus f used to find unlocked files, only relevant in indirect mode Utility/FSEvents.hs: | Files.isSymbolicLink s = runhook addSymlinkHook $ Just s Utility/FSEvents.hs: | Files.isSymbolicLink s -> Utility/INotify.hs: | Files.isSymbolicLink s -> Utility/INotify.hs: checkfiletype Files.isSymbolicLink addSymlinkHook f Utility/Kqueue.hs: | Files.isSymbolicLink s = callhook addSymlinkHook (Just s) change all above are lower-level, not relevant Audited and dealt with calls to isSymLink. Remaining calls are all safe, because: Annex/Direct.hs: | isSymLink (getmode item) = This is looking at git diff-tree objects, not files on disk Command/Unused.hs: | isSymLink (LsTree.mode l) = do This is looking at git ls-tree, not file on disk Utility/FileMode.hs:isSymLink :: FileMode -> Bool Utility/FileMode.hs:isSymLink = checkMode symbolicLinkMode low-level Done!!
2013-02-17 19:05:55 +00:00
have <- getAnnexLinkTarget file
2013-01-06 19:42:49 +00:00
maybe noop (go want) have
return True
where
go want have
| want /= fromInternalGitPath have = do
showNote "fixing link"
createWorkTreeDirectory (parentDir file)
liftIO $ R.removeLink file
addAnnexLink want file
| otherwise = noop
{- A repository that supports symlinks and is not bare may have in the past
- been bare, or not supported symlinks. If so, the object may be located
- in a directory other than the one where annex symlinks point to. Moves
- the object in that case.
-
- Also if a repository has been converted to bare, or moved to a crippled
- filesystem not supporting symlinks, the object file will be moved
- to the other location.
-}
fixObjectLocation :: Key -> Annex Bool
#ifdef mingw32_HOST_OS
2023-03-27 16:21:40 +00:00
fixObjectLocation _key = do
-- Windows does not allow locked files to be renamed, but annex
-- links are also not used on Windows.
return True
#else
2023-03-27 16:21:40 +00:00
fixObjectLocation key = do
loc <- calcRepo (gitAnnexLocation key)
idealloc <- calcRepo (gitAnnexLocation' (const (pure True)) key)
if loc == idealloc
then return True
else ifM (liftIO $ R.doesPathExist loc)
( moveobjdir loc idealloc
`catchNonAsync` \_e -> return True
, return True
)
where
moveobjdir src dest = do
let srcdir = parentDir src
let destdir = parentDir dest
showNote "normalizing object location"
-- When the content file is moved, it will
-- appear to other processes as if it has been removed.
-- That should never happen to a process that has used
-- lockContentShared, so avoid it by locking the content
-- for removal, although it's not really being removed.
lockContentForRemoval key (return True) $ \_lck -> do
-- Thaw the content directory to allow renaming it.
thawContentDir src
createAnnexDirectory (parentDir destdir)
liftIO $ renameDirectory
(fromRawFilePath srcdir)
(fromRawFilePath destdir)
-- Since the directory was moved, lockContentForRemoval
-- will not be able to remove the lock file it
-- made. So, remove the lock file here.
mlockfile <- contentLockFile key =<< getVersion
liftIO $ maybe noop (removeWhenExistsWith R.removeLink) mlockfile
freezeContentDir dest
cleanObjectDirs src
return True
#endif
{- Checks that the location log reflects the current status of the key,
2012-12-13 04:45:27 +00:00
- in this repository only. -}
verifyLocationLog :: Key -> KeyStatus -> ActionItem -> Annex Bool
verifyLocationLog key keystatus ai = do
obj <- calcRepo (gitAnnexLocation key)
present <- if isKeyUnlockedThin keystatus
then liftIO (doesFileExist (fromRawFilePath obj))
else inAnnex key
2013-01-06 19:42:49 +00:00
u <- getUUID
{- Since we're checking that a key's object file is present, throw
2013-01-06 19:42:49 +00:00
- in a permission fixup here too. -}
when present $ do
void $ tryIO $ case keystatus of
KeyUnlockedThin -> thawContent obj
KeyLockedThin -> thawContent obj
_ -> freezeContent obj
checkContentWritePerm obj >>= \case
Nothing -> warning $ "** Unable to set correct write mode for " <> QuotedPath obj <> " ; perhaps you don't own that file, or perhaps it has an xattr or ACL set"
_ -> return ()
whenM (liftIO $ R.doesPathExist $ parentDir obj) $
freezeContentDir obj
{- Warn when annex.securehashesonly is set and content using an
- insecure hash is present. This should only be able to happen
- if the repository already contained the content before the
- config was set, or of course if a hash was broken. -}
whenM (pure present <&&> (not <$> Backend.isCryptographicallySecureKey key)) $
whenM (annexSecureHashesOnly <$> Annex.getGitConfig) $
warning $ "** Despite annex.securehashesonly being set, " <> QuotedPath obj <> " has content present in the annex using an insecure " <> UnquotedString (decodeBS (formatKeyVariety (fromKey keyVariety key))) <> " key"
verifyLocationLog' key ai present u (logChange key u)
verifyLocationLogRemote :: Key -> ActionItem -> Remote -> Bool -> Annex Bool
verifyLocationLogRemote key ai remote present =
verifyLocationLog' key ai present (Remote.uuid remote)
(Remote.logStatus remote key)
verifyLocationLog' :: Key -> ActionItem -> Bool -> UUID -> (LogStatus -> Annex ()) -> Annex Bool
verifyLocationLog' key ai present u updatestatus = do
uuids <- loggedLocations key
case (present, u `elem` uuids) of
(True, False) -> do
2016-05-10 17:08:16 +00:00
fix InfoPresent
-- There is no data loss, so do not fail.
return True
(False, True) -> do
2016-05-10 17:08:16 +00:00
fix InfoMissing
warning $
"** Based on the location log, " <>
actionItemDesc ai <>
"\n** was expected to be present, " <>
2016-05-10 17:08:16 +00:00
"but its content is missing."
return False
(False, False) -> do
-- When the location log for the key is not present,
-- create it, so that the key will be known.
when (null uuids) $
whenM (not <$> isKnownKey key) $
updatestatus InfoMissing
return True
(True, True) -> return True
2012-11-12 05:05:04 +00:00
where
fix s = do
showNote "fixing location log"
2013-12-01 19:52:30 +00:00
updatestatus s
{- Verifies that all repos that are required to contain the content do,
- checking against the location log. -}
verifyRequiredContent :: Key -> ActionItem -> Annex Bool
verifyRequiredContent key ai@(ActionItemAssociatedFile afile _) = case afile of
-- Can't be checked if there's no associated file.
AssociatedFile Nothing -> return True
AssociatedFile (Just _) -> do
requiredlocs <- S.fromList . M.keys <$> requiredContentMap
if S.null requiredlocs
then return True
else go requiredlocs
where
go requiredlocs = do
presentlocs <- S.fromList <$> loggedLocations key
missinglocs <- filterM
(\u -> isRequiredContent (Just u) S.empty (Just key) afile False)
(S.toList $ S.difference requiredlocs presentlocs)
if null missinglocs
then return True
else do
missingrequired <- Remote.prettyPrintUUIDs "missingrequired" missinglocs
warning $
"** Required content " <>
actionItemDesc ai <>
" is missing from these repositories:\n" <>
UnquotedString missingrequired
return False
verifyRequiredContent _ _ = return True
2016-02-14 20:52:43 +00:00
{- Verifies the associated file records. -}
verifyAssociatedFiles :: Key -> KeyStatus -> RawFilePath -> Annex Bool
verifyAssociatedFiles key keystatus file = do
when (isKeyUnlockedThin keystatus) $ do
f <- inRepo $ toTopFilePath file
afs <- Database.Keys.getAssociatedFiles key
unless (getTopFilePath f `elem` map getTopFilePath afs) $
Database.Keys.addAssociatedFile key f
return True
verifyWorkTree :: Key -> RawFilePath -> Annex Bool
verifyWorkTree key file = do
{- Make sure that a pointer file is replaced with its content,
- when the content is available. -}
mk <- liftIO $ isPointerFile file
case mk of
Just k | k == key -> whenM (inAnnex key) $ do
showNote "fixing worktree content"
replaceWorkTreeFile (fromRawFilePath file) $ \tmp -> do
mode <- liftIO $ catchMaybeIO $ fileMode <$> R.getFileStatus file
ifM (annexThin <$> Annex.getGitConfig)
( void $ linkFromAnnex' key tmp mode
, do
obj <- calcRepo (gitAnnexLocation key)
void $ checkedCopyFile key obj tmp mode
thawContent tmp
)
Database.Keys.storeInodeCaches key [tmp]
_ -> return ()
return True
{- The size of the data for a key is checked against the size encoded in
2013-01-06 19:42:49 +00:00
- the key's metadata, if available.
-
- Not checked when a file is unlocked.
-}
checkKeySize :: Key -> KeyStatus -> ActionItem -> Annex Bool
checkKeySize _ KeyUnlockedThin _ = return True
checkKeySize key _ ai = do
file <- calcRepo $ gitAnnexLocation key
ifM (liftIO $ R.doesPathExist file)
( checkKeySizeOr badContent key file ai
, return True
)
withLocalCopy :: Maybe RawFilePath -> (RawFilePath -> Annex Bool) -> Annex Bool
withLocalCopy Nothing _ = return True
withLocalCopy (Just localcopy) f = f localcopy
checkKeySizeRemote :: Key -> Remote -> ActionItem -> RawFilePath -> Annex Bool
checkKeySizeRemote key remote ai localcopy =
checkKeySizeOr (badContentRemote remote localcopy) key localcopy ai
checkKeySizeOr :: (Key -> Annex String) -> Key -> RawFilePath -> ActionItem -> Annex Bool
checkKeySizeOr bad key file ai = case fromKey keySize key of
Nothing -> return True
Just size -> do
size' <- liftIO $ getFileSize file
comparesizes size size'
2012-11-12 05:05:04 +00:00
where
comparesizes a b = do
let same = a == b
unless same $ badsize a b
return same
badsize a b = do
msg <- bad key
warning $ actionItemDesc ai
<> ": Bad file size ("
<> UnquotedString (compareSizes storageUnits True a b)
<> "); "
<> UnquotedString msg
{- Check for keys that are upgradable.
-
- Warns and suggests the user migrate, but does not migrate itself,
- because migration can cause more disk space to be used, and makes
- worktree changes that need to be committed.
-}
checkKeyUpgrade :: Backend -> Key -> ActionItem -> AssociatedFile -> Annex Bool
checkKeyUpgrade backend key ai (AssociatedFile (Just file)) =
case Types.Backend.canUpgradeKey backend of
Just a | a key -> do
warning $ actionItemDesc ai
<> ": Can be upgraded to an improved key format. "
<> "You can do so by running: git annex migrate --backend="
<> UnquotedByteString (formatKeyVariety (fromKey keyVariety key))
<> " "
<> QuotedPath file
return True
_ -> return True
checkKeyUpgrade _ _ _ (AssociatedFile Nothing) =
-- Don't suggest migrating without a filename, because
-- while possible to do, there is no actual benefit from
-- doing that in this situation.
return True
{- Runs the backend specific check on a key's content object.
-
2024-03-01 18:12:21 +00:00
- When a annex.thin is set, an unlocked file may be a hard link to the object.
2021-07-29 17:21:23 +00:00
- Thus when the user modifies the file, the object will be modified and
- not pass the check, and we don't want to find an error in this case.
2013-01-08 16:41:09 +00:00
-}
checkBackend :: Key -> KeyStatus -> AssociatedFile -> Annex Bool
checkBackend key keystatus afile = do
content <- calcRepo (gitAnnexLocation key)
ifM (liftIO $ R.doesPathExist content)
( ifM (pure (isKeyUnlockedThin keystatus) <&&> (not <$> isUnmodified key content))
( nocheck
, do
mic <- withTSDelta (liftIO . genInodeCache content)
ifM (checkBackendOr badContent key content ai)
( do
checkInodeCache key content mic ai
return True
, return False
)
)
, nocheck
2013-01-08 16:41:09 +00:00
)
where
nocheck = return True
2019-06-06 16:53:24 +00:00
ai = mkActionItem (key, afile)
checkBackendRemote :: Key -> Remote -> ActionItem -> RawFilePath -> Annex Bool
checkBackendRemote key remote ai localcopy =
checkBackendOr (badContentRemote remote localcopy) key localcopy ai
checkBackendOr :: (Key -> Annex String) -> Key -> RawFilePath -> ActionItem -> Annex Bool
checkBackendOr bad key file ai =
ifM (Annex.getRead Annex.fast)
( return True
, do
ok <- verifyKeyContent' key file
unless ok $ do
msg <- bad key
warning $ actionItemDesc ai
<> ": Bad file content; "
<> UnquotedString msg
return ok
)
{- Check, if there are InodeCaches recorded for a key, that one of them
- matches the object file. There are situations where the InodeCache
- of the object file does not get recorded, including a v8 upgrade.
- There may also be situations where the wrong InodeCache is recorded,
- if inodes are not stable.
-
- This must be called after the content of the object file has been
- verified to be correct. The InodeCache is generated again to detect if
- the object file was changed while the content was being verified.
-}
checkInodeCache :: Key -> RawFilePath -> Maybe InodeCache -> ActionItem -> Annex ()
checkInodeCache key content mic ai = case mic of
Nothing -> noop
Just ic -> do
ics <- Database.Keys.getInodeCaches key
unless (null ics) $
unlessM (isUnmodifiedCheapLowLevel ic ics) $ do
withTSDelta (liftIO . genInodeCache content) >>= \case
Nothing -> noop
Just ic' -> whenM (compareInodeCaches ic ic') $ do
warning $ actionItemDesc ai
<> ": Stale or missing inode cache; updating."
Database.Keys.addInodeCaches key [ic]
checkKeyNumCopies :: Key -> AssociatedFile -> NumCopies -> Annex Bool
checkKeyNumCopies key afile numcopies = do
let (desc, hasafile) = case afile of
AssociatedFile Nothing -> (serializeKey' key, False)
AssociatedFile (Just af) -> (af, True)
locs <- loggedLocations key
(untrustedlocations, otherlocations) <- trustPartition UnTrusted locs
(deadlocations, safelocations) <- trustPartition DeadTrusted otherlocations
let present = numCopiesCount safelocations
if present < fromNumCopies numcopies
then ifM (checkDead key)
( do
showLongNote $ "This key is dead, skipping."
return True
, do
untrusted <- Remote.prettyPrintUUIDs "untrusted" untrustedlocations
dead <- Remote.prettyPrintUUIDs "dead" deadlocations
warning $ missingNote desc present numcopies untrusted dead
when (present == 0 && not hasafile) $
2015-06-09 19:12:40 +00:00
showLongNote "(Avoid this check by running: git annex dead --key )"
return False
)
else return True
missingNote :: RawFilePath -> Int -> NumCopies -> String -> String -> StringContainingQuotedPath
missingNote file 0 _ [] dead =
"** No known copies exist of " <> QuotedPath file <> UnquotedString (honorDead dead)
missingNote file 0 _ untrusted dead =
"Only these untrusted locations may have copies of " <> QuotedPath file <>
"\n" <> UnquotedString untrusted <>
"Back it up to trusted locations with git-annex copy." <> UnquotedString (honorDead dead)
missingNote file present needed [] _ =
"Only " <> UnquotedString (show present) <> " of " <> UnquotedString (show (fromNumCopies needed)) <>
" trustworthy copies exist of " <> QuotedPath file <>
"\nBack it up with git-annex copy."
missingNote file present needed untrusted dead =
missingNote file present needed [] dead <>
"\nThe following untrusted locations may also have copies: " <>
"\n" <> UnquotedString untrusted
honorDead :: String -> String
honorDead dead
| null dead = ""
| otherwise = "\nThese dead repositories used to have copies\n" ++ dead
{- Bad content is moved aside. -}
badContent :: Key -> Annex String
badContent key = do
dest <- moveBad key
return $ "moved to " ++ fromRawFilePath dest
{- Bad content is dropped from the remote. We have downloaded a copy
- from the remote to a temp file already (in some cases, it's just a
- symlink to a file in the remote). To avoid any further data loss,
- that temp file is moved to the bad content directory unless
- the local annex has a copy of the content. -}
badContentRemote :: Remote -> RawFilePath -> Key -> Annex String
badContentRemote remote localcopy key = do
bad <- fromRepo gitAnnexBadDir
let destbad = bad P.</> keyFile key
let destbad' = fromRawFilePath destbad
movedbad <- ifM (inAnnex key <||> liftIO (doesFileExist destbad'))
( return False
, do
createAnnexDirectory (parentDir destbad)
liftIO $ catchDefaultIO False $
ifM (isSymbolicLink <$> R.getSymbolicLinkStatus localcopy)
( copyFileExternal CopyTimeStamps (fromRawFilePath localcopy) destbad'
, do
moveFile localcopy destbad
return True
)
)
toward SafeDropProof expiry checking Added Maybe POSIXTime to SafeDropProof, which gets set when the proof is based on a LockedCopy. If there are several LockedCopies, it uses the closest expiry time. That is not optimal, it may be that the proof expires based on one LockedCopy but another one has not expired. But that seems unlikely to really happen, and anyway the user can just re-run a drop if it fails due to expiry. Pass the SafeDropProof to removeKey, which is responsible for checking it for expiry in situations where that could be a problem. Which really only means in Remote.Git. Made Remote.Git check expiry when dropping from a local remote. Checking expiry when dropping from a P2P remote is not yet implemented. P2P.Protocol.remove has SafeDropProof plumbed through to it for that purpose. Fixing the remaining 2 build warnings should complete this work. Note that the use of a POSIXTime here means that if the clock gets set forward while git-annex is in the middle of a drop, it may say that dropping took too long. That seems ok. Less ok is that if the clock gets turned back a sufficient amount (eg 5 minutes), proof expiry won't be noticed. It might be better to use the Monotonic clock, but that doesn't advance when a laptop is suspended, and while there is the linux Boottime clock, that is not available on other systems. Perhaps a combination of POSIXTime and the Monotonic clock could detect laptop suspension and also detect clock being turned back? There is a potential future flag day where p2pDefaultLockContentRetentionDuration is not assumed, but is probed using the P2P protocol, and peers that don't support it can no longer produce a LockedCopy. Until that happens, when git-annex is communicating with older peers there is a risk of data loss when a ssh connection closes during LOCKCONTENT.
2024-07-04 16:23:46 +00:00
dropped <- tryNonAsync (Remote.removeKey remote Nothing key)
2020-05-14 18:08:09 +00:00
when (isRight dropped) $
Remote.logStatus remote key InfoMissing
return $ case (movedbad, dropped) of
2020-05-14 18:08:09 +00:00
(True, Right ()) -> "moved from " ++ Remote.name remote ++
" to " ++ fromRawFilePath destbad
2020-05-14 18:08:09 +00:00
(False, Right ()) -> "dropped from " ++ Remote.name remote
(_, Left e) -> "failed to drop from" ++ Remote.name remote ++ ": " ++ show e
2012-09-25 17:22:12 +00:00
runFsck :: Incremental -> SeekInput -> ActionItem -> Key -> Annex Bool -> CommandStart
runFsck inc si ai key a = stopUnless (needFsck inc key) $
starting "fsck" (OnlyActionOn key ai) si $ do
make CommandStart return a StartMessage The goal is to be able to run CommandStart in the main thread when -J is used, rather than unncessarily passing it off to a worker thread, which incurs overhead that is signficant when the CommandStart is going to quickly decide to stop. To do that, the message it displays needs to be displayed in the worker thread, after the CommandStart has run. Also, the change will mean that CommandStart will no longer necessarily run with the same Annex state as CommandPerform. While its docs already said it should avoid modifying Annex state, I audited all the CommandStart code as part of the conversion. (Note that CommandSeek already sometimes runs with a different Annex state, and that has not been a source of any problems, so I am not too worried that this change will lead to breakage going forward.) The only modification of Annex state I found was it calling allowMessages in some Commands that default to noMessages. Dealt with that by adding a startCustomOutput and a startingUsualMessages. This lets a command start with noMessages and then select the output it wants for each CommandStart. One bit of breakage: onlyActionOn has been removed from commands that used it. The plan is that, since a StartMessage contains an ActionItem, when a Key can be extracted from that, the parallel job runner can run onlyActionOn' automatically. Then commands won't need to worry about this detail. Future work. Otherwise, this was a fairly straightforward process of making each CommandStart compile again. Hopefully other behavior changes were mostly avoided. In a few cases, a command had a CommandStart that called a CommandPerform that then called showStart multiple times. I have collapsed those down to a single start action. The main command to perhaps suffer from it is Command.Direct, which used to show a start for each file, and no longer does. Another minor behavior change is that some commands used showStart before, but had an associated file and a Key available, so were changed to ShowStart with an ActionItemAssociatedFile. That will not change the normal output or behavior, but --json output will now include the key. This should not break it for anyone using a real json parser.
2019-06-06 19:42:30 +00:00
ok <- a
when ok $
recordFsckTime inc key
next $ return ok
2012-09-25 19:06:33 +00:00
{- Check if a key needs to be fscked, with support for incremental fscks. -}
needFsck :: Incremental -> Key -> Annex Bool
needFsck (ScheduleIncremental _ _ i) k = needFsck i k
needFsck (ContIncremental h) key = liftIO $ not <$> FsckDb.inDb h key
needFsck _ _ = return True
2012-09-25 19:06:33 +00:00
recordFsckTime :: Incremental -> Key -> Annex ()
recordFsckTime inc key = withFsckDb inc $ \h -> liftIO $ FsckDb.addDb h key
2012-09-25 18:16:34 +00:00
2013-05-19 18:46:48 +00:00
{- Records the start time of an incremental fsck.
2012-09-25 18:16:34 +00:00
-
2023-03-14 02:39:16 +00:00
- To guard against time stamp damage (for example, if an annex directory
2012-09-25 18:16:34 +00:00
- is copied without -a), the fsckstate file contains a time that should
- be identical to its modification time.
- (This is not possible to do on Windows, and so the timestamp in
- the file will only be equal or greater than the modification time.)
-}
recordStartTime :: UUID -> Annex ()
recordStartTime u = do
f <- fromRepo (gitAnnexFsckState u)
createAnnexDirectory $ parentDir f
liftIO $ removeWhenExistsWith R.removeLink f
liftIO $ withFile (fromRawFilePath f) WriteMode $ \h -> do
#ifndef mingw32_HOST_OS
t <- modificationTime <$> R.getFileStatus f
#else
t <- getPOSIXTime
#endif
hPutStr h $ showTime $ realToFrac t
setAnnexFilePerm f
2012-11-12 05:05:04 +00:00
where
showTime :: POSIXTime -> String
showTime = show
2012-09-25 18:16:34 +00:00
resetStartTime :: UUID -> Annex ()
resetStartTime u = liftIO . removeWhenExistsWith R.removeLink
=<< fromRepo (gitAnnexFsckState u)
2012-09-25 18:16:34 +00:00
{- Gets the incremental fsck start time. -}
getStartTime :: UUID -> Annex (Maybe EpochTime)
getStartTime u = do
f <- fromRepo (gitAnnexFsckState u)
2012-09-25 18:16:34 +00:00
liftIO $ catchDefaultIO Nothing $ do
timestamp <- modificationTime <$> R.getFileStatus f
let fromstatus = Just (realToFrac timestamp)
fromfile <- parsePOSIXTime <$> readFile (fromRawFilePath f)
return $ if matchingtimestamp fromfile fromstatus
2012-09-25 18:16:34 +00:00
then Just timestamp
else Nothing
2012-11-12 05:05:04 +00:00
where
matchingtimestamp fromfile fromstatus =
#ifndef mingw32_HOST_OS
fromfile == fromstatus
#else
fromfile >= fromstatus
#endif
data Incremental
= NonIncremental
| ScheduleIncremental Duration UUID Incremental
| StartIncremental FsckDb.FsckHandle
| ContIncremental FsckDb.FsckHandle
prepIncremental :: UUID -> Maybe IncrementalOpt -> Annex Incremental
prepIncremental _ Nothing = pure NonIncremental
prepIncremental u (Just StartIncrementalO) = do
recordStartTime u
ifM (FsckDb.newPass u)
( StartIncremental <$> openFsckDb u
, giveup "Cannot start a new --incremental fsck pass; another fsck process is already running."
)
prepIncremental u (Just MoreIncrementalO) =
ContIncremental <$> openFsckDb u
prepIncremental u (Just (ScheduleIncrementalO delta)) = do
started <- getStartTime u
i <- prepIncremental u $ Just $ case started of
Nothing -> StartIncrementalO
Just _ -> MoreIncrementalO
return (ScheduleIncremental delta u i)
cleanupIncremental :: Incremental -> Annex ()
cleanupIncremental (ScheduleIncremental delta u i) = do
v <- getStartTime u
case v of
Nothing -> noop
Just started -> do
now <- liftIO getPOSIXTime
when (now - realToFrac started >= durationToPOSIXTime delta) $
resetStartTime u
cleanupIncremental i
cleanupIncremental _ = return ()
openFsckDb :: UUID -> Annex FsckDb.FsckHandle
openFsckDb u = do
h <- FsckDb.openDb u
Annex.addCleanupAction FsckCleanup $
FsckDb.closeDb h
return h
withFsckDb :: Incremental -> (FsckDb.FsckHandle -> Annex ()) -> Annex ()
withFsckDb (ContIncremental h) a = a h
withFsckDb (StartIncremental h) a = a h
withFsckDb NonIncremental _ = noop
withFsckDb (ScheduleIncremental _ _ i) a = withFsckDb i a