git-annex/Command/Fsck.hs

719 lines
24 KiB
Haskell
Raw Normal View History

2010-11-06 21:06:19 +00:00
{- git-annex command
-
- Copyright 2010-2020 Joey Hess <id@joeyh.name>
2010-11-06 21:06:19 +00:00
-
- Licensed under the GNU AGPL version 3 or higher.
2010-11-06 21:06:19 +00:00
-}
2013-05-10 21:29:59 +00:00
{-# LANGUAGE CPP #-}
{-# LANGUAGE OverloadedStrings #-}
2013-05-10 21:29:59 +00:00
2010-11-06 21:06:19 +00:00
module Command.Fsck where
import Command
import qualified Annex
import qualified Remote
import qualified Types.Backend
import qualified Backend
2011-10-04 04:40:47 +00:00
import Annex.Content
import Annex.Perms
fully support core.symlinks=false in all relevant symlink handling code Refactored annex link code into nice clean new library. Audited and dealt with calls to createSymbolicLink. Remaining calls are all safe, because: Annex/Link.hs: ( liftIO $ createSymbolicLink linktarget file only when core.symlinks=true Assistant/WebApp/Configurators/Local.hs: createSymbolicLink link link test if symlinks can be made Command/Fix.hs: liftIO $ createSymbolicLink link file command only works in indirect mode Command/FromKey.hs: liftIO $ createSymbolicLink link file command only works in indirect mode Command/Indirect.hs: liftIO $ createSymbolicLink l f refuses to run if core.symlinks=false Init.hs: createSymbolicLink f f2 test if symlinks can be made Remote/Directory.hs: go [file] = catchBoolIO $ createSymbolicLink file f >> return True fast key linking; catches failure to make symlink and falls back to copy Remote/Git.hs: liftIO $ catchBoolIO $ createSymbolicLink loc file >> return True ditto Upgrade/V1.hs: liftIO $ createSymbolicLink link f v1 repos could not be on a filesystem w/o symlinks Audited and dealt with calls to readSymbolicLink. Remaining calls are all safe, because: Annex/Link.hs: ( liftIO $ catchMaybeIO $ readSymbolicLink file only when core.symlinks=true Assistant/Threads/Watcher.hs: ifM ((==) (Just link) <$> liftIO (catchMaybeIO $ readSymbolicLink file)) code that fixes real symlinks when inotify sees them It's ok to not fix psdueo-symlinks. Assistant/Threads/Watcher.hs: mlink <- liftIO (catchMaybeIO $ readSymbolicLink file) ditto Command/Fix.hs: stopUnless ((/=) (Just link) <$> liftIO (catchMaybeIO $ readSymbolicLink file)) $ do command only works in indirect mode Upgrade/V1.hs: getsymlink = takeFileName <$> readSymbolicLink file v1 repos could not be on a filesystem w/o symlinks Audited and dealt with calls to isSymbolicLink. (Typically used with getSymbolicLinkStatus, but that is just used because getFileStatus is not as robust; it also works on pseudolinks.) Remaining calls are all safe, because: Assistant/Threads/SanityChecker.hs: | isSymbolicLink s -> addsymlink file ms only handles staging of symlinks that were somehow not staged (might need to be updated to support pseudolinks, but this is only a belt-and-suspenders check anyway, and I've never seen the code run) Command/Add.hs: if isSymbolicLink s || not (isRegularFile s) avoids adding symlinks to the annex, so not relevant Command/Indirect.hs: | isSymbolicLink s -> void $ flip whenAnnexed f $ only allowed on systems that support symlinks Command/Indirect.hs: whenM (liftIO $ not . isSymbolicLink <$> getSymbolicLinkStatus f) $ do ditto Seek.hs:notSymlink f = liftIO $ not . isSymbolicLink <$> getSymbolicLinkStatus f used to find unlocked files, only relevant in indirect mode Utility/FSEvents.hs: | Files.isSymbolicLink s = runhook addSymlinkHook $ Just s Utility/FSEvents.hs: | Files.isSymbolicLink s -> Utility/INotify.hs: | Files.isSymbolicLink s -> Utility/INotify.hs: checkfiletype Files.isSymbolicLink addSymlinkHook f Utility/Kqueue.hs: | Files.isSymbolicLink s = callhook addSymlinkHook (Just s) change all above are lower-level, not relevant Audited and dealt with calls to isSymLink. Remaining calls are all safe, because: Annex/Direct.hs: | isSymLink (getmode item) = This is looking at git diff-tree objects, not files on disk Command/Unused.hs: | isSymLink (LsTree.mode l) = do This is looking at git ls-tree, not file on disk Utility/FileMode.hs:isSymLink :: FileMode -> Bool Utility/FileMode.hs:isSymLink = checkMode symbolicLinkMode low-level Done!!
2013-02-17 19:05:55 +00:00
import Annex.Link
2011-10-15 20:21:08 +00:00
import Logs.Location
import Logs.Trust
import Logs.Activity
2018-10-30 03:13:36 +00:00
import Utility.TimeStamp
import Logs.PreferredContent
2015-04-30 18:02:56 +00:00
import Annex.NumCopies
import Annex.UUID
import Annex.ReplaceFile
2011-07-06 00:36:43 +00:00
import Utility.DataUnits
import Utility.HumanTime
import Utility.CopyFile
2013-06-18 01:30:52 +00:00
import Git.FilePath
2014-02-11 19:29:56 +00:00
import Utility.PID
import qualified Database.Keys
import qualified Database.Fsck as FsckDb
import Types.CleanupActions
import Types.Key
import Types.ActionItem
import qualified Utility.RawFilePath as R
2010-11-06 21:06:19 +00:00
2012-09-25 18:16:34 +00:00
import Data.Time.Clock.POSIX
import System.Posix.Types (EpochTime)
import qualified Data.Set as S
import qualified Data.Map as M
import Data.Either
import qualified System.FilePath.ByteString as P
2012-09-25 18:16:34 +00:00
cmd :: Command
cmd = withGlobalOptions [jobsOption, jsonOptions, annexedMatchingOptions] $
command "fsck" SectionMaintenance
"find and fix problems"
paramPaths (seek <$$> optParser)
data FsckOptions = FsckOptions
{ fsckFiles :: CmdParams
2015-07-09 20:05:45 +00:00
, fsckFromOption :: Maybe (DeferredParse Remote)
, incrementalOpt :: Maybe IncrementalOpt
2015-07-09 16:44:03 +00:00
, keyOptions :: Maybe KeyOptions
}
data IncrementalOpt
= StartIncrementalO
| MoreIncrementalO
| ScheduleIncrementalO Duration
optParser :: CmdParamsDesc -> Parser FsckOptions
optParser desc = FsckOptions
<$> cmdParams desc
<*> optional (parseRemoteOption <$> strOption
2015-07-09 14:41:17 +00:00
( long "from" <> short 'f' <> metavar paramRemote
<> help "check remote"
<> completeRemotes
))
<*> optional parseincremental
<*> optional parseKeyOptions
where
parseincremental =
flag' StartIncrementalO
( long "incremental" <> short 'S'
<> help "start an incremental fsck"
)
<|> flag' MoreIncrementalO
( long "more" <> short 'm'
<> help "continue an incremental fsck"
)
<|> (ScheduleIncrementalO <$> option (eitherReader parseDuration)
( long "incremental-schedule" <> metavar paramTime
<> help "schedule incremental fscking"
))
seek :: FsckOptions -> CommandSeek
seek o = startConcurrency commandStages $ do
2015-07-09 20:05:45 +00:00
from <- maybe (pure Nothing) (Just <$$> getParsed) (fsckFromOption o)
u <- maybe getUUID (pure . Remote.uuid) from
checkDeadRepo u
i <- prepIncremental u (incrementalOpt o)
let seeker = AnnexedFileSeeker
{ startAction = start from i
, checkContentPresent = Nothing
, usesLocationLog = True
}
withKeyOptions (keyOptions o) False seeker
(\kai -> commandAction . startKey from i kai =<< getNumCopies)
(withFilesInGitAnnex ww seeker)
=<< workTreeItems ww (fsckFiles o)
cleanupIncremental i
void $ tryIO $ recordActivity Fsck u
where
ww = WarnUnmatchLsFiles
2010-11-15 22:22:50 +00:00
checkDeadRepo :: UUID -> Annex ()
checkDeadRepo u =
whenM ((==) DeadTrusted <$> lookupTrust u) $
earlyWarning "Warning: Fscking a repository that is currently marked as dead."
start :: Maybe Remote -> Incremental -> SeekInput -> RawFilePath -> Key -> CommandStart
start from inc si file key = Backend.getBackend (fromRawFilePath file) key >>= \case
2017-12-05 19:00:50 +00:00
Nothing -> stop
Just backend -> do
(numcopies, _mincopies) <- getFileNumMinCopies file
2017-12-05 19:00:50 +00:00
case from of
Nothing -> go $ perform key file backend numcopies
Just r -> go $ performRemote key afile backend numcopies r
2012-11-12 05:05:04 +00:00
where
go = runFsck inc si (mkActionItem (key, afile)) key
afile = AssociatedFile (Just file)
2010-11-15 22:22:50 +00:00
perform :: Key -> RawFilePath -> Backend -> NumCopies -> Annex Bool
perform key file backend numcopies = do
keystatus <- getKeyFileStatus key (fromRawFilePath file)
check
-- order matters
[ fixLink key file
, verifyLocationLog key keystatus ai
, verifyRequiredContent key ai
, verifyAssociatedFiles key keystatus file
, verifyWorkTree key file
, checkKeySize key keystatus ai
, checkBackend backend key keystatus afile
, checkKeyUpgrade backend key ai afile
, checkKeyNumCopies key afile numcopies
]
where
afile = AssociatedFile (Just file)
2019-06-06 16:53:24 +00:00
ai = mkActionItem (key, afile)
{- To fsck a remote, the content is retrieved to a tmp file,
- and checked locally. -}
performRemote :: Key -> AssociatedFile -> Backend -> NumCopies -> Remote -> Annex Bool
performRemote key afile backend numcopies remote =
dispatch =<< Remote.hasKey remote key
2012-11-12 05:05:04 +00:00
where
dispatch (Left err) = do
showNote err
return False
2017-12-05 19:00:50 +00:00
dispatch (Right True) = withtmp $ \tmpfile ->
getfile tmpfile >>= \case
2015-04-27 21:40:21 +00:00
Nothing -> go True Nothing
Just (Right verification) -> go True (Just (tmpfile, verification))
Just (Left _) -> do
warning "failed to download file from remote"
2015-02-12 20:03:59 +00:00
void $ go True Nothing
return False
2012-11-12 05:05:04 +00:00
dispatch (Right False) = go False Nothing
go present lv = check
[ verifyLocationLogRemote key ai remote present
, verifyRequiredContent key ai
, withLocalCopy (fmap fst lv) $ checkKeySizeRemote key remote ai
, case fmap snd lv of
Just Verified -> return True
_ -> withLocalCopy (fmap fst lv) $
checkBackendRemote backend key remote ai
, checkKeyNumCopies key afile numcopies
2012-11-12 05:05:04 +00:00
]
2019-06-06 16:53:24 +00:00
ai = mkActionItem (key, afile)
2012-11-12 05:05:04 +00:00
withtmp a = do
-- Put it in the gitAnnexTmpObjectDir since that's on a
-- filesystem where object temp files are normally
-- stored. The pid prevents multiple fsck processes
-- contending over the same file. (Multiple threads cannot,
-- because OnlyActionOn is used.)
2014-02-11 19:29:56 +00:00
pid <- liftIO getPID
t <- fromRepo gitAnnexTmpObjectDir
2012-11-12 05:05:04 +00:00
createAnnexDirectory t
let tmp = t P.</> "fsck" <> toRawFilePath (show pid) <> "." <> keyFile key
let cleanup = liftIO $ catchIO (R.removeLink tmp) (const noop)
2012-11-12 05:05:04 +00:00
cleanup
cleanup `after` a tmp
getfile tmp = ifM (checkDiskSpace (Just (P.takeDirectory tmp)) key 0 True)
( ifM (getcheap tmp)
( return (Just (Right UnVerified))
2012-11-12 05:05:04 +00:00
, ifM (Annex.getState Annex.fast)
2015-04-27 21:40:21 +00:00
( return Nothing
, Just <$> tryNonAsync (getfile' tmp)
)
2012-11-12 05:05:04 +00:00
)
, return Nothing
)
getfile' tmp = Remote.retrieveKeyFile remote key (AssociatedFile Nothing) (fromRawFilePath tmp) dummymeter
dummymeter _ = noop
getcheap tmp = case Remote.retrieveKeyFileCheap remote of
Just a -> isRight <$> tryNonAsync (a key afile (fromRawFilePath tmp))
Nothing -> return False
startKey :: Maybe Remote -> Incremental -> (SeekInput, Key, ActionItem) -> NumCopies -> CommandStart
startKey from inc (si, key, ai) numcopies =
Backend.maybeLookupBackendVariety (fromKey keyVariety key) >>= \case
Nothing -> stop
Just backend -> runFsck inc si ai key $
case from of
Nothing -> performKey key backend numcopies
Just r -> performRemote key (AssociatedFile Nothing) backend numcopies r
performKey :: Key -> Backend -> NumCopies -> Annex Bool
performKey key backend numcopies = do
keystatus <- getKeyStatus key
check
2017-03-10 19:03:33 +00:00
[ verifyLocationLog key keystatus (mkActionItem key)
, checkKeySize key keystatus (mkActionItem key)
, checkBackend backend key keystatus (AssociatedFile Nothing)
, checkKeyNumCopies key (AssociatedFile Nothing) numcopies
]
2012-09-25 19:06:33 +00:00
check :: [Annex Bool] -> Annex Bool
2013-09-25 07:09:06 +00:00
check cs = and <$> sequence cs
{- Checks that symlinks points correctly to the annexed content. -}
fixLink :: Key -> RawFilePath -> Annex Bool
fixLink key file = do
want <- calcRepo $ gitAnnexLink file key
fully support core.symlinks=false in all relevant symlink handling code Refactored annex link code into nice clean new library. Audited and dealt with calls to createSymbolicLink. Remaining calls are all safe, because: Annex/Link.hs: ( liftIO $ createSymbolicLink linktarget file only when core.symlinks=true Assistant/WebApp/Configurators/Local.hs: createSymbolicLink link link test if symlinks can be made Command/Fix.hs: liftIO $ createSymbolicLink link file command only works in indirect mode Command/FromKey.hs: liftIO $ createSymbolicLink link file command only works in indirect mode Command/Indirect.hs: liftIO $ createSymbolicLink l f refuses to run if core.symlinks=false Init.hs: createSymbolicLink f f2 test if symlinks can be made Remote/Directory.hs: go [file] = catchBoolIO $ createSymbolicLink file f >> return True fast key linking; catches failure to make symlink and falls back to copy Remote/Git.hs: liftIO $ catchBoolIO $ createSymbolicLink loc file >> return True ditto Upgrade/V1.hs: liftIO $ createSymbolicLink link f v1 repos could not be on a filesystem w/o symlinks Audited and dealt with calls to readSymbolicLink. Remaining calls are all safe, because: Annex/Link.hs: ( liftIO $ catchMaybeIO $ readSymbolicLink file only when core.symlinks=true Assistant/Threads/Watcher.hs: ifM ((==) (Just link) <$> liftIO (catchMaybeIO $ readSymbolicLink file)) code that fixes real symlinks when inotify sees them It's ok to not fix psdueo-symlinks. Assistant/Threads/Watcher.hs: mlink <- liftIO (catchMaybeIO $ readSymbolicLink file) ditto Command/Fix.hs: stopUnless ((/=) (Just link) <$> liftIO (catchMaybeIO $ readSymbolicLink file)) $ do command only works in indirect mode Upgrade/V1.hs: getsymlink = takeFileName <$> readSymbolicLink file v1 repos could not be on a filesystem w/o symlinks Audited and dealt with calls to isSymbolicLink. (Typically used with getSymbolicLinkStatus, but that is just used because getFileStatus is not as robust; it also works on pseudolinks.) Remaining calls are all safe, because: Assistant/Threads/SanityChecker.hs: | isSymbolicLink s -> addsymlink file ms only handles staging of symlinks that were somehow not staged (might need to be updated to support pseudolinks, but this is only a belt-and-suspenders check anyway, and I've never seen the code run) Command/Add.hs: if isSymbolicLink s || not (isRegularFile s) avoids adding symlinks to the annex, so not relevant Command/Indirect.hs: | isSymbolicLink s -> void $ flip whenAnnexed f $ only allowed on systems that support symlinks Command/Indirect.hs: whenM (liftIO $ not . isSymbolicLink <$> getSymbolicLinkStatus f) $ do ditto Seek.hs:notSymlink f = liftIO $ not . isSymbolicLink <$> getSymbolicLinkStatus f used to find unlocked files, only relevant in indirect mode Utility/FSEvents.hs: | Files.isSymbolicLink s = runhook addSymlinkHook $ Just s Utility/FSEvents.hs: | Files.isSymbolicLink s -> Utility/INotify.hs: | Files.isSymbolicLink s -> Utility/INotify.hs: checkfiletype Files.isSymbolicLink addSymlinkHook f Utility/Kqueue.hs: | Files.isSymbolicLink s = callhook addSymlinkHook (Just s) change all above are lower-level, not relevant Audited and dealt with calls to isSymLink. Remaining calls are all safe, because: Annex/Direct.hs: | isSymLink (getmode item) = This is looking at git diff-tree objects, not files on disk Command/Unused.hs: | isSymLink (LsTree.mode l) = do This is looking at git ls-tree, not file on disk Utility/FileMode.hs:isSymLink :: FileMode -> Bool Utility/FileMode.hs:isSymLink = checkMode symbolicLinkMode low-level Done!!
2013-02-17 19:05:55 +00:00
have <- getAnnexLinkTarget file
2013-01-06 19:42:49 +00:00
maybe noop (go want) have
return True
where
go want have
| want /= fromInternalGitPath have = do
showNote "fixing link"
createWorkTreeDirectory (parentDir file)
liftIO $ R.removeLink file
addAnnexLink want file
| otherwise = noop
{- Checks that the location log reflects the current status of the key,
2012-12-13 04:45:27 +00:00
- in this repository only. -}
verifyLocationLog :: Key -> KeyStatus -> ActionItem -> Annex Bool
verifyLocationLog key keystatus ai = do
obj <- calcRepo (gitAnnexLocation key)
present <- if isKeyUnlockedThin keystatus
then liftIO (doesFileExist (fromRawFilePath obj))
else inAnnex key
2013-01-06 19:42:49 +00:00
u <- getUUID
{- Since we're checking that a key's object file is present, throw
2013-01-06 19:42:49 +00:00
- in a permission fixup here too. -}
when present $ do
void $ tryIO $ case keystatus of
KeyUnlockedThin -> thawContent obj
KeyLockedThin -> thawContent obj
_ -> freezeContent obj
unlessM (isContentWritePermOk obj) $
warning $ "** Unable to set correct write mode for " ++ fromRawFilePath obj ++ " ; perhaps you don't own that file"
whenM (liftIO $ R.doesPathExist $ parentDir obj) $
freezeContentDir obj
{- Warn when annex.securehashesonly is set and content using an
- insecure hash is present. This should only be able to happen
- if the repository already contained the content before the
- config was set. -}
whenM (pure present <&&> (not <$> Backend.isCryptographicallySecure key)) $
whenM (annexSecureHashesOnly <$> Annex.getGitConfig) $
warning $ "** Despite annex.securehashesonly being set, " ++ fromRawFilePath obj ++ " has content present in the annex using an insecure " ++ decodeBS (formatKeyVariety (fromKey keyVariety key)) ++ " key"
verifyLocationLog' key ai present u (logChange key u)
verifyLocationLogRemote :: Key -> ActionItem -> Remote -> Bool -> Annex Bool
verifyLocationLogRemote key ai remote present =
verifyLocationLog' key ai present (Remote.uuid remote)
(Remote.logStatus remote key)
verifyLocationLog' :: Key -> ActionItem -> Bool -> UUID -> (LogStatus -> Annex ()) -> Annex Bool
verifyLocationLog' key ai present u updatestatus = do
uuids <- loggedLocations key
case (present, u `elem` uuids) of
(True, False) -> do
2016-05-10 17:08:16 +00:00
fix InfoPresent
-- There is no data loss, so do not fail.
return True
(False, True) -> do
2016-05-10 17:08:16 +00:00
fix InfoMissing
warning $
"** Based on the location log, " ++
decodeBS' (actionItemDesc ai) ++
"\n** was expected to be present, " ++
2016-05-10 17:08:16 +00:00
"but its content is missing."
return False
(False, False) -> do
-- When the location log for the key is not present,
-- create it, so that the key will be known.
when (null uuids) $
whenM (not <$> isKnownKey key) $
updatestatus InfoMissing
return True
(True, True) -> return True
2012-11-12 05:05:04 +00:00
where
fix s = do
showNote "fixing location log"
2013-12-01 19:52:30 +00:00
updatestatus s
{- Verifies that all repos that are required to contain the content do,
- checking against the location log. -}
verifyRequiredContent :: Key -> ActionItem -> Annex Bool
verifyRequiredContent key ai@(ActionItemAssociatedFile afile _) = case afile of
-- Can't be checked if there's no associated file.
AssociatedFile Nothing -> return True
AssociatedFile (Just _) -> do
requiredlocs <- S.fromList . M.keys <$> requiredContentMap
if S.null requiredlocs
then return True
else go requiredlocs
where
go requiredlocs = do
presentlocs <- S.fromList <$> loggedLocations key
missinglocs <- filterM
(\u -> isRequiredContent (Just u) S.empty (Just key) afile False)
(S.toList $ S.difference requiredlocs presentlocs)
if null missinglocs
then return True
else do
missingrequired <- Remote.prettyPrintUUIDs "missingrequired" missinglocs
warning $
"** Required content " ++
decodeBS' (actionItemDesc ai) ++
" is missing from these repositories:\n" ++
missingrequired
return False
verifyRequiredContent _ _ = return True
2016-02-14 20:52:43 +00:00
{- Verifies the associated file records. -}
verifyAssociatedFiles :: Key -> KeyStatus -> RawFilePath -> Annex Bool
verifyAssociatedFiles key keystatus file = do
when (isKeyUnlockedThin keystatus) $ do
f <- inRepo $ toTopFilePath file
afs <- Database.Keys.getAssociatedFiles key
unless (getTopFilePath f `elem` map getTopFilePath afs) $
Database.Keys.addAssociatedFile key f
return True
verifyWorkTree :: Key -> RawFilePath -> Annex Bool
verifyWorkTree key file = do
{- Make sure that a pointer file is replaced with its content,
- when the content is available. -}
mk <- liftIO $ isPointerFile file
case mk of
Just k | k == key -> whenM (inAnnex key) $ do
showNote "fixing worktree content"
replaceWorkTreeFile (fromRawFilePath file) $ \tmp -> do
let tmp' = toRawFilePath tmp
mode <- liftIO $ catchMaybeIO $ fileMode <$> R.getFileStatus file
ifM (annexThin <$> Annex.getGitConfig)
( void $ linkFromAnnex key tmp' mode
, do
obj <- calcRepo (gitAnnexLocation key)
void $ checkedCopyFile key obj tmp' mode
thawContent tmp'
)
Database.Keys.storeInodeCaches key [file]
_ -> return ()
return True
{- The size of the data for a key is checked against the size encoded in
2013-01-06 19:42:49 +00:00
- the key's metadata, if available.
-
- Not checked when a file is unlocked.
-}
checkKeySize :: Key -> KeyStatus -> ActionItem -> Annex Bool
checkKeySize _ KeyUnlockedThin _ = return True
checkKeySize key _ ai = do
file <- calcRepo $ gitAnnexLocation key
ifM (liftIO $ R.doesPathExist file)
( checkKeySizeOr badContent key file ai
, return True
)
withLocalCopy :: Maybe RawFilePath -> (RawFilePath -> Annex Bool) -> Annex Bool
withLocalCopy Nothing _ = return True
withLocalCopy (Just localcopy) f = f localcopy
checkKeySizeRemote :: Key -> Remote -> ActionItem -> RawFilePath -> Annex Bool
checkKeySizeRemote key remote ai localcopy =
checkKeySizeOr (badContentRemote remote localcopy) key localcopy ai
checkKeySizeOr :: (Key -> Annex String) -> Key -> RawFilePath -> ActionItem -> Annex Bool
checkKeySizeOr bad key file ai = case fromKey keySize key of
Nothing -> return True
Just size -> do
size' <- liftIO $ getFileSize file
comparesizes size size'
2012-11-12 05:05:04 +00:00
where
comparesizes a b = do
let same = a == b
unless same $ badsize a b
return same
badsize a b = do
msg <- bad key
warning $ concat
[ decodeBS' (actionItemDesc ai)
, ": Bad file size ("
, compareSizes storageUnits True a b
, "); "
, msg
2012-11-12 05:05:04 +00:00
]
{- Check for keys that are upgradable.
-
- Warns and suggests the user migrate, but does not migrate itself,
- because migration can cause more disk space to be used, and makes
- worktree changes that need to be committed.
-}
checkKeyUpgrade :: Backend -> Key -> ActionItem -> AssociatedFile -> Annex Bool
checkKeyUpgrade backend key ai (AssociatedFile (Just file)) =
case Types.Backend.canUpgradeKey backend of
Just a | a key -> do
warning $ concat
[ decodeBS' (actionItemDesc ai)
, ": Can be upgraded to an improved key format. "
, "You can do so by running: git annex migrate --backend="
, decodeBS (formatKeyVariety (fromKey keyVariety key)) ++ " "
, decodeBS' file
]
return True
_ -> return True
checkKeyUpgrade _ _ _ (AssociatedFile Nothing) =
-- Don't suggest migrating without a filename, because
-- while possible to do, there is no actual benefit from
-- doing that in this situation.
return True
{- Runs the backend specific check on a key's content object.
-
- When a file is unlocked, it may be a hard link to the object,
- thus when the user modifies the file, the object will be modified and
- not pass the check, and we don't want to find an error in this case.
- So, skip the check if the key is unlocked and modified.
2013-01-08 16:41:09 +00:00
-}
checkBackend :: Backend -> Key -> KeyStatus -> AssociatedFile -> Annex Bool
checkBackend backend key keystatus afile = do
content <- calcRepo (gitAnnexLocation key)
ifM (pure (isKeyUnlockedThin keystatus) <&&> (not <$> isUnmodified key content))
( nocheck
, checkBackendOr badContent backend key content ai
2013-01-08 16:41:09 +00:00
)
where
nocheck = return True
2019-06-06 16:53:24 +00:00
ai = mkActionItem (key, afile)
checkBackendRemote :: Backend -> Key -> Remote -> ActionItem -> RawFilePath -> Annex Bool
checkBackendRemote backend key remote ai localcopy =
checkBackendOr (badContentRemote remote localcopy) backend key localcopy ai
checkBackendOr :: (Key -> Annex String) -> Backend -> Key -> RawFilePath -> ActionItem -> Annex Bool
checkBackendOr bad backend key file ai =
checkBackendOr' bad backend key file ai (return True)
-- The postcheck action is run after the content is verified,
-- in order to detect situations where the file is changed while being
-- verified.
checkBackendOr' :: (Key -> Annex String) -> Backend -> Key -> RawFilePath -> ActionItem -> Annex Bool -> Annex Bool
checkBackendOr' bad backend key file ai postcheck =
case Types.Backend.verifyKeyContent backend of
Nothing -> return True
Just verifier -> do
ok <- verifier key file
ifM postcheck
( do
unless ok $ do
msg <- bad key
warning $ concat
[ decodeBS' (actionItemDesc ai)
, ": Bad file content; "
, msg
]
return ok
, return True
)
checkKeyNumCopies :: Key -> AssociatedFile -> NumCopies -> Annex Bool
checkKeyNumCopies key afile numcopies = do
let (desc, hasafile) = case afile of
AssociatedFile Nothing -> (serializeKey key, False)
AssociatedFile (Just af) -> (fromRawFilePath af, True)
locs <- loggedLocations key
(untrustedlocations, otherlocations) <- trustPartition UnTrusted locs
(deadlocations, safelocations) <- trustPartition DeadTrusted otherlocations
let present = NumCopies (length safelocations)
if present < numcopies
then ifM (pure (not hasafile) <&&> checkDead key)
( do
showLongNote $ "This key is dead, skipping."
return True
, do
untrusted <- Remote.prettyPrintUUIDs "untrusted" untrustedlocations
dead <- Remote.prettyPrintUUIDs "dead" deadlocations
warning $ missingNote desc present numcopies untrusted dead
when (fromNumCopies present == 0 && not hasafile) $
2015-06-09 19:12:40 +00:00
showLongNote "(Avoid this check by running: git annex dead --key )"
return False
)
else return True
missingNote :: String -> NumCopies -> NumCopies -> String -> String -> String
missingNote file (NumCopies 0) _ [] dead =
"** No known copies exist of " ++ file ++ honorDead dead
missingNote file (NumCopies 0) _ untrusted dead =
"Only these untrusted locations may have copies of " ++ file ++
"\n" ++ untrusted ++
"Back it up to trusted locations with git-annex copy." ++ honorDead dead
missingNote file present needed [] _ =
"Only " ++ show (fromNumCopies present) ++ " of " ++ show (fromNumCopies needed) ++
" trustworthy copies exist of " ++ file ++
"\nBack it up with git-annex copy."
missingNote file present needed untrusted dead =
missingNote file present needed [] dead ++
"\nThe following untrusted locations may also have copies: " ++
"\n" ++ untrusted
honorDead :: String -> String
honorDead dead
| null dead = ""
| otherwise = "\nThese dead repositories used to have copies\n" ++ dead
{- Bad content is moved aside. -}
badContent :: Key -> Annex String
badContent key = do
dest <- moveBad key
return $ "moved to " ++ dest
{- Bad content is dropped from the remote. We have downloaded a copy
- from the remote to a temp file already (in some cases, it's just a
- symlink to a file in the remote). To avoid any further data loss,
- that temp file is moved to the bad content directory unless
- the local annex has a copy of the content. -}
badContentRemote :: Remote -> RawFilePath -> Key -> Annex String
badContentRemote remote localcopy key = do
bad <- fromRepo gitAnnexBadDir
let destbad = bad P.</> keyFile key
let destbad' = fromRawFilePath destbad
movedbad <- ifM (inAnnex key <||> liftIO (doesFileExist destbad'))
( return False
, do
createAnnexDirectory (parentDir destbad)
liftIO $ catchDefaultIO False $
ifM (isSymbolicLink <$> R.getSymbolicLinkStatus localcopy)
( copyFileExternal CopyTimeStamps (fromRawFilePath localcopy) destbad'
, do
moveFile (fromRawFilePath localcopy) destbad'
return True
)
)
2020-05-14 18:08:09 +00:00
dropped <- tryNonAsync (Remote.removeKey remote key)
when (isRight dropped) $
Remote.logStatus remote key InfoMissing
return $ case (movedbad, dropped) of
2020-05-14 18:08:09 +00:00
(True, Right ()) -> "moved from " ++ Remote.name remote ++
" to " ++ fromRawFilePath destbad
2020-05-14 18:08:09 +00:00
(False, Right ()) -> "dropped from " ++ Remote.name remote
(_, Left e) -> "failed to drop from" ++ Remote.name remote ++ ": " ++ show e
2012-09-25 17:22:12 +00:00
runFsck :: Incremental -> SeekInput -> ActionItem -> Key -> Annex Bool -> CommandStart
runFsck inc si ai key a = stopUnless (needFsck inc key) $
starting "fsck" (OnlyActionOn key ai) si $ do
make CommandStart return a StartMessage The goal is to be able to run CommandStart in the main thread when -J is used, rather than unncessarily passing it off to a worker thread, which incurs overhead that is signficant when the CommandStart is going to quickly decide to stop. To do that, the message it displays needs to be displayed in the worker thread, after the CommandStart has run. Also, the change will mean that CommandStart will no longer necessarily run with the same Annex state as CommandPerform. While its docs already said it should avoid modifying Annex state, I audited all the CommandStart code as part of the conversion. (Note that CommandSeek already sometimes runs with a different Annex state, and that has not been a source of any problems, so I am not too worried that this change will lead to breakage going forward.) The only modification of Annex state I found was it calling allowMessages in some Commands that default to noMessages. Dealt with that by adding a startCustomOutput and a startingUsualMessages. This lets a command start with noMessages and then select the output it wants for each CommandStart. One bit of breakage: onlyActionOn has been removed from commands that used it. The plan is that, since a StartMessage contains an ActionItem, when a Key can be extracted from that, the parallel job runner can run onlyActionOn' automatically. Then commands won't need to worry about this detail. Future work. Otherwise, this was a fairly straightforward process of making each CommandStart compile again. Hopefully other behavior changes were mostly avoided. In a few cases, a command had a CommandStart that called a CommandPerform that then called showStart multiple times. I have collapsed those down to a single start action. The main command to perhaps suffer from it is Command.Direct, which used to show a start for each file, and no longer does. Another minor behavior change is that some commands used showStart before, but had an associated file and a Key available, so were changed to ShowStart with an ActionItemAssociatedFile. That will not change the normal output or behavior, but --json output will now include the key. This should not break it for anyone using a real json parser.
2019-06-06 19:42:30 +00:00
ok <- a
when ok $
recordFsckTime inc key
next $ return ok
2012-09-25 19:06:33 +00:00
{- Check if a key needs to be fscked, with support for incremental fscks. -}
needFsck :: Incremental -> Key -> Annex Bool
needFsck (ScheduleIncremental _ _ i) k = needFsck i k
needFsck (ContIncremental h) key = liftIO $ not <$> FsckDb.inDb h key
needFsck _ _ = return True
2012-09-25 19:06:33 +00:00
recordFsckTime :: Incremental -> Key -> Annex ()
recordFsckTime inc key = withFsckDb inc $ \h -> liftIO $ FsckDb.addDb h key
2012-09-25 18:16:34 +00:00
2013-05-19 18:46:48 +00:00
{- Records the start time of an incremental fsck.
2012-09-25 18:16:34 +00:00
-
- To guard against time stamp damange (for example, if an annex directory
- is copied without -a), the fsckstate file contains a time that should
- be identical to its modification time.
- (This is not possible to do on Windows, and so the timestamp in
- the file will only be equal or greater than the modification time.)
-}
recordStartTime :: UUID -> Annex ()
recordStartTime u = do
f <- fromRepo (gitAnnexFsckState u)
createAnnexDirectory $ parentDir f
liftIO $ removeWhenExistsWith R.removeLink f
liftIO $ withFile (fromRawFilePath f) WriteMode $ \h -> do
#ifndef mingw32_HOST_OS
t <- modificationTime <$> R.getFileStatus f
#else
t <- getPOSIXTime
#endif
hPutStr h $ showTime $ realToFrac t
setAnnexFilePerm f
2012-11-12 05:05:04 +00:00
where
showTime :: POSIXTime -> String
showTime = show
2012-09-25 18:16:34 +00:00
resetStartTime :: UUID -> Annex ()
resetStartTime u = liftIO . removeWhenExistsWith R.removeLink
=<< fromRepo (gitAnnexFsckState u)
2012-09-25 18:16:34 +00:00
{- Gets the incremental fsck start time. -}
getStartTime :: UUID -> Annex (Maybe EpochTime)
getStartTime u = do
f <- fromRepo (gitAnnexFsckState u)
2012-09-25 18:16:34 +00:00
liftIO $ catchDefaultIO Nothing $ do
timestamp <- modificationTime <$> R.getFileStatus f
let fromstatus = Just (realToFrac timestamp)
fromfile <- parsePOSIXTime <$> readFile (fromRawFilePath f)
return $ if matchingtimestamp fromfile fromstatus
2012-09-25 18:16:34 +00:00
then Just timestamp
else Nothing
2012-11-12 05:05:04 +00:00
where
matchingtimestamp fromfile fromstatus =
#ifndef mingw32_HOST_OS
fromfile == fromstatus
#else
fromfile >= fromstatus
#endif
data Incremental
= NonIncremental
| ScheduleIncremental Duration UUID Incremental
| StartIncremental FsckDb.FsckHandle
| ContIncremental FsckDb.FsckHandle
prepIncremental :: UUID -> Maybe IncrementalOpt -> Annex Incremental
prepIncremental _ Nothing = pure NonIncremental
prepIncremental u (Just StartIncrementalO) = do
recordStartTime u
ifM (FsckDb.newPass u)
( StartIncremental <$> openFsckDb u
, giveup "Cannot start a new --incremental fsck pass; another fsck process is already running."
)
prepIncremental u (Just MoreIncrementalO) =
ContIncremental <$> openFsckDb u
prepIncremental u (Just (ScheduleIncrementalO delta)) = do
started <- getStartTime u
i <- prepIncremental u $ Just $ case started of
Nothing -> StartIncrementalO
Just _ -> MoreIncrementalO
return (ScheduleIncremental delta u i)
cleanupIncremental :: Incremental -> Annex ()
cleanupIncremental (ScheduleIncremental delta u i) = do
v <- getStartTime u
case v of
Nothing -> noop
Just started -> do
now <- liftIO getPOSIXTime
when (now - realToFrac started >= durationToPOSIXTime delta) $
resetStartTime u
cleanupIncremental i
cleanupIncremental _ = return ()
openFsckDb :: UUID -> Annex FsckDb.FsckHandle
openFsckDb u = do
h <- FsckDb.openDb u
Annex.addCleanupAction FsckCleanup $
FsckDb.closeDb h
return h
withFsckDb :: Incremental -> (FsckDb.FsckHandle -> Annex ()) -> Annex ()
withFsckDb (ContIncremental h) a = a h
withFsckDb (StartIncremental h) a = a h
withFsckDb NonIncremental _ = noop
withFsckDb (ScheduleIncremental _ _ i) a = withFsckDb i a
data KeyStatus
= KeyMissing
| KeyPresent
| KeyUnlockedThin
-- ^ An annex.thin worktree file is hard linked to the object.
| KeyLockedThin
-- ^ The object has hard links, but the file being fscked
-- is not the one that hard links to it.
deriving (Show)
isKeyUnlockedThin :: KeyStatus -> Bool
isKeyUnlockedThin KeyUnlockedThin = True
isKeyUnlockedThin KeyLockedThin = False
isKeyUnlockedThin KeyPresent = False
isKeyUnlockedThin KeyMissing = False
getKeyStatus :: Key -> Annex KeyStatus
getKeyStatus key = catchDefaultIO KeyMissing $ do
afs <- not . null <$> Database.Keys.getAssociatedFiles key
obj <- calcRepo (gitAnnexLocation key)
multilink <- ((> 1) . linkCount <$> liftIO (R.getFileStatus obj))
return $ if multilink && afs
then KeyUnlockedThin
else KeyPresent
getKeyFileStatus :: Key -> FilePath -> Annex KeyStatus
getKeyFileStatus key file = do
s <- getKeyStatus key
case s of
KeyUnlockedThin -> catchDefaultIO KeyUnlockedThin $
ifM (isJust <$> isAnnexLink (toRawFilePath file))
( return KeyLockedThin
, return KeyUnlockedThin
)
_ -> return s