From b2bafdb2fcfe83a864bfc11e2684f5c697613dfd Mon Sep 17 00:00:00 2001 From: Joey Hess Date: Tue, 16 Oct 2018 13:30:04 -0400 Subject: [PATCH] v6: Fix database inconsistency That could cause git-annex to get confused about whether a locked file's content was present, when the object file got touched. Unfortunately this means more work sometimes when annex.thin is set, since it has to checksum the file to tell if it's still got the right content. Had to suppress output when inAnnex calls isUnmodified, otherwise "(checksum...)" would be printed in places it ought not to be, eg "git annex get" could turn out not need to get anything, and so only display that. This commit was sponsored by Ole-Morten Duesund on Patreon. --- Annex/Content.hs | 46 +++++++++++-------- CHANGELOG | 2 + Messages.hs | 25 ++++++++-- ...led_repeatedly_for_present_content_v6.mdwn | 24 ++++++++++ doc/bugs/v6_unlock_confused_by_touch.mdwn | 2 + doc/todo/smudge.mdwn | 6 --- 6 files changed, 76 insertions(+), 29 deletions(-) diff --git a/Annex/Content.hs b/Annex/Content.hs index 5d657cac2a..e814b0c636 100644 --- a/Annex/Content.hs +++ b/Annex/Content.hs @@ -97,29 +97,29 @@ inAnnex key = inAnnexCheck key $ liftIO . doesFileExist inAnnexCheck :: Key -> (FilePath -> Annex Bool) -> Annex Bool inAnnexCheck key check = inAnnex' id False check key -{- inAnnex that performs an arbitrary check of the key's content. - - - - When the content is unlocked, it must also be unmodified, or the bad - - value will be returned. - - - - In direct mode, at least one of the associated files must pass the - - check. Additionally, the file must be unmodified. - -} +{- inAnnex that performs an arbitrary check of the key's content. -} inAnnex' :: (a -> Bool) -> a -> (FilePath -> Annex a) -> Key -> Annex a inAnnex' isgood bad check key = withObjectLoc key checkindirect checkdirect where checkindirect loc = do r <- check loc if isgood r - then do - cache <- Database.Keys.getInodeCaches key - if null cache - then return r - else ifM (sameInodeCache loc cache) - ( return r - , return bad - ) + then ifM (annexThin <$> Annex.getGitConfig) + -- When annex.thin is set, the object file + -- could be modified; make sure it's not. + -- (Suppress any messages about + -- checksumming, to avoid them cluttering + -- the display.) + ( ifM (doQuietAction $ isUnmodified key loc) + ( return r + , return bad + ) + , return r + ) else return bad + + -- In direct mode, at least one of the associated files must pass the + -- check. Additionally, the file must be unmodified. checkdirect [] = return bad checkdirect (loc:locs) = do r <- check loc @@ -750,9 +750,17 @@ isUnmodified key f = go =<< geti cheapcheck fc = anyM (compareInodeCaches fc) =<< Database.Keys.getInodeCaches key expensivecheck fc = ifM (verifyKeyContent RetrievalAllKeysSecure AlwaysVerify UnVerified key f) - -- The file could have been modified while it was - -- being verified. Detect that. - ( geti >>= maybe (return False) (compareInodeCaches fc) + ( do + -- The file could have been modified while it was + -- being verified. Detect that. + ifM (geti >>= maybe (return False) (compareInodeCaches fc)) + ( do + -- Update the InodeCache to avoid + -- performing this expensive check again. + Database.Keys.addInodeCaches key [fc] + return True + , return False + ) , return False ) geti = withTSDelta (liftIO . genInodeCache f) diff --git a/CHANGELOG b/CHANGELOG index 0a83f1e475..f0021ee1bc 100644 --- a/CHANGELOG +++ b/CHANGELOG @@ -7,6 +7,8 @@ git-annex (6.20181012) UNRELEASED; urgency=medium * Webapp: Fix termux detection. * runshell: Use system locales when built with GIT_ANNEX_PACKAGE_INSTALL set. (For Neurodebian packages.) + * v6: Fix database inconsistency that could cause git-annex to + get confused about whether a locked file's content was present. -- Joey Hess Sat, 13 Oct 2018 00:52:02 -0400 diff --git a/Messages.hs b/Messages.hs index d5dee72e21..9e33bd7762 100644 --- a/Messages.hs +++ b/Messages.hs @@ -16,6 +16,7 @@ module Messages ( showSideAction, doSideAction, doQuietSideAction, + doQuietAction, showStoringStateAction, showOutput, showLongNote, @@ -111,11 +112,27 @@ doSideAction :: Annex a -> Annex a doSideAction = doSideAction' StartBlock doSideAction' :: SideActionBlock -> Annex a -> Annex a -doSideAction' b a = do - o <- Annex.getState Annex.output - set $ o { sideActionBlock = b } - set o `after` a +doSideAction' b = bracket setup cleanup . const where + setup = do + o <- Annex.getState Annex.output + set $ o { sideActionBlock = b } + return o + cleanup = set + set o = Annex.changeState $ \s -> s { Annex.output = o } + +{- Performs an action, suppressing all normal standard output, + - but not json output. -} +doQuietAction :: Annex a -> Annex a +doQuietAction = bracket setup cleanup . const + where + setup = do + o <- Annex.getState Annex.output + case outputType o of + NormalOutput -> set $ o { outputType = QuietOutput } + _ -> noop + return o + cleanup = set set o = Annex.changeState $ \s -> s { Annex.output = o } {- Make way for subsequent output of a command. -} diff --git a/doc/bugs/inAnnex_check_failed_repeatedly_for_present_content_v6.mdwn b/doc/bugs/inAnnex_check_failed_repeatedly_for_present_content_v6.mdwn index 60e145fa39..00c12567b3 100644 --- a/doc/bugs/inAnnex_check_failed_repeatedly_for_present_content_v6.mdwn +++ b/doc/bugs/inAnnex_check_failed_repeatedly_for_present_content_v6.mdwn @@ -30,6 +30,30 @@ the problem yet. --[[Joey]] > > --[[Joey]] +> > When annex.thin is not set, inAnnex does not need to check the inode +> > cache, and not checking it will avoid this problem. +> > +> > It is necessary for inAnnex to check the inode cache when annex.thin +> > is set, because then the object file can be a hard link to the working +> > tree and so modifiable. +> > +> > Checking for link count of 2 and only then checking the inode cache +> > won't suffice though, because the object file could be modified and then the +> > worktree file deleted, and then the object file would be modified with +> > a link count of 1. So with annex.thin, have to always check the inode +> > cache. +> > +> > So, it seems what has to be done is, when annex.thin is set, check the +> > inode cache first, if it's unchanged great, but if not, inAnnex would +> > need to checksum the object file to determine if it's been modified. +> > So inAnnex gets potentially very much slower for annex.thin, but I can't +> > see a way around that. --[[Joey]] + +> > > Also, I was able to reproduce the repeated get, after unlock; lock; +> > > touch; fsck and the above change did fix that. +> > > +> > > So, all [[done]]! --[[Joey]] + ## more information needed If gleachkr comes back to IRC, it would be good to find out: diff --git a/doc/bugs/v6_unlock_confused_by_touch.mdwn b/doc/bugs/v6_unlock_confused_by_touch.mdwn index f072e812ba..d7abddede3 100644 --- a/doc/bugs/v6_unlock_confused_by_touch.mdwn +++ b/doc/bugs/v6_unlock_confused_by_touch.mdwn @@ -24,3 +24,5 @@ lock, it does. So, here's a complete reproducer: git annex unlock file cat file /annex/objects/... + +> now [[fixed|done]] --[[Joey]] diff --git a/doc/todo/smudge.mdwn b/doc/todo/smudge.mdwn index 81402a748e..664cf71742 100644 --- a/doc/todo/smudge.mdwn +++ b/doc/todo/smudge.mdwn @@ -25,12 +25,6 @@ git-annex should use smudge/clean filters. v6 mode (My enhanced smudge/clean patch set also fixed this problem, in a much nicer way...) -* - - I have this mostly analized and need to implement the planned fix. - -* - ## other warts * There are several v6 bugs that are edge cases and