when private journal file exists, still read from git-annex branch
Fix bug that caused stale git-annex branch information to read when annex.private or remote.name.annex-private is set. The private journal file should not prevent reading more current information from the git-annex branch, but used to. Note that, overBranchFileContents has to do additional work now, when there's a private journal file, it reads from the branch redundantly and more slowly. Sponsored-by: Jack Hill on Patreon
This commit is contained in:
parent
c5bf4d3504
commit
5a9e6b1fd4
5 changed files with 107 additions and 23 deletions
|
@ -280,8 +280,11 @@ precache file branchcontent = do
|
|||
st <- getState
|
||||
content <- if journalIgnorable st
|
||||
then pure branchcontent
|
||||
else fromMaybe branchcontent
|
||||
<$> getJournalFileStale (GetPrivate True) file
|
||||
else getJournalFileStale (GetPrivate True) file >>= return . \case
|
||||
NoJournalledContent -> branchcontent
|
||||
JournalledContent journalcontent -> journalcontent
|
||||
PossiblyStaleJournalledContent journalcontent ->
|
||||
branchcontent <> journalcontent
|
||||
Annex.BranchState.setCache file content
|
||||
|
||||
{- Like get, but does not merge the branch, so the info returned may not
|
||||
|
@ -296,8 +299,11 @@ getLocal' getprivate file = do
|
|||
fastDebug "Annex.Branch" ("read " ++ fromRawFilePath file)
|
||||
go =<< getJournalFileStale getprivate file
|
||||
where
|
||||
go (Just journalcontent) = return journalcontent
|
||||
go Nothing = getRef fullname file
|
||||
go NoJournalledContent = getRef fullname file
|
||||
go (JournalledContent journalcontent) = return journalcontent
|
||||
go (PossiblyStaleJournalledContent journalcontent) = do
|
||||
v <- getRef fullname file
|
||||
return (v <> journalcontent)
|
||||
|
||||
{- Gets the content of a file as staged in the branch's index. -}
|
||||
getStaged :: RawFilePath -> Annex L.ByteString
|
||||
|
@ -813,12 +819,7 @@ overBranchFileContents select go = do
|
|||
buf <- liftIO newEmptyMVar
|
||||
let go' reader = go $ liftIO reader >>= \case
|
||||
Just ((v, f), content) -> do
|
||||
-- Check the journal if it did not get
|
||||
-- committed to the branch
|
||||
content' <- if journalIgnorable st
|
||||
then pure content
|
||||
else maybe content Just
|
||||
<$> getJournalFileStale (GetPrivate True) f
|
||||
content' <- checkjournal st f content
|
||||
return (Just (v, f, content'))
|
||||
Nothing
|
||||
| journalIgnorable st -> return Nothing
|
||||
|
@ -834,16 +835,36 @@ overBranchFileContents select go = do
|
|||
catObjectStreamLsTree l (select' . getTopFilePath . Git.LsTree.file) g go'
|
||||
`finally` liftIO (void cleanup)
|
||||
where
|
||||
getnext [] = Nothing
|
||||
getnext (f:fs) = case select f of
|
||||
Nothing -> getnext fs
|
||||
Just v -> Just (v, f, fs)
|
||||
|
||||
-- Check the journal, in case it did not get committed to the branch
|
||||
checkjournal st f branchcontent
|
||||
| journalIgnorable st = return branchcontent
|
||||
| otherwise = getJournalFileStale (GetPrivate True) f >>= return . \case
|
||||
NoJournalledContent -> branchcontent
|
||||
JournalledContent journalledcontent ->
|
||||
Just journalledcontent
|
||||
PossiblyStaleJournalledContent journalledcontent ->
|
||||
Just (fromMaybe mempty branchcontent <> journalledcontent)
|
||||
|
||||
drain buf fs = case getnext fs of
|
||||
Just (v, f, fs') -> do
|
||||
liftIO $ putMVar buf fs'
|
||||
content <- getJournalFileStale (GetPrivate True) f
|
||||
content <- getJournalFileStale (GetPrivate True) f >>= \case
|
||||
NoJournalledContent -> return Nothing
|
||||
JournalledContent journalledcontent ->
|
||||
return (Just journalledcontent)
|
||||
PossiblyStaleJournalledContent journalledcontent -> do
|
||||
-- This is expensive, but happens
|
||||
-- only when there is a private
|
||||
-- journal file.
|
||||
content <- getRef fullname f
|
||||
return (Just (content <> journalledcontent))
|
||||
return (Just (v, f, content))
|
||||
Nothing -> do
|
||||
liftIO $ putMVar buf []
|
||||
return Nothing
|
||||
|
||||
getnext [] = Nothing
|
||||
getnext (f:fs) = case select f of
|
||||
Nothing -> getnext fs
|
||||
Just v -> Just (v, f, fs)
|
||||
|
||||
|
|
|
@ -89,8 +89,20 @@ setJournalFile _jl ru file content = withOtherTmp $ \tmp -> do
|
|||
withFile tmpfile WriteMode $ \h -> writeJournalHandle h content
|
||||
moveFile tmpfile (fromRawFilePath (jd P.</> jfile))
|
||||
|
||||
data JournalledContent
|
||||
= NoJournalledContent
|
||||
| JournalledContent L.ByteString
|
||||
| PossiblyStaleJournalledContent L.ByteString
|
||||
-- ^ This is used when the journalled content may have been
|
||||
-- supersceded by content in the git-annex branch. The returned
|
||||
-- content should be combined with content from the git-annex branch.
|
||||
-- This is particularly the case when a file is in the private
|
||||
-- journal, which does not get written to the git-annex branch,
|
||||
-- and so the git-annex branch can contain changes to non-private
|
||||
-- information that were made after that journal file was written.
|
||||
|
||||
{- Gets any journalled content for a file in the branch. -}
|
||||
getJournalFile :: JournalLocked -> GetPrivate -> RawFilePath -> Annex (Maybe L.ByteString)
|
||||
getJournalFile :: JournalLocked -> GetPrivate -> RawFilePath -> Annex JournalledContent
|
||||
getJournalFile _jl = getJournalFileStale
|
||||
|
||||
data GetPrivate = GetPrivate Bool
|
||||
|
@ -106,7 +118,7 @@ data GetPrivate = GetPrivate Bool
|
|||
- concurrency or other issues with a lazy read, and the minor loss of
|
||||
- laziness doesn't matter much, as the files are not very large.
|
||||
-}
|
||||
getJournalFileStale :: GetPrivate -> RawFilePath -> Annex (Maybe L.ByteString)
|
||||
getJournalFileStale :: GetPrivate -> RawFilePath -> Annex JournalledContent
|
||||
getJournalFileStale (GetPrivate getprivate) file = do
|
||||
-- Optimisation to avoid a second MVar access.
|
||||
st <- Annex.getState id
|
||||
|
@ -115,11 +127,19 @@ getJournalFileStale (GetPrivate getprivate) file = do
|
|||
if getprivate && privateUUIDsKnown' st
|
||||
then do
|
||||
x <- getfrom (gitAnnexJournalDir g)
|
||||
y <- getfrom (gitAnnexPrivateJournalDir g)
|
||||
-- This concacenation is the same as happens in a
|
||||
-- merge of two git-annex branches.
|
||||
return (x <> y)
|
||||
else getfrom (gitAnnexJournalDir g)
|
||||
getfrom (gitAnnexPrivateJournalDir g) >>= \case
|
||||
Nothing -> return $ case x of
|
||||
Nothing -> NoJournalledContent
|
||||
Just b -> JournalledContent b
|
||||
Just y -> return $ PossiblyStaleJournalledContent $ case x of
|
||||
Nothing -> y
|
||||
-- This concacenation is the same as
|
||||
-- happens in a merge of two
|
||||
-- git-annex branches.
|
||||
Just x' -> x' <> y
|
||||
else getfrom (gitAnnexJournalDir g) >>= return . \case
|
||||
Nothing -> NoJournalledContent
|
||||
Just b -> JournalledContent b
|
||||
where
|
||||
jfile = journalFile file
|
||||
getfrom d = catchMaybeIO $
|
||||
|
|
|
@ -18,6 +18,8 @@ git-annex (8.20211012) UNRELEASED; urgency=medium
|
|||
did not populate all unlocked files.
|
||||
(Reversion in version 8.20210621)
|
||||
* Avoid a some sqlite crashes on Windows SubSystem for Linux (WSL).
|
||||
* Fix bug that caused stale git-annex branch information to read
|
||||
when annex.private or remote.name.annex-private is set.
|
||||
|
||||
-- Joey Hess <id@joeyh.name> Mon, 11 Oct 2021 14:09:13 -0400
|
||||
|
||||
|
|
|
@ -59,3 +59,4 @@ git annex wanted b
|
|||
### Have you had any luck using git-annex before? (Sometimes we get tired of reading bug reports all day and a lil' positive end note does wonders)
|
||||
|
||||
|
||||
> [[fixed|done]] --[[Joey]]
|
||||
|
|
|
@ -0,0 +1,40 @@
|
|||
[[!comment format=mdwn
|
||||
username="joey"
|
||||
subject="""comment 2"""
|
||||
date="2021-10-26T16:11:53Z"
|
||||
content="""
|
||||
Easily reproduced this, thanks for a great bug report.
|
||||
|
||||
What I see after the setup is:
|
||||
|
||||
joey@darkstar:~/tmp/bench2/a>git show git-annex:preferred-content.log
|
||||
5cf4b197-9d7a-4c97-b492-74daf50a17d7 anything timestamp=1635264591.115678237s
|
||||
joey@darkstar:~/tmp/bench2/a>cat .git/annex/journal-private/preferred-content.log
|
||||
5cf4b197-9d7a-4c97-b492-74daf50a17d7 nothing timestamp=1635264500.401941428s
|
||||
d8bcb9f8-2ae2-4e3b-8e7b-fe536a4b53f3 anything timestamp=1635264558.726745148s
|
||||
|
||||
Where b is 5cf and the private remote is d8b.
|
||||
|
||||
So, it is ignoring the newer log line that is available in the git-annex
|
||||
branch, and only loading the older value from the private log, which is only
|
||||
included the private log because it was written in passing when the actually
|
||||
private information was recorded there.
|
||||
|
||||
In fact, when the private log file exists, it only reads it, ignoring
|
||||
the log in the git-annex branch. (But not ignoring non-private files
|
||||
that are in the journal but have not made it to the branch yet.)
|
||||
So even if the private log file only
|
||||
included a line for the private remote, it would not see the information
|
||||
that's in the git-annex branch.
|
||||
|
||||
So, the real root cause is that, when a journal file is available, git-annex
|
||||
uses it, rather than reading from the git-annex branch. Normally this is not
|
||||
a problem because when journal files are written, the current value from the
|
||||
branch is included in them, and anyway the journal gets written to the branch
|
||||
fairly quickly and deleted. But the private journal lingers around forever.
|
||||
So, it needs to read from the git-annex branch in addition to the private
|
||||
journal.
|
||||
|
||||
Fixed this. Since the bug did not actually cause the wrong information to
|
||||
be written to anywhere, all you need to do to recover is upgrade.
|
||||
"""]]
|
Loading…
Add table
Reference in a new issue