when private journal file exists, still read from git-annex branch
Fix bug that caused stale git-annex branch information to read when annex.private or remote.name.annex-private is set. The private journal file should not prevent reading more current information from the git-annex branch, but used to. Note that, overBranchFileContents has to do additional work now, when there's a private journal file, it reads from the branch redundantly and more slowly. Sponsored-by: Jack Hill on Patreon
This commit is contained in:
parent
c5bf4d3504
commit
5a9e6b1fd4
5 changed files with 107 additions and 23 deletions
|
@ -280,8 +280,11 @@ precache file branchcontent = do
|
||||||
st <- getState
|
st <- getState
|
||||||
content <- if journalIgnorable st
|
content <- if journalIgnorable st
|
||||||
then pure branchcontent
|
then pure branchcontent
|
||||||
else fromMaybe branchcontent
|
else getJournalFileStale (GetPrivate True) file >>= return . \case
|
||||||
<$> getJournalFileStale (GetPrivate True) file
|
NoJournalledContent -> branchcontent
|
||||||
|
JournalledContent journalcontent -> journalcontent
|
||||||
|
PossiblyStaleJournalledContent journalcontent ->
|
||||||
|
branchcontent <> journalcontent
|
||||||
Annex.BranchState.setCache file content
|
Annex.BranchState.setCache file content
|
||||||
|
|
||||||
{- Like get, but does not merge the branch, so the info returned may not
|
{- Like get, but does not merge the branch, so the info returned may not
|
||||||
|
@ -296,8 +299,11 @@ getLocal' getprivate file = do
|
||||||
fastDebug "Annex.Branch" ("read " ++ fromRawFilePath file)
|
fastDebug "Annex.Branch" ("read " ++ fromRawFilePath file)
|
||||||
go =<< getJournalFileStale getprivate file
|
go =<< getJournalFileStale getprivate file
|
||||||
where
|
where
|
||||||
go (Just journalcontent) = return journalcontent
|
go NoJournalledContent = getRef fullname file
|
||||||
go Nothing = getRef fullname file
|
go (JournalledContent journalcontent) = return journalcontent
|
||||||
|
go (PossiblyStaleJournalledContent journalcontent) = do
|
||||||
|
v <- getRef fullname file
|
||||||
|
return (v <> journalcontent)
|
||||||
|
|
||||||
{- Gets the content of a file as staged in the branch's index. -}
|
{- Gets the content of a file as staged in the branch's index. -}
|
||||||
getStaged :: RawFilePath -> Annex L.ByteString
|
getStaged :: RawFilePath -> Annex L.ByteString
|
||||||
|
@ -813,12 +819,7 @@ overBranchFileContents select go = do
|
||||||
buf <- liftIO newEmptyMVar
|
buf <- liftIO newEmptyMVar
|
||||||
let go' reader = go $ liftIO reader >>= \case
|
let go' reader = go $ liftIO reader >>= \case
|
||||||
Just ((v, f), content) -> do
|
Just ((v, f), content) -> do
|
||||||
-- Check the journal if it did not get
|
content' <- checkjournal st f content
|
||||||
-- committed to the branch
|
|
||||||
content' <- if journalIgnorable st
|
|
||||||
then pure content
|
|
||||||
else maybe content Just
|
|
||||||
<$> getJournalFileStale (GetPrivate True) f
|
|
||||||
return (Just (v, f, content'))
|
return (Just (v, f, content'))
|
||||||
Nothing
|
Nothing
|
||||||
| journalIgnorable st -> return Nothing
|
| journalIgnorable st -> return Nothing
|
||||||
|
@ -834,16 +835,36 @@ overBranchFileContents select go = do
|
||||||
catObjectStreamLsTree l (select' . getTopFilePath . Git.LsTree.file) g go'
|
catObjectStreamLsTree l (select' . getTopFilePath . Git.LsTree.file) g go'
|
||||||
`finally` liftIO (void cleanup)
|
`finally` liftIO (void cleanup)
|
||||||
where
|
where
|
||||||
|
-- Check the journal, in case it did not get committed to the branch
|
||||||
|
checkjournal st f branchcontent
|
||||||
|
| journalIgnorable st = return branchcontent
|
||||||
|
| otherwise = getJournalFileStale (GetPrivate True) f >>= return . \case
|
||||||
|
NoJournalledContent -> branchcontent
|
||||||
|
JournalledContent journalledcontent ->
|
||||||
|
Just journalledcontent
|
||||||
|
PossiblyStaleJournalledContent journalledcontent ->
|
||||||
|
Just (fromMaybe mempty branchcontent <> journalledcontent)
|
||||||
|
|
||||||
|
drain buf fs = case getnext fs of
|
||||||
|
Just (v, f, fs') -> do
|
||||||
|
liftIO $ putMVar buf fs'
|
||||||
|
content <- getJournalFileStale (GetPrivate True) f >>= \case
|
||||||
|
NoJournalledContent -> return Nothing
|
||||||
|
JournalledContent journalledcontent ->
|
||||||
|
return (Just journalledcontent)
|
||||||
|
PossiblyStaleJournalledContent journalledcontent -> do
|
||||||
|
-- This is expensive, but happens
|
||||||
|
-- only when there is a private
|
||||||
|
-- journal file.
|
||||||
|
content <- getRef fullname f
|
||||||
|
return (Just (content <> journalledcontent))
|
||||||
|
return (Just (v, f, content))
|
||||||
|
Nothing -> do
|
||||||
|
liftIO $ putMVar buf []
|
||||||
|
return Nothing
|
||||||
|
|
||||||
getnext [] = Nothing
|
getnext [] = Nothing
|
||||||
getnext (f:fs) = case select f of
|
getnext (f:fs) = case select f of
|
||||||
Nothing -> getnext fs
|
Nothing -> getnext fs
|
||||||
Just v -> Just (v, f, fs)
|
Just v -> Just (v, f, fs)
|
||||||
|
|
||||||
drain buf fs = case getnext fs of
|
|
||||||
Just (v, f, fs') -> do
|
|
||||||
liftIO $ putMVar buf fs'
|
|
||||||
content <- getJournalFileStale (GetPrivate True) f
|
|
||||||
return (Just (v, f, content))
|
|
||||||
Nothing -> do
|
|
||||||
liftIO $ putMVar buf []
|
|
||||||
return Nothing
|
|
||||||
|
|
|
@ -89,8 +89,20 @@ setJournalFile _jl ru file content = withOtherTmp $ \tmp -> do
|
||||||
withFile tmpfile WriteMode $ \h -> writeJournalHandle h content
|
withFile tmpfile WriteMode $ \h -> writeJournalHandle h content
|
||||||
moveFile tmpfile (fromRawFilePath (jd P.</> jfile))
|
moveFile tmpfile (fromRawFilePath (jd P.</> jfile))
|
||||||
|
|
||||||
|
data JournalledContent
|
||||||
|
= NoJournalledContent
|
||||||
|
| JournalledContent L.ByteString
|
||||||
|
| PossiblyStaleJournalledContent L.ByteString
|
||||||
|
-- ^ This is used when the journalled content may have been
|
||||||
|
-- supersceded by content in the git-annex branch. The returned
|
||||||
|
-- content should be combined with content from the git-annex branch.
|
||||||
|
-- This is particularly the case when a file is in the private
|
||||||
|
-- journal, which does not get written to the git-annex branch,
|
||||||
|
-- and so the git-annex branch can contain changes to non-private
|
||||||
|
-- information that were made after that journal file was written.
|
||||||
|
|
||||||
{- Gets any journalled content for a file in the branch. -}
|
{- Gets any journalled content for a file in the branch. -}
|
||||||
getJournalFile :: JournalLocked -> GetPrivate -> RawFilePath -> Annex (Maybe L.ByteString)
|
getJournalFile :: JournalLocked -> GetPrivate -> RawFilePath -> Annex JournalledContent
|
||||||
getJournalFile _jl = getJournalFileStale
|
getJournalFile _jl = getJournalFileStale
|
||||||
|
|
||||||
data GetPrivate = GetPrivate Bool
|
data GetPrivate = GetPrivate Bool
|
||||||
|
@ -106,7 +118,7 @@ data GetPrivate = GetPrivate Bool
|
||||||
- concurrency or other issues with a lazy read, and the minor loss of
|
- concurrency or other issues with a lazy read, and the minor loss of
|
||||||
- laziness doesn't matter much, as the files are not very large.
|
- laziness doesn't matter much, as the files are not very large.
|
||||||
-}
|
-}
|
||||||
getJournalFileStale :: GetPrivate -> RawFilePath -> Annex (Maybe L.ByteString)
|
getJournalFileStale :: GetPrivate -> RawFilePath -> Annex JournalledContent
|
||||||
getJournalFileStale (GetPrivate getprivate) file = do
|
getJournalFileStale (GetPrivate getprivate) file = do
|
||||||
-- Optimisation to avoid a second MVar access.
|
-- Optimisation to avoid a second MVar access.
|
||||||
st <- Annex.getState id
|
st <- Annex.getState id
|
||||||
|
@ -115,11 +127,19 @@ getJournalFileStale (GetPrivate getprivate) file = do
|
||||||
if getprivate && privateUUIDsKnown' st
|
if getprivate && privateUUIDsKnown' st
|
||||||
then do
|
then do
|
||||||
x <- getfrom (gitAnnexJournalDir g)
|
x <- getfrom (gitAnnexJournalDir g)
|
||||||
y <- getfrom (gitAnnexPrivateJournalDir g)
|
getfrom (gitAnnexPrivateJournalDir g) >>= \case
|
||||||
-- This concacenation is the same as happens in a
|
Nothing -> return $ case x of
|
||||||
-- merge of two git-annex branches.
|
Nothing -> NoJournalledContent
|
||||||
return (x <> y)
|
Just b -> JournalledContent b
|
||||||
else getfrom (gitAnnexJournalDir g)
|
Just y -> return $ PossiblyStaleJournalledContent $ case x of
|
||||||
|
Nothing -> y
|
||||||
|
-- This concacenation is the same as
|
||||||
|
-- happens in a merge of two
|
||||||
|
-- git-annex branches.
|
||||||
|
Just x' -> x' <> y
|
||||||
|
else getfrom (gitAnnexJournalDir g) >>= return . \case
|
||||||
|
Nothing -> NoJournalledContent
|
||||||
|
Just b -> JournalledContent b
|
||||||
where
|
where
|
||||||
jfile = journalFile file
|
jfile = journalFile file
|
||||||
getfrom d = catchMaybeIO $
|
getfrom d = catchMaybeIO $
|
||||||
|
|
|
@ -18,6 +18,8 @@ git-annex (8.20211012) UNRELEASED; urgency=medium
|
||||||
did not populate all unlocked files.
|
did not populate all unlocked files.
|
||||||
(Reversion in version 8.20210621)
|
(Reversion in version 8.20210621)
|
||||||
* Avoid a some sqlite crashes on Windows SubSystem for Linux (WSL).
|
* Avoid a some sqlite crashes on Windows SubSystem for Linux (WSL).
|
||||||
|
* Fix bug that caused stale git-annex branch information to read
|
||||||
|
when annex.private or remote.name.annex-private is set.
|
||||||
|
|
||||||
-- Joey Hess <id@joeyh.name> Mon, 11 Oct 2021 14:09:13 -0400
|
-- Joey Hess <id@joeyh.name> Mon, 11 Oct 2021 14:09:13 -0400
|
||||||
|
|
||||||
|
|
|
@ -59,3 +59,4 @@ git annex wanted b
|
||||||
### Have you had any luck using git-annex before? (Sometimes we get tired of reading bug reports all day and a lil' positive end note does wonders)
|
### Have you had any luck using git-annex before? (Sometimes we get tired of reading bug reports all day and a lil' positive end note does wonders)
|
||||||
|
|
||||||
|
|
||||||
|
> [[fixed|done]] --[[Joey]]
|
||||||
|
|
|
@ -0,0 +1,40 @@
|
||||||
|
[[!comment format=mdwn
|
||||||
|
username="joey"
|
||||||
|
subject="""comment 2"""
|
||||||
|
date="2021-10-26T16:11:53Z"
|
||||||
|
content="""
|
||||||
|
Easily reproduced this, thanks for a great bug report.
|
||||||
|
|
||||||
|
What I see after the setup is:
|
||||||
|
|
||||||
|
joey@darkstar:~/tmp/bench2/a>git show git-annex:preferred-content.log
|
||||||
|
5cf4b197-9d7a-4c97-b492-74daf50a17d7 anything timestamp=1635264591.115678237s
|
||||||
|
joey@darkstar:~/tmp/bench2/a>cat .git/annex/journal-private/preferred-content.log
|
||||||
|
5cf4b197-9d7a-4c97-b492-74daf50a17d7 nothing timestamp=1635264500.401941428s
|
||||||
|
d8bcb9f8-2ae2-4e3b-8e7b-fe536a4b53f3 anything timestamp=1635264558.726745148s
|
||||||
|
|
||||||
|
Where b is 5cf and the private remote is d8b.
|
||||||
|
|
||||||
|
So, it is ignoring the newer log line that is available in the git-annex
|
||||||
|
branch, and only loading the older value from the private log, which is only
|
||||||
|
included the private log because it was written in passing when the actually
|
||||||
|
private information was recorded there.
|
||||||
|
|
||||||
|
In fact, when the private log file exists, it only reads it, ignoring
|
||||||
|
the log in the git-annex branch. (But not ignoring non-private files
|
||||||
|
that are in the journal but have not made it to the branch yet.)
|
||||||
|
So even if the private log file only
|
||||||
|
included a line for the private remote, it would not see the information
|
||||||
|
that's in the git-annex branch.
|
||||||
|
|
||||||
|
So, the real root cause is that, when a journal file is available, git-annex
|
||||||
|
uses it, rather than reading from the git-annex branch. Normally this is not
|
||||||
|
a problem because when journal files are written, the current value from the
|
||||||
|
branch is included in them, and anyway the journal gets written to the branch
|
||||||
|
fairly quickly and deleted. But the private journal lingers around forever.
|
||||||
|
So, it needs to read from the git-annex branch in addition to the private
|
||||||
|
journal.
|
||||||
|
|
||||||
|
Fixed this. Since the bug did not actually cause the wrong information to
|
||||||
|
be written to anywhere, all you need to do to recover is upgrade.
|
||||||
|
"""]]
|
Loading…
Add table
Reference in a new issue