fix --all to include not yet committed files from the journal
Fix bug caused by recent optimisations that could make git-annex not see recently recorded status information when configured with annex.alwayscommit=false. This does mean that --all can end up processing the same key more than once, but before the optimisations that introduced this bug, it used to also behave that way. So I didn't try to fix that; it's an edge case and anyway git-annex behaves well when run on the same key repeatedly. I am not too happy with the use of a MVar to buffer the list of files in the journal. I guess it doesn't defeat lazy streaming of the list, if that list is actually generated lazily, and anyway the size of the journal is normally capped and small, so if configs are changed to make it huge and this code path fire, git-annex using enough memory to buffer it all is not a large problem.
This commit is contained in:
parent
74acf17a31
commit
653b719472
4 changed files with 55 additions and 30 deletions
|
@ -43,6 +43,7 @@ import Data.Function
|
|||
import Data.Char
|
||||
import Data.ByteString.Builder
|
||||
import Control.Concurrent (threadDelay)
|
||||
import Control.Concurrent.MVar
|
||||
import qualified System.FilePath.ByteString as P
|
||||
|
||||
import Annex.Common
|
||||
|
@ -756,26 +757,57 @@ rememberTreeishLocked treeish graftpoint jl = do
|
|||
|
||||
{- Runs an action on the content of selected files from the branch.
|
||||
- This is much faster than reading the content of each file in turn,
|
||||
- because it lets git cat-file stream content as fast as it can run.
|
||||
- because it lets git cat-file stream content without blocking.
|
||||
-
|
||||
- The action is passed an IO action that it can repeatedly call to read
|
||||
- the next file and its contents. When there are no more files, that
|
||||
- action will return Nothing.
|
||||
- The action is passed a callback that it can repeatedly call to read
|
||||
- the next file and its contents. When there are no more files, the
|
||||
- callback will return Nothing.
|
||||
-}
|
||||
overBranchFileContents
|
||||
:: (RawFilePath -> Maybe v)
|
||||
-> (IO (Maybe (v, RawFilePath, Maybe L.ByteString)) -> Annex ())
|
||||
-> (Annex (Maybe (v, RawFilePath, Maybe L.ByteString)) -> Annex ())
|
||||
-> Annex ()
|
||||
overBranchFileContents select go = do
|
||||
void update
|
||||
st <- update
|
||||
g <- Annex.gitRepo
|
||||
(l, cleanup) <- inRepo $ Git.LsTree.lsTree
|
||||
Git.LsTree.LsTreeRecursive
|
||||
(Git.LsTree.LsTreeLong False)
|
||||
fullname
|
||||
let select' f = fmap (\v -> (v, f)) (select f)
|
||||
let go' reader = go $ reader >>= \case
|
||||
Nothing -> return Nothing
|
||||
Just ((v, f), content) -> return (Just (v, f, content))
|
||||
buf <- liftIO newEmptyMVar
|
||||
let go' reader = go $ liftIO reader >>= \case
|
||||
Just ((v, f), content) -> do
|
||||
-- Check the journal if it did not get
|
||||
-- committed to the branch
|
||||
content' <- if journalIgnorable st
|
||||
then pure content
|
||||
else maybe content Just <$> getJournalFileStale f
|
||||
return (Just (v, f, content'))
|
||||
Nothing
|
||||
| journalIgnorable st -> return Nothing
|
||||
-- The journal did not get committed to the
|
||||
-- branch, and may contain files that
|
||||
-- are not present in the branch, which
|
||||
-- need to be provided to the action still.
|
||||
-- This can cause the action to be run a
|
||||
-- second time with a file it already ran on.
|
||||
| otherwise -> liftIO (tryTakeMVar buf) >>= \case
|
||||
Nothing -> drain buf =<< getJournalledFilesStale
|
||||
Just fs -> drain buf fs
|
||||
catObjectStreamLsTree l (select' . getTopFilePath . Git.LsTree.file) g go'
|
||||
liftIO $ void cleanup
|
||||
where
|
||||
getnext [] = Nothing
|
||||
getnext (f:fs) = case select f of
|
||||
Nothing -> getnext fs
|
||||
Just v -> Just (v, f, fs)
|
||||
|
||||
drain buf fs = case getnext fs of
|
||||
Just (v, f, fs') -> do
|
||||
liftIO $ putMVar buf fs'
|
||||
content <- getJournalFileStale f
|
||||
return (Just (v, f, content))
|
||||
Nothing -> do
|
||||
liftIO $ putMVar buf []
|
||||
return Nothing
|
||||
|
|
|
@ -276,7 +276,7 @@ withKeyOptions' ko auto mkkeyaction fallbackaction worktreeitems = do
|
|||
let discard reader = reader >>= \case
|
||||
Nothing -> noop
|
||||
Just _ -> discard reader
|
||||
let go reader = liftIO reader >>= \case
|
||||
let go reader = reader >>= \case
|
||||
Just (k, f, content) -> checktimelimit (discard reader) $ do
|
||||
maybe noop (Annex.Branch.precache f) content
|
||||
keyaction Nothing (SeekInput [], k, mkActionItem k)
|
||||
|
@ -373,7 +373,7 @@ seekFilteredKeys seeker listfs = do
|
|||
liftIO $ void cleanup
|
||||
where
|
||||
finisher mi oreader checktimelimit = liftIO oreader >>= \case
|
||||
Just ((si, f), content) -> checktimelimit discard $ do
|
||||
Just ((si, f), content) -> checktimelimit (liftIO discard) $ do
|
||||
keyaction f mi content $
|
||||
commandAction . startAction seeker si f
|
||||
finisher mi oreader checktimelimit
|
||||
|
@ -384,7 +384,7 @@ seekFilteredKeys seeker listfs = do
|
|||
Just _ -> discard
|
||||
|
||||
precachefinisher mi lreader checktimelimit = liftIO lreader >>= \case
|
||||
Just ((logf, (si, f), k), logcontent) -> checktimelimit discard $ do
|
||||
Just ((logf, (si, f), k), logcontent) -> checktimelimit (liftIO discard) $ do
|
||||
maybe noop (Annex.Branch.precache logf) logcontent
|
||||
checkMatcherWhen mi
|
||||
(matcherNeedsLocationLog mi && not (matcherNeedsFileName mi))
|
||||
|
@ -591,9 +591,9 @@ notSymlink :: RawFilePath -> IO Bool
|
|||
notSymlink f = liftIO $ not . isSymbolicLink <$> R.getSymbolicLinkStatus f
|
||||
|
||||
{- Returns an action that, when there's a time limit, can be used
|
||||
- to check it before processing a file. The IO action is run when over the
|
||||
- time limit. -}
|
||||
mkCheckTimeLimit :: Annex (IO () -> Annex () -> Annex ())
|
||||
- to check it before processing a file. The first action is run when over the
|
||||
- time limit, otherwise the second action is run. -}
|
||||
mkCheckTimeLimit :: Annex (Annex () -> Annex () -> Annex ())
|
||||
mkCheckTimeLimit = Annex.getState Annex.timelimit >>= \case
|
||||
Nothing -> return $ \_ a -> a
|
||||
Just (duration, cutoff) -> return $ \cleanup a -> do
|
||||
|
@ -602,6 +602,6 @@ mkCheckTimeLimit = Annex.getState Annex.timelimit >>= \case
|
|||
then do
|
||||
warning $ "Time limit (" ++ fromDuration duration ++ ") reached! Shutting down..."
|
||||
shutdown True
|
||||
liftIO cleanup
|
||||
cleanup
|
||||
liftIO $ exitWith $ ExitFailure 101
|
||||
else a
|
||||
|
|
|
@ -15,5 +15,8 @@ logs that are only in the journal.
|
|||
Before that optimisation, it was using Logs.Location.loggedKeys,
|
||||
which does look at the journal.
|
||||
|
||||
> fixed
|
||||
|
||||
(This is also a blocker for [[todo/hiding_a_repository]].)
|
||||
--[[Joey]]
|
||||
|
||||
[[done]] --[[Joey]]
|
||||
|
|
|
@ -151,19 +151,9 @@ later write.
|
|||
> No way to configure what repo is hidden yet. --[[Joey]]
|
||||
>
|
||||
> Implementation notes:
|
||||
>
|
||||
> * CmdLine.Seek precaches git-annex branch
|
||||
> location logs, but that does not include private ones. Since they're
|
||||
> cached, the private ones don't get read. Result is eg, whereis finds no
|
||||
> copies. Either need to disable CmdLine.Seek precaching when there's
|
||||
> hidden repos, or could make the cache indicate it's only of public
|
||||
> info, so private info still gets read.
|
||||
> * CmdLine.Seek contains a LsTreeRecursive over the branch to handle
|
||||
> --all, and again that won't see private information, including even
|
||||
> annexed files that are only present in the hidden repo.
|
||||
> * (And I wonder, don't both the caches above already miss things in
|
||||
> the journal?)
|
||||
> * Any other direct accesses of the branch, not going through
|
||||
>
|
||||
> * [[bugs/git-annex_branch_caching_bug]] was a problem, now fixed.
|
||||
> * Any other similar direct accesses of the branch, not going through
|
||||
> Annex.Branch, also need to be fixed (and may be missing journal files
|
||||
> already?) Command.ImportFeed.knownItems is one. Command.Log behavior
|
||||
> needs to be investigated, may be ok. And Logs.Web.withKnownUrls is another.
|
||||
|
|
Loading…
Reference in a new issue