Merge branch 'master' into hiddenannex
This commit is contained in:
commit
0bb57702e1
5 changed files with 113 additions and 36 deletions
|
@ -31,6 +31,8 @@ module Annex.Branch (
|
|||
rememberTreeish,
|
||||
performTransitions,
|
||||
withIndex,
|
||||
precache,
|
||||
overBranchFileContents,
|
||||
) where
|
||||
|
||||
import qualified Data.ByteString as B
|
||||
|
@ -42,6 +44,7 @@ import Data.Function
|
|||
import Data.Char
|
||||
import Data.ByteString.Builder
|
||||
import Control.Concurrent (threadDelay)
|
||||
import Control.Concurrent.MVar
|
||||
import qualified System.FilePath.ByteString as P
|
||||
|
||||
import Annex.Common
|
||||
|
@ -66,6 +69,7 @@ import Annex.HashObject
|
|||
import Git.Types (Ref(..), fromRef, fromRef', RefDate, TreeItemType(..))
|
||||
import Git.FilePath
|
||||
import Annex.CatFile
|
||||
import Git.CatFile (catObjectStreamLsTree)
|
||||
import Annex.Perms
|
||||
import Logs
|
||||
import Logs.Transitions
|
||||
|
@ -261,6 +265,18 @@ get file = getCache file >>= \case
|
|||
setCache file content
|
||||
return content
|
||||
|
||||
{- Used to cache the value of a file, which has been read from the branch
|
||||
- using some optimised method. The journal has to be checked, in case
|
||||
- it has a newer version of the file that has not reached the branch yet.
|
||||
-}
|
||||
precache :: RawFilePath -> L.ByteString -> Annex ()
|
||||
precache file branchcontent = do
|
||||
st <- getState
|
||||
content <- if journalIgnorable st
|
||||
then pure branchcontent
|
||||
else fromMaybe branchcontent <$> getJournalFileStale file
|
||||
Annex.BranchState.setCache file content
|
||||
|
||||
{- Like get, but does not merge the branch, so the info returned may not
|
||||
- reflect changes in remotes.
|
||||
- (Changing the value this returns, and then merging is always the
|
||||
|
@ -758,3 +774,59 @@ rememberTreeishLocked treeish graftpoint jl = do
|
|||
-- say that the index contains c'.
|
||||
setIndexSha c'
|
||||
|
||||
{- Runs an action on the content of selected files from the branch.
|
||||
- This is much faster than reading the content of each file in turn,
|
||||
- because it lets git cat-file stream content without blocking.
|
||||
-
|
||||
- The action is passed a callback that it can repeatedly call to read
|
||||
- the next file and its contents. When there are no more files, the
|
||||
- callback will return Nothing.
|
||||
-}
|
||||
overBranchFileContents
|
||||
:: (RawFilePath -> Maybe v)
|
||||
-> (Annex (Maybe (v, RawFilePath, Maybe L.ByteString)) -> Annex ())
|
||||
-> Annex ()
|
||||
overBranchFileContents select go = do
|
||||
st <- update
|
||||
g <- Annex.gitRepo
|
||||
(l, cleanup) <- inRepo $ Git.LsTree.lsTree
|
||||
Git.LsTree.LsTreeRecursive
|
||||
(Git.LsTree.LsTreeLong False)
|
||||
fullname
|
||||
let select' f = fmap (\v -> (v, f)) (select f)
|
||||
buf <- liftIO newEmptyMVar
|
||||
let go' reader = go $ liftIO reader >>= \case
|
||||
Just ((v, f), content) -> do
|
||||
-- Check the journal if it did not get
|
||||
-- committed to the branch
|
||||
content' <- if journalIgnorable st
|
||||
then pure content
|
||||
else maybe content Just <$> getJournalFileStale f
|
||||
return (Just (v, f, content'))
|
||||
Nothing
|
||||
| journalIgnorable st -> return Nothing
|
||||
-- The journal did not get committed to the
|
||||
-- branch, and may contain files that
|
||||
-- are not present in the branch, which
|
||||
-- need to be provided to the action still.
|
||||
-- This can cause the action to be run a
|
||||
-- second time with a file it already ran on.
|
||||
| otherwise -> liftIO (tryTakeMVar buf) >>= \case
|
||||
Nothing -> drain buf =<< getJournalledFilesStale
|
||||
Just fs -> drain buf fs
|
||||
catObjectStreamLsTree l (select' . getTopFilePath . Git.LsTree.file) g go'
|
||||
liftIO $ void cleanup
|
||||
where
|
||||
getnext [] = Nothing
|
||||
getnext (f:fs) = case select f of
|
||||
Nothing -> getnext fs
|
||||
Just v -> Just (v, f, fs)
|
||||
|
||||
drain buf fs = case getnext fs of
|
||||
Just (v, f, fs') -> do
|
||||
liftIO $ putMVar buf fs'
|
||||
content <- getJournalFileStale f
|
||||
return (Just (v, f, content))
|
||||
Nothing -> do
|
||||
liftIO $ putMVar buf []
|
||||
return Nothing
|
||||
|
|
|
@ -12,6 +12,9 @@ git-annex (8.20210331) UNRELEASED; urgency=medium
|
|||
* directory: When cp supports reflinks, use it.
|
||||
* init: Fix a crash when the repo's was cloned from a repo that had an
|
||||
adjusted branch checked out, and the origin remote is not named "origin".
|
||||
* Fix bug caused by recent optimisations that could make git-annex not
|
||||
see recently recorded status information when configured with
|
||||
annex.alwayscommit=false.
|
||||
|
||||
-- Joey Hess <id@joeyh.name> Thu, 01 Apr 2021 12:17:26 -0400
|
||||
|
||||
|
|
|
@ -9,8 +9,6 @@
|
|||
- Licensed under the GNU AGPL version 3 or higher.
|
||||
-}
|
||||
|
||||
{-# LANGUAGE TupleSections #-}
|
||||
|
||||
module CmdLine.Seek where
|
||||
|
||||
import Annex.Common
|
||||
|
@ -44,7 +42,6 @@ import Annex.Concurrent
|
|||
import Annex.CheckIgnore
|
||||
import Annex.Action
|
||||
import qualified Annex.Branch
|
||||
import qualified Annex.BranchState
|
||||
import qualified Database.Keys
|
||||
import qualified Utility.RawFilePath as R
|
||||
import Utility.Tuple
|
||||
|
@ -274,25 +271,18 @@ withKeyOptions' ko auto mkkeyaction fallbackaction worktreeitems = do
|
|||
checktimelimit <- mkCheckTimeLimit
|
||||
keyaction <- mkkeyaction
|
||||
config <- Annex.getGitConfig
|
||||
g <- Annex.gitRepo
|
||||
|
||||
void Annex.Branch.update
|
||||
(l, cleanup) <- inRepo $ LsTree.lsTree
|
||||
LsTree.LsTreeRecursive
|
||||
(LsTree.LsTreeLong False)
|
||||
Annex.Branch.fullname
|
||||
let getk f = fmap (,f) (locationLogFileKey config f)
|
||||
let getk = locationLogFileKey config
|
||||
let discard reader = reader >>= \case
|
||||
Nothing -> noop
|
||||
Just _ -> discard reader
|
||||
let go reader = liftIO reader >>= \case
|
||||
Nothing -> return ()
|
||||
Just ((k, f), content) -> checktimelimit (discard reader) $ do
|
||||
maybe noop (Annex.BranchState.setCache f) content
|
||||
let go reader = reader >>= \case
|
||||
Just (k, f, content) -> checktimelimit (discard reader) $ do
|
||||
maybe noop (Annex.Branch.precache f) content
|
||||
keyaction Nothing (SeekInput [], k, mkActionItem k)
|
||||
go reader
|
||||
catObjectStreamLsTree l (getk . getTopFilePath . LsTree.file) g go
|
||||
liftIO $ void cleanup
|
||||
Nothing -> return ()
|
||||
Annex.Branch.overBranchFileContents getk go
|
||||
|
||||
runkeyaction getks = do
|
||||
keyaction <- mkkeyaction
|
||||
|
@ -383,7 +373,7 @@ seekFilteredKeys seeker listfs = do
|
|||
liftIO $ void cleanup
|
||||
where
|
||||
finisher mi oreader checktimelimit = liftIO oreader >>= \case
|
||||
Just ((si, f), content) -> checktimelimit discard $ do
|
||||
Just ((si, f), content) -> checktimelimit (liftIO discard) $ do
|
||||
keyaction f mi content $
|
||||
commandAction . startAction seeker si f
|
||||
finisher mi oreader checktimelimit
|
||||
|
@ -394,8 +384,8 @@ seekFilteredKeys seeker listfs = do
|
|||
Just _ -> discard
|
||||
|
||||
precachefinisher mi lreader checktimelimit = liftIO lreader >>= \case
|
||||
Just ((logf, (si, f), k), logcontent) -> checktimelimit discard $ do
|
||||
maybe noop (Annex.BranchState.setCache logf) logcontent
|
||||
Just ((logf, (si, f), k), logcontent) -> checktimelimit (liftIO discard) $ do
|
||||
maybe noop (Annex.Branch.precache logf) logcontent
|
||||
checkMatcherWhen mi
|
||||
(matcherNeedsLocationLog mi && not (matcherNeedsFileName mi))
|
||||
(MatchingFile $ FileInfo f f (Just k))
|
||||
|
@ -601,9 +591,9 @@ notSymlink :: RawFilePath -> IO Bool
|
|||
notSymlink f = liftIO $ not . isSymbolicLink <$> R.getSymbolicLinkStatus f
|
||||
|
||||
{- Returns an action that, when there's a time limit, can be used
|
||||
- to check it before processing a file. The IO action is run when over the
|
||||
- time limit. -}
|
||||
mkCheckTimeLimit :: Annex (IO () -> Annex () -> Annex ())
|
||||
- to check it before processing a file. The first action is run when over the
|
||||
- time limit, otherwise the second action is run. -}
|
||||
mkCheckTimeLimit :: Annex (Annex () -> Annex () -> Annex ())
|
||||
mkCheckTimeLimit = Annex.getState Annex.timelimit >>= \case
|
||||
Nothing -> return $ \_ a -> a
|
||||
Just (duration, cutoff) -> return $ \cleanup a -> do
|
||||
|
@ -612,6 +602,6 @@ mkCheckTimeLimit = Annex.getState Annex.timelimit >>= \case
|
|||
then do
|
||||
warning $ "Time limit (" ++ fromDuration duration ++ ") reached! Shutting down..."
|
||||
shutdown True
|
||||
liftIO cleanup
|
||||
cleanup
|
||||
liftIO $ exitWith $ ExitFailure 101
|
||||
else a
|
||||
|
|
22
doc/bugs/git-annex_branch_caching_bug.mdwn
Normal file
22
doc/bugs/git-annex_branch_caching_bug.mdwn
Normal file
|
@ -0,0 +1,22 @@
|
|||
If the journal contains a newer version of a log file than the git-annex
|
||||
branch, and annex.alwayscommit=false so the branch is not getting updated,
|
||||
the value from the journal can be ignored when reading that log file.
|
||||
|
||||
In CmdLine.Seek, there is some code that precaches location logs as an
|
||||
optimisation (when using eg --copies). That streams info from the
|
||||
git-annex branch into the cache. But it never checks for a journal file
|
||||
with newer information.
|
||||
|
||||
> fixed this
|
||||
|
||||
Also in Cmdline.Seek, there is a LsTreeRecursive over the branch to handle
|
||||
`--all`, and I think again that would mean it doesn't notice location
|
||||
logs that are only in the journal.
|
||||
Before that optimisation, it was using Logs.Location.loggedKeys,
|
||||
which does look at the journal.
|
||||
|
||||
> fixed
|
||||
|
||||
(This is also a blocker for [[todo/hiding_a_repository]].)
|
||||
|
||||
[[done]] --[[Joey]]
|
|
@ -151,19 +151,9 @@ later write.
|
|||
> No way to configure what repo is hidden yet. --[[Joey]]
|
||||
>
|
||||
> Implementation notes:
|
||||
>
|
||||
> * CmdLine.Seek precaches git-annex branch
|
||||
> location logs, but that does not include private ones. Since they're
|
||||
> cached, the private ones don't get read. Result is eg, whereis finds no
|
||||
> copies. Either need to disable CmdLine.Seek precaching when there's
|
||||
> hidden repos, or could make the cache indicate it's only of public
|
||||
> info, so private info still gets read.
|
||||
> * CmdLine.Seek contains a LsTreeRecursive over the branch to handle
|
||||
> --all, and again that won't see private information, including even
|
||||
> annexed files that are only present in the hidden repo.
|
||||
> * (And I wonder, don't both the caches above already miss things in
|
||||
> the journal?)
|
||||
> * Any other direct accesses of the branch, not going through
|
||||
>
|
||||
> * [[bugs/git-annex_branch_caching_bug]] was a problem, now fixed.
|
||||
> * Any other similar direct accesses of the branch, not going through
|
||||
> Annex.Branch, also need to be fixed (and may be missing journal files
|
||||
> already?) Command.ImportFeed.knownItems is one. Command.Log behavior
|
||||
> needs to be investigated, may be ok. And Logs.Web.withKnownUrls is another.
|
||||
|
|
Loading…
Add table
Reference in a new issue