Merge branch 'master' into hiddenannex

This commit is contained in:
Joey Hess 2021-04-21 15:45:12 -04:00
commit 0bb57702e1
No known key found for this signature in database
GPG key ID: DB12DB0FF05F8F38
5 changed files with 113 additions and 36 deletions

View file

@ -31,6 +31,8 @@ module Annex.Branch (
rememberTreeish,
performTransitions,
withIndex,
precache,
overBranchFileContents,
) where
import qualified Data.ByteString as B
@ -42,6 +44,7 @@ import Data.Function
import Data.Char
import Data.ByteString.Builder
import Control.Concurrent (threadDelay)
import Control.Concurrent.MVar
import qualified System.FilePath.ByteString as P
import Annex.Common
@ -66,6 +69,7 @@ import Annex.HashObject
import Git.Types (Ref(..), fromRef, fromRef', RefDate, TreeItemType(..))
import Git.FilePath
import Annex.CatFile
import Git.CatFile (catObjectStreamLsTree)
import Annex.Perms
import Logs
import Logs.Transitions
@ -261,6 +265,18 @@ get file = getCache file >>= \case
setCache file content
return content
{- Used to cache the value of a file, which has been read from the branch
- using some optimised method. The journal has to be checked, in case
- it has a newer version of the file that has not reached the branch yet.
-}
precache :: RawFilePath -> L.ByteString -> Annex ()
precache file branchcontent = do
st <- getState
content <- if journalIgnorable st
then pure branchcontent
else fromMaybe branchcontent <$> getJournalFileStale file
Annex.BranchState.setCache file content
{- Like get, but does not merge the branch, so the info returned may not
- reflect changes in remotes.
- (Changing the value this returns, and then merging is always the
@ -758,3 +774,59 @@ rememberTreeishLocked treeish graftpoint jl = do
-- say that the index contains c'.
setIndexSha c'
{- Runs an action on the content of selected files from the branch.
- This is much faster than reading the content of each file in turn,
- because it lets git cat-file stream content without blocking.
-
- The action is passed a callback that it can repeatedly call to read
- the next file and its contents. When there are no more files, the
- callback will return Nothing.
-}
overBranchFileContents
:: (RawFilePath -> Maybe v)
-> (Annex (Maybe (v, RawFilePath, Maybe L.ByteString)) -> Annex ())
-> Annex ()
overBranchFileContents select go = do
st <- update
g <- Annex.gitRepo
(l, cleanup) <- inRepo $ Git.LsTree.lsTree
Git.LsTree.LsTreeRecursive
(Git.LsTree.LsTreeLong False)
fullname
let select' f = fmap (\v -> (v, f)) (select f)
buf <- liftIO newEmptyMVar
let go' reader = go $ liftIO reader >>= \case
Just ((v, f), content) -> do
-- Check the journal if it did not get
-- committed to the branch
content' <- if journalIgnorable st
then pure content
else maybe content Just <$> getJournalFileStale f
return (Just (v, f, content'))
Nothing
| journalIgnorable st -> return Nothing
-- The journal did not get committed to the
-- branch, and may contain files that
-- are not present in the branch, which
-- need to be provided to the action still.
-- This can cause the action to be run a
-- second time with a file it already ran on.
| otherwise -> liftIO (tryTakeMVar buf) >>= \case
Nothing -> drain buf =<< getJournalledFilesStale
Just fs -> drain buf fs
catObjectStreamLsTree l (select' . getTopFilePath . Git.LsTree.file) g go'
liftIO $ void cleanup
where
getnext [] = Nothing
getnext (f:fs) = case select f of
Nothing -> getnext fs
Just v -> Just (v, f, fs)
drain buf fs = case getnext fs of
Just (v, f, fs') -> do
liftIO $ putMVar buf fs'
content <- getJournalFileStale f
return (Just (v, f, content))
Nothing -> do
liftIO $ putMVar buf []
return Nothing

View file

@ -12,6 +12,9 @@ git-annex (8.20210331) UNRELEASED; urgency=medium
* directory: When cp supports reflinks, use it.
* init: Fix a crash when the repo's was cloned from a repo that had an
adjusted branch checked out, and the origin remote is not named "origin".
* Fix bug caused by recent optimisations that could make git-annex not
see recently recorded status information when configured with
annex.alwayscommit=false.
-- Joey Hess <id@joeyh.name> Thu, 01 Apr 2021 12:17:26 -0400

View file

@ -9,8 +9,6 @@
- Licensed under the GNU AGPL version 3 or higher.
-}
{-# LANGUAGE TupleSections #-}
module CmdLine.Seek where
import Annex.Common
@ -44,7 +42,6 @@ import Annex.Concurrent
import Annex.CheckIgnore
import Annex.Action
import qualified Annex.Branch
import qualified Annex.BranchState
import qualified Database.Keys
import qualified Utility.RawFilePath as R
import Utility.Tuple
@ -274,25 +271,18 @@ withKeyOptions' ko auto mkkeyaction fallbackaction worktreeitems = do
checktimelimit <- mkCheckTimeLimit
keyaction <- mkkeyaction
config <- Annex.getGitConfig
g <- Annex.gitRepo
void Annex.Branch.update
(l, cleanup) <- inRepo $ LsTree.lsTree
LsTree.LsTreeRecursive
(LsTree.LsTreeLong False)
Annex.Branch.fullname
let getk f = fmap (,f) (locationLogFileKey config f)
let getk = locationLogFileKey config
let discard reader = reader >>= \case
Nothing -> noop
Just _ -> discard reader
let go reader = liftIO reader >>= \case
Nothing -> return ()
Just ((k, f), content) -> checktimelimit (discard reader) $ do
maybe noop (Annex.BranchState.setCache f) content
let go reader = reader >>= \case
Just (k, f, content) -> checktimelimit (discard reader) $ do
maybe noop (Annex.Branch.precache f) content
keyaction Nothing (SeekInput [], k, mkActionItem k)
go reader
catObjectStreamLsTree l (getk . getTopFilePath . LsTree.file) g go
liftIO $ void cleanup
Nothing -> return ()
Annex.Branch.overBranchFileContents getk go
runkeyaction getks = do
keyaction <- mkkeyaction
@ -383,7 +373,7 @@ seekFilteredKeys seeker listfs = do
liftIO $ void cleanup
where
finisher mi oreader checktimelimit = liftIO oreader >>= \case
Just ((si, f), content) -> checktimelimit discard $ do
Just ((si, f), content) -> checktimelimit (liftIO discard) $ do
keyaction f mi content $
commandAction . startAction seeker si f
finisher mi oreader checktimelimit
@ -394,8 +384,8 @@ seekFilteredKeys seeker listfs = do
Just _ -> discard
precachefinisher mi lreader checktimelimit = liftIO lreader >>= \case
Just ((logf, (si, f), k), logcontent) -> checktimelimit discard $ do
maybe noop (Annex.BranchState.setCache logf) logcontent
Just ((logf, (si, f), k), logcontent) -> checktimelimit (liftIO discard) $ do
maybe noop (Annex.Branch.precache logf) logcontent
checkMatcherWhen mi
(matcherNeedsLocationLog mi && not (matcherNeedsFileName mi))
(MatchingFile $ FileInfo f f (Just k))
@ -601,9 +591,9 @@ notSymlink :: RawFilePath -> IO Bool
notSymlink f = liftIO $ not . isSymbolicLink <$> R.getSymbolicLinkStatus f
{- Returns an action that, when there's a time limit, can be used
- to check it before processing a file. The IO action is run when over the
- time limit. -}
mkCheckTimeLimit :: Annex (IO () -> Annex () -> Annex ())
- to check it before processing a file. The first action is run when over the
- time limit, otherwise the second action is run. -}
mkCheckTimeLimit :: Annex (Annex () -> Annex () -> Annex ())
mkCheckTimeLimit = Annex.getState Annex.timelimit >>= \case
Nothing -> return $ \_ a -> a
Just (duration, cutoff) -> return $ \cleanup a -> do
@ -612,6 +602,6 @@ mkCheckTimeLimit = Annex.getState Annex.timelimit >>= \case
then do
warning $ "Time limit (" ++ fromDuration duration ++ ") reached! Shutting down..."
shutdown True
liftIO cleanup
cleanup
liftIO $ exitWith $ ExitFailure 101
else a

View file

@ -0,0 +1,22 @@
If the journal contains a newer version of a log file than the git-annex
branch, and annex.alwayscommit=false so the branch is not getting updated,
the value from the journal can be ignored when reading that log file.
In CmdLine.Seek, there is some code that precaches location logs as an
optimisation (when using eg --copies). That streams info from the
git-annex branch into the cache. But it never checks for a journal file
with newer information.
> fixed this
Also in Cmdline.Seek, there is a LsTreeRecursive over the branch to handle
`--all`, and I think again that would mean it doesn't notice location
logs that are only in the journal.
Before that optimisation, it was using Logs.Location.loggedKeys,
which does look at the journal.
> fixed
(This is also a blocker for [[todo/hiding_a_repository]].)
[[done]] --[[Joey]]

View file

@ -151,19 +151,9 @@ later write.
> No way to configure what repo is hidden yet. --[[Joey]]
>
> Implementation notes:
>
> * CmdLine.Seek precaches git-annex branch
> location logs, but that does not include private ones. Since they're
> cached, the private ones don't get read. Result is eg, whereis finds no
> copies. Either need to disable CmdLine.Seek precaching when there's
> hidden repos, or could make the cache indicate it's only of public
> info, so private info still gets read.
> * CmdLine.Seek contains a LsTreeRecursive over the branch to handle
> --all, and again that won't see private information, including even
> annexed files that are only present in the hidden repo.
> * (And I wonder, don't both the caches above already miss things in
> the journal?)
> * Any other direct accesses of the branch, not going through
>
> * [[bugs/git-annex_branch_caching_bug]] was a problem, now fixed.
> * Any other similar direct accesses of the branch, not going through
> Annex.Branch, also need to be fixed (and may be missing journal files
> already?) Command.ImportFeed.knownItems is one. Command.Log behavior
> needs to be investigated, may be ok. And Logs.Web.withKnownUrls is another.