git-annex/Assistant/Threads/Merger.hs
Joey Hess 6d7ecd9e5d
merge git-annex branch in memory in read-only repository
Improved support for using git-annex in a read-only repository, git-annex
branch information from remotes that cannot be merged into the git-annex
branch will now not crash it, but will be merged in memory.

To avoid this making git-annex behave one way in a read-only repository,
and another way when it can write, it's important that Annex.Branch.get
return the same thing (modulo log file compaction) in both cases.

This manages that mostly. There are some exceptions:

- When there is a transition in one of the remote git-annex branches
  that has not yet been applied to the local or other git-annex branches.
  Transitions are not handled.
- `git-annex log` runs git log on the git-annex branch, and so
  it will not be able to show information coming from the other, not yet
  merged branches.
- Annex.Branch.files only looks at files in the git-annex branch and not
  unmerged branches. This affects git-annex info output.
- Annex.Branch.hs.overBranchFileContents ditto. Affects --all and
  also importfeed (but importfeed cannot work in a read-only repo
  anyway).
- CmdLine.Seek.seekFilteredKeys when precaching location logs.
  Note use of Annex.Branch.fullname
- Database.ContentIdentifier.needsUpdateFromLog and updateFromLog

These warts make this not suitable to be merged yet.

This readonly code path is more expensive, since it has to query several
branches. The value does get cached, but still large queries will be
slower in a read-only repository when there are unmerged git-annex
branches.

When annex.merge-annex-branches=false, updateTo skips doing anything,
and so the read-only repository code does not get triggered. So a user who
is bothered by the extra work can set that.

Other writes to the repository can still result in permissions errors.
This includes the initial creation of the git-annex branch, and of course
any writes to the git-annex branch.

Sponsored-by: Dartmouth College's Datalad project
2021-12-27 13:21:15 -04:00

124 lines
3.6 KiB
Haskell

{- git-annex assistant git merge thread
-
- Copyright 2012-2017 Joey Hess <id@joeyh.name>
-
- Licensed under the GNU AGPL version 3 or higher.
-}
{-# LANGUAGE OverloadedStrings #-}
module Assistant.Threads.Merger where
import Assistant.Common
import Assistant.TransferQueue
import Assistant.BranchChange
import Assistant.Sync
import Utility.DirWatcher
import Utility.DirWatcher.Types
import Utility.Directory.Create
import Annex.CurrentBranch
import qualified Annex
import qualified Annex.Branch
import qualified Git
import qualified Git.Branch
import qualified Git.Ref
import qualified Command.Sync
import qualified System.FilePath.ByteString as P
{- This thread watches for changes to .git/refs/, and handles incoming
- pushes. -}
mergeThread :: NamedThread
mergeThread = namedThread "Merger" $ do
g <- liftAnnex gitRepo
let gitd = Git.localGitDir g
let dir = gitd P.</> "refs"
liftIO $ createDirectoryUnder gitd dir
let hook a = Just <$> asIO2 (runHandler a)
changehook <- hook onChange
errhook <- hook onErr
let hooks = mkWatchHooks
{ addHook = changehook
, modifyHook = changehook
, errHook = errhook
}
void $ liftIO $ watchDir (fromRawFilePath dir) (const False) True hooks id
debug ["watching", fromRawFilePath dir]
type Handler = FilePath -> Assistant ()
{- Runs an action handler.
-
- Exceptions are ignored, otherwise a whole thread could be crashed.
-}
runHandler :: Handler -> FilePath -> Maybe FileStatus -> Assistant ()
runHandler handler file _filestatus =
either (liftIO . print) (const noop) =<< tryIO <~> handler file
{- Called when there's an error with inotify. -}
onErr :: Handler
onErr = error
{- Called when a new branch ref is written, or a branch ref is modified.
-
- At startup, synthetic add events fire, causing this to run, but that's
- ok; it ensures that any changes pushed since the last time the assistant
- ran are merged in.
-}
onChange :: Handler
onChange file
| ".lock" `isSuffixOf` file = noop
| isAnnexBranch file = do
branchChanged
diverged <- liftAnnex Annex.Branch.forceUpdate >>= return . \case
u@(Annex.Branch.UpdateMade {}) -> Annex.Branch.refsWereMerged u
(Annex.Branch.UpdateFailedPermissions {}) -> True
when diverged $ do
updateExportTreeFromLogAll
queueDeferredDownloads "retrying deferred download" Later
| otherwise = mergecurrent
where
changedbranch = fileToBranch file
mergecurrent =
mergecurrent' =<< liftAnnex getCurrentBranch
mergecurrent' currbranch@(Just b, _)
| changedbranch `isRelatedTo` b =
whenM (liftAnnex $ inRepo $ Git.Branch.changed b changedbranch) $ do
debug
[ "merging", Git.fromRef changedbranch
, "into", Git.fromRef b
]
void $ liftAnnex $ do
cmode <- annexCommitMode <$> Annex.getGitConfig
-- Allow merging unrelated histories.
mc <- Command.Sync.mergeConfig True
Command.Sync.merge
currbranch
mc
def
cmode
changedbranch
mergecurrent' _ = noop
{- Is the first branch a synced branch or remote tracking branch related
- to the second branch, which should be merged into it? -}
isRelatedTo :: Git.Ref -> Git.Ref -> Bool
isRelatedTo x y
| basex /= takeDirectory basex ++ "/" ++ basey = False
| "/synced/" `isInfixOf` Git.fromRef x = True
| "refs/remotes/" `isPrefixOf` Git.fromRef x = True
| otherwise = False
where
basex = Git.fromRef $ Git.Ref.base x
basey = Git.fromRef $ Git.Ref.base y
isAnnexBranch :: FilePath -> Bool
isAnnexBranch f = n `isSuffixOf` f
where
n = '/' : Git.fromRef Annex.Branch.name
fileToBranch :: FilePath -> Git.Ref
fileToBranch f = Git.Ref $ encodeBS $ "refs" </> base
where
base = Prelude.last $ split "/refs/" f