implemented filter-branch for key info

Not tested yet but should work.

Noted a possible optimisation, which should probably be added, to
speed it up in cases where there is no uuid filtering being done.
It would need Annex.Branch to add a function like getRef that uses
catFileDetails, so the sha is also returned. The difficulty would be
making it support the precached file content; if it didn't it would
probably not be any faster and could even be slower. So probably the
precaching would need to be changed to also cache the sha.
This commit is contained in:
Joey Hess 2021-05-17 11:06:47 -04:00
parent 1d16654a22
commit 1da9fe5bd8
No known key found for this signature in database
GPG key ID: DB12DB0FF05F8F38
2 changed files with 75 additions and 6 deletions

View file

@ -7,7 +7,8 @@
module Annex.Branch.Transitions (
FileTransition(..),
getTransitionCalculator
getTransitionCalculator,
filterBranch,
) where
import Common

View file

@ -5,14 +5,32 @@
- Licensed under the GNU AGPL version 3 or higher.
-}
{-# LANGUAGE OverloadedStrings #-}
module Command.FilterBranch where
import Command
import qualified Annex
import qualified Annex.Branch
import Annex.Branch.Transitions (filterBranch, FileTransition(..))
import Annex.HashObject
import Annex.Tmp
import Logs
import Git.Types
import Git.FilePath
import Git.Index
import Git.Env
import Git.UpdateIndex
import qualified Git.LsTree as LsTree
import Utility.RawFilePath
import qualified Data.Set as S
import qualified Data.ByteString.Lazy as L
import Data.ByteString.Builder
import qualified System.FilePath.ByteString as P
cmd :: Command
cmd = withGlobalOptions [annexedMatchingOptions] $
cmd = noMessages $ withGlobalOptions [annexedMatchingOptions] $
command "filter-branch" SectionMaintenance
"filter information from the git-annex branch"
paramPaths (seek <$$> optParser)
@ -86,7 +104,57 @@ mkUUIDMatcher' l = \u ->
(includes, excludes) = S.partition isInclude (S.fromList l)
seek :: FilterBranchOptions -> CommandSeek
seek o = do
keyinfomatcher <- mkUUIDMatcher (keyInformation o)
configinfomatcher <- mkUUIDMatcher (repoConfig o)
error "TODO"
seek o = withOtherTmp $ \tmpdir -> do
let tmpindex = tmpdir P.</> "index"
gc <- Annex.getGitConfig
r' <- Annex.inRepo $ \r ->
addGitEnv r indexEnv (fromRawFilePath tmpindex)
withUpdateIndex r' $ \h -> do
keyinfomatcher <- mkUUIDMatcher (keyInformation o)
configmatcher <- mkUUIDMatcher (repoConfig o)
let addtoindex f sha = liftIO $ streamUpdateIndex' h $
pureStreamer $ L.fromStrict $ LsTree.formatLsTree $ LsTree.TreeItem
{ LsTree.mode = fromTreeItemType TreeFile
, LsTree.typeobj = fmtObjectType BlobObject
, LsTree.sha = sha
, LsTree.size = Nothing
, LsTree.file = asTopFilePath f
}
let filterbanch matcher f c
| L.null c = noop
| otherwise = case filterBranch matcher gc f c of
ChangeFile builder -> do
let c' = toLazyByteString builder
unless (L.null c') $
addtoindex f =<< hashBlob c'
-- This could perhaps be optimised by looking
-- up the sha of the file in the branch.
PreserveFile -> addtoindex f =<< hashBlob c
let addkeyinfo k = startingCustomOutput k $ do
forM_ (keyLogFiles gc k) $ \f ->
filterbanch keyinfomatcher f
=<< Annex.Branch.get f
next (return True)
let seeker = AnnexedFileSeeker
{ startAction = \_ _ k -> addkeyinfo k
, checkContentPresent = Nothing
, usesLocationLog = True
}
-- Avoid the usual default of all files in the current
-- directory and below, because this command is documented
-- as only including the information it has explicitly been
-- told to include.
when (not (null (includeFiles o)) || isJust (keyOptions o)) $
withKeyOptions (keyOptions o) False seeker
(commandAction . \(_, k, _) -> addkeyinfo k)
(withFilesInGitAnnex ww seeker)
=<< workTreeItems ww (includeFiles o)
-- TODO output commit
liftIO $ removeWhenExistsWith removeLink tmpindex
where
ww = WarnUnmatchLsFiles