filter exported tree through remote's preferred content setting

The filtering is fairly efficient as far as building the trees goes,
since it reuses adjustTree. But it still needs to traverse the whole
tree, and look up the keys used by every file.

The tree that gets recorded to export.log is the filtered tree.
This way resumes of interrupted sync to an export uses it without
needing to recalculate it. And, a change to the preferred content
settings of the remote will result in a different tree, so the export
will be updated accordingly.

The original tree is still used in the remote tracking branch.
That branch represents the special remote as a git remote, and if it
were a normal git remote, the tree in its head would not be affected by
preferred content.
This commit is contained in:
Joey Hess 2019-05-20 11:54:55 -04:00
parent 12451ea010
commit 568af1073e
No known key found for this signature in database
GPG key ID: DB12DB0FF05F8F38
3 changed files with 65 additions and 22 deletions

View file

@ -12,6 +12,7 @@ module Annex.FileMatcher (
checkFileMatcher,
checkFileMatcher',
checkMatcher,
checkMatcher',
matchAll,
PreferredContentData(..),
preferredContentTokens,
@ -63,7 +64,11 @@ checkMatcher matcher mkey afile notpresent notconfigured d
(Just key, _) -> go (MatchingKey key afile)
_ -> d
where
go mi = matchMrun matcher $ \a -> a notpresent mi
go mi = checkMatcher' matcher mi notpresent
checkMatcher' :: FileMatcher Annex -> MatchInfo -> AssumeNotPresent -> Annex Bool
checkMatcher' matcher mi notpresent =
matchMrun matcher $ \a -> a notpresent mi
fileMatchInfo :: FilePath -> Annex MatchInfo
fileMatchInfo file = do

View file

@ -14,6 +14,7 @@ import qualified Annex
import qualified Git
import qualified Git.DiffTree
import qualified Git.LsTree
import qualified Git.Tree
import qualified Git.Ref
import Git.Types
import Git.FilePath
@ -24,13 +25,17 @@ import Annex.Export
import Annex.Content
import Annex.Transfer
import Annex.CatFile
import Annex.FileMatcher
import Types.FileMatcher
import Annex.RemoteTrackingBranch
import Logs.Location
import Logs.Export
import Logs.PreferredContent
import Database.Export
import Config
import Utility.Tmp
import Utility.Metered
import Utility.Matcher
import qualified Data.ByteString.Lazy as L
import qualified Data.Map as M
@ -79,7 +84,8 @@ seek o = do
setConfig (remoteConfig r "annex-tracking-branch")
(fromRef $ exportTreeish o)
tree <- fromMaybe (giveup "unknown tree") <$>
tree <- filterPreferredContent r =<<
fromMaybe (giveup "unknown tree") <$>
inRepo (Git.Ref.tree (exportTreeish o))
mtbcommitsha <- getExportCommit r (exportTreeish o)
@ -112,8 +118,8 @@ getExportCommit r treeish
-- | Changes what's exported to the remote. Does not upload any new
-- files, but does delete and rename files already exported to the remote.
changeExport :: Remote -> ExportHandle -> Git.Ref -> CommandSeek
changeExport r db new = do
changeExport :: Remote -> ExportHandle -> PreferredFiltered Git.Ref -> CommandSeek
changeExport r db (PreferredFiltered new) = do
old <- getExport (uuid r)
recordExportBeginning (uuid r) new
@ -223,8 +229,8 @@ newtype AllFilled = AllFilled { fromAllFilled :: Bool }
--
-- Once all exported files have reached the remote, updates the
-- remote tracking branch.
fillExport :: Remote -> ExportHandle -> Git.Ref -> Maybe (RemoteTrackingBranch, Sha) -> Annex Bool
fillExport r db newtree mtbcommitsha = do
fillExport :: Remote -> ExportHandle -> PreferredFiltered Git.Ref -> Maybe (RemoteTrackingBranch, Sha) -> Annex Bool
fillExport r db (PreferredFiltered newtree) mtbcommitsha = do
(l, cleanup) <- inRepo $ Git.LsTree.lsTree Git.LsTree.LsTreeRecursive newtree
cvar <- liftIO $ newMVar (FileUploaded False)
allfilledvar <- liftIO $ newMVar (AllFilled True)
@ -431,3 +437,34 @@ removeEmptyDirectories r db loc ks
( removeexportdirectory d
, return True
)
-- | A value that has been filtered through the remote's preferred content
-- expression.
newtype PreferredFiltered t = PreferredFiltered t
filterPreferredContent :: Remote -> Git.Ref -> Annex (PreferredFiltered Git.Ref)
filterPreferredContent r tree = do
m <- preferredContentMap
case M.lookup (uuid r) m of
Just matcher | not (isEmpty matcher) ->
PreferredFiltered <$> go matcher
_ -> return (PreferredFiltered tree)
where
go matcher = do
g <- Annex.gitRepo
Git.Tree.adjustTree (checkmatcher matcher) [] [] tree g
checkmatcher matcher ti@(Git.Tree.TreeItem topf _ sha) =
catKey sha >>= \case
Just k -> do
-- Match filename relative to the
-- top of the tree.
let af = AssociatedFile $ Just $
getTopFilePath topf
let mi = MatchingKey k af
ifM (checkMatcher' matcher mi mempty)
( return (Just ti)
, return Nothing
)
-- Always export non-annexed files.
Nothing -> return (Just ti)

View file

@ -723,23 +723,22 @@ seekExportContent o rs (currbranch, _) = or <$> forM rs go
(Export.openDb (Remote.uuid r))
Export.closeDb
(\db -> Export.writeLockDbWhile db (go' r db))
go' r db = do
(exported, mtbcommitsha) <- case remoteAnnexTrackingBranch (Remote.gitconfig r) of
Nothing -> nontracking r
Just b -> do
mtree <- inRepo $ Git.Ref.tree b
mtbcommitsha <- Command.Export.getExportCommit r b
case (mtree, mtbcommitsha) of
(Just tree, Just _) -> do
Command.Export.changeExport r db tree
return ([mkExported tree []], mtbcommitsha)
_ -> nontracking r
fillexport r db (exportedTreeishes exported) mtbcommitsha
nontracking r = do
go' r db = case remoteAnnexTrackingBranch (Remote.gitconfig r) of
Nothing -> nontracking r db
Just b -> do
mtree <- inRepo $ Git.Ref.tree b
mtbcommitsha <- Command.Export.getExportCommit r b
case (mtree, mtbcommitsha) of
(Just tree, Just _) -> do
filteredtree <- Command.Export.filterPreferredContent r tree
Command.Export.changeExport r db filteredtree
Command.Export.fillExport r db filteredtree mtbcommitsha
_ -> nontracking r db
nontracking r db = do
exported <- getExport (Remote.uuid r)
maybe noop (warnnontracking r exported) currbranch
return (exported, Nothing)
fillexport r db (exportedTreeishes exported) Nothing
warnnontracking r exported currb = inRepo (Git.Ref.tree currb) >>= \case
Just currt | not (any (== currt) (exportedTreeishes exported)) ->
@ -754,7 +753,9 @@ seekExportContent o rs (currbranch, _) = or <$> forM rs go
gitconfig = show (remoteConfig r "tracking-branch")
fillexport _ _ [] _ = return False
fillexport r db (t:[]) mtbcommitsha = Command.Export.fillExport r db t mtbcommitsha
fillexport r db (tree:[]) mtbcommitsha = do
let filteredtree = Command.Export.PreferredFiltered tree
Command.Export.fillExport r db filteredtree mtbcommitsha
fillexport r _ _ _ = do
warnExportImportConflict r
return False