diff --git a/Annex/Branch.hs b/Annex/Branch.hs index 7b3fa0def9..28208d266e 100644 --- a/Annex/Branch.hs +++ b/Annex/Branch.hs @@ -24,6 +24,7 @@ module Annex.Branch ( change, maybeChange, commitMessage, + createMessage, commit, forceCommit, getBranch, @@ -129,7 +130,8 @@ getBranch = maybe (hasOrigin >>= go >>= use) return =<< branchsha <$> branchsha go False = withIndex' True $ do cmode <- annexCommitMode <$> Annex.getGitConfig - inRepo $ Git.Branch.commitAlways cmode "branch created" fullname [] + cmessage <- createMessage + inRepo $ Git.Branch.commitAlways cmode cmessage fullname [] use sha = do setIndexSha sha return sha @@ -363,6 +365,10 @@ set jl ru f c = do commitMessage :: Annex String commitMessage = fromMaybe "update" . annexCommitMessage <$> Annex.getGitConfig +{- Commit message used when creating the branch. -} +createMessage :: Annex String +createMessage = fromMaybe "branch created" . annexCommitMessage <$> Annex.getGitConfig + {- Stages the journal, and commits staged changes to the branch. -} commit :: String -> Annex () commit = whenM (journalDirty gitAnnexJournalDir) . forceCommit diff --git a/Annex/Branch/Transitions.hs b/Annex/Branch/Transitions.hs index cec95cc478..ef54c8b943 100644 --- a/Annex/Branch/Transitions.hs +++ b/Annex/Branch/Transitions.hs @@ -7,7 +7,8 @@ module Annex.Branch.Transitions ( FileTransition(..), - getTransitionCalculator + getTransitionCalculator, + filterBranch, ) where import Common diff --git a/CHANGELOG b/CHANGELOG index bb71154ab7..46e6723268 100644 --- a/CHANGELOG +++ b/CHANGELOG @@ -11,6 +11,8 @@ git-annex (8.20210429) UNRELEASED; urgency=medium * reinject: Error out when run on a file that is not annexed, rather than silently skipping it. * assistant: Fix a crash on startup by avoiding using forkProcess. + * init: When annex.commitmessage is set, use that message for the commit + that creates the git-annex branch. -- Joey Hess Mon, 03 May 2021 10:33:10 -0400 diff --git a/CmdLine/GitAnnex.hs b/CmdLine/GitAnnex.hs index ac2b9b8216..fe40081483 100644 --- a/CmdLine/GitAnnex.hs +++ b/CmdLine/GitAnnex.hs @@ -68,6 +68,7 @@ import qualified Command.Unlock import qualified Command.Lock import qualified Command.PreCommit import qualified Command.PostReceive +import qualified Command.FilterBranch import qualified Command.Find import qualified Command.FindRef import qualified Command.Whereis @@ -202,6 +203,7 @@ cmds testoptparser testrunner mkbenchmarkgenerator = map addGitAnnexGlobalOption , Command.Unused.cmd , Command.DropUnused.cmd , Command.AddUnused.cmd + , Command.FilterBranch.cmd , Command.Find.cmd , Command.FindRef.cmd , Command.Whereis.cmd diff --git a/Command/FilterBranch.hs b/Command/FilterBranch.hs new file mode 100644 index 0000000000..d405ce7141 --- /dev/null +++ b/Command/FilterBranch.hs @@ -0,0 +1,194 @@ +{- git-annex command + - + - Copyright 2021 Joey Hess + - + - Licensed under the GNU AGPL version 3 or higher. + -} + +{-# LANGUAGE OverloadedStrings #-} + +module Command.FilterBranch where + +import Command +import qualified Annex +import qualified Annex.Branch +import Annex.Branch.Transitions (filterBranch, FileTransition(..)) +import Annex.HashObject +import Annex.Tmp +import Annex.SpecialRemote.Config +import Types.ProposedAccepted +import Logs +import Logs.Remote +import Git.Types +import Git.FilePath +import Git.Index +import Git.Env +import Git.UpdateIndex +import qualified Git.LsTree as LsTree +import qualified Git.Branch as Git +import Utility.RawFilePath + +import qualified Data.Map as M +import qualified Data.Set as S +import qualified Data.ByteString.Lazy as L +import Data.ByteString.Builder +import qualified System.FilePath.ByteString as P + +cmd :: Command +cmd = noMessages $ withGlobalOptions [annexedMatchingOptions] $ + command "filter-branch" SectionMaintenance + "filter information from the git-annex branch" + paramPaths (seek <$$> optParser) + +data FilterBranchOptions = FilterBranchOptions + { includeFiles :: CmdParams + , keyOptions :: Maybe KeyOptions + , keyInformation :: [IncludeExclude (DeferredParse UUID)] + , repoConfig :: [IncludeExclude (DeferredParse UUID)] + , includeGlobalConfig :: Bool + } + +optParser :: CmdParamsDesc -> Parser FilterBranchOptions +optParser desc = FilterBranchOptions + <$> cmdParams desc + <*> optional parseKeyOptions + <*> many (parseIncludeExclude "key-information") + <*> many (parseIncludeExclude "repo-config") + <*> switch + ( long "include-global-config" + <> help "include global configuration" + ) + +data IncludeExclude t + = Include t + | Exclude t + | IncludeAll + deriving (Show, Eq, Ord) + +isInclude :: IncludeExclude t -> Bool +isInclude (Include _) = True +isInclude IncludeAll = True +isInclude (Exclude _) = False + +parseIncludeExclude :: String -> Parser (IncludeExclude (DeferredParse UUID)) +parseIncludeExclude s = + ( Include <$> parseRepositoryOption + ("include-" ++ s ++ "-for") + "include information about a repository" + ) <|> + ( Exclude <$> parseRepositoryOption + ("exclude-" ++ s ++ "-for") + "exclude information about a repository" + ) <|> + ( flag' IncludeAll + ( long ("include-all-" ++ s) + <> help "include information about all non-excluded repositories" + ) + ) + +parseRepositoryOption :: String -> String -> Parser (DeferredParse UUID) +parseRepositoryOption s h = parseUUIDOption <$> strOption + ( long s + <> help h + <> metavar (paramRemote `paramOr` paramDesc `paramOr` paramUUID) + <> completeRemotes + ) + +mkUUIDMatcher :: [IncludeExclude (DeferredParse UUID)] -> Annex (UUID -> Bool) +mkUUIDMatcher l = do + sameasmap <- M.mapMaybe + (toUUID . fromProposedAccepted <$$> M.lookup sameasUUIDField) + <$> remoteConfigMap + mkUUIDMatcher' sameasmap <$> mapM get l + where + get (Include v) = Include <$> getParsed v + get (Exclude v) = Exclude <$> getParsed v + get IncludeAll = pure IncludeAll + +mkUUIDMatcher' :: M.Map UUID UUID -> [IncludeExclude UUID] -> (UUID -> Bool) +mkUUIDMatcher' sameasmap l = \u -> + let sameas = M.lookup u sameasmap + in ( S.member (Include u) includes + || S.member IncludeAll includes + || maybe False (\u' -> S.member (Include u') includes) sameas + ) + && S.notMember (Exclude u) excludes + && maybe True (\u' -> S.notMember (Exclude u') excludes) sameas + where + (includes, excludes) = (S.partition isInclude (S.fromList l)) + +seek :: FilterBranchOptions -> CommandSeek +seek o = withOtherTmp $ \tmpdir -> do + let tmpindex = tmpdir P. "index" + gc <- Annex.getGitConfig + tmpindexrepo <- Annex.inRepo $ \r -> + addGitEnv r indexEnv (fromRawFilePath tmpindex) + withUpdateIndex tmpindexrepo $ \h -> do + keyinfomatcher <- mkUUIDMatcher (keyInformation o) + repoconfigmatcher <- mkUUIDMatcher (repoConfig o) + + let addtoindex f sha = liftIO $ streamUpdateIndex' h $ + pureStreamer $ L.fromStrict $ LsTree.formatLsTree $ LsTree.TreeItem + { LsTree.mode = fromTreeItemType TreeFile + , LsTree.typeobj = fmtObjectType BlobObject + , LsTree.sha = sha + , LsTree.size = Nothing + , LsTree.file = asTopFilePath f + } + + let filterbanch matcher f c + | L.null c = noop + | otherwise = case filterBranch matcher gc f c of + ChangeFile builder -> do + let c' = toLazyByteString builder + unless (L.null c') $ + addtoindex f =<< hashBlob c' + -- This could perhaps be optimised by looking + -- up the sha of the file in the branch. + PreserveFile -> addtoindex f =<< hashBlob c + + -- Add information for all keys that are being included, + -- filtering out information for repositories that are not + -- being included. + let addkeyinfo k = startingCustomOutput k $ do + forM_ (keyLogFiles gc k) $ \f -> + filterbanch keyinfomatcher f + =<< Annex.Branch.get f + next (return True) + let seeker = AnnexedFileSeeker + { startAction = \_ _ k -> addkeyinfo k + , checkContentPresent = Nothing + , usesLocationLog = True + } + -- Avoid the usual default of all files in the current + -- directory and below, because this command is documented + -- as only including the information it has explicitly been + -- told to include. + when (not (null (includeFiles o)) || isJust (keyOptions o)) $ + withKeyOptions (keyOptions o) False seeker + (commandAction . \(_, k, _) -> addkeyinfo k) + (withFilesInGitAnnex ww seeker) + =<< workTreeItems ww (includeFiles o) + + -- Add repository configs for all repositories that are + -- being included. + forM_ topLevelUUIDBasedLogs $ \f -> + filterbanch repoconfigmatcher f + =<< Annex.Branch.get f + + -- Add global configs when included. + when (includeGlobalConfig o) $ + forM_ otherTopLevelLogs $ \f -> do + c <- Annex.Branch.get f + unless (L.null c) $ + addtoindex f =<< hashBlob c + + -- Commit the temporary index, and output the result. + t <- liftIO $ Git.writeTree tmpindexrepo + liftIO $ removeWhenExistsWith removeLink tmpindex + cmode <- annexCommitMode <$> Annex.getGitConfig + cmessage <- Annex.Branch.commitMessage + c <- inRepo $ Git.commitTree cmode cmessage [] t + liftIO $ putStrLn (fromRef c) + where + ww = WarnUnmatchLsFiles diff --git a/Git/Branch.hs b/Git/Branch.hs index fcae905f64..70faca335f 100644 --- a/Git/Branch.hs +++ b/Git/Branch.hs @@ -1,6 +1,6 @@ {- git branch stuff - - - Copyright 2011 Joey Hess + - Copyright 2011-2021 Joey Hess - - Licensed under the GNU AGPL version 3 or higher. -} @@ -166,7 +166,7 @@ commitCommand' runner commitmode ps = runner $ -} commit :: CommitMode -> Bool -> String -> Branch -> [Ref] -> Repo -> IO (Maybe Sha) commit commitmode allowempty message branch parentrefs repo = do - tree <- getSha "write-tree" $ pipeReadStrict [Param "write-tree"] repo + tree <- writeTree repo ifM (cancommit tree) ( do sha <- commitTree commitmode message parentrefs tree repo @@ -185,6 +185,9 @@ commitAlways :: CommitMode -> String -> Branch -> [Ref] -> Repo -> IO Sha commitAlways commitmode message branch parentrefs repo = fromJust <$> commit commitmode True message branch parentrefs repo +writeTree :: Repo -> IO Sha +writeTree repo = getSha "write-tree" $ pipeReadStrict [Param "write-tree"] repo + commitTree :: CommitMode -> String -> [Ref] -> Ref -> Repo -> IO Sha commitTree commitmode message parentrefs tree repo = getSha "commit-tree" $ diff --git a/Git/LsTree.hs b/Git/LsTree.hs index 2c57521c45..a49c4eaa78 100644 --- a/Git/LsTree.hs +++ b/Git/LsTree.hs @@ -28,6 +28,7 @@ import Utility.Attoparsec import Numeric import Data.Either +import Data.Char import System.Posix.Types import qualified Data.ByteString as S import qualified Data.ByteString.Lazy as L @@ -146,9 +147,9 @@ parserLsTree long = case long of {- Inverse of parseLsTree. Note that the long output format is not - generated, so any size information is not included. -} -formatLsTree :: TreeItem -> String -formatLsTree ti = unwords - [ showOct (mode ti) "" - , decodeBS (typeobj ti) - , fromRef (sha ti) - ] ++ ('\t' : fromRawFilePath (getTopFilePath (file ti))) +formatLsTree :: TreeItem -> S.ByteString +formatLsTree ti = S.intercalate (S.singleton (fromIntegral (ord ' '))) + [ encodeBS' (showOct (mode ti) "") + , typeobj ti + , fromRef' (sha ti) + ] <> (S.cons (fromIntegral (ord '\t')) (getTopFilePath (file ti))) diff --git a/Logs.hs b/Logs.hs index 2948106692..fae8c24a9c 100644 --- a/Logs.hs +++ b/Logs.hs @@ -72,13 +72,9 @@ keyLogFiles config k = , chunkLogFile config k ] ++ oldurlLogs config k -{- All the log files that do not contain information specific to a key. -} -nonKeyLogFiles :: [RawFilePath] -nonKeyLogFiles = concat - [ topLevelNewUUIDBasedLogs - , topLevelOldUUIDBasedLogs - , otherTopLevelLogs - ] +{- All uuid-based logs stored in the top of the git-annex branch. -} +topLevelUUIDBasedLogs :: [RawFilePath] +topLevelUUIDBasedLogs = topLevelNewUUIDBasedLogs ++ topLevelOldUUIDBasedLogs {- All the old-format uuid-based logs stored in the top of the git-annex branch. -} topLevelOldUUIDBasedLogs :: [RawFilePath] diff --git a/Logs/Export.hs b/Logs/Export.hs index 6fea1b9a96..ecbf4b4aa0 100644 --- a/Logs/Export.hs +++ b/Logs/Export.hs @@ -23,6 +23,7 @@ module Logs.Export ( ) where import qualified Data.Map as M +import qualified Data.ByteString as B import Annex.Common import qualified Annex.Branch @@ -119,10 +120,10 @@ logExportExcluded :: UUID -> ((Git.Tree.TreeItem -> IO ()) -> Annex a) -> Annex logExportExcluded u a = do logf <- fromRepo $ gitAnnexExportExcludeLog u withLogHandle logf $ \logh -> do - liftIO $ hSetNewlineMode logh noNewlineTranslation a (writer logh) where - writer logh = hPutStrLn logh + writer logh = B.hPutStr logh + . flip B.snoc (fromIntegral (ord '\n')) . Git.LsTree.formatLsTree . Git.Tree.treeItemToLsTreeItem diff --git a/doc/git-annex-filter-branch.mdwn b/doc/git-annex-filter-branch.mdwn new file mode 100644 index 0000000000..14e6bf85a6 --- /dev/null +++ b/doc/git-annex-filter-branch.mdwn @@ -0,0 +1,151 @@ +# NAME + +git-annex filter-branch - filter information from the git-annex branch + +# SYNOPSIS + +git annex filter-branch [...] + +# DESCRIPTION + +This copies selected information from the git-annex branch into a git +commit object, and outputs its hash. The git commit can be transported +to another git repository, and given a branch name such as "foo/git-annex", +and git-annex there will automatically merge that into its git-annex +branch. This allows publishing some information from your git-annex branch, +without publishing the whole thing. + +Other ways to avoid publishing information from a git-annex branch, +or remove information from it include [[git-annex-forget]](1), the +`annex.private` git config, and the `--private` option to +[[git-annex-initremote](1). Those are much easier to use, but this +provides full control for those who need it. + +With no options, no information at all will be included from the git-annex +branch. Use options to specify what to include. All options can be specified +multiple times. + +When the repository contains information about a private +repository (due to `annex.private` being set, or `git-annex initremote +--private` being used), that private information will be included when +allowed by the options, even though it is not recorded on the git-annex +branch. + +When a repository was created with `git annex initremote --sameas=foo`, +its information will be included when the information for foo is, +and excluded when foo is excluded. + +When a special remote is configured with importtree=yes or exporttree=yes, +normally the git tree corresponding to the repository is included in +the git-annex branch, to make sure it does not get garbage collected +by `git gc`. Those trees are *not* included when filtering the git-annex +branch. Usually this will not cause any problems, but if such a tree does +get garbage collected, it will prevent accessing files on the special +remote, until the next time a tree is imported or exported to it. + +# OPTIONS + +* `path` + + Include information about all keys of annexed files in the path. + +* file matching options + + The [[git-annex-matching-options]](1) + can be used to specify which files in a path to include. + +* `--branch=ref` + + Include information about keys referred of annexed files in the branch + or treeish. + +* `--key=key` + + Include information about a specific key. + +* `--all` + + Include information about all keys. + +* `--include-key-information-for=repo` + + When including information about a key, include information specific to + this repository. The repository can be specified with a uuid or the name + of a remote. This option can be used repeatedly to include several + repositories. + +* `--include-all-key-information` + + Include key information for all repositories, except any excluded with + the `--exclude-key-information-for` option. + +* `--exclude-key-information-for=repo` + + When including information about a key, exclude information specific to + this repository. The repository can be specified with a uuid or the name + of a remote. This option can be used repeatedly to exclude + several repositories. + +* `--include-repo-config-for=repo` + + Include configuration specific to this repository. + The repository can be specified with a uuid or the name of a remote. + + This includes the configuration of special remotes, which may include + embedded credentials, or encryption parameters. It also includes trust + settings, preferred content, etc. It does not include information + about any git-annex keys. This option can be used repeatedly to include + several repositories. + +* `--include-all-repo-config` + + Include the configuration of all repositories, except for any excluded + with the `--exclude-repo-config-for` option. + +* `--exclude-repo-config-for=repo` + + Exclude configuration specific to this repository. + The repository can be specified with a uuid or the name of a remote. + This option can be used repeatedly to exclude several repositories. + +* `--include-global-config` + + Include global configuration, that is not specific to any repository. + + This includes configs stored by [[git-annex-numcopies]](1), + [[git-annex-config]](1), etc. + +# EXAMPLES + +You have a big git-annex repository and are splitting the directory "foo" +out, to make a smaller repository. You want the smaller repo's git-annex +branch to contain all the information about remotes and other configuration, +but only information about keys in that directory. + + git-annex filter-branch foo --include-all-key-information \ + --include-all-repo-config --include-global-config + +That only includes information about the keys that are currently +in the directory "foo", not keys used by old versions of files. +To also include information about the version of the subdir in +tag "1.0", add the option `--branch=1.0:foo` + +Your repository has a special remote "bar", and you want to share information +about which annexed files are stored in it, but without sharing anything +about the configuration of the remote. + + git-annex filter-branch --all --include-all-key-information \ + --include-all-repo-config --exclude-repo-config-for=bar \ + --include-global-config + +# SEE ALSO + +[[git-annex]](1) + +[[git-annex-forget]](1) + +# AUTHOR + +Joey Hess + +Warning: Automatically converted into a man page by mdwn2man. Edit with care. diff --git a/doc/git-annex-forget.mdwn b/doc/git-annex-forget.mdwn index 003e2946ca..61cf4d506a 100644 --- a/doc/git-annex-forget.mdwn +++ b/doc/git-annex-forget.mdwn @@ -32,6 +32,8 @@ git to push the branch to any git repositories not running git-annex.) [[git-annex]](1) +[[git-annex-filter-branch]](1) + # AUTHOR Joey Hess diff --git a/doc/git-annex.mdwn b/doc/git-annex.mdwn index 5038f30e49..f355359bb3 100644 --- a/doc/git-annex.mdwn +++ b/doc/git-annex.mdwn @@ -404,6 +404,12 @@ content from the key-value store. See [[git-annex-forget]](1) for details. +* `filter-branch` + + Produces a filtered version of the git-annex branch. + + See [[git-annex-filter-branch]](1) for details. + * `repair` This can repair many of the problems with git repositories that `git fsck` @@ -1008,7 +1014,7 @@ repository, using [[git-annex-config]]. See its man page for a list.) * `annex.commitmessage` When git-annex updates the git-annex branch, it usually makes up - its own commit message ("update"), since users rarely look at or + its own commit message (eg "update"), since users rarely look at or care about changes to that branch. If you do care, you can specify this setting by running commands with `-c annex.commitmessage=whatever` diff --git a/git-annex.cabal b/git-annex.cabal index a7ba9c46aa..f12da50c0d 100644 --- a/git-annex.cabal +++ b/git-annex.cabal @@ -742,6 +742,7 @@ Executable git-annex Command.ExamineKey Command.Expire Command.Export + Command.FilterBranch Command.Find Command.FindRef Command.Fix