From 503788238c9e6171b9ac5d70cfbd1eb1a7929dbe Mon Sep 17 00:00:00 2001 From: Joey Hess Date: Wed, 1 Jan 2020 14:03:06 -0400 Subject: [PATCH] add --force-annex/--force-git options make it easier to override annex.largefiles configuration (and potentially safer as it avoids bugs like the smudge bug fixed in the last release) Deleted some old comments that were posted to the man page discussing such options. Updated docs that used -c annex.largefiles to use the options. Note that addSmallOverridden was needed to avoid the clean filter running on the file. It would be possible to make addFile also update the index directly, rather than going via git add. However, it was not necessary, and I want to avoid breaking on some edge case, particularly if the code in addSmallOverridden has some oversight. Also, when annex.addunlocked is set and annex.largefiles does not match a file, git annex add --force-large works, but git status will then show the file as added, with a unstaged modification. The unstaged modification adds the file to git. This is identical behavior to using -c annex.largefiles=nothing when annex.addunlocked is set. This does not prevent committing what was intended to be added. I have not gotten to the bottom of why git thinks the file is modified and runs it through the clean filter in this case. --- CHANGELOG | 8 +++ Command/Add.hs | 55 ++++++++++++++++--- doc/git-annex-add.mdwn | 10 ++++ ..._3286fb304f161df9775366db27cf9530._comment | 12 ---- ..._9bce0639d6c0767e39465db00f8120f1._comment | 12 ---- ..._352e8782fc9e4d02c069a527e8c88f40._comment | 14 ----- ..._48e3f9438721b9696897bc6e5a41f4ac._comment | 8 --- ..._47dbfaaaf5ac0b9f4f5300070a505f5b._comment | 10 ---- ..._bdf11966ddf7b8575a9caa0f5e360e42._comment | 16 ------ doc/tips/largefiles.mdwn | 6 +- ..._annex_add_option_to_control_to_where.mdwn | 7 ++- 11 files changed, 74 insertions(+), 84 deletions(-) delete mode 100644 doc/git-annex-add/comment_1_3286fb304f161df9775366db27cf9530._comment delete mode 100644 doc/git-annex-add/comment_2_9bce0639d6c0767e39465db00f8120f1._comment delete mode 100644 doc/git-annex-add/comment_3_352e8782fc9e4d02c069a527e8c88f40._comment delete mode 100644 doc/git-annex-add/comment_4_48e3f9438721b9696897bc6e5a41f4ac._comment delete mode 100644 doc/git-annex-add/comment_5_47dbfaaaf5ac0b9f4f5300070a505f5b._comment delete mode 100644 doc/git-annex-add/comment_6_bdf11966ddf7b8575a9caa0f5e360e42._comment diff --git a/CHANGELOG b/CHANGELOG index cb41a4e4fd..7a50cf28ce 100644 --- a/CHANGELOG +++ b/CHANGELOG @@ -1,3 +1,11 @@ +git-annex (7.20191231) UNRELEASED; urgency=medium + + * add: --force-annex/--force-git options make it easier to override + annex.largefiles configuration (and potentially safer as it avoids + bugs like the smudge bug fixed in the last release). + + -- Joey Hess Wed, 01 Jan 2020 12:51:40 -0400 + git-annex (7.20191230) upstream; urgency=medium * Optimised processing of many files, especially by commands like find diff --git a/Command/Add.hs b/Command/Add.hs index 45f9f62b0a..d07a10b25b 100644 --- a/Command/Add.hs +++ b/Command/Add.hs @@ -1,6 +1,6 @@ {- git-annex command - - - Copyright 2010-2017 Joey Hess + - Copyright 2010-2020 Joey Hess - - Licensed under the GNU AGPL version 3 or higher. -} @@ -17,8 +17,12 @@ import qualified Database.Keys import Annex.FileMatcher import Annex.Link import Annex.Tmp +import Annex.HashObject import Messages.Progress +import Git.Types import Git.FilePath +import qualified Git.UpdateIndex +import Utility.FileMode import qualified Utility.RawFilePath as R cmd :: Command @@ -32,6 +36,7 @@ data AddOptions = AddOptions , includeDotFiles :: Bool , batchOption :: BatchMode , updateOnly :: Bool + , largeFilesOverride :: Maybe Bool } optParser :: CmdParamsDesc -> Parser AddOptions @@ -47,18 +52,31 @@ optParser desc = AddOptions <> short 'u' <> help "only update tracked files" ) + <*> (parseforcelarge <|> parseforcesmall) + where + parseforcelarge = flag Nothing (Just True) + ( long "force-large" + <> help "add all files to annex, ignoring other configuration" + ) + parseforcesmall = flag Nothing (Just False) + ( long "force-small" + <> help "add all files to git, ignoring other configuration" + ) seek :: AddOptions -> CommandSeek seek o = startConcurrency commandStages $ do largematcher <- largeFilesMatcher addunlockedmatcher <- addUnlockedMatcher - let gofile file = ifM (checkFileMatcher largematcher (fromRawFilePath file) <||> Annex.getState Annex.force) - ( start file addunlockedmatcher - , ifM (annexAddSmallFiles <$> Annex.getGitConfig) - ( startSmall file - , stop + let gofile file = case largeFilesOverride o of + Nothing -> ifM (checkFileMatcher largematcher (fromRawFilePath file) <||> Annex.getState Annex.force) + ( start file addunlockedmatcher + , ifM (annexAddSmallFiles <$> Annex.getGitConfig) + ( startSmall file + , stop + ) ) - ) + Just True -> start file addunlockedmatcher + Just False -> startSmallOverridden file case batchOption o of Batch fmt | updateOnly o -> @@ -82,6 +100,29 @@ addSmall file = do showNote "non-large file; adding content to git repository" addFile file +startSmallOverridden :: RawFilePath -> CommandStart +startSmallOverridden file = starting "add" (ActionItemWorkTreeFile file) $ + next $ addSmallOverridden file + +addSmallOverridden :: RawFilePath -> Annex Bool +addSmallOverridden file = do + showNote "adding content to git repository" + let file' = fromRawFilePath file + s <- liftIO $ getFileStatus file' + if isSymbolicLink s + then addFile file + else do + -- Can't use addFile because the clean filter will + -- honor annex.largefiles and it has been overridden. + -- Instead, hash the file and add to the index. + sha <- hashFile file' + let ty = if isExecutable (fileMode s) + then TreeExecutable + else TreeFile + Annex.Queue.addUpdateIndex =<< + inRepo (Git.UpdateIndex.stageFile sha ty file') + return True + addFile :: RawFilePath -> Annex Bool addFile file = do ps <- forceParams diff --git a/doc/git-annex-add.mdwn b/doc/git-annex-add.mdwn index d3d4c36247..5734ee85ad 100644 --- a/doc/git-annex-add.mdwn +++ b/doc/git-annex-add.mdwn @@ -39,6 +39,16 @@ annexed content, and other symlinks. Add gitignored files. +* `--force-large` + + Treat all files as large files, ignoring annex.largefiles configuration, + and add to the annex. + +* `--force-small` + + Treat all files as small files, ignoring annex.largefiles configuration, + and add to git, also ignoring annex.addsmallfiles configuration. + * `--backend` Specifies which key-value backend to use. diff --git a/doc/git-annex-add/comment_1_3286fb304f161df9775366db27cf9530._comment b/doc/git-annex-add/comment_1_3286fb304f161df9775366db27cf9530._comment deleted file mode 100644 index f752bbbf9f..0000000000 --- a/doc/git-annex-add/comment_1_3286fb304f161df9775366db27cf9530._comment +++ /dev/null @@ -1,12 +0,0 @@ -[[!comment format=mdwn - username="rrnewton@63c9faa1997c908b1dc04dfdca33c809660cd158" - nickname="rrnewton" - avatar="http://cdn.libravatar.org/avatar/638acc3e55c2bb09aa0dcca5b5c8acb6" - subject="Flag to force same behavior as annex.largefiles attribute?" - date="2018-05-21T05:29:06Z" - content=""" -When in [direct mode](https://git-annex.branchable.com/direct_mode), the \"add the non-large file directly to the git repository\" behavior described above is very useful, because the option of typing simply `git add foo`, does not exist as it does in [indirect mode](https://git-annex.branchable.com/git-annex-indirect/). - -However, I can't see any combination of flags that trigger this behavior. I suppose it can be accomplished by temporarily setting [annex.largefiles](https://git-annex.branchable.com/tips/largefiles/) to a huge value before executing `git annex add` (i.e. creating a `.gitattributes` and then deleting it). I think I'll try that as a work-around, but it would be great to have a flag that accomplishes this. - -"""]] diff --git a/doc/git-annex-add/comment_2_9bce0639d6c0767e39465db00f8120f1._comment b/doc/git-annex-add/comment_2_9bce0639d6c0767e39465db00f8120f1._comment deleted file mode 100644 index 8efa9a63a8..0000000000 --- a/doc/git-annex-add/comment_2_9bce0639d6c0767e39465db00f8120f1._comment +++ /dev/null @@ -1,12 +0,0 @@ -[[!comment format=mdwn - username="joey" - subject="""comment 2""" - date="2018-05-21T16:36:51Z" - content=""" -@rrnewton I know people do commonly accomplish this -by something like `git -c annex.largefiles='exclude(*)' annex add` - -A shorter way to write that would only be useful for direct mode, -so I'm inclined not to add it, but open a todo item if you want to discuss -that. -"""]] diff --git a/doc/git-annex-add/comment_3_352e8782fc9e4d02c069a527e8c88f40._comment b/doc/git-annex-add/comment_3_352e8782fc9e4d02c069a527e8c88f40._comment deleted file mode 100644 index 0ff2ba568f..0000000000 --- a/doc/git-annex-add/comment_3_352e8782fc9e4d02c069a527e8c88f40._comment +++ /dev/null @@ -1,14 +0,0 @@ -[[!comment format=mdwn - username="rrnewton@63c9faa1997c908b1dc04dfdca33c809660cd158" - nickname="rrnewton" - avatar="http://cdn.libravatar.org/avatar/638acc3e55c2bb09aa0dcca5b5c8acb6" - subject="Sounds great!" - date="2018-05-21T18:09:35Z" - content=""" -That's fabulous. A Bash alias around that command is really all I need when working in direct mode. (And the archive's too damn big to switch back and forth between direct/indirect.) - -I was just too much a newb with git attributes to know it could be done that way. For discoverability, maybe that command could be placed in an \"examples\" section in the primary documentation above? - - - -"""]] diff --git a/doc/git-annex-add/comment_4_48e3f9438721b9696897bc6e5a41f4ac._comment b/doc/git-annex-add/comment_4_48e3f9438721b9696897bc6e5a41f4ac._comment deleted file mode 100644 index 19676fe380..0000000000 --- a/doc/git-annex-add/comment_4_48e3f9438721b9696897bc6e5a41f4ac._comment +++ /dev/null @@ -1,8 +0,0 @@ -[[!comment format=mdwn - username="timeless-ventricle" - avatar="http://cdn.libravatar.org/avatar/0b220fa4c0b59e883f360979ee745d63" - subject="comment 4" - date="2019-01-06T12:24:49Z" - content=""" -@joey I'm obviously missing something here, why would a shorter way to write that only be useful for direct mode? I don't understand what the connection is between direct mode and wanting to specify whether this is a \"regular git\" file or an annexed file (except that direct mode is not supported in v7)? I thought it was considered supported to have a mix of both large binary files and text files? Even if some text files are large, I think I want to add them as files whose content is tracked by git, so I think I want to choose 'by hand' -- is that not really supported / considered a bad idea for some reason? -"""]] diff --git a/doc/git-annex-add/comment_5_47dbfaaaf5ac0b9f4f5300070a505f5b._comment b/doc/git-annex-add/comment_5_47dbfaaaf5ac0b9f4f5300070a505f5b._comment deleted file mode 100644 index 29353d11b6..0000000000 --- a/doc/git-annex-add/comment_5_47dbfaaaf5ac0b9f4f5300070a505f5b._comment +++ /dev/null @@ -1,10 +0,0 @@ -[[!comment format=mdwn - username="joey" - subject="""comment 5""" - date="2019-01-22T21:10:37Z" - content=""" -Because "git add foo" does not work in direct mode. - -This is really not the place to be having a conversation about this. If you -want something changed in git-annex, open a bug report or todo item. -"""]] diff --git a/doc/git-annex-add/comment_6_bdf11966ddf7b8575a9caa0f5e360e42._comment b/doc/git-annex-add/comment_6_bdf11966ddf7b8575a9caa0f5e360e42._comment deleted file mode 100644 index 592cf7865e..0000000000 --- a/doc/git-annex-add/comment_6_bdf11966ddf7b8575a9caa0f5e360e42._comment +++ /dev/null @@ -1,16 +0,0 @@ -[[!comment format=mdwn - username="johnmario.itec19@69a7b742534851b36216e0f951f1a00dbb9067cd" - nickname="johnmario.itec19" - avatar="http://cdn.libravatar.org/avatar/2f07ffce1656bdcd6aa19aaab7517975" - subject="commenting on git-annex-add" - date="2019-09-02T06:21:27Z" - content=""" -Yes you can do that. Simplest way is to git add the files you want to directly be in the git repo (e.g. the source code) and git annex add the large files. - -You can then check in any changes to the source code files (or anything else you added with git add) to github as normal. - -You can manage the storage and versioning of the large files using git annex commands. Git annex supports using AWS S3 and/or glacier for backing up the files. It can also back them up to a server you control over ssh or to an external drive (or any combination of the above). http://git-annex.branchable.com/special_remotes/ - -With the latest version of git annex, you can also set up automatically filters that decide which types/sizes of files to check in directly to git vs which ones to store as links in the annex. https://git-annex.branchable.com/tips/largefiles/ -For more tech related assistance or support Data Recovery Dubai -"""]] diff --git a/doc/tips/largefiles.mdwn b/doc/tips/largefiles.mdwn index 6de853d59e..7785daacd8 100644 --- a/doc/tips/largefiles.mdwn +++ b/doc/tips/largefiles.mdwn @@ -89,7 +89,7 @@ If you've set up an annex.largefiles configuration but want to force a file to be stored in the annex, you can temporarily override the configuration like this: - git annex add -c annex.largefiles=anything smallfile + git annex add --force-large smallfile ## converting git to annexed @@ -97,7 +97,7 @@ When you have a file that is currently stored in git, and you want to convert that to be stored in the annex, here's how to accomplish that: git rm --cached file - git annex add -c annex.largefiles=anything file + git annex add --force-large file git commit file This first removes the file from git's index cache, and then adds it back @@ -111,7 +111,7 @@ convert that to be stored in git, here's how to accomplish that: git annex unlock file git rm --cached file - git -c annex.largefiles=nothing add file + git annex add --force-small file git commit file You can modify the file after unlocking it and before adding it to diff --git a/doc/todo/git_annex_add_option_to_control_to_where.mdwn b/doc/todo/git_annex_add_option_to_control_to_where.mdwn index cbc1a9582d..ea06f44b38 100644 --- a/doc/todo/git_annex_add_option_to_control_to_where.mdwn +++ b/doc/todo/git_annex_add_option_to_control_to_where.mdwn @@ -1,5 +1,6 @@ -Make `git-annex add --annex` and `git-annex add --git` add a specific file to -annex or git, bypassing annex.largefiles and all other configuration and state. +Make `git-annex add --force-large` and `git-annex add --force-small` +add a specific file to annex or git, bypassing annex.largefiles +and all other configuration and state. One reason to want this is that it avoids users doing stuff like this: @@ -11,3 +12,5 @@ Such a temporary setting of annex.largefiles can be problimatic, as explored in Also, this could also be used to easily switch a file from one storage to the other. I suppose the file would have to be touched first to make git-annex add process it? + +> [[done]] --[[Joey]]