v6: avoid accidental conversion when annex.largefiles is not configured
v6: When annex.largefiles is not configured for a file, running git add or git commit, or otherwise using git to stage a file will add it to the annex if the file was in the annex before, and to git otherwise. This is to avoid accidental conversion. Note that git-annex add's behavior has not changed, for reasons explained in the added comment. Performance: No added overhead when annex.largefiles is configured. When not configured, there is an added call to catObjectMetaData, which involves a round trip through git cat-file --batch. However, the earlier catKeyFile primes the cache for it. This commit was supported by the NSF-funded DataLad project.
This commit is contained in:
parent
b3dfcd18fe
commit
10138056dc
9 changed files with 106 additions and 44 deletions
|
@ -13,6 +13,7 @@ module Annex.CatFile (
|
|||
catCommit,
|
||||
catObjectDetails,
|
||||
catFileHandle,
|
||||
catObjectMetaData,
|
||||
catFileStop,
|
||||
catKey,
|
||||
catKeyFile,
|
||||
|
|
|
@ -10,6 +10,7 @@
|
|||
module Annex.FileMatcher (
|
||||
GetFileMatcher,
|
||||
checkFileMatcher,
|
||||
checkFileMatcher',
|
||||
checkMatcher,
|
||||
matchAll,
|
||||
preferredContentParser,
|
||||
|
@ -42,17 +43,25 @@ import qualified Data.Set as S
|
|||
type GetFileMatcher = FilePath -> Annex (FileMatcher Annex)
|
||||
|
||||
checkFileMatcher :: GetFileMatcher -> FilePath -> Annex Bool
|
||||
checkFileMatcher getmatcher file = do
|
||||
matcher <- getmatcher file
|
||||
checkMatcher matcher Nothing (AssociatedFile (Just file)) S.empty True
|
||||
checkFileMatcher getmatcher file = checkFileMatcher' getmatcher file (return True)
|
||||
|
||||
checkMatcher :: FileMatcher Annex -> Maybe Key -> AssociatedFile -> AssumeNotPresent -> Bool -> Annex Bool
|
||||
checkMatcher matcher mkey afile notpresent d
|
||||
| isEmpty matcher = return d
|
||||
-- | Allows running an action when no matcher is configured for the file.
|
||||
checkFileMatcher' :: GetFileMatcher -> FilePath -> Annex Bool -> Annex Bool
|
||||
checkFileMatcher' getmatcher file notconfigured = do
|
||||
matcher <- getmatcher file
|
||||
checkMatcher matcher Nothing afile S.empty notconfigured d
|
||||
where
|
||||
afile = AssociatedFile (Just file)
|
||||
-- checkMatcher will never use this, because afile is provided.
|
||||
d = return True
|
||||
|
||||
checkMatcher :: FileMatcher Annex -> Maybe Key -> AssociatedFile -> AssumeNotPresent -> Annex Bool -> Annex Bool -> Annex Bool
|
||||
checkMatcher matcher mkey afile notpresent notconfigured d
|
||||
| isEmpty matcher = notconfigured
|
||||
| otherwise = case (mkey, afile) of
|
||||
(_, AssociatedFile (Just file)) -> go =<< fileMatchInfo file
|
||||
(Just key, _) -> go (MatchingKey key)
|
||||
_ -> return d
|
||||
_ -> d
|
||||
where
|
||||
go mi = matchMrun matcher $ \a -> a notpresent mi
|
||||
|
||||
|
|
|
@ -11,6 +11,11 @@ git-annex (6.20180808) UNRELEASED; urgency=medium
|
|||
pipe. This also avoids git buffering the whole file content in memory.
|
||||
* v6: After updating the worktree for an add/drop, update git's index,
|
||||
so git status will not show the files as modified.
|
||||
* v6: When annex.largefiles is not configured for a file, running git
|
||||
add or git commit, or otherwise using git to stage a file
|
||||
will add it to the annex if the file was in the annex before,
|
||||
and to git otherwise. This is to avoid accidental conversion.
|
||||
Note that git-annex add's behavior has not changed.
|
||||
* v6: Update associated files database when git has staged changes
|
||||
to pointer files.
|
||||
* v6: Fix some race conditions.
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
{- git-annex command
|
||||
-
|
||||
- Copyright 2015 Joey Hess <id@joeyh.name>
|
||||
- Copyright 2015-2018 Joey Hess <id@joeyh.name>
|
||||
-
|
||||
- Licensed under the GNU GPL version 3 or higher.
|
||||
-}
|
||||
|
@ -18,6 +18,7 @@ import Logs.Location
|
|||
import qualified Database.Keys
|
||||
import qualified Git.BuildVersion
|
||||
import Git.FilePath
|
||||
import Git.Ref
|
||||
import Backend
|
||||
|
||||
import qualified Data.ByteString.Lazy as B
|
||||
|
@ -77,50 +78,59 @@ clean file = do
|
|||
Just k -> do
|
||||
getMoveRaceRecovery k file
|
||||
liftIO $ B.hPut stdout b
|
||||
Nothing -> ifM (shouldAnnex file)
|
||||
( do
|
||||
-- Before git 2.5, failing to consume all
|
||||
-- stdin here would cause a SIGPIPE and
|
||||
-- crash it.
|
||||
-- Newer git catches the signal and
|
||||
-- stops sending, which is much faster.
|
||||
-- (Also, git seems to forget to free memory
|
||||
-- when sending the file, so the less we
|
||||
-- let it send, the less memory it will
|
||||
-- waste.)
|
||||
if Git.BuildVersion.older "2.5"
|
||||
then B.length b `seq` return ()
|
||||
else liftIO $ hClose stdin
|
||||
-- Look up the backend that was used
|
||||
-- for this file before, so that when
|
||||
-- git re-cleans a file its backend does
|
||||
-- not change.
|
||||
currbackend <- maybe Nothing (maybeLookupBackendVariety . keyVariety)
|
||||
<$> catKeyFile file
|
||||
liftIO . emitPointer
|
||||
=<< go
|
||||
=<< (\ld -> ingest' currbackend ld Nothing norestage)
|
||||
=<< lockDown cfg file
|
||||
, liftIO $ B.hPut stdout b
|
||||
)
|
||||
Nothing -> go b =<< catKeyFile file
|
||||
stop
|
||||
where
|
||||
go (Just k, _) = do
|
||||
go b oldkey = ifM (shouldAnnex file oldkey)
|
||||
( do
|
||||
-- Before git 2.5, failing to consume all stdin here
|
||||
-- would cause a SIGPIPE and crash it.
|
||||
-- Newer git catches the signal and stops sending,
|
||||
-- which is much faster. (Also, git seems to forget
|
||||
-- to free memory when sending the file, so the
|
||||
-- less we let it send, the less memory it will waste.)
|
||||
if Git.BuildVersion.older "2.5"
|
||||
then B.length b `seq` return ()
|
||||
else liftIO $ hClose stdin
|
||||
-- Look up the backend that was used for this file
|
||||
-- before, so that when git re-cleans a file its
|
||||
-- backend does not change.
|
||||
let oldbackend = maybe Nothing (maybeLookupBackendVariety . keyVariety) oldkey
|
||||
-- Can't restage associated files because git add
|
||||
-- runs this and has the index locked.
|
||||
let norestage = Restage False
|
||||
liftIO . emitPointer
|
||||
=<< postingest
|
||||
=<< (\ld -> ingest' oldbackend ld Nothing norestage)
|
||||
=<< lockDown cfg file
|
||||
, liftIO $ B.hPut stdout b
|
||||
)
|
||||
|
||||
postingest (Just k, _) = do
|
||||
logStatus k InfoPresent
|
||||
return k
|
||||
go _ = error "could not add file to the annex"
|
||||
postingest _ = error "could not add file to the annex"
|
||||
|
||||
cfg = LockDownConfig
|
||||
{ lockingFile = False
|
||||
, hardlinkFileTmp = False
|
||||
}
|
||||
-- Can't restage associated files because git add runs this and has
|
||||
-- the index locked.
|
||||
norestage = Restage False
|
||||
|
||||
shouldAnnex :: FilePath -> Annex Bool
|
||||
shouldAnnex file = do
|
||||
-- New files are annexed as configured by annex.largefiles, with a default
|
||||
-- of annexing them.
|
||||
--
|
||||
-- If annex.largefiles is not configured for a file, and a file with its
|
||||
-- name is already in the index, preserve its annexed/not annexed state.
|
||||
-- This prevents accidental conversions when annex.largefiles is being
|
||||
-- set/unset on the fly rather than being set in gitattributes or .git/config.
|
||||
shouldAnnex :: FilePath -> Maybe Key -> Annex Bool
|
||||
shouldAnnex file moldkey = do
|
||||
matcher <- largeFilesMatcher
|
||||
checkFileMatcher matcher file
|
||||
checkFileMatcher' matcher file whenempty
|
||||
where
|
||||
whenempty = case moldkey of
|
||||
Just _ -> return True
|
||||
Nothing -> isNothing <$> catObjectMetaData (fileRef file)
|
||||
|
||||
emitPointer :: Key -> IO ()
|
||||
emitPointer = putStr . formatPointer
|
||||
|
|
|
@ -58,7 +58,7 @@ checkMap getmap mu notpresent mkey afile d = do
|
|||
m <- getmap
|
||||
case M.lookup u m of
|
||||
Nothing -> return d
|
||||
Just matcher -> checkMatcher matcher mkey afile notpresent d
|
||||
Just matcher -> checkMatcher matcher mkey afile notpresent (return d) (return d)
|
||||
|
||||
preferredContentMap :: Annex (FileMatcherMap Annex)
|
||||
preferredContentMap = maybe (fst <$> preferredRequiredMapsLoad) return
|
||||
|
|
|
@ -105,3 +105,6 @@ index 982793c..8fdffc0 100644
|
|||
yeap
|
||||
|
||||
[[!meta author=yoh]]
|
||||
|
||||
> [[done]]; clean filter defaults to preserving git/annex state of file.
|
||||
> --[[Joey]]
|
||||
|
|
|
@ -0,0 +1,31 @@
|
|||
[[!comment format=mdwn
|
||||
username="joey"
|
||||
subject="""comment 12"""
|
||||
date="2018-08-27T18:08:24Z"
|
||||
content="""
|
||||
Hmm, I think if annex.largefiles is configured, it should be honored. So,
|
||||
the ways to convert documented on
|
||||
<https://git-annex.branchable.com/tips/largefiles/> will work
|
||||
with only a minor modification.
|
||||
|
||||
If annex.largefiles is not configured, it can check if the file was annexed
|
||||
or not before, and maintain the status quo.
|
||||
|
||||
At least to start with I am only going to do it in the clean filter, so
|
||||
`git annex add` behavior won't change, it default to annexing. It may make
|
||||
sense to make `git annex add` consistent with `git add`, but that would
|
||||
also need to affect v5 repositories, probably, and I don't want to entangle
|
||||
a v6 bug fix with a v5 behavior change. Also, the risk of a
|
||||
`git annex add` with the wrong annex.largefiles temp setting
|
||||
is much smaller than the risk of forgetting to temp set annex.largefiles
|
||||
when running `git commit -a`. Also, I seem to remember another todo item
|
||||
discussing making this change to `git annex add` and would need to revisit
|
||||
the thinking in that.
|
||||
|
||||
Note that a .gitattributes file might only configure largefiles for eg,
|
||||
"*.c" and not for other files, thus implicitly accepting the default
|
||||
largefiles for the rest. Should then `Makefile` be treated as having
|
||||
largefiles configured or not? I lean toward treating it as not configured,
|
||||
because then when the user temporarily overrides largefiles to add `Makefile`
|
||||
to git, a modification won't accidentially go to the annex.
|
||||
"""]]
|
|
@ -45,3 +45,6 @@ operating system: linux x86_64
|
|||
|
||||
|
||||
[[ben]]
|
||||
|
||||
> [[done]]; clean filter defaults to preserving git/annex state of file.
|
||||
> --[[Joey]]
|
||||
|
|
|
@ -128,7 +128,7 @@ When you have a file that is currently stored in the annex, and you want to
|
|||
convert that to be stored in git, here's how to accomplish that:
|
||||
|
||||
git annex unlock file
|
||||
git add file
|
||||
git -c annex.largefiles=nothing add file
|
||||
git commit -n file
|
||||
|
||||
You can modify the file after unlocking it and before adding it to
|
||||
|
|
Loading…
Reference in a new issue