v6: avoid accidental conversion when annex.largefiles is not configured
v6: When annex.largefiles is not configured for a file, running git add or git commit, or otherwise using git to stage a file will add it to the annex if the file was in the annex before, and to git otherwise. This is to avoid accidental conversion. Note that git-annex add's behavior has not changed, for reasons explained in the added comment. Performance: No added overhead when annex.largefiles is configured. When not configured, there is an added call to catObjectMetaData, which involves a round trip through git cat-file --batch. However, the earlier catKeyFile primes the cache for it. This commit was supported by the NSF-funded DataLad project.
This commit is contained in:
parent
b3dfcd18fe
commit
10138056dc
9 changed files with 106 additions and 44 deletions
|
@ -13,6 +13,7 @@ module Annex.CatFile (
|
||||||
catCommit,
|
catCommit,
|
||||||
catObjectDetails,
|
catObjectDetails,
|
||||||
catFileHandle,
|
catFileHandle,
|
||||||
|
catObjectMetaData,
|
||||||
catFileStop,
|
catFileStop,
|
||||||
catKey,
|
catKey,
|
||||||
catKeyFile,
|
catKeyFile,
|
||||||
|
|
|
@ -10,6 +10,7 @@
|
||||||
module Annex.FileMatcher (
|
module Annex.FileMatcher (
|
||||||
GetFileMatcher,
|
GetFileMatcher,
|
||||||
checkFileMatcher,
|
checkFileMatcher,
|
||||||
|
checkFileMatcher',
|
||||||
checkMatcher,
|
checkMatcher,
|
||||||
matchAll,
|
matchAll,
|
||||||
preferredContentParser,
|
preferredContentParser,
|
||||||
|
@ -42,17 +43,25 @@ import qualified Data.Set as S
|
||||||
type GetFileMatcher = FilePath -> Annex (FileMatcher Annex)
|
type GetFileMatcher = FilePath -> Annex (FileMatcher Annex)
|
||||||
|
|
||||||
checkFileMatcher :: GetFileMatcher -> FilePath -> Annex Bool
|
checkFileMatcher :: GetFileMatcher -> FilePath -> Annex Bool
|
||||||
checkFileMatcher getmatcher file = do
|
checkFileMatcher getmatcher file = checkFileMatcher' getmatcher file (return True)
|
||||||
matcher <- getmatcher file
|
|
||||||
checkMatcher matcher Nothing (AssociatedFile (Just file)) S.empty True
|
|
||||||
|
|
||||||
checkMatcher :: FileMatcher Annex -> Maybe Key -> AssociatedFile -> AssumeNotPresent -> Bool -> Annex Bool
|
-- | Allows running an action when no matcher is configured for the file.
|
||||||
checkMatcher matcher mkey afile notpresent d
|
checkFileMatcher' :: GetFileMatcher -> FilePath -> Annex Bool -> Annex Bool
|
||||||
| isEmpty matcher = return d
|
checkFileMatcher' getmatcher file notconfigured = do
|
||||||
|
matcher <- getmatcher file
|
||||||
|
checkMatcher matcher Nothing afile S.empty notconfigured d
|
||||||
|
where
|
||||||
|
afile = AssociatedFile (Just file)
|
||||||
|
-- checkMatcher will never use this, because afile is provided.
|
||||||
|
d = return True
|
||||||
|
|
||||||
|
checkMatcher :: FileMatcher Annex -> Maybe Key -> AssociatedFile -> AssumeNotPresent -> Annex Bool -> Annex Bool -> Annex Bool
|
||||||
|
checkMatcher matcher mkey afile notpresent notconfigured d
|
||||||
|
| isEmpty matcher = notconfigured
|
||||||
| otherwise = case (mkey, afile) of
|
| otherwise = case (mkey, afile) of
|
||||||
(_, AssociatedFile (Just file)) -> go =<< fileMatchInfo file
|
(_, AssociatedFile (Just file)) -> go =<< fileMatchInfo file
|
||||||
(Just key, _) -> go (MatchingKey key)
|
(Just key, _) -> go (MatchingKey key)
|
||||||
_ -> return d
|
_ -> d
|
||||||
where
|
where
|
||||||
go mi = matchMrun matcher $ \a -> a notpresent mi
|
go mi = matchMrun matcher $ \a -> a notpresent mi
|
||||||
|
|
||||||
|
|
|
@ -11,6 +11,11 @@ git-annex (6.20180808) UNRELEASED; urgency=medium
|
||||||
pipe. This also avoids git buffering the whole file content in memory.
|
pipe. This also avoids git buffering the whole file content in memory.
|
||||||
* v6: After updating the worktree for an add/drop, update git's index,
|
* v6: After updating the worktree for an add/drop, update git's index,
|
||||||
so git status will not show the files as modified.
|
so git status will not show the files as modified.
|
||||||
|
* v6: When annex.largefiles is not configured for a file, running git
|
||||||
|
add or git commit, or otherwise using git to stage a file
|
||||||
|
will add it to the annex if the file was in the annex before,
|
||||||
|
and to git otherwise. This is to avoid accidental conversion.
|
||||||
|
Note that git-annex add's behavior has not changed.
|
||||||
* v6: Update associated files database when git has staged changes
|
* v6: Update associated files database when git has staged changes
|
||||||
to pointer files.
|
to pointer files.
|
||||||
* v6: Fix some race conditions.
|
* v6: Fix some race conditions.
|
||||||
|
|
|
@ -1,6 +1,6 @@
|
||||||
{- git-annex command
|
{- git-annex command
|
||||||
-
|
-
|
||||||
- Copyright 2015 Joey Hess <id@joeyh.name>
|
- Copyright 2015-2018 Joey Hess <id@joeyh.name>
|
||||||
-
|
-
|
||||||
- Licensed under the GNU GPL version 3 or higher.
|
- Licensed under the GNU GPL version 3 or higher.
|
||||||
-}
|
-}
|
||||||
|
@ -18,6 +18,7 @@ import Logs.Location
|
||||||
import qualified Database.Keys
|
import qualified Database.Keys
|
||||||
import qualified Git.BuildVersion
|
import qualified Git.BuildVersion
|
||||||
import Git.FilePath
|
import Git.FilePath
|
||||||
|
import Git.Ref
|
||||||
import Backend
|
import Backend
|
||||||
|
|
||||||
import qualified Data.ByteString.Lazy as B
|
import qualified Data.ByteString.Lazy as B
|
||||||
|
@ -77,50 +78,59 @@ clean file = do
|
||||||
Just k -> do
|
Just k -> do
|
||||||
getMoveRaceRecovery k file
|
getMoveRaceRecovery k file
|
||||||
liftIO $ B.hPut stdout b
|
liftIO $ B.hPut stdout b
|
||||||
Nothing -> ifM (shouldAnnex file)
|
Nothing -> go b =<< catKeyFile file
|
||||||
( do
|
|
||||||
-- Before git 2.5, failing to consume all
|
|
||||||
-- stdin here would cause a SIGPIPE and
|
|
||||||
-- crash it.
|
|
||||||
-- Newer git catches the signal and
|
|
||||||
-- stops sending, which is much faster.
|
|
||||||
-- (Also, git seems to forget to free memory
|
|
||||||
-- when sending the file, so the less we
|
|
||||||
-- let it send, the less memory it will
|
|
||||||
-- waste.)
|
|
||||||
if Git.BuildVersion.older "2.5"
|
|
||||||
then B.length b `seq` return ()
|
|
||||||
else liftIO $ hClose stdin
|
|
||||||
-- Look up the backend that was used
|
|
||||||
-- for this file before, so that when
|
|
||||||
-- git re-cleans a file its backend does
|
|
||||||
-- not change.
|
|
||||||
currbackend <- maybe Nothing (maybeLookupBackendVariety . keyVariety)
|
|
||||||
<$> catKeyFile file
|
|
||||||
liftIO . emitPointer
|
|
||||||
=<< go
|
|
||||||
=<< (\ld -> ingest' currbackend ld Nothing norestage)
|
|
||||||
=<< lockDown cfg file
|
|
||||||
, liftIO $ B.hPut stdout b
|
|
||||||
)
|
|
||||||
stop
|
stop
|
||||||
where
|
where
|
||||||
go (Just k, _) = do
|
go b oldkey = ifM (shouldAnnex file oldkey)
|
||||||
|
( do
|
||||||
|
-- Before git 2.5, failing to consume all stdin here
|
||||||
|
-- would cause a SIGPIPE and crash it.
|
||||||
|
-- Newer git catches the signal and stops sending,
|
||||||
|
-- which is much faster. (Also, git seems to forget
|
||||||
|
-- to free memory when sending the file, so the
|
||||||
|
-- less we let it send, the less memory it will waste.)
|
||||||
|
if Git.BuildVersion.older "2.5"
|
||||||
|
then B.length b `seq` return ()
|
||||||
|
else liftIO $ hClose stdin
|
||||||
|
-- Look up the backend that was used for this file
|
||||||
|
-- before, so that when git re-cleans a file its
|
||||||
|
-- backend does not change.
|
||||||
|
let oldbackend = maybe Nothing (maybeLookupBackendVariety . keyVariety) oldkey
|
||||||
|
-- Can't restage associated files because git add
|
||||||
|
-- runs this and has the index locked.
|
||||||
|
let norestage = Restage False
|
||||||
|
liftIO . emitPointer
|
||||||
|
=<< postingest
|
||||||
|
=<< (\ld -> ingest' oldbackend ld Nothing norestage)
|
||||||
|
=<< lockDown cfg file
|
||||||
|
, liftIO $ B.hPut stdout b
|
||||||
|
)
|
||||||
|
|
||||||
|
postingest (Just k, _) = do
|
||||||
logStatus k InfoPresent
|
logStatus k InfoPresent
|
||||||
return k
|
return k
|
||||||
go _ = error "could not add file to the annex"
|
postingest _ = error "could not add file to the annex"
|
||||||
|
|
||||||
cfg = LockDownConfig
|
cfg = LockDownConfig
|
||||||
{ lockingFile = False
|
{ lockingFile = False
|
||||||
, hardlinkFileTmp = False
|
, hardlinkFileTmp = False
|
||||||
}
|
}
|
||||||
-- Can't restage associated files because git add runs this and has
|
|
||||||
-- the index locked.
|
|
||||||
norestage = Restage False
|
|
||||||
|
|
||||||
shouldAnnex :: FilePath -> Annex Bool
|
-- New files are annexed as configured by annex.largefiles, with a default
|
||||||
shouldAnnex file = do
|
-- of annexing them.
|
||||||
|
--
|
||||||
|
-- If annex.largefiles is not configured for a file, and a file with its
|
||||||
|
-- name is already in the index, preserve its annexed/not annexed state.
|
||||||
|
-- This prevents accidental conversions when annex.largefiles is being
|
||||||
|
-- set/unset on the fly rather than being set in gitattributes or .git/config.
|
||||||
|
shouldAnnex :: FilePath -> Maybe Key -> Annex Bool
|
||||||
|
shouldAnnex file moldkey = do
|
||||||
matcher <- largeFilesMatcher
|
matcher <- largeFilesMatcher
|
||||||
checkFileMatcher matcher file
|
checkFileMatcher' matcher file whenempty
|
||||||
|
where
|
||||||
|
whenempty = case moldkey of
|
||||||
|
Just _ -> return True
|
||||||
|
Nothing -> isNothing <$> catObjectMetaData (fileRef file)
|
||||||
|
|
||||||
emitPointer :: Key -> IO ()
|
emitPointer :: Key -> IO ()
|
||||||
emitPointer = putStr . formatPointer
|
emitPointer = putStr . formatPointer
|
||||||
|
|
|
@ -58,7 +58,7 @@ checkMap getmap mu notpresent mkey afile d = do
|
||||||
m <- getmap
|
m <- getmap
|
||||||
case M.lookup u m of
|
case M.lookup u m of
|
||||||
Nothing -> return d
|
Nothing -> return d
|
||||||
Just matcher -> checkMatcher matcher mkey afile notpresent d
|
Just matcher -> checkMatcher matcher mkey afile notpresent (return d) (return d)
|
||||||
|
|
||||||
preferredContentMap :: Annex (FileMatcherMap Annex)
|
preferredContentMap :: Annex (FileMatcherMap Annex)
|
||||||
preferredContentMap = maybe (fst <$> preferredRequiredMapsLoad) return
|
preferredContentMap = maybe (fst <$> preferredRequiredMapsLoad) return
|
||||||
|
|
|
@ -105,3 +105,6 @@ index 982793c..8fdffc0 100644
|
||||||
yeap
|
yeap
|
||||||
|
|
||||||
[[!meta author=yoh]]
|
[[!meta author=yoh]]
|
||||||
|
|
||||||
|
> [[done]]; clean filter defaults to preserving git/annex state of file.
|
||||||
|
> --[[Joey]]
|
||||||
|
|
|
@ -0,0 +1,31 @@
|
||||||
|
[[!comment format=mdwn
|
||||||
|
username="joey"
|
||||||
|
subject="""comment 12"""
|
||||||
|
date="2018-08-27T18:08:24Z"
|
||||||
|
content="""
|
||||||
|
Hmm, I think if annex.largefiles is configured, it should be honored. So,
|
||||||
|
the ways to convert documented on
|
||||||
|
<https://git-annex.branchable.com/tips/largefiles/> will work
|
||||||
|
with only a minor modification.
|
||||||
|
|
||||||
|
If annex.largefiles is not configured, it can check if the file was annexed
|
||||||
|
or not before, and maintain the status quo.
|
||||||
|
|
||||||
|
At least to start with I am only going to do it in the clean filter, so
|
||||||
|
`git annex add` behavior won't change, it default to annexing. It may make
|
||||||
|
sense to make `git annex add` consistent with `git add`, but that would
|
||||||
|
also need to affect v5 repositories, probably, and I don't want to entangle
|
||||||
|
a v6 bug fix with a v5 behavior change. Also, the risk of a
|
||||||
|
`git annex add` with the wrong annex.largefiles temp setting
|
||||||
|
is much smaller than the risk of forgetting to temp set annex.largefiles
|
||||||
|
when running `git commit -a`. Also, I seem to remember another todo item
|
||||||
|
discussing making this change to `git annex add` and would need to revisit
|
||||||
|
the thinking in that.
|
||||||
|
|
||||||
|
Note that a .gitattributes file might only configure largefiles for eg,
|
||||||
|
"*.c" and not for other files, thus implicitly accepting the default
|
||||||
|
largefiles for the rest. Should then `Makefile` be treated as having
|
||||||
|
largefiles configured or not? I lean toward treating it as not configured,
|
||||||
|
because then when the user temporarily overrides largefiles to add `Makefile`
|
||||||
|
to git, a modification won't accidentially go to the annex.
|
||||||
|
"""]]
|
|
@ -45,3 +45,6 @@ operating system: linux x86_64
|
||||||
|
|
||||||
|
|
||||||
[[ben]]
|
[[ben]]
|
||||||
|
|
||||||
|
> [[done]]; clean filter defaults to preserving git/annex state of file.
|
||||||
|
> --[[Joey]]
|
||||||
|
|
|
@ -128,7 +128,7 @@ When you have a file that is currently stored in the annex, and you want to
|
||||||
convert that to be stored in git, here's how to accomplish that:
|
convert that to be stored in git, here's how to accomplish that:
|
||||||
|
|
||||||
git annex unlock file
|
git annex unlock file
|
||||||
git add file
|
git -c annex.largefiles=nothing add file
|
||||||
git commit -n file
|
git commit -n file
|
||||||
|
|
||||||
You can modify the file after unlocking it and before adding it to
|
You can modify the file after unlocking it and before adding it to
|
||||||
|
|
Loading…
Reference in a new issue