Merge branch 'delaysmudge' into v7
This commit is contained in:
commit
6ceeb4bae7
14 changed files with 236 additions and 60 deletions
|
@ -46,6 +46,7 @@ module Annex.Content (
|
|||
staleKeysPrune,
|
||||
pruneTmpWorkDirBefore,
|
||||
isUnmodified,
|
||||
isUnmodifiedCheap,
|
||||
verifyKeyContent,
|
||||
VerifyConfig(..),
|
||||
Verification(..),
|
||||
|
@ -746,25 +747,38 @@ isUnmodified :: Key -> FilePath -> Annex Bool
|
|||
isUnmodified key f = go =<< geti
|
||||
where
|
||||
go Nothing = return False
|
||||
go (Just fc) = cheapcheck fc <||> expensivecheck fc
|
||||
cheapcheck fc = anyM (compareInodeCaches fc)
|
||||
=<< Database.Keys.getInodeCaches key
|
||||
go (Just fc) = isUnmodifiedCheap' key fc <||> expensivecheck fc
|
||||
expensivecheck fc = ifM (verifyKeyContent RetrievalAllKeysSecure AlwaysVerify UnVerified key f)
|
||||
( do
|
||||
liftIO $ print "content verified"
|
||||
-- The file could have been modified while it was
|
||||
-- being verified. Detect that.
|
||||
ifM (geti >>= maybe (return False) (compareInodeCaches fc))
|
||||
( do
|
||||
-- Update the InodeCache to avoid
|
||||
-- performing this expensive check again.
|
||||
liftIO $ print "update inode cache"
|
||||
Database.Keys.addInodeCaches key [fc]
|
||||
return True
|
||||
, return False
|
||||
)
|
||||
, return False
|
||||
, do
|
||||
liftIO $ print "content not verified"
|
||||
return False
|
||||
)
|
||||
geti = withTSDelta (liftIO . genInodeCache f)
|
||||
|
||||
{- Cheap check if a file contains the unmodified content of the key,
|
||||
- only checking the InodeCache of the key.
|
||||
-}
|
||||
isUnmodifiedCheap :: Key -> FilePath -> Annex Bool
|
||||
isUnmodifiedCheap key f = maybe (return False) (isUnmodifiedCheap' key)
|
||||
=<< withTSDelta (liftIO . genInodeCache f)
|
||||
|
||||
isUnmodifiedCheap' :: Key -> InodeCache -> Annex Bool
|
||||
isUnmodifiedCheap' key fc =
|
||||
anyM (compareInodeCaches fc) =<< Database.Keys.getInodeCaches key
|
||||
|
||||
{- Moves a key out of .git/annex/objects/ into .git/annex/bad, and
|
||||
- returns the file it was moved to. -}
|
||||
moveBad :: Key -> Annex FilePath
|
||||
|
|
|
@ -32,6 +32,17 @@ postReceiveHook = Git.Hook "post-receive"
|
|||
[ mkHookScript "git annex post-receive"
|
||||
]
|
||||
|
||||
postCheckoutHook :: Git.Hook
|
||||
postCheckoutHook = Git.Hook "post-checkout" smudgeHook []
|
||||
|
||||
postMergeHook :: Git.Hook
|
||||
postMergeHook = Git.Hook "post-merge" smudgeHook []
|
||||
|
||||
-- Only run git-annex smudge --update when git-annex supports it.
|
||||
-- Older versions of git-annex didn't need this hook.
|
||||
smudgeHook :: String
|
||||
smudgeHook = mkHookScript "if git annex smudge --update >/dev/null 2>&1; then git-annex smudge --update; fi"
|
||||
|
||||
preCommitAnnexHook :: Git.Hook
|
||||
preCommitAnnexHook = Git.Hook "pre-commit-annex" "" []
|
||||
|
||||
|
|
|
@ -38,7 +38,6 @@ import Logs.Location
|
|||
import qualified Annex
|
||||
import qualified Annex.Queue
|
||||
import qualified Database.Keys
|
||||
import qualified Git.Branch
|
||||
import Config
|
||||
import Utility.InodeCache
|
||||
import Annex.ReplaceFile
|
||||
|
|
|
@ -112,6 +112,9 @@ initialize' ai mversion = checkCanInitialize ai $ do
|
|||
whenM versionSupportsUnlockedPointers $ do
|
||||
configureSmudgeFilter
|
||||
scanUnlockedFiles
|
||||
unlessM isBareRepo $ do
|
||||
hookWrite postCheckoutHook
|
||||
hookWrite postMergeHook
|
||||
checkAdjustedClone >>= \case
|
||||
NeedUpgradeForAdjustedClone ->
|
||||
void $ upgrade True versionForAdjustedClone
|
||||
|
|
|
@ -38,6 +38,8 @@ module Annex.Locations (
|
|||
gitAnnexFsckDbDir,
|
||||
gitAnnexFsckDbLock,
|
||||
gitAnnexFsckResultsLog,
|
||||
gitAnnexSmudgeLog,
|
||||
gitAnnexSmudgeLock,
|
||||
gitAnnexExportDbDir,
|
||||
gitAnnexExportLock,
|
||||
gitAnnexScheduleState,
|
||||
|
@ -312,6 +314,14 @@ gitAnnexFsckDbLock u r = gitAnnexFsckDir u r </> "fsck.lck"
|
|||
gitAnnexFsckResultsLog :: UUID -> Git.Repo -> FilePath
|
||||
gitAnnexFsckResultsLog u r = gitAnnexDir r </> "fsckresults" </> fromUUID u
|
||||
|
||||
{- .git/annex/smudge.log is used to log smudges worktree files that need to
|
||||
- be updated. -}
|
||||
gitAnnexSmudgeLog :: Git.Repo -> FilePath
|
||||
gitAnnexSmudgeLog r = gitAnnexDir r </> "smudge.log"
|
||||
|
||||
gitAnnexSmudgeLock :: Git.Repo -> FilePath
|
||||
gitAnnexSmudgeLock r = gitAnnexDir r </> "smudge.lck"
|
||||
|
||||
{- .git/annex/export/uuid/ is used to store information about
|
||||
- exports to special remotes. -}
|
||||
gitAnnexExportDir :: UUID -> Git.Repo -> FilePath
|
||||
|
|
13
CHANGELOG
13
CHANGELOG
|
@ -21,6 +21,19 @@ git-annex (6.20181012) UNRELEASED; urgency=medium
|
|||
get confused about whether a locked file's content was present.
|
||||
* Fix concurrency bug that occurred on the first download from an
|
||||
exporttree remote.
|
||||
* smudge: The smudge filter no longer provides git with annexed
|
||||
file content, to avoid a git memory leak, and because that did not
|
||||
honor annex.thin. Now git annex smudge --update has to be run
|
||||
after a checkout to update unlocked files in the working tree
|
||||
with annexed file contents.
|
||||
* init, upgrade: Install git post-checkout and post-merge hooks that run
|
||||
git annex smudge --update.
|
||||
* precommit: Run git annex smudge --update, because the post-merge
|
||||
hook is not run when there is a merge conflict. So the work tree will
|
||||
be updated when a commit is made to resolve the merge conflict.
|
||||
* Note that git has no hooks run after git stash or git cherry-pick,
|
||||
so the user will have to manually run git annex smudge --update
|
||||
after such commands.
|
||||
|
||||
-- Joey Hess <id@joeyh.name> Sat, 13 Oct 2018 00:52:02 -0400
|
||||
|
||||
|
|
|
@ -13,6 +13,7 @@ import Command
|
|||
import Config
|
||||
import qualified Command.Add
|
||||
import qualified Command.Fix
|
||||
import qualified Command.Smudge
|
||||
import Annex.Direct
|
||||
import Annex.Hook
|
||||
import Annex.Link
|
||||
|
@ -54,11 +55,21 @@ seek ps = lockPreCommitHook $ ifM isDirect
|
|||
flip withFilesToBeCommitted l $ \f -> commandAction $
|
||||
maybe stop (Command.Fix.start Command.Fix.FixSymlinks f)
|
||||
=<< isAnnexLink f
|
||||
-- inject unlocked files into the annex
|
||||
-- (not needed when repo version uses
|
||||
-- unlocked pointer files)
|
||||
unlessM versionSupportsUnlockedPointers $
|
||||
withFilesOldUnlockedToBeCommitted (commandAction . startInjectUnlocked) l
|
||||
ifM versionSupportsUnlockedPointers
|
||||
-- after a merge conflict or git
|
||||
-- cherry-pick or stash, pointer
|
||||
-- files in the worktree won't
|
||||
-- be populated, so populate them
|
||||
-- here
|
||||
( Command.Smudge.updateSmudged
|
||||
-- When there's a false index,
|
||||
-- restaging the files won't work.
|
||||
. Restage =<< liftIO Git.haveFalseIndex
|
||||
-- inject unlocked files into the annex
|
||||
-- (not needed when repo version uses
|
||||
-- unlocked pointer files)
|
||||
, withFilesOldUnlockedToBeCommitted (commandAction . startInjectUnlocked) l
|
||||
)
|
||||
)
|
||||
runAnnexHook preCommitAnnexHook
|
||||
-- committing changes to a view updates metadata
|
||||
|
|
|
@ -8,12 +8,12 @@
|
|||
module Command.Smudge where
|
||||
|
||||
import Command
|
||||
import qualified Annex
|
||||
import Annex.Content
|
||||
import Annex.Link
|
||||
import Annex.FileMatcher
|
||||
import Annex.Ingest
|
||||
import Annex.CatFile
|
||||
import Logs.Smudge
|
||||
import Logs.Location
|
||||
import qualified Database.Keys
|
||||
import qualified Git.BuildVersion
|
||||
|
@ -29,43 +29,46 @@ cmd = noCommit $ noMessages $
|
|||
"git smudge filter"
|
||||
paramFile (seek <$$> optParser)
|
||||
|
||||
data SmudgeOptions = SmudgeOptions
|
||||
data SmudgeOptions = UpdateOption | SmudgeOptions
|
||||
{ smudgeFile :: FilePath
|
||||
, cleanOption :: Bool
|
||||
}
|
||||
|
||||
optParser :: CmdParamsDesc -> Parser SmudgeOptions
|
||||
optParser desc = SmudgeOptions
|
||||
<$> argument str ( metavar desc )
|
||||
<*> switch ( long "clean" <> help "clean filter" )
|
||||
optParser desc = smudgeoptions <|> updateoption
|
||||
where
|
||||
smudgeoptions = SmudgeOptions
|
||||
<$> argument str ( metavar desc )
|
||||
<*> switch ( long "clean" <> help "clean filter" )
|
||||
updateoption = flag' UpdateOption
|
||||
( long "update" <> help "populate annexed worktree files" )
|
||||
|
||||
seek :: SmudgeOptions -> CommandSeek
|
||||
seek o = commandAction $
|
||||
(if cleanOption o then clean else smudge) (smudgeFile o)
|
||||
seek (SmudgeOptions f False) = commandAction (smudge f)
|
||||
seek (SmudgeOptions f True) = commandAction (clean f)
|
||||
seek UpdateOption = commandAction update
|
||||
|
||||
-- Smudge filter is fed git file content, and if it's a pointer to an
|
||||
-- available annex object, should output its content.
|
||||
-- available annex object, git expects it to output its content.
|
||||
--
|
||||
-- However, this does not do that. It outputs the pointer, and records
|
||||
-- the filename in the smudge log. Git hooks run after commands like checkout
|
||||
-- then run git annex smudge --update which populates the work tree files
|
||||
-- with annex content. This is done for several reasons:
|
||||
--
|
||||
-- * To support annex.thin
|
||||
-- * Because git currently buffers the whole object received from the
|
||||
-- smudge filter in memory, which is a problem with large files.
|
||||
smudge :: FilePath -> CommandStart
|
||||
smudge file = do
|
||||
b <- liftIO $ B.hGetContents stdin
|
||||
case parseLinkOrPointer b of
|
||||
Nothing -> liftIO $ B.putStr b
|
||||
Nothing -> noop
|
||||
Just k -> do
|
||||
Database.Keys.addAssociatedFile k =<< inRepo (toTopFilePath file)
|
||||
-- A previous unlocked checkout of the file may have
|
||||
-- led to the annex object getting modified;
|
||||
-- don't provide such modified content as it
|
||||
-- will be confusing. inAnnex will detect such
|
||||
-- modifications.
|
||||
ifM (inAnnex k)
|
||||
( do
|
||||
content <- calcRepo (gitAnnexLocation k)
|
||||
whenM (annexThin <$> Annex.getGitConfig) $
|
||||
warning $ "Not able to honor annex.thin when git is checking out " ++ file ++ " (run git annex fix to re-thin files)"
|
||||
liftIO $ B.putStr . fromMaybe b
|
||||
=<< catchMaybeIO (B.readFile content)
|
||||
, liftIO $ B.putStr b
|
||||
)
|
||||
topfile <- inRepo (toTopFilePath file)
|
||||
Database.Keys.addAssociatedFile k topfile
|
||||
void $ smudgeLog k topfile
|
||||
liftIO $ B.putStr b
|
||||
stop
|
||||
|
||||
-- Clean filter is fed file content on stdin, decides if a file
|
||||
|
@ -92,19 +95,30 @@ clean file = do
|
|||
if Git.BuildVersion.older "2.5"
|
||||
then B.length b `seq` return ()
|
||||
else liftIO $ hClose stdin
|
||||
-- Look up the backend that was used for this file
|
||||
-- before, so that when git re-cleans a file its
|
||||
-- backend does not change.
|
||||
let oldbackend = maybe Nothing (maybeLookupBackendVariety . keyVariety) oldkey
|
||||
-- Can't restage associated files because git add
|
||||
-- runs this and has the index locked.
|
||||
let norestage = Restage False
|
||||
liftIO . emitPointer
|
||||
=<< postingest
|
||||
=<< (\ld -> ingest' oldbackend ld Nothing norestage)
|
||||
=<< lockDown cfg file
|
||||
|
||||
-- Optimization when the file is already annexed
|
||||
-- and is unmodified.
|
||||
case oldkey of
|
||||
Nothing -> ingest oldkey
|
||||
Just ko -> ifM (isUnmodifiedCheap ko file)
|
||||
( liftIO $ emitPointer ko
|
||||
, ingest oldkey
|
||||
)
|
||||
, liftIO $ B.hPut stdout b
|
||||
)
|
||||
|
||||
ingest oldkey = do
|
||||
-- Look up the backend that was used for this file
|
||||
-- before, so that when git re-cleans a file its
|
||||
-- backend does not change.
|
||||
let oldbackend = maybe Nothing (maybeLookupBackendVariety . keyVariety) oldkey
|
||||
-- Can't restage associated files because git add
|
||||
-- runs this and has the index locked.
|
||||
let norestage = Restage False
|
||||
liftIO . emitPointer
|
||||
=<< postingest
|
||||
=<< (\ld -> ingest' oldbackend ld Nothing norestage)
|
||||
=<< lockDown cfg file
|
||||
|
||||
postingest (Just k, _) = do
|
||||
logStatus k InfoPresent
|
||||
|
@ -151,3 +165,19 @@ getMoveRaceRecovery k file = void $ tryNonAsync $
|
|||
populatePointerFile (Restage False) k obj file >>= \case
|
||||
Nothing -> return ()
|
||||
Just ic -> Database.Keys.addInodeCaches k [ic]
|
||||
|
||||
update :: CommandStart
|
||||
update = do
|
||||
updateSmudged (Restage True)
|
||||
stop
|
||||
|
||||
updateSmudged :: Restage -> Annex ()
|
||||
updateSmudged restage = streamSmudged $ \k topf -> do
|
||||
f <- fromRepo $ fromTopFilePath topf
|
||||
whenM (inAnnex k) $ do
|
||||
obj <- calcRepo (gitAnnexLocation k)
|
||||
unlessM (isJust <$> populatePointerFile restage k obj f) $
|
||||
liftIO (isPointerFile f) >>= \case
|
||||
Just k' | k' == k -> toplevelWarning False $
|
||||
"unable to populate worktree file " ++ f
|
||||
_ -> noop
|
||||
|
|
49
Logs/File.hs
49
Logs/File.hs
|
@ -2,26 +2,59 @@
|
|||
-
|
||||
- Copyright 2018 Joey Hess <id@joeyh.name>
|
||||
-
|
||||
- Licensed under the GNU GPL version 3 or higher.
|
||||
- Licensed under the GNU AGPL version 3 or higher.
|
||||
-}
|
||||
|
||||
module Logs.File where
|
||||
module Logs.File (writeLogFile, appendLogFile, streamLogFile) where
|
||||
|
||||
import Annex.Common
|
||||
import Annex.Perms
|
||||
import Annex.LockFile
|
||||
import qualified Git
|
||||
import Utility.Tmp
|
||||
|
||||
-- | Writes content to a file, replacing the file atomically, and
|
||||
-- making the new file have whatever permissions the git repository is
|
||||
-- configured to use. Creates the parent directory when necessary.
|
||||
writeLogFile :: FilePath -> String -> Annex ()
|
||||
writeLogFile f c = go `catchNonAsync` \_e -> do
|
||||
-- Most of the time, the directory will exist, so this is only
|
||||
-- done if writing the file fails.
|
||||
createAnnexDirectory (parentDir f)
|
||||
go
|
||||
writeLogFile f c = createDirWhenNeeded f $ viaTmp writelog f c
|
||||
where
|
||||
go = viaTmp writelog f c
|
||||
writelog f' c' = do
|
||||
liftIO $ writeFile f' c'
|
||||
setAnnexFilePerm f'
|
||||
|
||||
-- | Appends a line to a log file, first locking it to prevent
|
||||
-- concurrent writers.
|
||||
appendLogFile :: FilePath -> (Git.Repo -> FilePath) -> String -> Annex ()
|
||||
appendLogFile f lck c = createDirWhenNeeded f $ withExclusiveLock lck $ do
|
||||
liftIO $ withFile f AppendMode $ \h -> hPutStrLn h c
|
||||
setAnnexFilePerm f
|
||||
|
||||
-- | Streams lines from a log file, and then empties the file at the end.
|
||||
--
|
||||
-- If the action is interrupted or throws an exception, the log file is
|
||||
-- left unchanged.
|
||||
--
|
||||
-- Does nothing if the log file does not exist.
|
||||
--
|
||||
-- Locking is used to prevent writes to to the log file while this
|
||||
-- is running.
|
||||
streamLogFile :: FilePath -> (Git.Repo -> FilePath) -> (String -> Annex ()) -> Annex ()
|
||||
streamLogFile f lck a = withExclusiveLock lck $ bracketOnError setup cleanup go
|
||||
where
|
||||
setup = liftIO $ tryWhenExists $ openFile f ReadMode
|
||||
cleanup Nothing = noop
|
||||
cleanup (Just h) = liftIO $ hClose h
|
||||
go Nothing = noop
|
||||
go (Just h) = do
|
||||
mapM_ a =<< liftIO (lines <$> hGetContents h)
|
||||
liftIO $ hClose h
|
||||
liftIO $ writeFile f ""
|
||||
setAnnexFilePerm f
|
||||
|
||||
createDirWhenNeeded :: FilePath -> Annex () -> Annex ()
|
||||
createDirWhenNeeded f a = a `catchNonAsync` \_e -> do
|
||||
-- Most of the time, the directory will exist, so this is only
|
||||
-- done if writing the file fails.
|
||||
createAnnexDirectory (parentDir f)
|
||||
a
|
||||
|
|
40
Logs/Smudge.hs
Normal file
40
Logs/Smudge.hs
Normal file
|
@ -0,0 +1,40 @@
|
|||
{- git-annex smudge log file
|
||||
-
|
||||
- Copyright 2018 Joey Hess <id@joeyh.name>
|
||||
-
|
||||
- Licensed under the GNU AGPL version 3 or higher.
|
||||
-}
|
||||
|
||||
module Logs.Smudge where
|
||||
|
||||
import Annex.Common
|
||||
import Git.FilePath
|
||||
import Logs.File
|
||||
|
||||
-- | Log a smudged file.
|
||||
smudgeLog :: Key -> TopFilePath -> Annex ()
|
||||
smudgeLog k f = do
|
||||
logf <- fromRepo gitAnnexSmudgeLog
|
||||
appendLogFile logf gitAnnexSmudgeLock $
|
||||
key2file k ++ " " ++ getTopFilePath f
|
||||
|
||||
-- | Streams all smudged files, and then empties the log at the end.
|
||||
--
|
||||
-- If the action is interrupted or throws an exception, the log file is
|
||||
-- left unchanged.
|
||||
--
|
||||
-- Locking is used to prevent new items being added to the log while this
|
||||
-- is running.
|
||||
streamSmudged :: (Key -> TopFilePath -> Annex ()) -> Annex ()
|
||||
streamSmudged a = do
|
||||
logf <- fromRepo gitAnnexSmudgeLog
|
||||
streamLogFile logf gitAnnexSmudgeLock $ \l ->
|
||||
case parse l of
|
||||
Nothing -> noop
|
||||
Just (k, f) -> a k f
|
||||
where
|
||||
parse l =
|
||||
let (ks, f) = separate (== ' ') l
|
||||
in do
|
||||
k <- file2key ks
|
||||
return (k, asTopFilePath f)
|
|
@ -17,6 +17,7 @@ import Annex.Direct
|
|||
import Annex.Content
|
||||
import Annex.CatFile
|
||||
import Annex.WorkTree
|
||||
import Annex.Hook
|
||||
import qualified Database.Keys
|
||||
import qualified Annex.Content.Direct as Direct
|
||||
import qualified Git
|
||||
|
@ -63,6 +64,9 @@ upgrade automatic = do
|
|||
- adjust branch. Instead, update HEAD manually. -}
|
||||
inRepo $ setHeadRef b
|
||||
configureSmudgeFilter
|
||||
unlessM isBareRepo $ do
|
||||
hookWrite postCheckoutHook
|
||||
hookWrite postMergeHook
|
||||
-- Inode sentinal file was only used in direct mode and when
|
||||
-- locking down files as they were added. In v6, it's used more
|
||||
-- extensively, so make sure it exists, since old repos that didn't
|
||||
|
|
|
@ -6,11 +6,13 @@ git-annex smudge - git filter driver for git-annex
|
|||
|
||||
git annex smudge [--clean] file
|
||||
|
||||
git annex smudge --update
|
||||
|
||||
# DESCRIPTION
|
||||
|
||||
This command lets git-annex be used as a git filter driver which lets
|
||||
annexed files in the git repository to be unlocked at all times, instead
|
||||
of being symlinks.
|
||||
annexed files in the git repository to be unlocked, instead
|
||||
of being symlinks, and lets `git add` store files in the annex.
|
||||
|
||||
When adding a file with `git add`, the annex.largefiles config is
|
||||
consulted to decide if a given file should be added to git as-is,
|
||||
|
@ -32,6 +34,16 @@ contents:
|
|||
* filter=annex
|
||||
.* !filter
|
||||
|
||||
The smudge filter does not provide git with the content of annexed files,
|
||||
because that would be slow and triggers memory leaks in git. Instead,
|
||||
it records which worktree files need to be updated, and
|
||||
`git annex smudge --update` later updates the work tree to contain
|
||||
the content. That is run by several git hooks, including post-checkout
|
||||
and post-merge. However, a few git commands, notably `git stash` and
|
||||
`git cherry-pick`, do not run any hooks, so after using those commands
|
||||
you can manually run `git annex smudge --update` to update the working
|
||||
tree.
|
||||
|
||||
# SEE ALSO
|
||||
|
||||
[[git-annex]](1)
|
||||
|
|
|
@ -1032,11 +1032,6 @@ Here are all the supported configuration settings.
|
|||
After setting (or unsetting) this, you should run `git annex fix` to
|
||||
fix up the annexed files in the work tree to be hard links (or copies).
|
||||
|
||||
Note that `annex.thin` is not honored when git updates an annexed file
|
||||
in the working tree. So when `git checkout` or `git merge` updates the
|
||||
working tree, a second copy of annexed files will result. You can run
|
||||
`git-annex fix` to fix up the hard links after running such git commands.
|
||||
|
||||
* `annex.delayadd`
|
||||
|
||||
Makes the watch and assistant commands delay for the specified number of
|
||||
|
|
|
@ -880,6 +880,7 @@ Executable git-annex
|
|||
Logs.Schedule
|
||||
Logs.SingleValue
|
||||
Logs.SingleValue.Pure
|
||||
Logs.Smudge
|
||||
Logs.TimeStamp
|
||||
Logs.Transfer
|
||||
Logs.Transitions
|
||||
|
|
Loading…
Reference in a new issue